├── LICENSE
├── README.md
├── config
├── catalog
│ ├── hive.properties
│ ├── jmx.properties
│ ├── tpcds.properties
│ └── tpch.properties
└── config.properties
├── packer
├── .atlanrc
├── Makefile
├── base_configs
│ ├── config.properties
│ ├── env.sh
│ ├── jvm.config
│ ├── log.properties
│ └── node.properties
├── presto.json
├── presto.sh
├── presto_metrics_cloudwatch.service
├── presto_metrics_prometheus.service
└── presto_scaling_service.service
├── presto.yaml
├── sample_presto_config.zip
└── scripts
├── autoscaling_termination_wait
└── lambda_function.py
├── graceful_shutdown_handler
└── lambda_function.py
└── ha_lambda
└── lambda_function.py
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
Presto on AWS
2 |
3 |
4 | This is a cloudformation template for deploying [Presto](https://prestosql.io) on AWS. It deploys coordinators and workers in an autoscaling group.
5 |
6 | ## Features
7 |
8 | - Graceful shutdown of workers using Autoscaling lifecycle management. Presto worker will not shutdown until all the queries finish on that worker.
9 | - Highly available coordinator nodes.
10 | - Autoscaling of presto workers based on presto's memory and CPU usage.
11 | - A cloudwatch and prometheus agent which runs inside presto coordinator to push presto's metrics such as input data, CPU usage, running/blocked/failed queries.
12 | - A query logger which pushes completed queries and its stats to ElasticSearch.
13 | - A presto AMI creation packer script to easily update presto version.
14 | - Logs of presto coordinator and workers available in Cloudwatch.
15 | - Health check in Presto workers to remove unhealthy workers
16 |
17 | ## Architecture
18 |
19 | 
20 |
21 |
22 |
23 | ## Pre-requisites
24 |
25 | - A VPC and subnet
26 | - A user with following permissions. // TODO: Add permissions
27 |
28 | ### Modiying the presto configuration
29 |
30 | - New connectors: To add presto connectors (like hive connector, postgres connector etc) configuration to the deployment create a directory with following structure. Add properties file for each connector, zip the directory. Add the connector file copying command into the boostrap script in CFT.
31 | ```
32 | ├── catalog
33 | │ ├── hive.properties
34 | │ ├── jmx.properties
35 | │ ├── tpcds.properties
36 | │ └── tpch.properties
37 | └── config.properties
38 | ```
39 |
40 | - To modify the core configuration such as enabling spill or reserved pool disabling/enabling modify the config.properties file mentioned above. Memory based configurations like JVM memory, max memory per node is automatically handled based on selected instances.
41 |
42 | Add the URL of above directory as zip file in `AdditionalConfigsUri` parameter in CFT.
43 |
44 | ### Creating Presto AMI using Packer
45 |
46 | - Go inside `packer` directory and change the parameters of `.atlanrc` file. The presto version is 330 by default. Source AMI is Amazon Linux 2 in the region you want to create the AMI in.
47 | - Run the following command
48 | ```bash
49 | make build_presto_image
50 | ```
51 | - Change the `presto.json` to modify the AMI further.
52 | - To use this AMI add the AMI ID in the mapping in `presto.yaml` with AMI's region.
53 |
54 | ### Deployment
55 |
56 | The CFT requires following parameters for deployment
57 | - VPC ID: VPC to deploy Presto cluster
58 | - Subnet ID: Subnet to deploy Presto cluster
59 | - Security groups ID: SGs to attach to presto coordinators and workers
60 | - Keyname: Private key to use to launch presto machines
61 | - Coordinator Instance type: EC2 Instance type for coordinator
62 | - Coordinator Instance Count: For HA Coordinator deployment set it to 2 else set it to 1.
63 | - Min workers count: Minimum numbers of EC2 machines in Presto workers ASG
64 | - Max workers count: Maximum numbers of EC2 machines in Presto workers ASG
65 | - Workers instance type: EC2 Instance type for workers
66 | - Presto Version: Presto version, required for compatibility before and after version 330
67 | - EC2 Root volume size: EBS Volume size (GB) for presto workers and coordinators. Increase the value to few hundred GBs if you have disk spill based workload.
68 | - Hive IP: Format `thrift://:9083`
69 | - Elasticsearch Host: Elasticsearch host for query logger to push SQL queries into.
70 | - Elasticsearch Port: Elasticsearch port for query logger to push SQL queries into
71 | - Environment: Identifier for Dev, Production presto clusters.
72 |
73 | Create the AMI and provide the ID with region in CFT. Now deploy the CFT by following the guide from AWS.
74 |
75 | ### Configuring autoscaling of workers
76 |
77 | You can configure presto workers autoscaling based on metrics from presto like running queries, heap usage etc. These metrics gets pushed into Cloudwatch by presto coordinator. You can configure the Cloudwatch alarams and autoscaling based on these Cloudwatch Metrics.
78 |
79 | ### Limitations/Future work
80 | - Add support for TLS in the deployment.
81 | - Graceful shutdown lambda only waits for 1 hour for queries to finish. Add feature to wait to terminate the worker until all the queries finish on that worker.
82 | - High availibility feature only switches between standby and live coordinator but doesn't restart the failed coordinator.
83 | - No retention policy configuration for presto logs in Cloudwatch
84 |
85 | ### Contribute
86 |
87 | 1. Fork it
88 | 2. Create your feature branch (`git checkout -b my-new-feature`)
89 | 3. Commit your changes (`git commit -am 'Add some feature'`)
90 | 4. Push to the branch (`git push origin my-new-feature`)
91 | 5. Create new Pull Request
92 |
--------------------------------------------------------------------------------
/config/catalog/hive.properties:
--------------------------------------------------------------------------------
1 | connector.name=hive-hadoop2
2 | hive.metastore-refresh-interval=1s
3 | hive.metastore-cache-ttl=5s
4 | hive.non-managed-table-writes-enabled = true
5 | hive.max-partitions-per-writers=1000
6 | hive.orc.use-column-names = true
7 | hive.parquet.use-column-names = true
8 | hive.metastore-timeout=5m
9 |
--------------------------------------------------------------------------------
/config/catalog/jmx.properties:
--------------------------------------------------------------------------------
1 | connector.name=jmx
--------------------------------------------------------------------------------
/config/catalog/tpcds.properties:
--------------------------------------------------------------------------------
1 | connector.name=tpcds
--------------------------------------------------------------------------------
/config/catalog/tpch.properties:
--------------------------------------------------------------------------------
1 | connector.name=tpch
--------------------------------------------------------------------------------
/config/config.properties:
--------------------------------------------------------------------------------
1 | experimental.spill-enabled=false
2 | experimental.spill-order-by=true
3 | experimental.spill-window-operator=true
4 | experimental.spiller-spill-path=/var/lib/presto/spill/
5 | experimental.spiller-max-used-space-threshold=0.8
6 | experimental.max-spill-per-node=260GB
7 | experimental.query-max-spill-per-node=150GB
8 | experimental.reserved-pool-enabled=false
9 | query.low-memory-killer.policy=total-reservation-on-blocked-nodes
10 | shutdown.grace-period=60.00m
--------------------------------------------------------------------------------
/packer/.atlanrc:
--------------------------------------------------------------------------------
1 | AWS_ACCESS_KEY=
2 | AWS_SECRET_KEY=
3 | VPC_ID=
4 | SUBNET_ID=
5 | PRESTO_VERSION=330
6 | AMI_NAME=
7 | SOURCE_AMI=ami-0323c3dd2da7fb37d
8 | REGION=us-east-1
9 |
--------------------------------------------------------------------------------
/packer/Makefile:
--------------------------------------------------------------------------------
1 | FILE := .atlanrc
2 | -include ./$(FILE)
3 |
4 | build_presto_image:
5 | @packer build \
6 | -var 'vpc_id=$(VPC_ID)' \
7 | -var 'subnet_id=$(SUBNET_ID)' \
8 | -var 'aws_access_key=$(AWS_ACCESS_KEY)' \
9 | -var 'aws_secret_key=$(AWS_SECRET_KEY)' \
10 | -var 'presto_version=$(PRESTO_VERSION)' \
11 | -var 'ami_name=$(AMI_NAME)' \
12 | -var 'source_ami=$(SOURCE_AMI)' \
13 | -var 'region=$(REGION)' \
14 | presto.json
15 |
16 | validate_config:
17 | @packer validate presto.json
18 |
--------------------------------------------------------------------------------
/packer/base_configs/config.properties:
--------------------------------------------------------------------------------
1 | coordinator={{isCoordinator}}
2 | node-scheduler.include-coordinator=false
3 | http-server.http.port=8080
4 | discovery.uri={{coordinatorDiscoveryUri}}
--------------------------------------------------------------------------------
/packer/base_configs/env.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atlanhq/presto-on-aws/a0f31d44b8d9729c9fdbccdb516969c735b148f3/packer/base_configs/env.sh
--------------------------------------------------------------------------------
/packer/base_configs/jvm.config:
--------------------------------------------------------------------------------
1 | -server
2 | -Xmx{{jvmMemory}}G
3 | -XX:-UseBiasedLocking
4 | -XX:+UseG1GC
5 | -XX:+ExplicitGCInvokesConcurrent
6 | -XX:+HeapDumpOnOutOfMemoryError
7 | -XX:+UseGCOverheadLimit
8 | -XX:+ExitOnOutOfMemoryError
9 | -XX:ReservedCodeCacheSize=512M
10 | -Djdk.attach.allowAttachSelf=true
11 |
--------------------------------------------------------------------------------
/packer/base_configs/log.properties:
--------------------------------------------------------------------------------
1 | # Enable verbose logging from Presto
2 | #com.facebook.presto=DEBUG
3 |
--------------------------------------------------------------------------------
/packer/base_configs/node.properties:
--------------------------------------------------------------------------------
1 | node.environment={{envName}}
2 | node.id={{instanceId}}
3 | node.data-dir=/var/lib/presto/data
4 | catalog.config-dir=/etc/presto/catalog
5 | plugin.dir=/usr/lib/presto/plugin
6 | node.server-log-file=/var/log/presto/server.log
7 | node.launcher-log-file=/var/log/presto/launcher.log
8 |
--------------------------------------------------------------------------------
/packer/presto.json:
--------------------------------------------------------------------------------
1 | {
2 | "variables": {
3 | "subnet_id": "",
4 | "vpc_id": "",
5 | "aws_access_key": "",
6 | "aws_secret_key": "",
7 | "presto_version": "",
8 | "ami_name": "",
9 | "environment": "dev",
10 | "source_ami": "",
11 | "region": ""
12 | },
13 | "builders": [{
14 | "type": "amazon-ebs",
15 | "region": "{{ user `region` }}",
16 | "source_ami": "{{ user `source_ami` }}",
17 | "instance_type": "t3a.large",
18 | "ssh_username": "ec2-user",
19 | "ssh_timeout": "5m",
20 | "ami_name": "{{ user `ami_name`}}",
21 | "vpc_id": "{{ user `vpc_id` }}",
22 | "subnet_id": "{{ user `subnet_id` }}",
23 | "force_deregister": true,
24 | "run_tags": {
25 | "Name": "atlan-presto"
26 | },
27 | "tags": {
28 | "Name": "Atlan-Presto",
29 | "Environment": "{{ user `environment` }}",
30 | "user": "arpit",
31 | "presto_version": "{{ user `presto_version` }}"
32 | },
33 | "ami_block_device_mappings": [
34 | {
35 | "device_name": "/dev/xvda",
36 | "volume_size": 8,
37 | "delete_on_termination": true
38 | }
39 | ]
40 | }],
41 | "provisioners": [
42 | {
43 | "type": "shell",
44 | "inline": [
45 | "sudo mkdir -p /etc/presto",
46 | "sudo mkdir -p /etc/presto_metrics",
47 | "sudo chown -R ec2-user:ec2-user /etc/presto /etc/presto_metrics"
48 | ]
49 | },
50 | {
51 | "type": "file",
52 | "source": "./presto",
53 | "destination": "/tmp/presto"
54 | },
55 | {
56 | "type": "file",
57 | "source": "./presto_metrics_prometheus.service",
58 | "destination": "/tmp/presto_metrics_prometheus.service"
59 | },
60 | {
61 | "type": "file",
62 | "source": "./presto_metrics_cloudwatch.service",
63 | "destination": "/tmp/presto_metrics_cloudwatch.service"
64 | },
65 | {
66 | "type": "file",
67 | "source": "./presto_scaling_service.service",
68 | "destination": "/tmp/presto_scaling_service.service"
69 | },
70 | {
71 | "type": "file",
72 | "source": "./base_configs/",
73 | "destination": "/etc/presto"
74 | },
75 | {
76 | "type": "shell",
77 | "environment_vars": [
78 | "aws_access_key={{ user `aws_access_key`}}",
79 | "aws_secret_key={{ user `aws_secret_key`}}",
80 | "presto_version={{ user `presto_version`}}"
81 | ],
82 | "scripts": [
83 | "presto.sh"
84 | ]
85 | }
86 | ]
87 | }
88 |
--------------------------------------------------------------------------------
/packer/presto.sh:
--------------------------------------------------------------------------------
1 | #! /bin/bash
2 | set -euxo
3 | echo "HELLO WORLD"
4 |
5 | version=$presto_version
6 |
7 | echo $version
8 | # Install Java
9 | sudo amazon-linux-extras install java-openjdk11
10 | java -version
11 | sudo yum install -y awslogs aws-cfn-bootstrap
12 |
13 | sudo mkdir -p /usr/lib/presto /var/log/presto /var/lib/presto/data /var/lib/presto/spill /etc/presto_metrics /etc/presto_scaling_service /var/run/presto
14 |
15 | # Install presto
16 | wget -O /tmp/presto-server.tar.gz https://repo1.maven.org/maven2/io/prestosql/presto-server/$version/presto-server-$version.tar.gz
17 | tar -xvf /tmp/presto-server.tar.gz -C /tmp/
18 | sudo cp -r /tmp/presto-server-$version/* /usr/lib/presto/
19 | ls /usr/lib/presto
20 |
21 |
22 | sudo chown -R ec2-user:ec2-user /etc/presto /usr/lib/presto /var/lib/presto /var/log/presto /etc/presto_metrics /etc/presto_scaling_service /var/run/presto
23 |
24 | # install presto cli
25 | wget -O /tmp/presto-cli https://repo1.maven.org/maven2/io/prestosql/presto-cli/$version/presto-cli-$version-executable.jar
26 | sudo mv /tmp/presto-cli /usr/local/bin/presto-cli
27 | sudo chmod +x /usr/local/bin/presto-cli
28 |
29 | # create additional services
30 |
31 | # presto metrics prometheus service
32 |
33 | sudo wget -O /usr/local/bin/presto_metrics https://github.com/atlanhq/presto-metrics/releases/download/v1.0.0/presto_metrics_v1.0.0_linux_amd64
34 | sudo chmod +x /usr/local/bin/presto_metrics
35 | sudo chown -R ec2-user:ec2-user /usr/local/bin/presto_metrics
36 |
37 |
38 | cat < /etc/presto_metrics/env.prometheus
39 | PRESTO_HOST=localhost
40 | PRESTO_PORT=8080
41 | SERVICE_NAME=prometheus
42 | STACK_NAME=atlan-presto-test-stack
43 | CLOUDWATCH_NAMESPACE=presto
44 | EOF
45 |
46 | cat < /etc/presto_metrics/env.cloudwatch
47 | PRESTO_HOST=localhost
48 | PRESTO_PORT=8080
49 | SERVICE_NAME=cloudwatch
50 | STACK_NAME=atlan-presto-test-stack
51 | CLOUDWATCH_NAMESPACE=presto
52 | EOF
53 |
54 | sudo touch /etc/default/presto && sudo chown ec2-user:ec2-user /etc/default/presto
55 | /usr/bin/printf "PRESTO_OPTS= \
56 | --pid-file=/var/run/presto/presto.pid \
57 | --node-config=/etc/presto/node.properties \
58 | --jvm-config=/etc/presto/jvm.config \
59 | --config=/etc/presto/config.properties \
60 | --launcher-log-file=/var/log/presto/launcher.log \
61 | --server-log-file=/var/log/presto/server.log \
62 | -Dhttp-server.log.path=/var/log/presto/http-request.log \
63 | -Dcatalog.config-dir=/etc/presto/catalog
64 | [Install]
65 | WantedBy=default.target
66 | " >> /etc/default/presto
67 |
68 | sudo touch /etc/systemd/system/presto.service && sudo chown ec2-user:ec2-user /etc/systemd/system/presto.service
69 |
70 | /usr/bin/printf "
71 | [Unit]
72 | Description=Presto Server
73 | Documentation=https://prestosql.io/
74 | After=network-online.target
75 | [Service]
76 | User=ec2-user
77 | Restart=on-failure
78 | Type=forking
79 | PIDFile=/var/run/presto/presto.pid
80 | RuntimeDirectory=presto
81 | EnvironmentFile=/etc/default/presto
82 | ExecStart=/usr/lib/presto/bin/launcher start \$PRESTO_OPTS
83 | ExecStop=/usr/lib/presto/bin/launcher stop \$PRESTO_OPTS
84 | [Install]
85 | WantedBy=default.target
86 | " >> /etc/systemd/system/presto.service
87 |
88 |
89 | sudo cp /tmp/presto_metrics_prometheus.service /etc/systemd/system/presto_metrics_prometheus.service
90 | sudo cp /tmp/presto_metrics_cloudwatch.service /etc/systemd/system/presto_metrics_cloudwatch.service
91 |
92 | sudo systemctl daemon-reload
93 |
--------------------------------------------------------------------------------
/packer/presto_metrics_cloudwatch.service:
--------------------------------------------------------------------------------
1 | [Unit]
2 | Description=Presto Metrics Prometheus exporter
3 | [Service]
4 | User=ec2-user
5 | #change this to your workspace
6 | EnvironmentFile=/etc/presto_metrics/env.cloudwatch
7 | WorkingDirectory=/etc/presto_metrics/
8 | #path to executable.
9 | ExecStart=/usr/local/bin/presto_metrics --web.service-name=${SERVICE_NAME} \
10 | --web.presto-host=${PRESTO_HOST} \
11 | --web.presto-port=${PRESTO_PORT} \
12 | --web.stack-name=${STACK_NAME} \
13 | --web.cloudwatch-namespace=${CLOUDWATCH_NAMESPACE} \
14 | --web.api-prefix=${API_PREFIX} \
15 | --web.cloudwatch-region=${CLOUDWATCH_REGION}
16 | SuccessExitStatus=143
17 | TimeoutStopSec=10
18 | Restart=on-failure
19 | RestartSec=5
20 | [Install]
21 | WantedBy=multi-user.target
--------------------------------------------------------------------------------
/packer/presto_metrics_prometheus.service:
--------------------------------------------------------------------------------
1 | [Unit]
2 | Description=Presto Metrics Prometheus exporter
3 | [Service]
4 | User=ec2-user
5 | #change this to your workspace
6 | EnvironmentFile=/etc/presto_metrics/env.prometheus
7 | WorkingDirectory=/etc/presto_metrics/
8 | #path to executable.
9 | ExecStart=/usr/local/bin/presto_metrics --web.service-name=${SERVICE_NAME} \
10 | --web.presto-host=${PRESTO_HOST} \
11 | --web.presto-port=${PRESTO_PORT} \
12 | --web.stack-name=${STACK_NAME} \
13 | --web.cloudwatch-namespace=${CLOUDWATCH_NAMESPACE} \
14 | --web.api-prefix=${API_PREFIX}
15 |
16 | SuccessExitStatus=143
17 | TimeoutStopSec=10
18 | Restart=on-failure
19 | RestartSec=5
20 | [Install]
21 | WantedBy=multi-user.target
--------------------------------------------------------------------------------
/packer/presto_scaling_service.service:
--------------------------------------------------------------------------------
1 | [Unit]
2 | Description=Presto Scaling Service
3 | [Service]
4 | User=ec2-user
5 | #change this to your workspace
6 | EnvironmentFile=/etc/presto_scaling_service/env
7 | WorkingDirectory=/etc/presto_scaling_service
8 | #path to executable.
9 | ExecStart=/usr/local/bin/presto_scaling_service --web.presto-host=${PRESTO_HOST} \
10 | --web.presto-port=${PRESTO_PORT} \
11 | --web.workers-asg-name=${PRESTO_WORKERS_ASG_NAME} \
12 | --web.api-prefix=${API_PREFIX}
13 | SuccessExitStatus=143
14 | TimeoutStopSec=10
15 | Restart=on-failure
16 | RestartSec=5
17 | [Install]
18 | WantedBy=multi-user.target
--------------------------------------------------------------------------------
/presto.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | AWSTemplateFormatVersion: '2010-09-09'
3 | Description: 'CloudFormation Template for OpenSource Presto'
4 | Metadata:
5 | AWS::CloudFormation::Interface:
6 | ParameterGroups:
7 | -
8 | Label:
9 | default: "AWS Configuration"
10 | Parameters:
11 | - VPC
12 | - Subnet
13 | - KeyName
14 | - SecurityGroups
15 | -
16 | Label:
17 | default: "Presto Configuration"
18 | Parameters:
19 | - CoordinatorInstanceType
20 | - WorkersInstanceType
21 | - WorkersCount
22 | - Ec2RootVolumeSize
23 | - MaxWorkersCount
24 | -
25 | Label:
26 | default: "Additional Parameters"
27 | Parameters:
28 | - Environment
29 |
30 | Mappings:
31 | RegionMap:
32 | ap-south-1:
33 | PRESTOIMAGE: ami-0bcffb0a9872eb14c
34 | us-east-1:
35 | PRESTOIMAGE: ami-04254dc35836a5c71
36 |
37 | Parameters:
38 | VPC:
39 | Type: 'AWS::EC2::VPC::Id'
40 | #aws-permission @cft ec2:DescribeVpcs
41 | Description: VPC ID
42 | AllowedPattern: ".+"
43 | Subnet:
44 | Type: 'AWS::EC2::Subnet::Id'
45 | #aws-permission @cft ec2:DescribeSubnets
46 | Description: Subnet to use for Presto nodes (must belong to the selected VPC)
47 | AllowedPattern: ".+"
48 | KeyName:
49 | Description: EC2 Key Name
50 | Type: AWS::EC2::KeyPair::KeyName
51 | #aws-permission @cft ec2:DescribeKeyPairs
52 | AllowedPattern: ".+"
53 | SecurityGroups:
54 | Type: 'List'
55 | #aws-permission @cft ec2:DescribeSecurityGroups
56 | Description: 'Security Groups for Presto nodes (e.g: allowing SSH access). Must select at least one.'
57 | AllowedPattern: ".+"
58 | CoordinatorInstanceType:
59 | Type: String
60 | Default: m5.large
61 | Description: EC2 instance type of the coordinator
62 | CoordinatorInstanceCount:
63 | Type: String
64 | Default: 1
65 | Description: Number of Coordinator instances to deploy
66 | WorkersInstanceType:
67 | Type: String
68 | Default: m5.large
69 | Description: EC2 instance type of the workers
70 | ElasticsearchHost:
71 | Type: String
72 | Default: dev-admin-search.atlan.com
73 | ElasticsearchPort:
74 | Type: "String"
75 | Default: 443
76 | AdditionalConfigsUri:
77 | Type: "String"
78 | Description: Additional Configuration zip file to use, provide an https s3 public url to fetch the zip file from.
79 | WorkersCount:
80 | Description: Number of dedicated Presto worker nodes (apart from coordinator) to instantiate.
81 | Type: Number
82 | Default: 1
83 | MinValue: 1
84 | MaxWorkersCount:
85 | Description: Number of max dedicated Presto worker nodes.
86 | Type: Number
87 | Default: 5
88 | MinValue: 1
89 | Ec2RootVolumeSize:
90 | Type: String
91 | Default: 100
92 | Description: EC2 root volume size
93 | HiveIPAddress:
94 | Type: String
95 | Default: thrift://emr.dev.services:9083
96 | Description: Hive IP Address
97 | Environment:
98 | Type: String
99 | Description: Presto Launch Environment
100 | AllowedValues:
101 | - dev
102 | - prod
103 | Default: "dev"
104 | PrestoVersion:
105 | Type: String
106 | Default: 330
107 | Description: Presto Version which is being deployed
108 |
109 | Resources:
110 | PrestoSecurityGroup:
111 | Type: "AWS::EC2::SecurityGroup"
112 | #aws-permission @cft ec2:CreateSecurityGroup
113 | #aws-permission @cft ec2:DeleteSecurityGroup
114 | Properties:
115 | GroupDescription: Presto nodes Security Group
116 | VpcId: !Ref VPC
117 | Tags:
118 | - { Key: Name, Value: !Sub "${AWS::StackName}-presto-sg" }
119 | - { Key: "presto:opensource:identification:role", Value: "presto:security-group" }
120 | PrestoHttpsOutboundRule:
121 | Type: AWS::EC2::SecurityGroupEgress
122 | #aws-permission @cft ec2:AuthorizeSecurityGroupEgress
123 | #aws-permission @cft ec2:RevokeSecurityGroupEgress
124 | Properties:
125 | IpProtocol: tcp
126 | FromPort: '443'
127 | ToPort: '443'
128 | CidrIp: 0.0.0.0/0
129 | GroupId: !GetAtt PrestoSecurityGroup.GroupId
130 | PrestoOutboundRule:
131 | Type: AWS::EC2::SecurityGroupEgress
132 | #aws-permission @cft ec2:AuthorizeSecurityGroupEgress
133 | #aws-permission @cft ec2:RevokeSecurityGroupEgress
134 | Properties:
135 | IpProtocol: tcp
136 | FromPort: '8080'
137 | ToPort: '8080'
138 | DestinationSecurityGroupId: !GetAtt PrestoSecurityGroup.GroupId
139 | GroupId: !GetAtt PrestoSecurityGroup.GroupId
140 | PrestoInboundRule:
141 | Type: AWS::EC2::SecurityGroupIngress
142 | #aws-permission @cft ec2:AuthorizeSecurityGroupIngress
143 | #aws-permission @cft ec2:RevokeSecurityGroupIngress
144 | Properties:
145 | IpProtocol: tcp
146 | FromPort: '8080'
147 | ToPort: '8080'
148 | SourceSecurityGroupId: !GetAtt PrestoSecurityGroup.GroupId
149 | GroupId: !GetAtt PrestoSecurityGroup.GroupId
150 | PrestoClusterIAMRole:
151 | Type: AWS::IAM::Role
152 | #aws-permission @cft iam:CreateRole
153 | #aws-permission @cft iam:DeleteRole
154 | # Condition: CreateIamInstanceProfile
155 | Properties:
156 | RoleName: !Sub ${AWS::StackName}-presto-cluster-iam-role
157 | AssumeRolePolicyDocument:
158 | Statement:
159 | - Effect: Allow
160 | Principal:
161 | Service: [ec2.amazonaws.com, apigateway.amazonaws.com]
162 | Action: ['sts:AssumeRole']
163 | Policies:
164 | #aws-permission @cft iam:AttachRolePolicy
165 | #aws-permission @cft iam:DeleteRolePolicy
166 | #aws-permission @cft iam:DetachRolePolicy
167 | #aws-permission @cft iam:PutRolePolicy
168 | - PolicyName: !Sub ${AWS::StackName}-presto-cf-policy
169 | PolicyDocument:
170 | Version: "2012-10-17"
171 | Statement:
172 | - Effect: Allow
173 | Action:
174 | - "autoscaling:CompleteLifecycleAction"
175 | - "autoscaling:RecordLifecycleActionHeartbeat"
176 | - "autoscaling:DescribeAutoScalingGroups"
177 | - "autoscaling:PutScalingPolicy"
178 | - "autoscaling:DescribeAutoScalingInstances"
179 | - "autoscaling:DescribeLaunchConfigurations"
180 | - "autoscaling:DescribeScalingActivities"
181 | - "autoscaling:UpdateAutoScalingGroup"
182 | - "autoscaling:SetDesiredCapacity"
183 | - "cloudformation:SignalResource"
184 | - "ec2:DescribeInstances"
185 | - "glue:BatchGetPartition"
186 | - "glue:BatchCreatePartition"
187 | - "glue:CreateDatabase"
188 | - "glue:CreateTable"
189 | - "glue:DeleteDatabase"
190 | - "glue:DeletePartition"
191 | - "glue:DeleteTable"
192 | - "glue:GetDatabase"
193 | - "glue:GetDatabases"
194 | - "glue:GetPartition"
195 | - "glue:GetPartitions"
196 | - "glue:GetTable"
197 | - "glue:GetTables"
198 | - "glue:UpdateTable"
199 | - "glue:UpdatePartition"
200 | - "s3:GetObject"
201 | - "s3:ListBucket"
202 | - "s3:PutObject"
203 | - "sqs:ChangeMessageVisibility"
204 | - "sqs:DeleteMessage"
205 | - "sqs:GetQueueUrl"
206 | - "sqs:ReceiveMessage"
207 | - "logs:CreateLogGroup"
208 | - "logs:CreateLogStream"
209 | - "logs:PutLogEvents"
210 | - "logs:PutRetentionPolicy"
211 | - "logs:DescribeLogGroups"
212 | - "logs:DescribeLogStreams"
213 | - "cloudwatch:PutMetricData"
214 | - "ec2:CreateNetworkInterface"
215 | - "ec2:DescribeNetworkInterfaces"
216 | - "ec2:DeleteNetworkInterface"
217 | - "ec2:AttachNetworkInterface"
218 | - "ec2:DetachNetworkInterface"
219 | - "ec2:DescribeNetworkInterfaceAttribute"
220 | Resource:
221 | - "*"
222 | PrestoClusterInstanceProfile:
223 | Type: AWS::IAM::InstanceProfile
224 | #aws-permission @cft iam:CreateInstanceProfile
225 | #aws-permission @cft iam:DeleteInstanceProfile
226 | #aws-permission @cft iam:GetRole
227 | #aws-permission @cft iam:AddRoleToInstanceProfile
228 | #aws-permission @cft iam:RemoveRoleFromInstanceProfile
229 | Properties:
230 | Roles:
231 | - Ref: PrestoClusterIAMRole
232 | CoordinatorENI:
233 | Type: 'AWS::EC2::NetworkInterface'
234 | #aws-permission @cft ec2:CreateNetworkInterface
235 | #aws-permission @cft ec2:DescribeNetworkInterfaces
236 | #aws-permission @cft ec2:ModifyNetworkInterfaceAttribute
237 | #aws-permission @cft ec2:DeleteNetworkInterface
238 | Properties:
239 | Description: !Sub "${AWS::StackName} coordinator ENI"
240 | GroupSet: !Split
241 | - ','
242 | - !Join
243 | - ','
244 | - - !GetAtt PrestoSecurityGroup.GroupId
245 | - !Join
246 | - ','
247 | - !Ref SecurityGroups
248 | SubnetId: !Ref Subnet
249 | Tags:
250 | - { Key: Name, Value: !Sub "${AWS::StackName}-coordinator-ENI" }
251 | - { Key: "presto:opensource:identification:role", Value: "presto:coordinator-eni" }
252 |
253 | PrestoCoordinatorServerLogGroup:
254 | Type: AWS::Logs::LogGroup
255 | Properties:
256 | LogGroupName: !Sub /prestosql/presto/${AWS::StackName}/coordinators/server
257 | RetentionInDays: 7
258 | PrestoCoordinatorSyslogLogGroup:
259 | Type: AWS::Logs::LogGroup
260 | Properties:
261 | LogGroupName: !Sub /prestosql/presto/${AWS::StackName}/coordinators/syslog
262 | RetentionInDays: 3
263 | PrestoWorkerServerLogGroup:
264 | Type: AWS::Logs::LogGroup
265 | Properties:
266 | LogGroupName: !Sub /prestosql/presto/${AWS::StackName}/workers/server
267 | RetentionInDays: 1
268 | PrestoWorkerSyslogLogGroup:
269 | Type: AWS::Logs::LogGroup
270 | Properties:
271 | LogGroupName: !Sub /prestosql/presto/${AWS::StackName}/workers/syslog
272 | RetentionInDays: 1
273 |
274 | Coordinator:
275 | Type: 'AWS::AutoScaling::LaunchConfiguration'
276 | #aws-permission @cft autoscaling:CreateLaunchConfiguration
277 | #aws-permission @cft autoscaling:DeleteLaunchConfiguration
278 | #aws-permission @cft autoscaling:DescribeLaunchConfigurations
279 | Properties:
280 | InstanceType: !Ref CoordinatorInstanceType
281 | ImageId: !FindInMap [RegionMap, !Ref "AWS::Region", PRESTOIMAGE]
282 | BlockDeviceMappings:
283 | - DeviceName: /dev/xvda
284 | Ebs:
285 | DeleteOnTermination: true
286 | VolumeSize: !Ref Ec2RootVolumeSize
287 | VolumeType: gp2
288 | KeyName: !Ref KeyName
289 | #aws-permission @cft iam:GetInstanceProfile
290 | IamInstanceProfile: !Ref PrestoClusterInstanceProfile
291 | SecurityGroups: !Split
292 | - ','
293 | - !Join
294 | - ','
295 | - - !GetAtt PrestoSecurityGroup.GroupId
296 | - !Join
297 | - ','
298 | - !Ref SecurityGroups
299 | UserData:
300 | Fn::Base64:
301 | !Sub |
302 | #!/bin/bash
303 | set -xtrace
304 |
305 | sed -i -e "s/{{isCoordinator}}/true/g" /etc/presto/config.properties
306 | sed -i -e "s#{{coordinatorDiscoveryUri}}#http://localhost:8080#g" /etc/presto/config.properties
307 | echo "" >> /etc/presto/config.properties
308 | echo discovery-server.enabled=true >> /etc/presto/config.properties
309 |
310 | R=$(($(grep MemTotal /proc/meminfo | awk '{print $2}')/1048576))
311 | X=$(($R*8/10))
312 | sed -i -e "s/{{jvmMemory}}/$X/g" /etc/presto/jvm.config
313 | sed -i -e "s/{{envName}}/${Environment}/g" /etc/presto/node.properties
314 | sed -i -e "s/{{instanceId}}/$(curl http://169.254.169.254/latest/meta-data/instance-id/)/g" /etc/presto/node.properties
315 |
316 | Z=$(($X*6/10))
317 | echo "query.max-memory-per-node="$Z"GB" >> /etc/presto/config.properties
318 |
319 | Y=$(($X*7/10))
320 | echo "query.max-total-memory-per-node="$Y"GB" >> /etc/presto/config.properties
321 |
322 | echo "query.max-memory=1PB" >> /etc/presto/config.properties
323 | echo "query.low-memory-killer.policy=total-reservation-on-blocked-nodes" >> /etc/presto/config.properties
324 |
325 | sudo tee /etc/awslogs/awslogs.conf > /dev/null < /dev/null < /dev/null < /dev/null <> /etc/presto/config.properties
362 | cat /tmp/config/catalog/hive.properties >> /etc/presto/catalog/hive.properties
363 | cp /tmp/config/catalog/jmx.properties /etc/presto/catalog/jmx.properties
364 | cp /tmp/config/catalog/tpch.properties /etc/presto/catalog/tpch.properties
365 | cp /tmp/config/catalog/tpcds.properties /etc/presto/catalog/tpcds.properties
366 |
367 | mkdir -p /usr/lib/presto/plugin/atlan-audit-logger-presto-experimental/
368 | rm -rf /usr/lib/presto/plugin/atlan-audit-logger-presto-experimental/*
369 | wget -O /usr/lib/presto/plugin/atlan-audit-logger-presto-experimental/QueryAuditEventListener-1.4-prestosql.jar https://github.com/atlanhq/presto-query-logger/releases/download/v1.3/presto-query-logger-1.3.jar
370 |
371 | mkdir /usr/lib/presto/etc/
372 | sudo tee /usr/lib/presto/etc/event-listener.properties > /dev/null < 329 )); then
384 | echo "Presto version greater than 329"
385 | apiPrefix='ui/api/'
386 | fi
387 |
388 |
389 | cat < /etc/presto_metrics/env.prometheus
390 | PRESTO_HOST=localhost
391 | PRESTO_PORT=8080
392 | SERVICE_NAME=prometheus
393 | STACK_NAME=${AWS::StackName}
394 | CLOUDWATCH_NAMESPACE=presto
395 | API_PREFIX=$apiPrefix
396 | EOF
397 |
398 | cat < /etc/presto_metrics/env.cloudwatch
399 | PRESTO_HOST=localhost
400 | PRESTO_PORT=8080
401 | SERVICE_NAME=cloudwatch
402 | STACK_NAME=${AWS::StackName}
403 | CLOUDWATCH_NAMESPACE=presto
404 | CLOUDWATCH_REGION=${AWS::Region}
405 | API_PREFIX=$apiPrefix
406 | EOF
407 |
408 |
409 | systemctl start awslogsd
410 | service presto start
411 |
412 | HTTP_URL="http://localhost:8080/v1/status"
413 | CURL_CMD="curl -w httpcode=%{http_code}"
414 |
415 | # -m, --max-time FOR curl operation
416 | CURL_MAX_CONNECTION_TIMEOUT="-m 5"
417 |
418 | # perform curl operation
419 |
420 | for i in {1..30}
421 | do
422 | sleep 5
423 | CURL_RETURN_CODE=0
424 | CURL_OUTPUT=`$CURL_CMD $CURL_MAX_CONNECTION_TIMEOUT $HTTP_URL 2> /dev/null` || CURL_RETURN_CODE=$?
425 | if [ $CURL_RETURN_CODE -ne 0 ]; then
426 | echo "Curl connection failed with return code - $CURL_RETURN_CODE"
427 | else
428 | echo "Success"
429 | break
430 | fi
431 | done
432 |
433 |
434 | if [ $CURL_RETURN_CODE -ne 0 ]; then
435 | /opt/aws/bin/cfn-signal -s 'false' --stack ${AWS::StackName} --resource Coordinators --region ${AWS::Region}
436 | else
437 | systemctl start presto_metrics_prometheus.service
438 | systemctl start presto_metrics_cloudwatch.service
439 | /opt/aws/bin/cfn-signal -s 'true' --stack ${AWS::StackName} --resource Coordinators --region ${AWS::Region}
440 |
441 | fi
442 | Coordinators:
443 | Type: 'AWS::AutoScaling::AutoScalingGroup'
444 | #aws-permission @cft autoscaling:CreateAutoScalingGroup
445 | #aws-permission @cft autoscaling:DeleteAutoScalingGroup
446 | #aws-permission @cft autoscaling:DescribeAutoScalingGroups
447 | #aws-permission @cft autoscaling:UpdateAutoScalingGroup
448 | #aws-permission @cft autoscaling:DescribeScalingActivities
449 | #aws-permission @cft autoscaling:DescribeLaunchConfigurations
450 | #aws-permission @cft autoscaling:DescribeAutoScalingInstances
451 | #aws-permission @cft ec2:CreateTags
452 | #aws-permission @cft ec2:RunInstances
453 | #aws-permission @cft ec2:TerminateInstances
454 | #aws-permission @cft ec2:DescribeInstances
455 | UpdatePolicy:
456 | # Make updates to LaunchConfiguration cause rolling update of coordinators
457 | AutoScalingReplacingUpdate:
458 | WillReplace: true
459 | CreationPolicy:
460 | ResourceSignal:
461 | Timeout: PT15M
462 | Count: !Ref CoordinatorInstanceCount
463 | Properties:
464 | LaunchConfigurationName: !Ref Coordinator
465 | VPCZoneIdentifier:
466 | - !Ref Subnet
467 | MinSize: !Ref CoordinatorInstanceCount
468 | MaxSize: !Ref CoordinatorInstanceCount
469 | DesiredCapacity: !Ref CoordinatorInstanceCount
470 | Tags:
471 | - { Key: Name, Value: !Sub "${AWS::StackName}-presto-coordinator", PropagateAtLaunch: true }
472 | - { Key: "presto:opensource:identification:role", Value: "presto:coordinator", PropagateAtLaunch: true }
473 | HealthCheckGracePeriod: 300
474 | HealthCheckType: ELB
475 | LoadBalancerNames: !Split
476 | - ','
477 | - !Join
478 | - ','
479 | - - !Ref PrestoCoordinatorsELB
480 | Worker:
481 | Type: 'AWS::AutoScaling::LaunchConfiguration'
482 | #aws-permission @cft autoscaling:CreateLaunchConfiguration
483 | #aws-permission @cft autoscaling:DeleteLaunchConfiguration
484 | #aws-permission @cft autoscaling:DescribeLaunchConfigurations
485 | #aws-permission @cft autoscaling:UpdateAutoScalingGroup
486 | Properties:
487 | InstanceType: !Ref WorkersInstanceType
488 | ImageId: !FindInMap [RegionMap, !Ref "AWS::Region", PRESTOIMAGE]
489 | BlockDeviceMappings:
490 | - DeviceName: /dev/xvda
491 | Ebs:
492 | DeleteOnTermination: true
493 | VolumeSize: !Ref Ec2RootVolumeSize
494 | VolumeType: gp2
495 | KeyName: !Ref KeyName
496 | #aws-permission @cft iam:GetInstanceProfile
497 | IamInstanceProfile: !Ref PrestoClusterInstanceProfile
498 | SecurityGroups: !Split
499 | - ','
500 | - !Join
501 | - ','
502 | - - !GetAtt PrestoSecurityGroup.GroupId
503 | - !Join
504 | - ','
505 | - !Ref SecurityGroups
506 | # When worker has private IP only, following things are problematic:
507 | # - S3 cannot be accessed (can be fixed with NAT box in VPC or "VPC endpoint for S3")
508 | # - EC2 boot is very long, as it includes `yum upgrade` which retries timeouts (can
509 | # be fixed with NAT box in VPC or "repo_upgrade: none" in cloud init)
510 | #AssociatePublicIpAddress: false
511 | UserData:
512 | Fn::Base64:
513 | !Sub |
514 | #!/bin/bash
515 | set -xtrace
516 |
517 | sed -i -e "s/{{isCoordinator}}/false/g" /etc/presto/config.properties
518 | sed -i -e "s#{{coordinatorDiscoveryUri}}#http://${CoordinatorENI.PrimaryPrivateIpAddress}:8080#g" /etc/presto/config.properties
519 |
520 | R=$(($(grep MemTotal /proc/meminfo | awk '{print $2}')/1048576))
521 | X=$(($R*8/10))
522 | sed -i -e "s/{{jvmMemory}}/$X/g" /etc/presto/jvm.config
523 | sed -i -e "s/{{envName}}/${Environment}/g" /etc/presto/node.properties
524 | sed -i -e "s/{{instanceId}}/$(curl http://169.254.169.254/latest/meta-data/instance-id/)/g" /etc/presto/node.properties
525 |
526 | echo "" >> /etc/presto/config.properties
527 |
528 | Z=$(($X*6/10))
529 | echo "query.max-memory-per-node="$Z"GB" >> /etc/presto/config.properties
530 |
531 | Y=$(($X*7/10))
532 | echo "query.max-total-memory-per-node="$Y"GB" >> /etc/presto/config.properties
533 | echo "query.low-memory-killer.policy=total-reservation-on-blocked-nodes" >> /etc/presto/config.properties
534 |
535 | sudo tee /etc/awslogs/awslogs.conf > /dev/null < /dev/null < /dev/null <> /etc/presto/config.properties
568 | cat /tmp/config/catalog/hive.properties >> /etc/presto/catalog/hive.properties
569 | cp /tmp/config/catalog/jmx.properties /etc/presto/catalog/jmx.properties
570 | cp /tmp/config/catalog/tpch.properties /etc/presto/catalog/tpch.properties
571 | cp /tmp/config/catalog/tpcds.properties /etc/presto/catalog/tpcds.properties
572 |
573 | mkdir /usr/lib/presto/plugin/atlan-audit-logger-presto-experimental/
574 | mkdir /usr/lib/presto/etc/
575 | rm -rf /usr/lib/presto/plugin/atlan-audit-logger-presto-experimental/*
576 | wget -O /usr/lib/presto/plugin/atlan-audit-logger-presto-experimental/QueryAuditEventListener-1.4-prestosql.jar https://athena-cloudformation-templates.s3.ap-south-1.amazonaws.com/unilever/config/QueryAuditEventListener-1.4-prestosql-jar-with-dependencies.jar
577 |
578 | sudo tee /usr/lib/presto/etc/event-listener.properties > /dev/null < 329 )); then
590 | echo "Presto version greater than 329"
591 | apiPrefix='ui/api/'
592 | fi
593 |
594 |
595 | cat < /etc/presto_metrics/env.prometheus
596 | PRESTO_HOST=localhost
597 | PRESTO_PORT=8080
598 | SERVICE_NAME=prometheus
599 | STACK_NAME=${AWS::StackName}
600 | CLOUDWATCH_NAMESPACE=presto
601 | API_PREFIX=$apiPrefix
602 | EOF
603 |
604 | cat < /etc/presto_metrics/env.cloudwatch
605 | PRESTO_HOST=localhost
606 | PRESTO_PORT=8080
607 | SERVICE_NAME=cloudwatch
608 | STACK_NAME=${AWS::StackName}
609 | CLOUDWATCH_NAMESPACE=presto
610 | API_PREFIX=$apiPrefix
611 | EOF
612 |
613 | systemctl start awslogsd
614 | service presto start
615 |
616 | HTTP_URL="http://localhost:8080/v1/status"
617 | CURL_CMD="curl -w httpcode=%{http_code}"
618 |
619 | # -m, --max-time FOR curl operation
620 | CURL_MAX_CONNECTION_TIMEOUT="-m 5"
621 |
622 | # perform curl operation
623 |
624 | for i in {1..30}
625 | do
626 | sleep 5
627 | CURL_RETURN_CODE=0
628 | CURL_OUTPUT=`$CURL_CMD $CURL_MAX_CONNECTION_TIMEOUT $HTTP_URL 2> /dev/null` || CURL_RETURN_CODE=$?
629 | if [ $CURL_RETURN_CODE -ne 0 ]; then
630 | echo "Curl connection failed with return code - $CURL_RETURN_CODE"
631 | else
632 | echo "Success"
633 | break
634 | fi
635 | done
636 |
637 | if [ $CURL_RETURN_CODE -ne 0 ]; then
638 | /opt/aws/bin/cfn-signal -s 'false' --stack ${AWS::StackName} --resource Workers --region ${AWS::Region}
639 | else
640 | systemctl start presto_metrics_prometheus.service
641 | systemctl start presto_metrics_cloudwatch.service
642 | /opt/aws/bin/cfn-signal -s 'true' --stack ${AWS::StackName} --resource Workers --region ${AWS::Region}
643 | fi
644 |
645 | Workers:
646 | Type: 'AWS::AutoScaling::AutoScalingGroup'
647 | #aws-permission @cft autoscaling:CreateAutoScalingGroup
648 | #aws-permission @cft autoscaling:DeleteAutoScalingGroup
649 | #aws-permission @cft autoscaling:DescribeAutoScalingGroups
650 | #aws-permission @cft autoscaling:UpdateAutoScalingGroup
651 | #aws-permission @cft autoscaling:DescribeScalingActivities
652 | #aws-permission @cft autoscaling:DescribeLaunchConfigurations
653 | #aws-permission @cft autoscaling:DescribeAutoScalingInstances
654 | #aws-permission @cft ec2:CreateTags
655 | #aws-permission @cft ec2:RunInstances
656 | #aws-permission @cft ec2:TerminateInstances
657 | #aws-permission @cft ec2:DescribeInstances
658 | UpdatePolicy:
659 | # Make updates to LaunchConfiguration cause rolling update of workers
660 | AutoScalingReplacingUpdate:
661 | WillReplace: true
662 | CreationPolicy:
663 | ResourceSignal:
664 | Timeout: PT15M
665 | Count: !Ref WorkersCount
666 | Properties:
667 | LaunchConfigurationName: !Ref Worker
668 | MetricsCollection:
669 | - Granularity: "1Minute"
670 | VPCZoneIdentifier:
671 | - !Ref Subnet
672 | MinSize: !Ref WorkersCount
673 | MaxSize: !Ref MaxWorkersCount
674 | DesiredCapacity: !Ref WorkersCount
675 | Tags:
676 | - { Key: Name, Value: !Sub "${AWS::StackName}-presto-worker", PropagateAtLaunch: true }
677 | - { Key: "presto:opensource:identification:role", Value: "presto:worker", PropagateAtLaunch: true }
678 | HealthCheckGracePeriod: 180
679 | HealthCheckType: ELB
680 | LoadBalancerNames: !Split
681 | - ','
682 | - !Join
683 | - ','
684 | - - !Ref PrestoWorkersELB
685 | WorkersScaleUpPolicy:
686 | Type: AWS::AutoScaling::ScalingPolicy
687 | Properties:
688 | AdjustmentType: ChangeInCapacity
689 | AutoScalingGroupName:
690 | Ref: Workers
691 | Cooldown: '60'
692 | ScalingAdjustment: '1'
693 | WorkersScaleDownPolicy:
694 | Type: AWS::AutoScaling::ScalingPolicy
695 | Properties:
696 | AdjustmentType: ChangeInCapacity
697 | AutoScalingGroupName:
698 | Ref: Workers
699 | Cooldown: '60'
700 | ScalingAdjustment: "-1"
701 | WorkersUserCPUAlarmHigh:
702 | Type: AWS::CloudWatch::Alarm
703 | Properties:
704 | AlarmDescription: Scale-up if CPU > 70% for 1 minutes
705 | MetricName: MeanWorkerUserCPUUtilisation
706 | Namespace: presto
707 | Statistic: Average
708 | Period: '120'
709 | EvaluationPeriods: '1'
710 | Threshold: '0.7'
711 | AlarmActions:
712 | - Ref: WorkersScaleUpPolicy
713 | Dimensions:
714 | - Name: prestoStackName
715 | Value:
716 | Ref: "AWS::StackName"
717 | ComparisonOperator: GreaterThanThreshold
718 | WorkersUserCPUAlarmLow:
719 | Type: AWS::CloudWatch::Alarm
720 | Properties:
721 | AlarmDescription: Scale-down if CPU < 50% for 5 minutes
722 | MetricName: MeanWorkerUserCPUUtilisation
723 | Namespace: presto
724 | Statistic: Average
725 | Period: '300'
726 | EvaluationPeriods: '1'
727 | Threshold: '0.5'
728 | AlarmActions:
729 | - Ref: WorkersScaleDownPolicy
730 | Dimensions:
731 | - Name: prestoStackName
732 | Value:
733 | Ref: "AWS::StackName"
734 | ComparisonOperator: LessThanThreshold
735 |
736 | WorkersSystemCPUAlarmHigh:
737 | Type: AWS::CloudWatch::Alarm
738 | Properties:
739 | AlarmDescription: Scale-up if CPU > 70% for 1 minutes
740 | MetricName: MeanWorkerSystemCPUUtilisation
741 | Namespace: presto
742 | Statistic: Average
743 | Period: '120'
744 | EvaluationPeriods: '1'
745 | Threshold: '0.7'
746 | AlarmActions:
747 | - Ref: WorkersScaleUpPolicy
748 | Dimensions:
749 | - Name: prestoStackName
750 | Value:
751 | Ref: "AWS::StackName"
752 | ComparisonOperator: GreaterThanThreshold
753 | WorkersSystemCPUAlarmLow:
754 | Type: AWS::CloudWatch::Alarm
755 | Properties:
756 | AlarmDescription: Scale-down if CPU < 50% for 5 minutes
757 | MetricName: MeanWorkerSystemCPUUtilisation
758 | Namespace: presto
759 | Statistic: Average
760 | Period: '300'
761 | EvaluationPeriods: '1'
762 | Threshold: '0.5'
763 | AlarmActions:
764 | - Ref: WorkersScaleDownPolicy
765 | Dimensions:
766 | - Name: prestoStackName
767 | Value:
768 | Ref: "AWS::StackName"
769 | ComparisonOperator: LessThanThreshold
770 |
771 | GracefulNodeShutdownQueueIAMRole:
772 | Type: AWS::IAM::Role
773 | #aws-permission @cft iam:CreateRole
774 | #aws-permission @cft iam:DeleteRole
775 | #Condition: CreateIamInstanceProfile
776 | Properties:
777 | RoleName: !Sub ${AWS::StackName}-graceful-shutdown-iam-role
778 | AssumeRolePolicyDocument:
779 | Statement:
780 | - Effect: Allow
781 | Principal:
782 | Service:
783 | - ec2.amazonaws.com
784 | - lambda.amazonaws.com
785 | Action: ['sts:AssumeRole']
786 | Policies:
787 | #aws-permission @cft iam:AttachRolePolicy
788 | #aws-permission @cft iam:DeleteRolePolicy
789 | #aws-permission @cft iam:DetachRolePolicy
790 | #aws-permission @cft iam:PutRolePolicy
791 | - PolicyName: !Sub ${AWS::StackName}-graceful-shutdown-iam-role
792 | PolicyDocument:
793 | Version: "2012-10-17"
794 | Statement:
795 | - Effect: Allow
796 | Action:
797 | - "ec2:DescribeInstances"
798 | - "sqs:ReceiveMessage"
799 | - "sqs:SendMessage"
800 | - "sqs:DeleteMessage"
801 | - "sqs:GetQueueAttributes"
802 | - "sqs:GetQueueUrl"
803 | - "logs:PutLogEvents"
804 | - "logs:CreateLogStream"
805 | - "logs:CreateLogGroup"
806 | - "ec2:CreateNetworkInterface"
807 | - "ec2:DescribeNetworkInterfaces"
808 | - "ec2:DeleteNetworkInterface"
809 | - "ec2:AttachNetworkInterfaces"
810 | Resource:
811 | - "*"
812 |
813 | AutoScalingTerminationWaitLambdaIAMRole:
814 | Type: AWS::IAM::Role
815 | #aws-permission @cft iam:CreateRole
816 | #aws-permission @cft iam:DeleteRole
817 | #Condition: CreateIamInstanceProfile
818 | Properties:
819 | RoleName: !Sub ${AWS::StackName}-asg-terminate-lambda-iam-role
820 | AssumeRolePolicyDocument:
821 | Statement:
822 | - Effect: Allow
823 | Principal:
824 | Service:
825 | - ec2.amazonaws.com
826 | - lambda.amazonaws.com
827 | Action: ['sts:AssumeRole']
828 | Policies:
829 | #aws-permission @cft iam:AttachRolePolicy
830 | #aws-permission @cft iam:DeleteRolePolicy
831 | #aws-permission @cft iam:DetachRolePolicy
832 | #aws-permission @cft iam:PutRolePolicy
833 | - PolicyName: !Sub ${AWS::StackName}-asg-terminate-lambda-iam-role
834 | PolicyDocument:
835 | Version: "2012-10-17"
836 | Statement:
837 | - Effect: Allow
838 | Action:
839 | - "autoscaling:CompleteLifecycleAction"
840 | - "sqs:ReceiveMessage"
841 | - "sqs:SendMessage"
842 | - "sqs:GetQueueUrl"
843 | - "sqs:GetQueueAttributes"
844 | - "sqs:DeleteMessage"
845 | - "logs:PutLogEvents"
846 | - "logs:CreateLogStream"
847 | - "logs:CreateLogGroup"
848 | - "ec2:CreateNetworkInterface"
849 | - "ec2:DescribeNetworkInterfaces"
850 | - "ec2:DeleteNetworkInterface"
851 | - "ec2:AttachNetworkInterfaces"
852 | - "ec2:Describe*"
853 | Resource:
854 | - "*"
855 |
856 | GracefulNodeShutdownQueue:
857 | Type: 'AWS::SQS::Queue'
858 | #aws-permission @cft sqs:CreateQueue
859 | #aws-permission @cft sqs:DeleteQueue
860 | #aws-permission @cft sqs:GetQueueAttributes
861 | #aws-permission @cft sqs:TagQueue
862 | Properties:
863 | # This is required so that threads that handle those messages will not try to process the same message over and over again
864 | # The value is higher than the usual graceful shutdown, so that in most cases there will not be a need to request
865 | # more time. Otherwise if the handling thread is reaching 250s it will tell SQS to keep his message private for longer.
866 | # See AWS docs on VisibilityTimeout for more details.
867 | VisibilityTimeout: 250
868 | AutoScalingTerminationWaitQueue:
869 | Type: 'AWS::SQS::Queue'
870 | Properties:
871 | VisibilityTimeout: 250
872 |
873 | AutoScalingNotificationIAMRole:
874 | Type: 'AWS::IAM::Role'
875 | Properties:
876 | AssumeRolePolicyDocument:
877 | Version: 2012-10-17
878 | Statement:
879 | - Effect: Allow
880 | Principal:
881 | Service:
882 | - autoscaling.amazonaws.com
883 | - ec2.amazonaws.com
884 | Action:
885 | - 'sts:AssumeRole'
886 | Path: /
887 | ManagedPolicyArns:
888 | - arn:aws:iam::aws:policy/service-role/AutoScalingNotificationAccessRole
889 | - arn:aws:iam::aws:policy/AmazonEC2ReadOnlyAccess
890 |
891 | GracefulPrestoNodeShutdownHook:
892 | Type: "AWS::AutoScaling::LifecycleHook"
893 | #aws-permission @cft autoscaling:PutLifecycleHook
894 | #aws-permission @cft autoscaling:DeleteLifecycleHook
895 | Properties:
896 | AutoScalingGroupName: !Ref Workers
897 | LifecycleTransition: 'autoscaling:EC2_INSTANCE_TERMINATING'
898 | NotificationTargetARN: !GetAtt GracefulNodeShutdownQueue.Arn
899 | RoleARN: !GetAtt AutoScalingNotificationIAMRole.Arn
900 | HeartbeatTimeout: 3600
901 | DefaultResult: CONTINUE
902 | GracefulNodeShutdownLambda:
903 | Type: "AWS::Lambda::Function"
904 | Properties:
905 | Code:
906 | ZipFile: |
907 | import os
908 | import json
909 | import boto3
910 | from botocore.vendored import requests
911 |
912 | def lambda_handler(event, context):
913 | # TODO implement
914 | print(event)
915 | event_body = json.loads(event['Records'][0]["body"])
916 | if event_body["LifecycleTransition"] != "autoscaling:EC2_INSTANCE_TERMINATING":
917 | print("Not a terminating condition return")
918 | return
919 | ec2_instance_id = event_body["EC2InstanceId"]
920 | instance_id = ec2_instance_id
921 | ec2 = boto3.resource('ec2')
922 | ec2_instance = ec2.Instance(instance_id)
923 | ip = ec2_instance.private_ip_address
924 | for i in range(3):
925 | try:
926 | url = 'http://{}:8080/v1/info/state'.format(ip)
927 | payload = "\"SHUTTING_DOWN\""
928 | headers = {
929 | 'Content-Type': "application/json",
930 | 'cache-control': "no-cache"
931 | }
932 |
933 | response = requests.request("PUT", url, data=payload, headers=headers)
934 | print(response.text)
935 | except Exception as e:
936 | pass
937 | print(ip)
938 | queue_url = os.getenv('QUEUE_URL')
939 | print(queue_url)
940 | sqs = boto3.client('sqs')
941 | response = sqs.send_message(
942 | QueueUrl=queue_url,
943 | MessageBody=json.dumps(event_body)
944 | )
945 | print(response)
946 |
947 | return {
948 | 'statusCode': 200,
949 | 'body': json.dumps('Hello from Lambda!')
950 | }
951 |
952 |
953 | Environment:
954 | Variables:
955 | QUEUE_URL: !Ref AutoScalingTerminationWaitQueue
956 | COORDINATOR_IP: !GetAtt CoordinatorENI.PrimaryPrivateIpAddress
957 | Role: !GetAtt GracefulNodeShutdownQueueIAMRole.Arn
958 | Timeout: 10
959 | Handler: index.lambda_handler
960 | Runtime: python3.6
961 | VpcConfig:
962 | SubnetIds:
963 | - !Ref Subnet
964 | SecurityGroupIds: !Split
965 | - ','
966 | - !Join
967 | - ','
968 | - - !GetAtt PrestoSecurityGroup.GroupId
969 | - !Join
970 | - ','
971 | - !Ref SecurityGroups
972 |
973 | GracefulNodeShutdownLambdaEventSourceMapping:
974 | Type: AWS::Lambda::EventSourceMapping
975 | Properties:
976 | BatchSize: 1
977 | EventSourceArn: !GetAtt GracefulNodeShutdownQueue.Arn
978 | FunctionName: !GetAtt GracefulNodeShutdownLambda.Arn
979 | DependsOn:
980 | - GracefulNodeShutdownLambda
981 | - GracefulNodeShutdownQueue
982 |
983 | AutoScalingTerminationWaitLambda:
984 | Type: "AWS::Lambda::Function"
985 | Properties:
986 | Code:
987 | ZipFile: |
988 | import json
989 | import boto3
990 | import os
991 | from botocore.vendored import requests
992 |
993 | def lambda_handler(event, context):
994 | # TODO implement
995 |
996 | def enqueue_message(event_body):
997 | queue_url = os.getenv('QUEUE_URL')
998 | print(queue_url)
999 | sqs = boto3.client('sqs')
1000 | response = sqs.send_message(
1001 | QueueUrl=queue_url,
1002 | MessageBody=json.dumps(event_body),
1003 | DelaySeconds=60
1004 | )
1005 | print(response)
1006 |
1007 | def complete_lifecycle(event_body):
1008 | res = autoscaling.complete_lifecycle_action(
1009 | LifecycleHookName=event_body["LifecycleHookName"],
1010 | AutoScalingGroupName=event_body["AutoScalingGroupName"],
1011 | LifecycleActionToken=event_body["LifecycleActionToken"],
1012 | LifecycleActionResult='CONTINUE'
1013 | )
1014 | print(res)
1015 |
1016 | event_body = json.loads(event['Records'][0]["body"])
1017 | if event_body["LifecycleTransition"] != "autoscaling:EC2_INSTANCE_TERMINATING":
1018 | print("Not a terminating condition return")
1019 | return
1020 | ec2_instance_id = event_body["EC2InstanceId"]
1021 | ec2 = boto3.resource("ec2")
1022 | autoscaling = boto3.client('autoscaling')
1023 | ec2_instance = ec2.Instance(ec2_instance_id)
1024 | ip = ec2_instance.private_ip_address
1025 | print(ec2_instance_id)
1026 | request_url = "http://{ip}:8080/v1/task".format(ip=ip, node_id=ec2_instance_id)
1027 | try:
1028 | print(request_url)
1029 | worker_tasks = requests.get(request_url)
1030 | worker_tasks = worker_tasks.json()
1031 | print(len(worker_tasks))
1032 | for task in worker_tasks:
1033 | if task['taskStatus']['state'] == 'RUNNING':
1034 | print('RUNNING QUEURIES FOUND')
1035 | enqueue_message(event_body)
1036 | return
1037 |
1038 | print('NO_QUERIES')
1039 | complete_lifecycle(event_body)
1040 | return
1041 | except Exception as e:
1042 | print(str(e))
1043 | print("Terminating instance because worker not responding")
1044 | complete_lifecycle(event_body)
1045 | return
1046 |
1047 |
1048 | Environment:
1049 | Variables:
1050 | QUEUE_URL: !Ref AutoScalingTerminationWaitQueue
1051 | COORDINATOR_IP: !GetAtt CoordinatorENI.PrimaryPrivateIpAddress
1052 | Role: !GetAtt AutoScalingTerminationWaitLambdaIAMRole.Arn
1053 | Timeout: 10
1054 | Handler: index.lambda_handler
1055 | Runtime: python3.6
1056 | VpcConfig:
1057 | SubnetIds:
1058 | - !Ref Subnet
1059 | SecurityGroupIds: !Split
1060 | - ','
1061 | - !Join
1062 | - ','
1063 | - - !GetAtt PrestoSecurityGroup.GroupId
1064 | - !Join
1065 | - ','
1066 | - !Ref SecurityGroups
1067 | AutoScalingTerminationWaitLambdaEventSourceMapping:
1068 | Type: AWS::Lambda::EventSourceMapping
1069 | Properties:
1070 | BatchSize: 1
1071 | EventSourceArn: !GetAtt AutoScalingTerminationWaitQueue.Arn
1072 | FunctionName: !GetAtt AutoScalingTerminationWaitLambda.Arn
1073 | DependsOn:
1074 | - AutoScalingTerminationWaitLambda
1075 | - AutoScalingTerminationWaitQueue
1076 |
1077 | AutoScalingNotificationRole:
1078 | Type: 'AWS::IAM::Role'
1079 | Properties:
1080 | AssumeRolePolicyDocument:
1081 | Version: 2012-10-17
1082 | Statement:
1083 | - Effect: Allow
1084 | Principal:
1085 | Service:
1086 | - autoscaling.amazonaws.com
1087 | Action:
1088 | - 'sts:AssumeRole'
1089 | Path: /
1090 | ManagedPolicyArns:
1091 | - arn:aws:iam::aws:policy/service-role/AutoScalingNotificationAccessRole
1092 |
1093 | PrestoWorkersELB:
1094 | Type: AWS::ElasticLoadBalancing::LoadBalancer
1095 | Properties:
1096 | HealthCheck:
1097 | HealthyThreshold: 2
1098 | Interval: 15
1099 | Target: HTTP:8080/v1/status
1100 | Timeout: 10
1101 | UnhealthyThreshold: 2
1102 | Scheme: internal
1103 | Subnets: !Split
1104 | - ','
1105 | - !Join
1106 | - ','
1107 | - - !Ref Subnet
1108 | SecurityGroups: !Split
1109 | - ','
1110 | - !Join
1111 | - ','
1112 | - - !GetAtt PrestoSecurityGroup.GroupId
1113 | - !Join
1114 | - ','
1115 | - !Ref SecurityGroups
1116 | Listeners:
1117 | - InstancePort: 8080
1118 | InstanceProtocol: HTTP
1119 | LoadBalancerPort: 8080
1120 | Protocol: HTTP
1121 |
1122 | PrestoCoordinatorsELB:
1123 | Type: AWS::ElasticLoadBalancing::LoadBalancer
1124 | Properties:
1125 | HealthCheck:
1126 | HealthyThreshold: 2
1127 | Interval: 15
1128 | Target: HTTP:8080/v1/status
1129 | Timeout: 10
1130 | UnhealthyThreshold: 2
1131 | Scheme: internal
1132 | Subnets: !Split
1133 | - ','
1134 | - !Join
1135 | - ','
1136 | - - !Ref Subnet
1137 | SecurityGroups: !Split
1138 | - ','
1139 | - !Join
1140 | - ','
1141 | - - !GetAtt PrestoSecurityGroup.GroupId
1142 | - !Join
1143 | - ','
1144 | - !Ref SecurityGroups
1145 | Listeners:
1146 | - InstancePort: 8080
1147 | InstanceProtocol: HTTP
1148 | LoadBalancerPort: 8080
1149 | Protocol: HTTP
1150 |
1151 | HALambdaIamRole:
1152 | Type: AWS::IAM::Role
1153 | #aws-permission @cft iam:CreateRole
1154 | #aws-permission @cft iam:DeleteRole
1155 | Properties:
1156 | RoleName: !Sub ${AWS::StackName}-HA-lambda-role
1157 | AssumeRolePolicyDocument:
1158 | Statement:
1159 | - Effect: Allow
1160 | Principal:
1161 | Service: [lambda.amazonaws.com]
1162 | Action: ['sts:AssumeRole']
1163 | Policies:
1164 | #aws-permission @cft iam:AttachRolePolicy
1165 | #aws-permission @cft iam:DeleteRolePolicy
1166 | #aws-permission @cft iam:DetachRolePolicy
1167 | #aws-permission @cft iam:PutRolePolicy
1168 | - PolicyName: !Sub ${AWS::StackName}-HA-lambda-policy
1169 | PolicyDocument:
1170 | Version: "2012-10-17"
1171 | Statement:
1172 | - Effect: Allow
1173 | Action:
1174 | - "ec2:DescribeInstances"
1175 | - "ec2:DescribeNetworkInterfaces"
1176 | - "ec2:AttachNetworkInterface"
1177 | - "ec2:DetachNetworkInterface"
1178 | - "ec2:CreateNetworkInterface"
1179 | - "ec2:DeleteNetworkInterface"
1180 | - "logs:CreateLogGroup"
1181 | - "logs:CreateLogStream"
1182 | - "logs:PutLogEvents"
1183 | Resource:
1184 | - "*"
1185 | HALambda:
1186 | Type: AWS::Lambda::Function
1187 | DependsOn: Coordinators
1188 | Properties:
1189 | Role: !GetAtt HALambdaIamRole.Arn
1190 | Handler: index.lambda_handler
1191 | MemorySize: 128
1192 | Runtime: python3.7
1193 | Code:
1194 | ZipFile: |
1195 | import json
1196 | import boto3
1197 | import http.client
1198 | import time
1199 | import os
1200 |
1201 | def detach_eni_instance(eniAttachmentId):
1202 | client = boto3.client('ec2')
1203 | print("detaching eni {}".format(eniAttachmentId))
1204 | response = client.detach_network_interface(
1205 | AttachmentId = eniAttachmentId,
1206 | Force = True
1207 | )
1208 | time.sleep(5)
1209 | print(response)
1210 | print("ENI detached")
1211 |
1212 | def attach_eni_instance(instanceId, eni_id):
1213 | client = boto3.client('ec2')
1214 | print("attaching eni {} to instance {}".format(eni_id, instanceId))
1215 | response = client.attach_network_interface(
1216 | DeviceIndex=1,
1217 | InstanceId = instanceId,
1218 | NetworkInterfaceId = eni_id,
1219 | )
1220 | return response
1221 |
1222 | def instance_health(instanceId):
1223 | client = boto3.client('ec2')
1224 | response = client.describe_instances(
1225 | InstanceIds=[
1226 | instanceId
1227 | ]
1228 | )
1229 | print("checking health for instance {}".format(instanceId))
1230 | try:
1231 | conn = http.client.HTTPConnection(response['Reservations'][0]['Instances'][0]['PrivateIpAddress'], 8080)
1232 | conn.request("GET", "/v1/info")
1233 | r1 = conn.getresponse()
1234 | print(r1.status, r1.reason)
1235 | data = json.loads(r1.read().decode('utf-8').replace("'", '"'))
1236 | except Exception as e:
1237 | print("AN EXCEPTION OCCURED", str(e))
1238 | data = {
1239 | "starting": True
1240 | }
1241 | return data
1242 | # return response
1243 |
1244 | def attach_eni(eni_id):
1245 | client = boto3.client('ec2')
1246 | response = client.describe_instances(
1247 | Filters=[
1248 | {
1249 | 'Name': 'tag:presto:opensource:identification:role',
1250 | 'Values': [
1251 | 'presto:coordinator'
1252 | ]
1253 | },
1254 | {
1255 | 'Name': 'tag:aws:cloudformation:stack-name',
1256 | 'Values': [
1257 | os.environ['STACK_NAME']
1258 | ]
1259 | }
1260 | ]
1261 | )
1262 | for j in range(len(response['Reservations'])):
1263 | for i in range(len(response['Reservations'][j]['Instances'])):
1264 | if response['Reservations'][j]['Instances'][i]['State']['Name'] != 'running':
1265 | continue
1266 | privateIpAddress = response['Reservations'][j]['Instances'][i]['PrivateIpAddress']
1267 | instanceId = response['Reservations'][j]['Instances'][i]['InstanceId']
1268 | print("Found instance to attach {}, {}".format(privateIpAddress, instanceId))
1269 | try:
1270 | conn = http.client.HTTPConnection(privateIpAddress, 8080)
1271 | conn.request("GET", "/v1/info")
1272 | r1 = conn.getresponse()
1273 | print(privateIpAddress, r1.status, r1.reason)
1274 | data = json.loads(r1.read().decode('utf-8').replace("'", '"'))
1275 | except Exception as e:
1276 | print("AN EXCEPTION OCCURED", str(e))
1277 | data = {
1278 | "starting": True
1279 | }
1280 | if not data['starting']:
1281 | print("Instance {} is healthy | Attaching ENI to Instance".format(instanceId))
1282 | print(attach_eni_instance(instanceId, eni_id))
1283 | break
1284 | else:
1285 | print(instanceId + "Instance is unhealthy ...")
1286 |
1287 | def lambda_handler(event, context):
1288 | client = boto3.resource('ec2')
1289 | network_interface = client.NetworkInterface(os.environ['ENI_ID'])
1290 | print("Network ENI status: ", network_interface.status)
1291 | if network_interface.status == "available":
1292 | print("ENI not attached to any coordinator | Looking for suitable coordinator")
1293 | attach_eni(os.environ['ENI_ID'])
1294 | else:
1295 | print("ENI is attached | Checking health of the coordinator")
1296 | data = instance_health(network_interface.attachment['InstanceId'])
1297 | # data = instance_health("i-0b9a126690a1fe099")
1298 | if not data['starting']:
1299 | print("Coordinator is healthy | EXITING")
1300 | else:
1301 | print("Coordinator is unhealthy | REPLACING")
1302 | detach_eni_instance(network_interface.attachment['AttachmentId'])
1303 | attach_eni(os.environ['ENI_ID'])
1304 |
1305 |
1306 | VpcConfig:
1307 | SubnetIds:
1308 | - !Ref Subnet
1309 | SecurityGroupIds: !Split
1310 | - ','
1311 | - !Join
1312 | - ','
1313 | - - !GetAtt PrestoSecurityGroup.GroupId
1314 | - !Join
1315 | - ','
1316 | - !Ref SecurityGroups
1317 | Timeout: 60
1318 | Environment:
1319 | Variables:
1320 | ENI_ID: !Ref CoordinatorENI
1321 | STACK_NAME: !Sub ${AWS::StackName}
1322 | HALambdaTriggerRule:
1323 | Type: AWS::Events::Rule
1324 | #aws-permission @cft events:PutRule
1325 | #aws-permission @cft events:DeleteRule
1326 | #aws-permission @cft events:DescribeRule
1327 | #aws-permission @cft events:PutTargets
1328 | #aws-permission @cft events:RemoveTargets
1329 | DependsOn: Coordinators
1330 | Properties:
1331 | ScheduleExpression: rate(1 minute)
1332 | State: ENABLED
1333 | Targets:
1334 | -
1335 | Arn: !GetAtt HALambda.Arn
1336 | Id: 'HALambda'
1337 | PermissionForEventsToInvokeHALambda:
1338 | Type: AWS::Lambda::Permission
1339 | #aws-permission @cft lambda:AddPermission
1340 | #aws-permission @cft lambda:RemovePermission
1341 | Properties:
1342 | FunctionName:
1343 | Ref: HALambda
1344 | Action: 'lambda:InvokeFunction'
1345 | Principal: 'events.amazonaws.com'
1346 | SourceArn: !GetAtt HALambdaTriggerRule.Arn
1347 |
1348 | Outputs:
1349 | PrestoCoordinatorIp:
1350 | Description: Coordinator Instance Ip
1351 | Value: !GetAtt CoordinatorENI.PrimaryPrivateIpAddress
1352 | CoordinatorDashboard:
1353 | Description: Coordinator Dashboard URL
1354 | Value: !Sub "http://${CoordinatorENI.PrimaryPrivateIpAddress}:8080/ui"
--------------------------------------------------------------------------------
/sample_presto_config.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atlanhq/presto-on-aws/a0f31d44b8d9729c9fdbccdb516969c735b148f3/sample_presto_config.zip
--------------------------------------------------------------------------------
/scripts/autoscaling_termination_wait/lambda_function.py:
--------------------------------------------------------------------------------
1 | import json
2 | import boto3
3 | import os
4 | from botocore.vendored import requests
5 |
6 | def lambda_handler(event, context):
7 | # TODO implement
8 |
9 | def enqueue_message(event_body):
10 | queue_url = os.getenv('QUEUE_URL')
11 | print(queue_url)
12 | sqs = boto3.client('sqs')
13 | response = sqs.send_message(
14 | QueueUrl=queue_url,
15 | MessageBody=json.dumps(event_body),
16 | DelaySeconds=60
17 | )
18 | print(response)
19 |
20 | def complete_lifecycle(event_body):
21 | res = autoscaling.complete_lifecycle_action(
22 | LifecycleHookName=event_body["LifecycleHookName"],
23 | AutoScalingGroupName=event_body["AutoScalingGroupName"],
24 | LifecycleActionToken=event_body["LifecycleActionToken"],
25 | LifecycleActionResult='CONTINUE'
26 | )
27 | print(res)
28 |
29 | event_body = json.loads(event['Records'][0]["body"])
30 | if event_body["LifecycleTransition"] != "autoscaling:EC2_INSTANCE_TERMINATING":
31 | print("Not a terminating condition return")
32 | return
33 | ec2_instance_id = event_body["EC2InstanceId"]
34 | ec2 = boto3.resource("ec2")
35 | autoscaling = boto3.client('autoscaling')
36 | ec2_instance = ec2.Instance(ec2_instance_id)
37 | ip = ec2_instance.private_ip_address
38 | print(ec2_instance_id)
39 | request_url = "http://{ip}:8080/v1/task".format(ip=ip, node_id=ec2_instance_id)
40 | try:
41 | print(request_url)
42 | worker_tasks = requests.get(request_url)
43 | worker_tasks = worker_tasks.json()
44 | print(len(worker_tasks))
45 | for task in worker_tasks:
46 | if task['taskStatus']['state'] == 'RUNNING':
47 | print('RUNNING QUEURIES FOUND')
48 | enqueue_message(event_body)
49 | return
50 |
51 | print('NO_QUERIES')
52 | complete_lifecycle(event_body)
53 | return
54 | except Exception as e:
55 | print(str(e))
56 | print("Terminating instance because worker not responding")
57 | complete_lifecycle(event_body)
58 | return
59 |
--------------------------------------------------------------------------------
/scripts/graceful_shutdown_handler/lambda_function.py:
--------------------------------------------------------------------------------
1 | import os
2 | import json
3 | import boto3
4 | from botocore.vendored import requests
5 |
6 | def lambda_handler(event, context):
7 | # TODO implement
8 | print(event)
9 | event_body = json.loads(event['Records'][0]["body"])
10 | if event_body["LifecycleTransition"] != "autoscaling:EC2_INSTANCE_TERMINATING":
11 | print("Not a terminating condition return")
12 | return
13 | ec2_instance_id = event_body["EC2InstanceId"]
14 | instance_id = ec2_instance_id
15 | ec2 = boto3.resource('ec2')
16 | ec2_instance = ec2.Instance(instance_id)
17 | ip = ec2_instance.private_ip_address
18 | for i in range(3):
19 | try:
20 | url = 'http://{}:8080/v1/info/state'.format(ip)
21 | payload = "\"SHUTTING_DOWN\""
22 | headers = {
23 | 'Content-Type': "application/json",
24 | 'cache-control': "no-cache"
25 | }
26 |
27 | response = requests.request("PUT", url, data=payload, headers=headers)
28 | print(response.text)
29 | except Exception as e:
30 | pass
31 | print(ip)
32 | queue_url = os.getenv('QUEUE_URL')
33 | print(queue_url)
34 | sqs = boto3.client('sqs')
35 | response = sqs.send_message(
36 | QueueUrl=queue_url,
37 | MessageBody=json.dumps(event_body)
38 | )
39 | print(response)
40 |
41 | return {
42 | 'statusCode': 200,
43 | 'body': json.dumps('Hello from Lambda!')
44 | }
45 |
--------------------------------------------------------------------------------
/scripts/ha_lambda/lambda_function.py:
--------------------------------------------------------------------------------
1 | import json
2 | import boto3
3 | import http.client
4 | import time
5 | import os
6 |
7 | def detach_eni_instance(eniAttachmentId):
8 | client = boto3.client('ec2')
9 | print("detaching eni {}".format(eniAttachmentId))
10 | response = client.detach_network_interface(
11 | AttachmentId = eniAttachmentId,
12 | Force = True
13 | )
14 | time.sleep(5)
15 | print(response)
16 | print("ENI detached")
17 |
18 | def attach_eni_instance(instanceId, eni_id):
19 | client = boto3.client('ec2')
20 | print("attaching eni {} to instance {}".format(eni_id, instanceId))
21 | response = client.attach_network_interface(
22 | DeviceIndex=1,
23 | InstanceId = instanceId,
24 | NetworkInterfaceId = eni_id,
25 | )
26 | return response
27 |
28 | def instance_health(instanceId):
29 | client = boto3.client('ec2')
30 | response = client.describe_instances(
31 | InstanceIds=[
32 | instanceId
33 | ]
34 | )
35 | print("checking health for instance {}".format(instanceId))
36 | try:
37 | conn = http.client.HTTPConnection(response['Reservations'][0]['Instances'][0]['PrivateIpAddress'], 8080)
38 | conn.request("GET", "/v1/info")
39 | r1 = conn.getresponse()
40 | print(r1.status, r1.reason)
41 | data = json.loads(r1.read().decode('utf-8').replace("'", '"'))
42 | except Exception as e:
43 | print("AN EXCEPTION OCCURED", str(e))
44 | data = {
45 | "starting": True
46 | }
47 | return data
48 | # return response
49 |
50 | def attach_eni(eni_id):
51 | client = boto3.client('ec2')
52 | response = client.describe_instances(
53 | Filters=[
54 | {
55 | 'Name': 'tag:presto:opensource:identification:role',
56 | 'Values': [
57 | 'presto:coordinator'
58 | ]
59 | },
60 | {
61 | 'Name': 'tag:aws:cloudformation:stack-name',
62 | 'Values': [
63 | os.environ['STACK_NAME']
64 | ]
65 | }
66 | ]
67 | )
68 | for j in range(len(response['Reservations'])):
69 | for i in range(len(response['Reservations'][j]['Instances'])):
70 | if response['Reservations'][j]['Instances'][i]['State']['Name'] != 'running':
71 | continue
72 | privateIpAddress = response['Reservations'][j]['Instances'][i]['PrivateIpAddress']
73 | instanceId = response['Reservations'][j]['Instances'][i]['InstanceId']
74 | print("Found instance to attach {}, {}".format(privateIpAddress, instanceId))
75 | try:
76 | conn = http.client.HTTPConnection(privateIpAddress, 8080)
77 | conn.request("GET", "/v1/info")
78 | r1 = conn.getresponse()
79 | print(privateIpAddress, r1.status, r1.reason)
80 | data = json.loads(r1.read().decode('utf-8').replace("'", '"'))
81 | except Exception as e:
82 | print("AN EXCEPTION OCCURED", str(e))
83 | data = {
84 | "starting": True
85 | }
86 | if not data['starting']:
87 | print("Instance {} is healthy | Attaching ENI to Instance".format(instanceId))
88 | print(attach_eni_instance(instanceId, eni_id))
89 | break
90 | else:
91 | print(instanceId + "Instance is unhealthy ...")
92 |
93 | def lambda_handler(event, context):
94 | client = boto3.resource('ec2')
95 | network_interface = client.NetworkInterface(os.environ['ENI_ID'])
96 | print("Network ENI status: ", network_interface.status)
97 | if network_interface.status == "available":
98 | print("ENI not attached to any coordinator | Looking for suitable coordinator")
99 | attach_eni(os.environ['ENI_ID'])
100 | else:
101 | print("ENI is attached | Checking health of the coordinator")
102 | data = instance_health(network_interface.attachment['InstanceId'])
103 | # data = instance_health("i-0b9a126690a1fe099")
104 | if not data['starting']:
105 | print("Coordinator is healthy | EXITING")
106 | else:
107 | print("Coordinator is unhealthy | REPLACING")
108 | detach_eni_instance(network_interface.attachment['AttachmentId'])
109 | attach_eni(os.environ['ENI_ID'])
110 |
111 |
--------------------------------------------------------------------------------