├── LICENSE
├── README.md
├── config
    ├── catalog
    │   ├── hive.properties
    │   ├── jmx.properties
    │   ├── tpcds.properties
    │   └── tpch.properties
    └── config.properties
├── packer
    ├── .atlanrc
    ├── Makefile
    ├── base_configs
    │   ├── config.properties
    │   ├── env.sh
    │   ├── jvm.config
    │   ├── log.properties
    │   └── node.properties
    ├── presto.json
    ├── presto.sh
    ├── presto_metrics_cloudwatch.service
    ├── presto_metrics_prometheus.service
    └── presto_scaling_service.service
├── presto.yaml
├── sample_presto_config.zip
└── scripts
    ├── autoscaling_termination_wait
        └── lambda_function.py
    ├── graceful_shutdown_handler
        └── lambda_function.py
    └── ha_lambda
        └── lambda_function.py


/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <h1 align='center'>Presto on AWS</h1>
 2 | 
 3 | 
 4 | This is a cloudformation template for deploying [Presto](https://prestosql.io) on AWS. It deploys coordinators and workers in an autoscaling group.
 5 | 
 6 | ## Features
 7 | 
 8 | - Graceful shutdown of workers using Autoscaling lifecycle management. Presto worker will not shutdown until all the queries finish on that worker.
 9 | - Highly available coordinator nodes. 
10 | - Autoscaling of presto workers based on presto's memory and CPU usage. 
11 | - A cloudwatch and prometheus agent which runs inside presto coordinator to push presto's metrics such as input data, CPU usage, running/blocked/failed queries. 
12 | - A query logger which pushes completed queries and its stats to ElasticSearch. 
13 | - A presto AMI creation packer script to easily update presto version. 
14 | - Logs of presto coordinator and workers available in Cloudwatch.
15 | - Health check in Presto workers to remove unhealthy workers
16 | 
17 | ## Architecture
18 | 
19 | ![Screen Shot 2020-06-04 at 8.52.37 PM](https://user-images.githubusercontent.com/10682054/83781909-c81ab280-a6ac-11ea-8e48-b36ec631f5ac.png)
20 | 
21 | 
22 | 
23 | ## Pre-requisites
24 | 
25 | - A VPC and subnet
26 | - A user with following permissions. // TODO: Add permissions
27 | 
28 | ### Modiying the presto configuration
29 | 
30 | - New connectors: To add presto connectors (like hive connector, postgres connector etc) configuration to the deployment create a directory with following structure. Add properties file for each connector, zip the directory. Add the connector file copying command into the boostrap script in CFT. 
31 |   ```
32 |     ├── catalog 
33 |     │   ├── hive.properties
34 |     │   ├── jmx.properties
35 |     │   ├── tpcds.properties
36 |     │   └── tpch.properties
37 |     └── config.properties
38 |   ```
39 | 
40 | - To modify the core configuration such as enabling spill or reserved pool disabling/enabling modify the config.properties file mentioned above. Memory based configurations like JVM memory, max memory per node is automatically handled based on selected instances. 
41 | 
42 | Add the URL of above directory as zip file in `AdditionalConfigsUri` parameter in CFT. 
43 | 
44 | ### Creating Presto AMI using Packer
45 | 
46 |  - Go inside `packer` directory and change the parameters of `.atlanrc` file. The presto version is 330 by default. Source AMI is Amazon Linux 2 in the region you want to create the AMI in. 
47 |  - Run the following command
48 |   ```bash
49 |     make build_presto_image
50 |   ```
51 |  - Change the `presto.json` to modify the AMI further. 
52 |  - To use this AMI add the AMI ID in the mapping in `presto.yaml` with AMI's region. 
53 | 
54 | ### Deployment
55 | 
56 | The CFT requires following parameters for deployment
57 |  - VPC ID: VPC to deploy Presto cluster
58 |  - Subnet ID: Subnet to deploy Presto cluster
59 |  - Security groups ID: SGs to attach to presto coordinators and workers
60 |  - Keyname: Private key to use to launch presto machines
61 |  - Coordinator Instance type: EC2 Instance type for coordinator
62 |  - Coordinator Instance Count: For HA Coordinator deployment set it to 2 else set it to 1. 
63 |  - Min workers count: Minimum numbers of EC2 machines in Presto workers ASG
64 |  - Max workers count: Maximum numbers of EC2 machines in Presto workers ASG
65 |  - Workers instance type: EC2 Instance type for workers
66 |  - Presto Version: Presto version, required for compatibility before and after version 330
67 |  - EC2 Root volume size: EBS Volume size (GB) for presto workers and coordinators. Increase the value to few hundred GBs if you have disk spill based workload. 
68 |  - Hive IP: Format `thrift://<ip>:9083`
69 |  - Elasticsearch Host: Elasticsearch host for query logger to push SQL queries into. 
70 |  - Elasticsearch Port: Elasticsearch port for query logger to push SQL queries into
71 |  - Environment: Identifier for Dev, Production presto clusters.
72 |  
73 | Create the AMI and provide the ID with region in CFT. Now deploy the CFT by following the guide from AWS.
74 | 
75 | ### Configuring autoscaling of workers
76 | 
77 | You can configure presto workers autoscaling based on metrics from presto like running queries, heap usage etc. These metrics gets pushed into Cloudwatch by presto coordinator. You can configure the Cloudwatch alarams and autoscaling based on these Cloudwatch Metrics. 
78 | 
79 | ### Limitations/Future work
80 |  - Add support for TLS in the deployment. 
81 |  - Graceful shutdown lambda only waits for 1 hour for queries to finish. Add feature to wait to terminate the worker until all the queries finish on that worker.
82 |  - High availibility feature only switches between standby and live coordinator but doesn't restart the failed coordinator. 
83 |  - No retention policy configuration for presto logs in Cloudwatch
84 | 
85 | ### Contribute
86 | 
87 | 1. Fork it
88 | 2. Create your feature branch (`git checkout -b my-new-feature`)
89 | 3. Commit your changes (`git commit -am 'Add some feature'`)
90 | 4. Push to the branch (`git push origin my-new-feature`)
91 | 5. Create new Pull Request
92 | 


--------------------------------------------------------------------------------
/config/catalog/hive.properties:
--------------------------------------------------------------------------------
1 | connector.name=hive-hadoop2
2 | hive.metastore-refresh-interval=1s
3 | hive.metastore-cache-ttl=5s
4 | hive.non-managed-table-writes-enabled = true
5 | hive.max-partitions-per-writers=1000
6 | hive.orc.use-column-names = true
7 | hive.parquet.use-column-names = true
8 | hive.metastore-timeout=5m
9 | 


--------------------------------------------------------------------------------
/config/catalog/jmx.properties:
--------------------------------------------------------------------------------
1 | connector.name=jmx


--------------------------------------------------------------------------------
/config/catalog/tpcds.properties:
--------------------------------------------------------------------------------
1 | connector.name=tpcds


--------------------------------------------------------------------------------
/config/catalog/tpch.properties:
--------------------------------------------------------------------------------
1 | connector.name=tpch


--------------------------------------------------------------------------------
/config/config.properties:
--------------------------------------------------------------------------------
 1 | experimental.spill-enabled=false
 2 | experimental.spill-order-by=true
 3 | experimental.spill-window-operator=true
 4 | experimental.spiller-spill-path=/var/lib/presto/spill/
 5 | experimental.spiller-max-used-space-threshold=0.8
 6 | experimental.max-spill-per-node=260GB
 7 | experimental.query-max-spill-per-node=150GB
 8 | experimental.reserved-pool-enabled=false
 9 | query.low-memory-killer.policy=total-reservation-on-blocked-nodes
10 | shutdown.grace-period=60.00m


--------------------------------------------------------------------------------
/packer/.atlanrc:
--------------------------------------------------------------------------------
1 | AWS_ACCESS_KEY=
2 | AWS_SECRET_KEY=
3 | VPC_ID=
4 | SUBNET_ID=
5 | PRESTO_VERSION=330
6 | AMI_NAME=
7 | SOURCE_AMI=ami-0323c3dd2da7fb37d
8 | REGION=us-east-1
9 | 


--------------------------------------------------------------------------------
/packer/Makefile:
--------------------------------------------------------------------------------
 1 | FILE := .atlanrc
 2 | -include ./$(FILE)
 3 | 
 4 | build_presto_image:
 5 | 	@packer build \
 6 | 		-var 'vpc_id=$(VPC_ID)' \
 7 | 		-var 'subnet_id=$(SUBNET_ID)' \
 8 | 		-var 'aws_access_key=$(AWS_ACCESS_KEY)' \
 9 | 		-var 'aws_secret_key=$(AWS_SECRET_KEY)' \
10 | 		-var 'presto_version=$(PRESTO_VERSION)' \
11 | 		-var 'ami_name=$(AMI_NAME)' \
12 | 		-var 'source_ami=$(SOURCE_AMI)' \
13 | 		-var 'region=$(REGION)' \
14 | 		presto.json
15 | 
16 | validate_config:
17 | 	@packer validate presto.json
18 | 


--------------------------------------------------------------------------------
/packer/base_configs/config.properties:
--------------------------------------------------------------------------------
1 | coordinator={{isCoordinator}}
2 | node-scheduler.include-coordinator=false
3 | http-server.http.port=8080
4 | discovery.uri={{coordinatorDiscoveryUri}}


--------------------------------------------------------------------------------
/packer/base_configs/env.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atlanhq/presto-on-aws/a0f31d44b8d9729c9fdbccdb516969c735b148f3/packer/base_configs/env.sh


--------------------------------------------------------------------------------
/packer/base_configs/jvm.config:
--------------------------------------------------------------------------------
 1 | -server
 2 | -Xmx{{jvmMemory}}G
 3 | -XX:-UseBiasedLocking
 4 | -XX:+UseG1GC
 5 | -XX:+ExplicitGCInvokesConcurrent
 6 | -XX:+HeapDumpOnOutOfMemoryError
 7 | -XX:+UseGCOverheadLimit
 8 | -XX:+ExitOnOutOfMemoryError
 9 | -XX:ReservedCodeCacheSize=512M
10 | -Djdk.attach.allowAttachSelf=true
11 | 


--------------------------------------------------------------------------------
/packer/base_configs/log.properties:
--------------------------------------------------------------------------------
1 | # Enable verbose logging from Presto
2 | #com.facebook.presto=DEBUG
3 | 


--------------------------------------------------------------------------------
/packer/base_configs/node.properties:
--------------------------------------------------------------------------------
1 | node.environment={{envName}}
2 | node.id={{instanceId}}
3 | node.data-dir=/var/lib/presto/data
4 | catalog.config-dir=/etc/presto/catalog
5 | plugin.dir=/usr/lib/presto/plugin
6 | node.server-log-file=/var/log/presto/server.log
7 | node.launcher-log-file=/var/log/presto/launcher.log
8 | 


--------------------------------------------------------------------------------
/packer/presto.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"variables": {
 3 | 		"subnet_id": "",
 4 | 		"vpc_id": "",
 5 | 		"aws_access_key": "",
 6 | 		"aws_secret_key": "",
 7 | 		"presto_version": "",
 8 | 		"ami_name": "",
 9 | 		"environment": "dev",
10 |         "source_ami": "",
11 |         "region": ""
12 | 	},
13 | 	"builders": [{
14 | 		"type": "amazon-ebs",
15 | 		"region": "{{ user `region` }}",
16 | 		"source_ami": "{{ user `source_ami` }}",
17 | 		"instance_type": "t3a.large",
18 | 		"ssh_username": "ec2-user",
19 | 		"ssh_timeout": "5m",
20 | 		"ami_name": "{{ user `ami_name`}}",
21 | 		"vpc_id": "{{ user `vpc_id` }}",
22 | 		"subnet_id": "{{ user `subnet_id` }}",
23 | 		"force_deregister": true,
24 | 		"run_tags": {
25 | 			"Name": "atlan-presto"
26 | 		},
27 | 		"tags": {
28 | 			"Name": "Atlan-Presto",
29 | 			"Environment": "{{ user `environment` }}",
30 | 			"user": "arpit",
31 | 			"presto_version": "{{ user `presto_version` }}"
32 | 		},
33 | 		"ami_block_device_mappings": [
34 | 			{
35 | 				"device_name": "/dev/xvda",
36 | 				"volume_size": 8,
37 | 				"delete_on_termination": true
38 | 			}
39 | 		]
40 | 	}],
41 | 	"provisioners": [
42 | 		{
43 | 			"type": "shell",
44 | 			"inline": [
45 | 				"sudo mkdir -p /etc/presto",
46 | 				"sudo mkdir -p /etc/presto_metrics",
47 | 				"sudo chown -R ec2-user:ec2-user /etc/presto /etc/presto_metrics"
48 | 			]
49 | 		},
50 | 		{
51 | 			"type": "file",
52 | 			"source": "./presto",
53 | 			"destination": "/tmp/presto"
54 | 		},
55 | 		{
56 | 			"type": "file",
57 | 			"source": "./presto_metrics_prometheus.service",
58 | 			"destination": "/tmp/presto_metrics_prometheus.service"
59 | 		},
60 | 		{
61 | 			"type": "file",
62 | 			"source": "./presto_metrics_cloudwatch.service",
63 | 			"destination": "/tmp/presto_metrics_cloudwatch.service"
64 | 		},
65 | 		{
66 | 			"type": "file",
67 | 			"source": "./presto_scaling_service.service",
68 | 			"destination": "/tmp/presto_scaling_service.service"
69 | 		},
70 | 		{
71 | 			"type": "file",
72 | 			"source": "./base_configs/",
73 | 			"destination": "/etc/presto"
74 | 		},
75 | 		{
76 | 		    "type": "shell",
77 | 			"environment_vars": [
78 | 				"aws_access_key={{ user `aws_access_key`}}",
79 | 				"aws_secret_key={{ user `aws_secret_key`}}",
80 | 				"presto_version={{ user `presto_version`}}"
81 |             ],
82 | 		    "scripts": [
83 | 		        "presto.sh"
84 | 		    ]
85 | 		}
86 | 	]
87 | }
88 | 


--------------------------------------------------------------------------------
/packer/presto.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | set -euxo
 3 | echo "HELLO WORLD"
 4 | 
 5 | version=$presto_version
 6 | 
 7 | echo $version
 8 | # Install Java
 9 | sudo amazon-linux-extras install java-openjdk11
10 | java -version
11 | sudo yum install -y awslogs aws-cfn-bootstrap
12 | 
13 | sudo mkdir -p /usr/lib/presto /var/log/presto /var/lib/presto/data /var/lib/presto/spill /etc/presto_metrics /etc/presto_scaling_service /var/run/presto
14 | 
15 | # Install presto
16 | wget -O /tmp/presto-server.tar.gz https://repo1.maven.org/maven2/io/prestosql/presto-server/$version/presto-server-$version.tar.gz
17 | tar -xvf /tmp/presto-server.tar.gz -C /tmp/
18 | sudo cp -r /tmp/presto-server-$version/* /usr/lib/presto/
19 | ls /usr/lib/presto
20 | 
21 | 
22 | sudo chown -R ec2-user:ec2-user /etc/presto /usr/lib/presto /var/lib/presto /var/log/presto /etc/presto_metrics /etc/presto_scaling_service /var/run/presto
23 | 
24 | # install presto cli
25 | wget -O /tmp/presto-cli https://repo1.maven.org/maven2/io/prestosql/presto-cli/$version/presto-cli-$version-executable.jar
26 | sudo mv /tmp/presto-cli /usr/local/bin/presto-cli
27 | sudo chmod +x /usr/local/bin/presto-cli
28 | 
29 | # create additional services
30 | 
31 | # presto metrics prometheus service
32 | 
33 | sudo wget -O /usr/local/bin/presto_metrics https://github.com/atlanhq/presto-metrics/releases/download/v1.0.0/presto_metrics_v1.0.0_linux_amd64
34 | sudo chmod +x /usr/local/bin/presto_metrics
35 | sudo chown -R ec2-user:ec2-user /usr/local/bin/presto_metrics
36 | 
37 | 
38 | cat <<EOF > /etc/presto_metrics/env.prometheus
39 | PRESTO_HOST=localhost
40 | PRESTO_PORT=8080
41 | SERVICE_NAME=prometheus
42 | STACK_NAME=atlan-presto-test-stack
43 | CLOUDWATCH_NAMESPACE=presto
44 | EOF
45 | 
46 | cat <<EOF > /etc/presto_metrics/env.cloudwatch
47 | PRESTO_HOST=localhost
48 | PRESTO_PORT=8080
49 | SERVICE_NAME=cloudwatch
50 | STACK_NAME=atlan-presto-test-stack
51 | CLOUDWATCH_NAMESPACE=presto
52 | EOF
53 | 
54 | sudo touch /etc/default/presto && sudo chown ec2-user:ec2-user /etc/default/presto
55 | /usr/bin/printf "PRESTO_OPTS= \
56 | --pid-file=/var/run/presto/presto.pid \
57 | --node-config=/etc/presto/node.properties \
58 | --jvm-config=/etc/presto/jvm.config \
59 | --config=/etc/presto/config.properties \
60 | --launcher-log-file=/var/log/presto/launcher.log \
61 | --server-log-file=/var/log/presto/server.log \
62 | -Dhttp-server.log.path=/var/log/presto/http-request.log \
63 | -Dcatalog.config-dir=/etc/presto/catalog
64 | [Install]
65 | WantedBy=default.target
66 | " >> /etc/default/presto
67 | 
68 | sudo touch /etc/systemd/system/presto.service && sudo chown ec2-user:ec2-user /etc/systemd/system/presto.service
69 | 
70 | /usr/bin/printf "
71 | [Unit]
72 | Description=Presto Server
73 | Documentation=https://prestosql.io/
74 | After=network-online.target
75 | [Service]
76 | User=ec2-user
77 | Restart=on-failure
78 | Type=forking
79 | PIDFile=/var/run/presto/presto.pid
80 | RuntimeDirectory=presto
81 | EnvironmentFile=/etc/default/presto
82 | ExecStart=/usr/lib/presto/bin/launcher start \$PRESTO_OPTS
83 | ExecStop=/usr/lib/presto/bin/launcher stop \$PRESTO_OPTS
84 | [Install]
85 | WantedBy=default.target
86 | " >> /etc/systemd/system/presto.service
87 | 
88 | 
89 | sudo cp /tmp/presto_metrics_prometheus.service /etc/systemd/system/presto_metrics_prometheus.service 
90 | sudo cp /tmp/presto_metrics_cloudwatch.service /etc/systemd/system/presto_metrics_cloudwatch.service 
91 | 
92 | sudo systemctl daemon-reload
93 | 


--------------------------------------------------------------------------------
/packer/presto_metrics_cloudwatch.service:
--------------------------------------------------------------------------------
 1 | [Unit]
 2 | Description=Presto Metrics Prometheus exporter
 3 | [Service]
 4 | User=ec2-user
 5 | #change this to your workspace
 6 | EnvironmentFile=/etc/presto_metrics/env.cloudwatch
 7 | WorkingDirectory=/etc/presto_metrics/
 8 | #path to executable.
 9 | ExecStart=/usr/local/bin/presto_metrics --web.service-name=${SERVICE_NAME} \
10 | 										--web.presto-host=${PRESTO_HOST} \
11 | 										--web.presto-port=${PRESTO_PORT} \
12 | 										--web.stack-name=${STACK_NAME} \
13 | 										--web.cloudwatch-namespace=${CLOUDWATCH_NAMESPACE} \
14 | 										--web.api-prefix=${API_PREFIX} \
15 | 										--web.cloudwatch-region=${CLOUDWATCH_REGION}
16 | SuccessExitStatus=143
17 | TimeoutStopSec=10
18 | Restart=on-failure
19 | RestartSec=5
20 | [Install]
21 | WantedBy=multi-user.target


--------------------------------------------------------------------------------
/packer/presto_metrics_prometheus.service:
--------------------------------------------------------------------------------
 1 | [Unit]
 2 | Description=Presto Metrics Prometheus exporter
 3 | [Service]
 4 | User=ec2-user
 5 | #change this to your workspace
 6 | EnvironmentFile=/etc/presto_metrics/env.prometheus
 7 | WorkingDirectory=/etc/presto_metrics/
 8 | #path to executable.
 9 | ExecStart=/usr/local/bin/presto_metrics --web.service-name=${SERVICE_NAME} \
10 | 										--web.presto-host=${PRESTO_HOST} \
11 | 										--web.presto-port=${PRESTO_PORT} \
12 | 										--web.stack-name=${STACK_NAME} \
13 | 										--web.cloudwatch-namespace=${CLOUDWATCH_NAMESPACE} \
14 | 										--web.api-prefix=${API_PREFIX}
15 | 
16 | SuccessExitStatus=143
17 | TimeoutStopSec=10
18 | Restart=on-failure
19 | RestartSec=5
20 | [Install]
21 | WantedBy=multi-user.target


--------------------------------------------------------------------------------
/packer/presto_scaling_service.service:
--------------------------------------------------------------------------------
 1 | [Unit]
 2 | Description=Presto Scaling Service
 3 | [Service]
 4 | User=ec2-user
 5 | #change this to your workspace
 6 | EnvironmentFile=/etc/presto_scaling_service/env
 7 | WorkingDirectory=/etc/presto_scaling_service
 8 | #path to executable.
 9 | ExecStart=/usr/local/bin/presto_scaling_service --web.presto-host=${PRESTO_HOST} \
10 | 												--web.presto-port=${PRESTO_PORT} \
11 | 												--web.workers-asg-name=${PRESTO_WORKERS_ASG_NAME} \
12 | 												--web.api-prefix=${API_PREFIX}
13 | SuccessExitStatus=143
14 | TimeoutStopSec=10
15 | Restart=on-failure
16 | RestartSec=5
17 | [Install]
18 | WantedBy=multi-user.target


--------------------------------------------------------------------------------
/presto.yaml:
--------------------------------------------------------------------------------
   1 | ---
   2 | AWSTemplateFormatVersion: '2010-09-09'
   3 | Description: 'CloudFormation Template for OpenSource Presto'
   4 | Metadata:
   5 |   AWS::CloudFormation::Interface:
   6 |     ParameterGroups:
   7 |       -
   8 |         Label:
   9 |           default: "AWS Configuration"
  10 |         Parameters:
  11 |           - VPC
  12 |           - Subnet
  13 |           - KeyName
  14 |           - SecurityGroups
  15 |       -
  16 |         Label:
  17 |           default: "Presto Configuration"
  18 |         Parameters:
  19 |           - CoordinatorInstanceType
  20 |           - WorkersInstanceType
  21 |           - WorkersCount
  22 |           - Ec2RootVolumeSize
  23 |           - MaxWorkersCount
  24 |       -
  25 |         Label:
  26 |           default: "Additional Parameters"
  27 |         Parameters:
  28 |           - Environment
  29 | 
  30 | Mappings:
  31 |   RegionMap:
  32 |     ap-south-1:
  33 |       PRESTOIMAGE: ami-0bcffb0a9872eb14c
  34 |     us-east-1:
  35 |       PRESTOIMAGE: ami-04254dc35836a5c71
  36 | 
  37 | Parameters:
  38 |   VPC:
  39 |     Type: 'AWS::EC2::VPC::Id'
  40 |     #aws-permission @cft ec2:DescribeVpcs
  41 |     Description: VPC ID
  42 |     AllowedPattern: ".+"
  43 |   Subnet:
  44 |     Type: 'AWS::EC2::Subnet::Id'
  45 |     #aws-permission @cft ec2:DescribeSubnets
  46 |     Description: Subnet to use for Presto nodes (must belong to the selected VPC)
  47 |     AllowedPattern: ".+"
  48 |   KeyName:
  49 |     Description: EC2 Key Name
  50 |     Type: AWS::EC2::KeyPair::KeyName
  51 |     #aws-permission @cft ec2:DescribeKeyPairs
  52 |     AllowedPattern: ".+"
  53 |   SecurityGroups:
  54 |     Type: 'List<AWS::EC2::SecurityGroup::Id>'
  55 |     #aws-permission @cft ec2:DescribeSecurityGroups
  56 |     Description: 'Security Groups for Presto nodes (e.g: allowing SSH access). Must select at least one.'
  57 |     AllowedPattern: ".+"
  58 |   CoordinatorInstanceType:
  59 |     Type: String
  60 |     Default: m5.large
  61 |     Description: EC2 instance type of the coordinator
  62 |   CoordinatorInstanceCount:
  63 |     Type: String
  64 |     Default: 1
  65 |     Description: Number of Coordinator instances to deploy
  66 |   WorkersInstanceType:
  67 |     Type: String
  68 |     Default: m5.large
  69 |     Description: EC2 instance type of the workers
  70 |   ElasticsearchHost:
  71 |     Type: String
  72 |     Default: dev-admin-search.atlan.com
  73 |   ElasticsearchPort:
  74 |     Type: "String"
  75 |     Default: 443
  76 |   AdditionalConfigsUri:
  77 |     Type: "String"
  78 |     Description: Additional Configuration zip file to use, provide an https s3 public url to fetch the zip file from.
  79 |   WorkersCount:
  80 |     Description: Number of dedicated Presto worker nodes (apart from coordinator) to instantiate.
  81 |     Type: Number
  82 |     Default: 1
  83 |     MinValue: 1
  84 |   MaxWorkersCount:
  85 |     Description: Number of max dedicated Presto worker nodes.
  86 |     Type: Number
  87 |     Default: 5
  88 |     MinValue: 1
  89 |   Ec2RootVolumeSize:
  90 |     Type: String
  91 |     Default: 100
  92 |     Description: EC2 root volume size
  93 |   HiveIPAddress:
  94 |     Type: String
  95 |     Default: thrift://emr.dev.services:9083
  96 |     Description: Hive IP Address
  97 |   Environment:
  98 |     Type: String
  99 |     Description: Presto Launch Environment
 100 |     AllowedValues:
 101 |       - dev
 102 |       - prod
 103 |     Default: "dev"
 104 |   PrestoVersion:
 105 |     Type: String
 106 |     Default: 330
 107 |     Description: Presto Version which is being deployed
 108 | 
 109 | Resources:
 110 |   PrestoSecurityGroup:
 111 |     Type: "AWS::EC2::SecurityGroup"
 112 |     #aws-permission @cft ec2:CreateSecurityGroup
 113 |     #aws-permission @cft ec2:DeleteSecurityGroup
 114 |     Properties:
 115 |       GroupDescription: Presto nodes Security Group
 116 |       VpcId: !Ref VPC
 117 |       Tags:
 118 |         - { Key: Name, Value: !Sub "${AWS::StackName}-presto-sg" }
 119 |         - { Key: "presto:opensource:identification:role", Value: "presto:security-group" }
 120 |   PrestoHttpsOutboundRule:
 121 |     Type: AWS::EC2::SecurityGroupEgress
 122 |     #aws-permission @cft ec2:AuthorizeSecurityGroupEgress
 123 |     #aws-permission @cft ec2:RevokeSecurityGroupEgress
 124 |     Properties:
 125 |       IpProtocol: tcp
 126 |       FromPort: '443'
 127 |       ToPort: '443'
 128 |       CidrIp: 0.0.0.0/0
 129 |       GroupId: !GetAtt PrestoSecurityGroup.GroupId
 130 |   PrestoOutboundRule:
 131 |     Type: AWS::EC2::SecurityGroupEgress
 132 |     #aws-permission @cft ec2:AuthorizeSecurityGroupEgress
 133 |     #aws-permission @cft ec2:RevokeSecurityGroupEgress
 134 |     Properties:
 135 |       IpProtocol: tcp
 136 |       FromPort: '8080'
 137 |       ToPort: '8080'
 138 |       DestinationSecurityGroupId: !GetAtt PrestoSecurityGroup.GroupId
 139 |       GroupId: !GetAtt PrestoSecurityGroup.GroupId
 140 |   PrestoInboundRule:
 141 |     Type: AWS::EC2::SecurityGroupIngress
 142 |     #aws-permission @cft ec2:AuthorizeSecurityGroupIngress
 143 |     #aws-permission @cft ec2:RevokeSecurityGroupIngress
 144 |     Properties:
 145 |       IpProtocol: tcp
 146 |       FromPort: '8080'
 147 |       ToPort: '8080'
 148 |       SourceSecurityGroupId: !GetAtt PrestoSecurityGroup.GroupId
 149 |       GroupId: !GetAtt PrestoSecurityGroup.GroupId
 150 |   PrestoClusterIAMRole:
 151 |     Type: AWS::IAM::Role
 152 |     #aws-permission @cft iam:CreateRole
 153 |     #aws-permission @cft iam:DeleteRole
 154 |     # Condition: CreateIamInstanceProfile
 155 |     Properties:
 156 |       RoleName: !Sub ${AWS::StackName}-presto-cluster-iam-role
 157 |       AssumeRolePolicyDocument:
 158 |         Statement:
 159 |           - Effect: Allow
 160 |             Principal:
 161 |               Service: [ec2.amazonaws.com, apigateway.amazonaws.com]
 162 |             Action: ['sts:AssumeRole']
 163 |       Policies:
 164 |         #aws-permission @cft iam:AttachRolePolicy
 165 |         #aws-permission @cft iam:DeleteRolePolicy
 166 |         #aws-permission @cft iam:DetachRolePolicy
 167 |         #aws-permission @cft iam:PutRolePolicy
 168 |         - PolicyName: !Sub ${AWS::StackName}-presto-cf-policy
 169 |           PolicyDocument:
 170 |             Version: "2012-10-17"
 171 |             Statement:
 172 |               - Effect: Allow
 173 |                 Action:
 174 |                   - "autoscaling:CompleteLifecycleAction"
 175 |                   - "autoscaling:RecordLifecycleActionHeartbeat"
 176 |                   - "autoscaling:DescribeAutoScalingGroups"
 177 |                   - "autoscaling:PutScalingPolicy"
 178 |                   - "autoscaling:DescribeAutoScalingInstances"
 179 |                   - "autoscaling:DescribeLaunchConfigurations"
 180 |                   - "autoscaling:DescribeScalingActivities"
 181 |                   - "autoscaling:UpdateAutoScalingGroup"
 182 |                   - "autoscaling:SetDesiredCapacity"
 183 |                   - "cloudformation:SignalResource"
 184 |                   - "ec2:DescribeInstances"
 185 |                   - "glue:BatchGetPartition"
 186 |                   - "glue:BatchCreatePartition"
 187 |                   - "glue:CreateDatabase"
 188 |                   - "glue:CreateTable"
 189 |                   - "glue:DeleteDatabase"
 190 |                   - "glue:DeletePartition"
 191 |                   - "glue:DeleteTable"
 192 |                   - "glue:GetDatabase"
 193 |                   - "glue:GetDatabases"
 194 |                   - "glue:GetPartition"
 195 |                   - "glue:GetPartitions"
 196 |                   - "glue:GetTable"
 197 |                   - "glue:GetTables"
 198 |                   - "glue:UpdateTable"
 199 |                   - "glue:UpdatePartition"
 200 |                   - "s3:GetObject"
 201 |                   - "s3:ListBucket"
 202 |                   - "s3:PutObject"
 203 |                   - "sqs:ChangeMessageVisibility"
 204 |                   - "sqs:DeleteMessage"
 205 |                   - "sqs:GetQueueUrl"
 206 |                   - "sqs:ReceiveMessage"
 207 |                   - "logs:CreateLogGroup"
 208 |                   - "logs:CreateLogStream"
 209 |                   - "logs:PutLogEvents"
 210 |                   - "logs:PutRetentionPolicy"
 211 |                   - "logs:DescribeLogGroups"
 212 |                   - "logs:DescribeLogStreams"
 213 |                   - "cloudwatch:PutMetricData"
 214 |                   - "ec2:CreateNetworkInterface"
 215 |                   - "ec2:DescribeNetworkInterfaces"
 216 |                   - "ec2:DeleteNetworkInterface"
 217 |                   - "ec2:AttachNetworkInterface"
 218 |                   - "ec2:DetachNetworkInterface"
 219 |                   - "ec2:DescribeNetworkInterfaceAttribute"
 220 |                 Resource:
 221 |                   - "*"
 222 |   PrestoClusterInstanceProfile:
 223 |     Type: AWS::IAM::InstanceProfile
 224 |     #aws-permission @cft iam:CreateInstanceProfile
 225 |     #aws-permission @cft iam:DeleteInstanceProfile
 226 |     #aws-permission @cft iam:GetRole
 227 |     #aws-permission @cft iam:AddRoleToInstanceProfile
 228 |     #aws-permission @cft iam:RemoveRoleFromInstanceProfile
 229 |     Properties:
 230 |       Roles:
 231 |         - Ref: PrestoClusterIAMRole
 232 |   CoordinatorENI:
 233 |     Type: 'AWS::EC2::NetworkInterface'
 234 |     #aws-permission @cft ec2:CreateNetworkInterface
 235 |     #aws-permission @cft ec2:DescribeNetworkInterfaces
 236 |     #aws-permission @cft ec2:ModifyNetworkInterfaceAttribute
 237 |     #aws-permission @cft ec2:DeleteNetworkInterface
 238 |     Properties:
 239 |       Description: !Sub "${AWS::StackName} coordinator ENI"
 240 |       GroupSet: !Split
 241 |         - ','
 242 |         - !Join
 243 |           - ','
 244 |           - - !GetAtt PrestoSecurityGroup.GroupId
 245 |             - !Join
 246 |               - ','
 247 |               - !Ref SecurityGroups
 248 |       SubnetId: !Ref Subnet
 249 |       Tags:
 250 |         - { Key: Name, Value: !Sub "${AWS::StackName}-coordinator-ENI" }
 251 |         - { Key: "presto:opensource:identification:role", Value: "presto:coordinator-eni" }
 252 | 
 253 |   PrestoCoordinatorServerLogGroup:
 254 |     Type: AWS::Logs::LogGroup
 255 |     Properties:
 256 |       LogGroupName: !Sub /prestosql/presto/${AWS::StackName}/coordinators/server
 257 |       RetentionInDays: 7
 258 |   PrestoCoordinatorSyslogLogGroup:
 259 |     Type: AWS::Logs::LogGroup
 260 |     Properties:
 261 |       LogGroupName: !Sub /prestosql/presto/${AWS::StackName}/coordinators/syslog
 262 |       RetentionInDays: 3
 263 |   PrestoWorkerServerLogGroup:
 264 |     Type: AWS::Logs::LogGroup
 265 |     Properties:
 266 |       LogGroupName: !Sub /prestosql/presto/${AWS::StackName}/workers/server
 267 |       RetentionInDays: 1
 268 |   PrestoWorkerSyslogLogGroup:
 269 |     Type: AWS::Logs::LogGroup
 270 |     Properties:
 271 |       LogGroupName: !Sub /prestosql/presto/${AWS::StackName}/workers/syslog
 272 |       RetentionInDays: 1
 273 |   
 274 |   Coordinator:
 275 |     Type: 'AWS::AutoScaling::LaunchConfiguration'
 276 |     #aws-permission @cft autoscaling:CreateLaunchConfiguration
 277 |     #aws-permission @cft autoscaling:DeleteLaunchConfiguration
 278 |     #aws-permission @cft autoscaling:DescribeLaunchConfigurations
 279 |     Properties:
 280 |       InstanceType: !Ref CoordinatorInstanceType
 281 |       ImageId: !FindInMap [RegionMap, !Ref "AWS::Region", PRESTOIMAGE]
 282 |       BlockDeviceMappings:
 283 |         - DeviceName: /dev/xvda
 284 |           Ebs:
 285 |             DeleteOnTermination: true
 286 |             VolumeSize: !Ref Ec2RootVolumeSize
 287 |             VolumeType: gp2
 288 |       KeyName: !Ref KeyName
 289 |       #aws-permission @cft iam:GetInstanceProfile
 290 |       IamInstanceProfile: !Ref PrestoClusterInstanceProfile
 291 |       SecurityGroups: !Split
 292 |         - ','
 293 |         - !Join
 294 |           - ','
 295 |           - - !GetAtt PrestoSecurityGroup.GroupId
 296 |             - !Join
 297 |               - ','
 298 |               - !Ref SecurityGroups
 299 |       UserData:
 300 |         Fn::Base64:
 301 |           !Sub |
 302 |             #!/bin/bash
 303 |             set -xtrace
 304 |             
 305 |             sed -i -e "s/{{isCoordinator}}/true/g" /etc/presto/config.properties
 306 |             sed -i -e "s#{{coordinatorDiscoveryUri}}#http://localhost:8080#g" /etc/presto/config.properties
 307 |             echo "" >> /etc/presto/config.properties
 308 |             echo discovery-server.enabled=true >> /etc/presto/config.properties
 309 | 
 310 |             R=$(($(grep MemTotal /proc/meminfo | awk '{print $2}')/1048576))
 311 |             X=$(($R*8/10))
 312 |             sed -i -e "s/{{jvmMemory}}/$X/g" /etc/presto/jvm.config
 313 |             sed -i -e "s/{{envName}}/${Environment}/g" /etc/presto/node.properties
 314 |             sed -i -e "s/{{instanceId}}/$(curl http://169.254.169.254/latest/meta-data/instance-id/)/g" /etc/presto/node.properties
 315 |             
 316 |             Z=$(($X*6/10))
 317 |             echo "query.max-memory-per-node="$Z"GB" >> /etc/presto/config.properties
 318 | 
 319 |             Y=$(($X*7/10))
 320 |             echo "query.max-total-memory-per-node="$Y"GB" >> /etc/presto/config.properties
 321 | 
 322 |             echo "query.max-memory=1PB" >> /etc/presto/config.properties
 323 |             echo "query.low-memory-killer.policy=total-reservation-on-blocked-nodes" >> /etc/presto/config.properties
 324 |             
 325 |             sudo tee /etc/awslogs/awslogs.conf > /dev/null <<EOT
 326 |             [general]
 327 |             state_file = /tmp/awslogs.state
 328 |             [/var/log/prsto/server.log]
 329 |             file = /var/log/presto/server.log
 330 |             datetime_format = %b %d %H:%M:%S
 331 |             initial_position = start_of_file
 332 |             log_group_name = ${PrestoCoordinatorServerLogGroup}
 333 |             log_stream_name = {instance_id}
 334 | 
 335 |             [/var/log/messages]
 336 |             datetime_format = %b %d %H:%M:%S
 337 |             file = /var/log/messages
 338 |             buffer_duration = 5000
 339 |             initial_position = start_of_file
 340 |             log_group_name = ${PrestoCoordinatorSyslogLogGroup}
 341 |             log_stream_name = {instance_id}
 342 |             EOT
 343 |             sudo tee /etc/awslogs/awscli.conf > /dev/null <<EOT
 344 |             [plugins]
 345 |             cwlogs = cwlogs
 346 |             [default]
 347 |             region = ${AWS::Region}
 348 |             EOT
 349 | 
 350 |             sudo tee /etc/presto/catalog/hive.properties > /dev/null <<EOT
 351 |             connector.name=hive-hadoop2
 352 |             hive.metastore.uri=${HiveIPAddress}
 353 |             EOT
 354 | 
 355 |             sudo tee /etc/presto_cw_agent.conf > /dev/null <<EOT
 356 |             PRESTO_STACK_NAME=${AWS::StackName}
 357 |             EOT
 358 | 
 359 |             wget -O /tmp/presto_config.zip ${AdditionalConfigsUri}
 360 |             unzip /tmp/presto_config.zip -d /tmp/
 361 |             cat /tmp/config/config.properties >> /etc/presto/config.properties
 362 |             cat /tmp/config/catalog/hive.properties >> /etc/presto/catalog/hive.properties
 363 |             cp /tmp/config/catalog/jmx.properties /etc/presto/catalog/jmx.properties
 364 |             cp /tmp/config/catalog/tpch.properties /etc/presto/catalog/tpch.properties
 365 |             cp /tmp/config/catalog/tpcds.properties /etc/presto/catalog/tpcds.properties
 366 | 
 367 |             mkdir -p /usr/lib/presto/plugin/atlan-audit-logger-presto-experimental/
 368 |             rm -rf /usr/lib/presto/plugin/atlan-audit-logger-presto-experimental/*
 369 |             wget -O /usr/lib/presto/plugin/atlan-audit-logger-presto-experimental/QueryAuditEventListener-1.4-prestosql.jar https://github.com/atlanhq/presto-query-logger/releases/download/v1.3/presto-query-logger-1.3.jar
 370 | 
 371 |             mkdir /usr/lib/presto/etc/
 372 |             sudo tee /usr/lib/presto/etc/event-listener.properties > /dev/null <<EOT
 373 |             event-listener.name=atlan-audit-logger
 374 |             es-host=${ElasticsearchHost}
 375 |             es-port=${ElasticsearchPort}
 376 |             es-protocol=http
 377 |             es-type=_doc
 378 |             EOT
 379 | 
 380 |             prestoVersion=$((${PrestoVersion}+0))
 381 |             apiPrefix='v1/'
 382 | 
 383 |             if (( $prestoVersion > 329 )); then
 384 |               echo "Presto version greater than 329"
 385 |                 apiPrefix='ui/api/'
 386 |             fi
 387 | 
 388 | 
 389 |             cat <<EOF > /etc/presto_metrics/env.prometheus
 390 |             PRESTO_HOST=localhost
 391 |             PRESTO_PORT=8080
 392 |             SERVICE_NAME=prometheus
 393 |             STACK_NAME=${AWS::StackName}
 394 |             CLOUDWATCH_NAMESPACE=presto
 395 |             API_PREFIX=$apiPrefix
 396 |             EOF
 397 | 
 398 |             cat <<EOF > /etc/presto_metrics/env.cloudwatch
 399 |             PRESTO_HOST=localhost
 400 |             PRESTO_PORT=8080
 401 |             SERVICE_NAME=cloudwatch
 402 |             STACK_NAME=${AWS::StackName}
 403 |             CLOUDWATCH_NAMESPACE=presto
 404 |             CLOUDWATCH_REGION=${AWS::Region}
 405 |             API_PREFIX=$apiPrefix
 406 |             EOF
 407 | 
 408 | 
 409 |             systemctl start awslogsd
 410 |             service presto start
 411 | 
 412 |             HTTP_URL="http://localhost:8080/v1/status"
 413 |             CURL_CMD="curl -w httpcode=%{http_code}"
 414 | 
 415 |             # -m, --max-time <seconds> FOR curl operation
 416 |             CURL_MAX_CONNECTION_TIMEOUT="-m 5"
 417 | 
 418 |             # perform curl operation
 419 | 
 420 |             for i in {1..30}
 421 |               do
 422 |                 sleep 5
 423 |                 CURL_RETURN_CODE=0
 424 |                 CURL_OUTPUT=`$CURL_CMD $CURL_MAX_CONNECTION_TIMEOUT $HTTP_URL 2> /dev/null` || CURL_RETURN_CODE=$?
 425 |                 if [ $CURL_RETURN_CODE -ne 0 ]; then
 426 |                   echo "Curl connection failed with return code - $CURL_RETURN_CODE"
 427 |                 else
 428 |                   echo "Success"
 429 |                   break
 430 |                 fi
 431 |               done
 432 | 
 433 | 
 434 |             if [ $CURL_RETURN_CODE -ne 0 ]; then
 435 |                 /opt/aws/bin/cfn-signal -s 'false' --stack ${AWS::StackName} --resource Coordinators --region ${AWS::Region}
 436 |             else
 437 |                systemctl start presto_metrics_prometheus.service
 438 |                systemctl start presto_metrics_cloudwatch.service
 439 |                /opt/aws/bin/cfn-signal -s 'true' --stack ${AWS::StackName} --resource Coordinators --region ${AWS::Region}
 440 | 
 441 |             fi
 442 |   Coordinators:
 443 |     Type: 'AWS::AutoScaling::AutoScalingGroup'
 444 |     #aws-permission @cft autoscaling:CreateAutoScalingGroup
 445 |     #aws-permission @cft autoscaling:DeleteAutoScalingGroup
 446 |     #aws-permission @cft autoscaling:DescribeAutoScalingGroups
 447 |     #aws-permission @cft autoscaling:UpdateAutoScalingGroup
 448 |     #aws-permission @cft autoscaling:DescribeScalingActivities
 449 |     #aws-permission @cft autoscaling:DescribeLaunchConfigurations
 450 |     #aws-permission @cft autoscaling:DescribeAutoScalingInstances
 451 |     #aws-permission @cft ec2:CreateTags
 452 |     #aws-permission @cft ec2:RunInstances
 453 |     #aws-permission @cft ec2:TerminateInstances
 454 |     #aws-permission @cft ec2:DescribeInstances
 455 |     UpdatePolicy:
 456 |       # Make updates to LaunchConfiguration cause rolling update of coordinators
 457 |       AutoScalingReplacingUpdate:
 458 |         WillReplace: true
 459 |     CreationPolicy:
 460 |       ResourceSignal:
 461 |         Timeout: PT15M
 462 |         Count: !Ref CoordinatorInstanceCount
 463 |     Properties:
 464 |       LaunchConfigurationName: !Ref Coordinator
 465 |       VPCZoneIdentifier:
 466 |         - !Ref Subnet
 467 |       MinSize: !Ref CoordinatorInstanceCount
 468 |       MaxSize: !Ref CoordinatorInstanceCount
 469 |       DesiredCapacity: !Ref CoordinatorInstanceCount
 470 |       Tags:
 471 |         - { Key: Name, Value: !Sub "${AWS::StackName}-presto-coordinator", PropagateAtLaunch: true }
 472 |         - { Key: "presto:opensource:identification:role", Value: "presto:coordinator", PropagateAtLaunch: true }
 473 |       HealthCheckGracePeriod: 300
 474 |       HealthCheckType: ELB
 475 |       LoadBalancerNames: !Split
 476 |         - ','
 477 |         - !Join
 478 |           - ','
 479 |           - - !Ref PrestoCoordinatorsELB
 480 |   Worker:
 481 |     Type: 'AWS::AutoScaling::LaunchConfiguration'
 482 |     #aws-permission @cft autoscaling:CreateLaunchConfiguration
 483 |     #aws-permission @cft autoscaling:DeleteLaunchConfiguration
 484 |     #aws-permission @cft autoscaling:DescribeLaunchConfigurations
 485 |     #aws-permission @cft autoscaling:UpdateAutoScalingGroup
 486 |     Properties:
 487 |       InstanceType: !Ref WorkersInstanceType
 488 |       ImageId: !FindInMap [RegionMap, !Ref "AWS::Region", PRESTOIMAGE]
 489 |       BlockDeviceMappings:
 490 |         - DeviceName: /dev/xvda
 491 |           Ebs:
 492 |             DeleteOnTermination: true
 493 |             VolumeSize: !Ref Ec2RootVolumeSize
 494 |             VolumeType: gp2
 495 |       KeyName: !Ref KeyName
 496 |       #aws-permission @cft iam:GetInstanceProfile
 497 |       IamInstanceProfile: !Ref PrestoClusterInstanceProfile
 498 |       SecurityGroups: !Split
 499 |         - ','
 500 |         - !Join
 501 |           - ','
 502 |           - - !GetAtt PrestoSecurityGroup.GroupId
 503 |             - !Join
 504 |               - ','
 505 |               - !Ref SecurityGroups
 506 |       # When worker has private IP only, following things are problematic:
 507 |       #  - S3 cannot be accessed (can be fixed with NAT box in VPC or "VPC endpoint for S3")
 508 |       #  - EC2 boot is very long, as it includes `yum upgrade` which retries timeouts (can
 509 |       #    be fixed with NAT box in VPC or "repo_upgrade: none" in cloud init)
 510 |       #AssociatePublicIpAddress: false
 511 |       UserData:
 512 |         Fn::Base64:
 513 |           !Sub |
 514 |             #!/bin/bash
 515 |             set -xtrace
 516 | 
 517 |             sed -i -e "s/{{isCoordinator}}/false/g" /etc/presto/config.properties
 518 |             sed -i -e "s#{{coordinatorDiscoveryUri}}#http://${CoordinatorENI.PrimaryPrivateIpAddress}:8080#g" /etc/presto/config.properties
 519 | 
 520 |             R=$(($(grep MemTotal /proc/meminfo | awk '{print $2}')/1048576))
 521 |             X=$(($R*8/10))
 522 |             sed -i -e "s/{{jvmMemory}}/$X/g" /etc/presto/jvm.config
 523 |             sed -i -e "s/{{envName}}/${Environment}/g" /etc/presto/node.properties
 524 |             sed -i -e "s/{{instanceId}}/$(curl http://169.254.169.254/latest/meta-data/instance-id/)/g" /etc/presto/node.properties
 525 |             
 526 |             echo "" >> /etc/presto/config.properties
 527 | 
 528 |             Z=$(($X*6/10))
 529 |             echo "query.max-memory-per-node="$Z"GB" >> /etc/presto/config.properties
 530 | 
 531 |             Y=$(($X*7/10))
 532 |             echo "query.max-total-memory-per-node="$Y"GB" >> /etc/presto/config.properties
 533 |             echo "query.low-memory-killer.policy=total-reservation-on-blocked-nodes" >> /etc/presto/config.properties
 534 | 
 535 |             sudo tee /etc/awslogs/awslogs.conf > /dev/null <<EOT
 536 |             [general]
 537 |             state_file = /tmp/awslogs.state
 538 |             [/var/log/prsto/server.log]
 539 |             file = /var/log/presto/server.log
 540 |             datetime_format = %b %d %H:%M:%S
 541 |             initial_position = start_of_file
 542 |             log_group_name = ${PrestoWorkerServerLogGroup}
 543 |             log_stream_name = {instance_id}
 544 | 
 545 |             [/var/log/messages]
 546 |             datetime_format = %b %d %H:%M:%S
 547 |             file = /var/log/messages
 548 |             buffer_duration = 5000
 549 |             initial_position = start_of_file
 550 |             log_group_name = ${PrestoWorkerSyslogLogGroup}
 551 |             log_stream_name = {instance_id}
 552 |             EOT
 553 |             sudo tee /etc/awslogs/awscli.conf > /dev/null <<EOT
 554 |             [plugins]
 555 |             cwlogs = cwlogs
 556 |             [default]
 557 |             region = ${AWS::Region}
 558 |             EOT
 559 | 
 560 |             sudo tee /etc/presto/catalog/hive.properties > /dev/null <<EOT
 561 |             connector.name=hive-hadoop2
 562 |             hive.metastore.uri=${HiveIPAddress}
 563 |             EOT
 564 | 
 565 |             wget -O /tmp/presto_config.zip ${AdditionalConfigsUri}
 566 |             unzip /tmp/presto_config.zip -d /tmp/
 567 |             cat /tmp/config/config.properties >> /etc/presto/config.properties
 568 |             cat /tmp/config/catalog/hive.properties >> /etc/presto/catalog/hive.properties
 569 |             cp /tmp/config/catalog/jmx.properties /etc/presto/catalog/jmx.properties
 570 |             cp /tmp/config/catalog/tpch.properties /etc/presto/catalog/tpch.properties
 571 |             cp /tmp/config/catalog/tpcds.properties /etc/presto/catalog/tpcds.properties
 572 | 
 573 |             mkdir /usr/lib/presto/plugin/atlan-audit-logger-presto-experimental/
 574 |             mkdir /usr/lib/presto/etc/
 575 |             rm -rf /usr/lib/presto/plugin/atlan-audit-logger-presto-experimental/*
 576 |             wget -O /usr/lib/presto/plugin/atlan-audit-logger-presto-experimental/QueryAuditEventListener-1.4-prestosql.jar https://athena-cloudformation-templates.s3.ap-south-1.amazonaws.com/unilever/config/QueryAuditEventListener-1.4-prestosql-jar-with-dependencies.jar
 577 | 
 578 |             sudo tee /usr/lib/presto/etc/event-listener.properties > /dev/null <<EOT
 579 |             event-listener.name=atlan-audit-logger
 580 |             es-host=${ElasticsearchHost}
 581 |             es-port=${ElasticsearchPort}
 582 |             es-protocol=http
 583 |             es-type=_doc
 584 |             EOT
 585 | 
 586 |             prestoVersion=$((${PrestoVersion}+0))
 587 |             apiPrefix='v1/'
 588 | 
 589 |             if (( $prestoVersion > 329 )); then
 590 |                 echo "Presto version greater than 329"
 591 |                 apiPrefix='ui/api/'
 592 |             fi
 593 | 
 594 | 
 595 |             cat <<EOF > /etc/presto_metrics/env.prometheus
 596 |             PRESTO_HOST=localhost
 597 |             PRESTO_PORT=8080
 598 |             SERVICE_NAME=prometheus
 599 |             STACK_NAME=${AWS::StackName}
 600 |             CLOUDWATCH_NAMESPACE=presto
 601 |             API_PREFIX=$apiPrefix
 602 |             EOF
 603 | 
 604 |             cat <<EOF > /etc/presto_metrics/env.cloudwatch
 605 |             PRESTO_HOST=localhost
 606 |             PRESTO_PORT=8080
 607 |             SERVICE_NAME=cloudwatch
 608 |             STACK_NAME=${AWS::StackName}
 609 |             CLOUDWATCH_NAMESPACE=presto
 610 |             API_PREFIX=$apiPrefix
 611 |             EOF
 612 |             
 613 |             systemctl start awslogsd
 614 |             service presto start
 615 | 
 616 |             HTTP_URL="http://localhost:8080/v1/status"
 617 |             CURL_CMD="curl -w httpcode=%{http_code}"
 618 | 
 619 |             # -m, --max-time <seconds> FOR curl operation
 620 |             CURL_MAX_CONNECTION_TIMEOUT="-m 5"
 621 | 
 622 |             # perform curl operation
 623 | 
 624 |             for i in {1..30}
 625 |               do
 626 |                 sleep 5
 627 |                 CURL_RETURN_CODE=0
 628 |                 CURL_OUTPUT=`$CURL_CMD $CURL_MAX_CONNECTION_TIMEOUT $HTTP_URL 2> /dev/null` || CURL_RETURN_CODE=$?
 629 |                 if [ $CURL_RETURN_CODE -ne 0 ]; then
 630 |                   echo "Curl connection failed with return code - $CURL_RETURN_CODE"
 631 |                 else
 632 |                   echo "Success"
 633 |                   break
 634 |                 fi
 635 |               done
 636 | 
 637 |             if [ $CURL_RETURN_CODE -ne 0 ]; then
 638 |                 /opt/aws/bin/cfn-signal -s 'false' --stack ${AWS::StackName} --resource Workers --region ${AWS::Region}
 639 |             else
 640 |                 systemctl start presto_metrics_prometheus.service
 641 |                 systemctl start presto_metrics_cloudwatch.service
 642 |                /opt/aws/bin/cfn-signal -s 'true' --stack ${AWS::StackName} --resource Workers --region ${AWS::Region}
 643 |             fi
 644 | 
 645 |   Workers:
 646 |     Type: 'AWS::AutoScaling::AutoScalingGroup'
 647 |     #aws-permission @cft autoscaling:CreateAutoScalingGroup
 648 |     #aws-permission @cft autoscaling:DeleteAutoScalingGroup
 649 |     #aws-permission @cft autoscaling:DescribeAutoScalingGroups
 650 |     #aws-permission @cft autoscaling:UpdateAutoScalingGroup
 651 |     #aws-permission @cft autoscaling:DescribeScalingActivities
 652 |     #aws-permission @cft autoscaling:DescribeLaunchConfigurations
 653 |     #aws-permission @cft autoscaling:DescribeAutoScalingInstances
 654 |     #aws-permission @cft ec2:CreateTags
 655 |     #aws-permission @cft ec2:RunInstances
 656 |     #aws-permission @cft ec2:TerminateInstances
 657 |     #aws-permission @cft ec2:DescribeInstances
 658 |     UpdatePolicy:
 659 |       # Make updates to LaunchConfiguration cause rolling update of workers
 660 |       AutoScalingReplacingUpdate:
 661 |         WillReplace: true
 662 |     CreationPolicy:
 663 |       ResourceSignal:
 664 |         Timeout: PT15M
 665 |         Count: !Ref WorkersCount
 666 |     Properties:
 667 |       LaunchConfigurationName: !Ref Worker
 668 |       MetricsCollection:
 669 |         - Granularity: "1Minute"
 670 |       VPCZoneIdentifier:
 671 |         - !Ref Subnet
 672 |       MinSize: !Ref WorkersCount
 673 |       MaxSize: !Ref MaxWorkersCount
 674 |       DesiredCapacity: !Ref WorkersCount
 675 |       Tags:
 676 |         - { Key: Name, Value: !Sub "${AWS::StackName}-presto-worker", PropagateAtLaunch: true }
 677 |         - { Key: "presto:opensource:identification:role", Value: "presto:worker", PropagateAtLaunch: true }
 678 |       HealthCheckGracePeriod: 180
 679 |       HealthCheckType: ELB
 680 |       LoadBalancerNames: !Split
 681 |         - ','
 682 |         - !Join
 683 |           - ','
 684 |           - - !Ref PrestoWorkersELB
 685 |   WorkersScaleUpPolicy:
 686 |     Type: AWS::AutoScaling::ScalingPolicy
 687 |     Properties:
 688 |       AdjustmentType: ChangeInCapacity
 689 |       AutoScalingGroupName:
 690 |         Ref: Workers
 691 |       Cooldown: '60'
 692 |       ScalingAdjustment: '1'
 693 |   WorkersScaleDownPolicy:
 694 |     Type: AWS::AutoScaling::ScalingPolicy
 695 |     Properties:
 696 |       AdjustmentType: ChangeInCapacity
 697 |       AutoScalingGroupName:
 698 |         Ref: Workers
 699 |       Cooldown: '60'
 700 |       ScalingAdjustment: "-1"
 701 |   WorkersUserCPUAlarmHigh:
 702 |     Type: AWS::CloudWatch::Alarm
 703 |     Properties:
 704 |       AlarmDescription: Scale-up if CPU > 70% for 1 minutes
 705 |       MetricName: MeanWorkerUserCPUUtilisation
 706 |       Namespace: presto
 707 |       Statistic: Average
 708 |       Period: '120'
 709 |       EvaluationPeriods: '1'
 710 |       Threshold: '0.7'
 711 |       AlarmActions:
 712 |       - Ref: WorkersScaleUpPolicy
 713 |       Dimensions:
 714 |       - Name: prestoStackName
 715 |         Value:
 716 |           Ref: "AWS::StackName"
 717 |       ComparisonOperator: GreaterThanThreshold
 718 |   WorkersUserCPUAlarmLow:
 719 |     Type: AWS::CloudWatch::Alarm
 720 |     Properties:
 721 |       AlarmDescription: Scale-down if CPU < 50% for 5 minutes
 722 |       MetricName: MeanWorkerUserCPUUtilisation
 723 |       Namespace: presto
 724 |       Statistic: Average
 725 |       Period: '300'
 726 |       EvaluationPeriods: '1'
 727 |       Threshold: '0.5'
 728 |       AlarmActions:
 729 |       - Ref: WorkersScaleDownPolicy
 730 |       Dimensions:
 731 |       - Name: prestoStackName
 732 |         Value:
 733 |           Ref: "AWS::StackName"
 734 |       ComparisonOperator: LessThanThreshold
 735 | 
 736 |   WorkersSystemCPUAlarmHigh:
 737 |     Type: AWS::CloudWatch::Alarm
 738 |     Properties:
 739 |       AlarmDescription: Scale-up if CPU > 70% for 1 minutes
 740 |       MetricName: MeanWorkerSystemCPUUtilisation
 741 |       Namespace: presto
 742 |       Statistic: Average
 743 |       Period: '120'
 744 |       EvaluationPeriods: '1'
 745 |       Threshold: '0.7'
 746 |       AlarmActions:
 747 |       - Ref: WorkersScaleUpPolicy
 748 |       Dimensions:
 749 |       - Name: prestoStackName
 750 |         Value:
 751 |           Ref: "AWS::StackName"
 752 |       ComparisonOperator: GreaterThanThreshold
 753 |   WorkersSystemCPUAlarmLow:
 754 |     Type: AWS::CloudWatch::Alarm
 755 |     Properties:
 756 |       AlarmDescription: Scale-down if CPU < 50% for 5 minutes
 757 |       MetricName: MeanWorkerSystemCPUUtilisation
 758 |       Namespace: presto
 759 |       Statistic: Average
 760 |       Period: '300'
 761 |       EvaluationPeriods: '1'
 762 |       Threshold: '0.5'
 763 |       AlarmActions:
 764 |       - Ref: WorkersScaleDownPolicy
 765 |       Dimensions:
 766 |       - Name: prestoStackName
 767 |         Value:
 768 |           Ref: "AWS::StackName"
 769 |       ComparisonOperator: LessThanThreshold
 770 | 
 771 |   GracefulNodeShutdownQueueIAMRole:
 772 |     Type: AWS::IAM::Role
 773 |     #aws-permission @cft iam:CreateRole
 774 |     #aws-permission @cft iam:DeleteRole
 775 |     #Condition: CreateIamInstanceProfile
 776 |     Properties:
 777 |       RoleName: !Sub ${AWS::StackName}-graceful-shutdown-iam-role
 778 |       AssumeRolePolicyDocument:
 779 |         Statement:
 780 |           - Effect: Allow
 781 |             Principal:
 782 |               Service:
 783 |                 - ec2.amazonaws.com
 784 |                 - lambda.amazonaws.com
 785 |             Action: ['sts:AssumeRole']
 786 |       Policies:
 787 |         #aws-permission @cft iam:AttachRolePolicy
 788 |         #aws-permission @cft iam:DeleteRolePolicy
 789 |         #aws-permission @cft iam:DetachRolePolicy
 790 |         #aws-permission @cft iam:PutRolePolicy
 791 |         - PolicyName: !Sub ${AWS::StackName}-graceful-shutdown-iam-role
 792 |           PolicyDocument:
 793 |             Version: "2012-10-17"
 794 |             Statement:
 795 |               - Effect: Allow
 796 |                 Action:
 797 |                 - "ec2:DescribeInstances"
 798 |                 - "sqs:ReceiveMessage"
 799 |                 - "sqs:SendMessage"
 800 |                 - "sqs:DeleteMessage"
 801 |                 - "sqs:GetQueueAttributes"
 802 |                 - "sqs:GetQueueUrl"
 803 |                 - "logs:PutLogEvents"
 804 |                 - "logs:CreateLogStream"
 805 |                 - "logs:CreateLogGroup"
 806 |                 - "ec2:CreateNetworkInterface"
 807 |                 - "ec2:DescribeNetworkInterfaces"
 808 |                 - "ec2:DeleteNetworkInterface"
 809 |                 - "ec2:AttachNetworkInterfaces"
 810 |                 Resource:
 811 |                   - "*"
 812 |   
 813 |   AutoScalingTerminationWaitLambdaIAMRole:
 814 |     Type: AWS::IAM::Role
 815 |     #aws-permission @cft iam:CreateRole
 816 |     #aws-permission @cft iam:DeleteRole
 817 |     #Condition: CreateIamInstanceProfile
 818 |     Properties:
 819 |       RoleName: !Sub ${AWS::StackName}-asg-terminate-lambda-iam-role
 820 |       AssumeRolePolicyDocument:
 821 |         Statement:
 822 |           - Effect: Allow
 823 |             Principal:
 824 |               Service:
 825 |                 - ec2.amazonaws.com
 826 |                 - lambda.amazonaws.com
 827 |             Action: ['sts:AssumeRole']
 828 |       Policies:
 829 |         #aws-permission @cft iam:AttachRolePolicy
 830 |         #aws-permission @cft iam:DeleteRolePolicy
 831 |         #aws-permission @cft iam:DetachRolePolicy
 832 |         #aws-permission @cft iam:PutRolePolicy
 833 |         - PolicyName: !Sub ${AWS::StackName}-asg-terminate-lambda-iam-role
 834 |           PolicyDocument:
 835 |             Version: "2012-10-17"
 836 |             Statement:
 837 |               - Effect: Allow
 838 |                 Action:
 839 |                 - "autoscaling:CompleteLifecycleAction"
 840 |                 - "sqs:ReceiveMessage"
 841 |                 - "sqs:SendMessage"
 842 |                 - "sqs:GetQueueUrl"
 843 |                 - "sqs:GetQueueAttributes"
 844 |                 - "sqs:DeleteMessage"
 845 |                 - "logs:PutLogEvents"
 846 |                 - "logs:CreateLogStream"
 847 |                 - "logs:CreateLogGroup"
 848 |                 - "ec2:CreateNetworkInterface"
 849 |                 - "ec2:DescribeNetworkInterfaces"
 850 |                 - "ec2:DeleteNetworkInterface"
 851 |                 - "ec2:AttachNetworkInterfaces"
 852 |                 - "ec2:Describe*"
 853 |                 Resource:
 854 |                   - "*"
 855 | 
 856 |   GracefulNodeShutdownQueue:
 857 |     Type: 'AWS::SQS::Queue'
 858 |     #aws-permission @cft sqs:CreateQueue
 859 |     #aws-permission @cft sqs:DeleteQueue
 860 |     #aws-permission @cft sqs:GetQueueAttributes
 861 |     #aws-permission @cft sqs:TagQueue
 862 |     Properties:
 863 |       # This is required so that threads that handle those messages will not try to process the same message over and over again
 864 |       # The value is higher than the usual graceful shutdown, so that in most cases there will not be a need to request
 865 |       # more time. Otherwise if the handling thread is reaching 250s it will tell SQS to keep his message private for longer.
 866 |       # See AWS docs on VisibilityTimeout for more details.
 867 |       VisibilityTimeout: 250
 868 |   AutoScalingTerminationWaitQueue:
 869 |     Type: 'AWS::SQS::Queue'
 870 |     Properties:
 871 |       VisibilityTimeout: 250
 872 | 
 873 |   AutoScalingNotificationIAMRole:
 874 |     Type: 'AWS::IAM::Role'
 875 |     Properties:
 876 |       AssumeRolePolicyDocument:
 877 |         Version: 2012-10-17
 878 |         Statement:
 879 |           - Effect: Allow
 880 |             Principal:
 881 |               Service:
 882 |               - autoscaling.amazonaws.com
 883 |               - ec2.amazonaws.com
 884 |             Action:
 885 |               - 'sts:AssumeRole'
 886 |       Path: /
 887 |       ManagedPolicyArns:
 888 |         - arn:aws:iam::aws:policy/service-role/AutoScalingNotificationAccessRole
 889 |         - arn:aws:iam::aws:policy/AmazonEC2ReadOnlyAccess
 890 | 
 891 |   GracefulPrestoNodeShutdownHook:
 892 |     Type: "AWS::AutoScaling::LifecycleHook"
 893 |     #aws-permission @cft autoscaling:PutLifecycleHook
 894 |     #aws-permission @cft autoscaling:DeleteLifecycleHook
 895 |     Properties:
 896 |       AutoScalingGroupName: !Ref Workers
 897 |       LifecycleTransition: 'autoscaling:EC2_INSTANCE_TERMINATING'
 898 |       NotificationTargetARN: !GetAtt GracefulNodeShutdownQueue.Arn
 899 |       RoleARN: !GetAtt AutoScalingNotificationIAMRole.Arn
 900 |       HeartbeatTimeout: 3600
 901 |       DefaultResult: CONTINUE
 902 |   GracefulNodeShutdownLambda:
 903 |     Type: "AWS::Lambda::Function"
 904 |     Properties:
 905 |       Code:
 906 |         ZipFile: |
 907 |           import os
 908 |           import json
 909 |           import boto3
 910 |           from botocore.vendored import requests
 911 | 
 912 |           def lambda_handler(event, context):
 913 |               # TODO implement
 914 |               print(event)
 915 |               event_body = json.loads(event['Records'][0]["body"])
 916 |               if event_body["LifecycleTransition"] != "autoscaling:EC2_INSTANCE_TERMINATING":
 917 |                   print("Not a terminating condition return")
 918 |                   return
 919 |               ec2_instance_id = event_body["EC2InstanceId"]
 920 |               instance_id = ec2_instance_id
 921 |               ec2 = boto3.resource('ec2')
 922 |               ec2_instance = ec2.Instance(instance_id)
 923 |               ip = ec2_instance.private_ip_address
 924 |               for i in range(3):
 925 |                   try:
 926 |                       url = 'http://{}:8080/v1/info/state'.format(ip)
 927 |                       payload = "\"SHUTTING_DOWN\""
 928 |                       headers = {
 929 |                           'Content-Type': "application/json",
 930 |                           'cache-control': "no-cache"
 931 |                       }
 932 |                   
 933 |                       response = requests.request("PUT", url, data=payload, headers=headers)
 934 |                       print(response.text)
 935 |                   except Exception as e:
 936 |                       pass
 937 |               print(ip)
 938 |               queue_url = os.getenv('QUEUE_URL')
 939 |               print(queue_url)
 940 |               sqs = boto3.client('sqs')
 941 |               response = sqs.send_message(
 942 |                   QueueUrl=queue_url,
 943 |                   MessageBody=json.dumps(event_body)
 944 |               )
 945 |               print(response)
 946 |               
 947 |               return {
 948 |                   'statusCode': 200,
 949 |                   'body': json.dumps('Hello from Lambda!')
 950 |               }
 951 | 
 952 | 
 953 |       Environment:
 954 |         Variables:
 955 |           QUEUE_URL: !Ref AutoScalingTerminationWaitQueue
 956 |           COORDINATOR_IP: !GetAtt CoordinatorENI.PrimaryPrivateIpAddress
 957 |       Role: !GetAtt GracefulNodeShutdownQueueIAMRole.Arn
 958 |       Timeout: 10
 959 |       Handler: index.lambda_handler
 960 |       Runtime: python3.6
 961 |       VpcConfig:
 962 |         SubnetIds: 
 963 |           - !Ref Subnet
 964 |         SecurityGroupIds: !Split
 965 |           - ','
 966 |           - !Join
 967 |             - ','
 968 |             - - !GetAtt PrestoSecurityGroup.GroupId
 969 |               - !Join
 970 |                 - ','
 971 |                 - !Ref SecurityGroups
 972 | 
 973 |   GracefulNodeShutdownLambdaEventSourceMapping:
 974 |     Type: AWS::Lambda::EventSourceMapping
 975 |     Properties:
 976 |       BatchSize: 1
 977 |       EventSourceArn: !GetAtt GracefulNodeShutdownQueue.Arn
 978 |       FunctionName: !GetAtt GracefulNodeShutdownLambda.Arn
 979 |     DependsOn: 
 980 |       - GracefulNodeShutdownLambda
 981 |       - GracefulNodeShutdownQueue
 982 | 
 983 |   AutoScalingTerminationWaitLambda:
 984 |     Type: "AWS::Lambda::Function"
 985 |     Properties:
 986 |       Code:
 987 |         ZipFile: |
 988 |           import json
 989 |           import boto3
 990 |           import os
 991 |           from botocore.vendored import requests
 992 | 
 993 |           def lambda_handler(event, context):
 994 |               # TODO implement
 995 | 
 996 |               def enqueue_message(event_body):
 997 |                   queue_url = os.getenv('QUEUE_URL')
 998 |                   print(queue_url)
 999 |                   sqs = boto3.client('sqs')
1000 |                   response = sqs.send_message(
1001 |                       QueueUrl=queue_url,
1002 |                       MessageBody=json.dumps(event_body),
1003 |                       DelaySeconds=60
1004 |                   )
1005 |                   print(response)
1006 | 
1007 |               def complete_lifecycle(event_body):
1008 |                   res = autoscaling.complete_lifecycle_action(
1009 |                       LifecycleHookName=event_body["LifecycleHookName"],
1010 |                       AutoScalingGroupName=event_body["AutoScalingGroupName"],
1011 |                       LifecycleActionToken=event_body["LifecycleActionToken"],
1012 |                       LifecycleActionResult='CONTINUE'
1013 |                   )
1014 |                   print(res)
1015 | 
1016 |               event_body = json.loads(event['Records'][0]["body"])
1017 |               if event_body["LifecycleTransition"] != "autoscaling:EC2_INSTANCE_TERMINATING":
1018 |                   print("Not a terminating condition return")
1019 |                   return
1020 |               ec2_instance_id = event_body["EC2InstanceId"]
1021 |               ec2 = boto3.resource("ec2")
1022 |               autoscaling = boto3.client('autoscaling')
1023 |               ec2_instance = ec2.Instance(ec2_instance_id)
1024 |               ip = ec2_instance.private_ip_address
1025 |               print(ec2_instance_id)
1026 |               request_url = "http://{ip}:8080/v1/task".format(ip=ip, node_id=ec2_instance_id)
1027 |               try:
1028 |                   print(request_url)
1029 |                   worker_tasks = requests.get(request_url)
1030 |                   worker_tasks = worker_tasks.json()
1031 |                   print(len(worker_tasks))
1032 |                   for task in worker_tasks:
1033 |                       if task['taskStatus']['state'] == 'RUNNING':
1034 |                           print('RUNNING QUEURIES FOUND')
1035 |                           enqueue_message(event_body)
1036 |                           return
1037 | 
1038 |                   print('NO_QUERIES')
1039 |                   complete_lifecycle(event_body)
1040 |                   return
1041 |               except Exception as e:
1042 |                   print(str(e))
1043 |                   print("Terminating instance because worker not responding")
1044 |                   complete_lifecycle(event_body)
1045 |                   return
1046 | 
1047 | 
1048 |       Environment:
1049 |         Variables:
1050 |           QUEUE_URL: !Ref AutoScalingTerminationWaitQueue
1051 |           COORDINATOR_IP: !GetAtt CoordinatorENI.PrimaryPrivateIpAddress
1052 |       Role: !GetAtt AutoScalingTerminationWaitLambdaIAMRole.Arn
1053 |       Timeout: 10
1054 |       Handler: index.lambda_handler
1055 |       Runtime: python3.6
1056 |       VpcConfig:
1057 |         SubnetIds: 
1058 |           - !Ref Subnet
1059 |         SecurityGroupIds: !Split
1060 |           - ','
1061 |           - !Join
1062 |             - ','
1063 |             - - !GetAtt PrestoSecurityGroup.GroupId
1064 |               - !Join
1065 |                 - ','
1066 |                 - !Ref SecurityGroups
1067 |   AutoScalingTerminationWaitLambdaEventSourceMapping:
1068 |     Type: AWS::Lambda::EventSourceMapping
1069 |     Properties:
1070 |       BatchSize: 1
1071 |       EventSourceArn: !GetAtt AutoScalingTerminationWaitQueue.Arn
1072 |       FunctionName: !GetAtt AutoScalingTerminationWaitLambda.Arn
1073 |     DependsOn:
1074 |       - AutoScalingTerminationWaitLambda
1075 |       - AutoScalingTerminationWaitQueue
1076 | 
1077 |   AutoScalingNotificationRole:
1078 |     Type: 'AWS::IAM::Role'
1079 |     Properties:
1080 |       AssumeRolePolicyDocument:
1081 |         Version: 2012-10-17
1082 |         Statement:
1083 |           - Effect: Allow
1084 |             Principal:
1085 |               Service:
1086 |               - autoscaling.amazonaws.com
1087 |             Action:
1088 |               - 'sts:AssumeRole'
1089 |       Path: /
1090 |       ManagedPolicyArns:
1091 |         - arn:aws:iam::aws:policy/service-role/AutoScalingNotificationAccessRole
1092 | 
1093 |   PrestoWorkersELB:
1094 |     Type: AWS::ElasticLoadBalancing::LoadBalancer
1095 |     Properties: 
1096 |       HealthCheck:
1097 |         HealthyThreshold: 2
1098 |         Interval: 15
1099 |         Target: HTTP:8080/v1/status
1100 |         Timeout: 10
1101 |         UnhealthyThreshold: 2
1102 |       Scheme: internal
1103 |       Subnets: !Split
1104 |         - ','
1105 |         - !Join
1106 |           - ','
1107 |           - - !Ref Subnet
1108 |       SecurityGroups: !Split
1109 |           - ','
1110 |           - !Join
1111 |             - ','
1112 |             - - !GetAtt PrestoSecurityGroup.GroupId
1113 |               - !Join
1114 |                 - ','
1115 |                 - !Ref SecurityGroups
1116 |       Listeners:
1117 |         - InstancePort: 8080
1118 |           InstanceProtocol: HTTP
1119 |           LoadBalancerPort: 8080
1120 |           Protocol: HTTP
1121 | 
1122 |   PrestoCoordinatorsELB:
1123 |     Type: AWS::ElasticLoadBalancing::LoadBalancer
1124 |     Properties: 
1125 |       HealthCheck:
1126 |         HealthyThreshold: 2
1127 |         Interval: 15
1128 |         Target: HTTP:8080/v1/status
1129 |         Timeout: 10
1130 |         UnhealthyThreshold: 2
1131 |       Scheme: internal
1132 |       Subnets: !Split
1133 |         - ','
1134 |         - !Join
1135 |           - ','
1136 |           - - !Ref Subnet
1137 |       SecurityGroups: !Split
1138 |           - ','
1139 |           - !Join
1140 |             - ','
1141 |             - - !GetAtt PrestoSecurityGroup.GroupId
1142 |               - !Join
1143 |                 - ','
1144 |                 - !Ref SecurityGroups
1145 |       Listeners:
1146 |         - InstancePort: 8080
1147 |           InstanceProtocol: HTTP
1148 |           LoadBalancerPort: 8080
1149 |           Protocol: HTTP
1150 | 
1151 |   HALambdaIamRole:
1152 |     Type: AWS::IAM::Role
1153 |     #aws-permission @cft iam:CreateRole
1154 |     #aws-permission @cft iam:DeleteRole
1155 |     Properties:
1156 |       RoleName: !Sub ${AWS::StackName}-HA-lambda-role
1157 |       AssumeRolePolicyDocument:
1158 |         Statement:
1159 |           - Effect: Allow
1160 |             Principal:
1161 |               Service: [lambda.amazonaws.com]
1162 |             Action: ['sts:AssumeRole']
1163 |       Policies:
1164 |         #aws-permission @cft iam:AttachRolePolicy
1165 |         #aws-permission @cft iam:DeleteRolePolicy
1166 |         #aws-permission @cft iam:DetachRolePolicy
1167 |         #aws-permission @cft iam:PutRolePolicy
1168 |         - PolicyName: !Sub ${AWS::StackName}-HA-lambda-policy
1169 |           PolicyDocument:
1170 |             Version: "2012-10-17"
1171 |             Statement:
1172 |               - Effect: Allow
1173 |                 Action:
1174 |                   - "ec2:DescribeInstances"
1175 |                   - "ec2:DescribeNetworkInterfaces"
1176 |                   - "ec2:AttachNetworkInterface"
1177 |                   - "ec2:DetachNetworkInterface"
1178 |                   - "ec2:CreateNetworkInterface"
1179 |                   - "ec2:DeleteNetworkInterface"
1180 |                   - "logs:CreateLogGroup"
1181 |                   - "logs:CreateLogStream"
1182 |                   - "logs:PutLogEvents"
1183 |                 Resource:
1184 |                   - "*"
1185 |   HALambda:
1186 |     Type: AWS::Lambda::Function
1187 |     DependsOn: Coordinators
1188 |     Properties:
1189 |       Role: !GetAtt HALambdaIamRole.Arn
1190 |       Handler: index.lambda_handler
1191 |       MemorySize: 128
1192 |       Runtime: python3.7
1193 |       Code:
1194 |         ZipFile: |
1195 |           import json
1196 |           import boto3
1197 |           import http.client
1198 |           import time
1199 |           import os
1200 | 
1201 |           def detach_eni_instance(eniAttachmentId):
1202 |               client = boto3.client('ec2')
1203 |               print("detaching eni {}".format(eniAttachmentId))
1204 |               response = client.detach_network_interface(
1205 |                   AttachmentId = eniAttachmentId,
1206 |                   Force = True
1207 |               )
1208 |               time.sleep(5)
1209 |               print(response)
1210 |               print("ENI detached")
1211 |               
1212 |           def attach_eni_instance(instanceId, eni_id):
1213 |               client = boto3.client('ec2')
1214 |               print("attaching eni {} to instance {}".format(eni_id, instanceId))
1215 |               response = client.attach_network_interface(
1216 |                   DeviceIndex=1,
1217 |                   InstanceId = instanceId,
1218 |                   NetworkInterfaceId = eni_id,
1219 |               )
1220 |               return response
1221 | 
1222 |           def instance_health(instanceId):
1223 |               client = boto3.client('ec2')
1224 |               response = client.describe_instances(
1225 |                   InstanceIds=[
1226 |                       instanceId
1227 |                   ]
1228 |               )
1229 |               print("checking health for instance {}".format(instanceId))
1230 |               try:
1231 |                   conn = http.client.HTTPConnection(response['Reservations'][0]['Instances'][0]['PrivateIpAddress'], 8080)
1232 |                   conn.request("GET", "/v1/info")
1233 |                   r1 = conn.getresponse()
1234 |                   print(r1.status, r1.reason)
1235 |                   data = json.loads(r1.read().decode('utf-8').replace("'", '"'))
1236 |               except Exception as e:
1237 |                   print("AN EXCEPTION OCCURED", str(e))
1238 |                   data = {
1239 |                       "starting": True
1240 |                   }
1241 |               return data
1242 |               # return response
1243 |               
1244 |           def attach_eni(eni_id):
1245 |               client = boto3.client('ec2')
1246 |               response = client.describe_instances(
1247 |                   Filters=[
1248 |                       {
1249 |                           'Name': 'tag:presto:opensource:identification:role',
1250 |                           'Values': [
1251 |                               'presto:coordinator'
1252 |                           ]
1253 |                       },
1254 |                       {
1255 |                           'Name': 'tag:aws:cloudformation:stack-name',
1256 |                           'Values': [
1257 |                               os.environ['STACK_NAME']
1258 |                           ]
1259 |                       }
1260 |                   ]
1261 |               )
1262 |               for j in range(len(response['Reservations'])):
1263 |                   for i in range(len(response['Reservations'][j]['Instances'])):
1264 |                       if response['Reservations'][j]['Instances'][i]['State']['Name'] != 'running':
1265 |                           continue
1266 |                       privateIpAddress = response['Reservations'][j]['Instances'][i]['PrivateIpAddress']
1267 |                       instanceId = response['Reservations'][j]['Instances'][i]['InstanceId']
1268 |                       print("Found instance to attach {}, {}".format(privateIpAddress, instanceId))
1269 |                       try:
1270 |                           conn = http.client.HTTPConnection(privateIpAddress, 8080)
1271 |                           conn.request("GET", "/v1/info")
1272 |                           r1 = conn.getresponse()
1273 |                           print(privateIpAddress, r1.status, r1.reason)
1274 |                           data = json.loads(r1.read().decode('utf-8').replace("'", '"'))
1275 |                       except Exception as e:
1276 |                           print("AN EXCEPTION OCCURED", str(e))
1277 |                           data = {
1278 |                               "starting": True
1279 |                           }
1280 |                       if not data['starting']:
1281 |                           print("Instance {} is healthy | Attaching ENI to Instance".format(instanceId))
1282 |                           print(attach_eni_instance(instanceId, eni_id))
1283 |                           break
1284 |                       else:
1285 |                           print(instanceId + "Instance is unhealthy ...")
1286 | 
1287 |           def lambda_handler(event, context):
1288 |               client = boto3.resource('ec2')
1289 |               network_interface = client.NetworkInterface(os.environ['ENI_ID'])
1290 |               print("Network ENI status: ", network_interface.status)
1291 |               if network_interface.status == "available":
1292 |                   print("ENI not attached to any coordinator | Looking for suitable coordinator")
1293 |                   attach_eni(os.environ['ENI_ID'])
1294 |               else:
1295 |                   print("ENI is attached | Checking health of the coordinator")
1296 |                   data = instance_health(network_interface.attachment['InstanceId'])
1297 |                   # data = instance_health("i-0b9a126690a1fe099")
1298 |                   if not data['starting']:
1299 |                       print("Coordinator is healthy | EXITING")
1300 |                   else:
1301 |                       print("Coordinator is unhealthy | REPLACING")
1302 |                       detach_eni_instance(network_interface.attachment['AttachmentId'])
1303 |                       attach_eni(os.environ['ENI_ID'])
1304 | 
1305 | 
1306 |       VpcConfig:
1307 |         SubnetIds: 
1308 |           - !Ref Subnet
1309 |         SecurityGroupIds: !Split
1310 |           - ','
1311 |           - !Join
1312 |             - ','
1313 |             - - !GetAtt PrestoSecurityGroup.GroupId
1314 |               - !Join
1315 |                 - ','
1316 |                 - !Ref SecurityGroups
1317 |       Timeout: 60
1318 |       Environment:
1319 |         Variables:
1320 |           ENI_ID: !Ref CoordinatorENI
1321 |           STACK_NAME: !Sub ${AWS::StackName}
1322 |   HALambdaTriggerRule:
1323 |     Type: AWS::Events::Rule
1324 |     #aws-permission @cft events:PutRule
1325 |     #aws-permission @cft events:DeleteRule
1326 |     #aws-permission @cft events:DescribeRule
1327 |     #aws-permission @cft events:PutTargets
1328 |     #aws-permission @cft events:RemoveTargets
1329 |     DependsOn: Coordinators
1330 |     Properties:
1331 |       ScheduleExpression: rate(1 minute)
1332 |       State: ENABLED
1333 |       Targets:
1334 |         -
1335 |           Arn: !GetAtt HALambda.Arn
1336 |           Id: 'HALambda'
1337 |   PermissionForEventsToInvokeHALambda:
1338 |     Type: AWS::Lambda::Permission
1339 |     #aws-permission @cft lambda:AddPermission
1340 |     #aws-permission @cft lambda:RemovePermission
1341 |     Properties:
1342 |       FunctionName:
1343 |         Ref: HALambda
1344 |       Action: 'lambda:InvokeFunction'
1345 |       Principal: 'events.amazonaws.com'
1346 |       SourceArn: !GetAtt HALambdaTriggerRule.Arn
1347 | 
1348 | Outputs:
1349 |   PrestoCoordinatorIp:
1350 |     Description: Coordinator Instance Ip
1351 |     Value: !GetAtt CoordinatorENI.PrimaryPrivateIpAddress
1352 |   CoordinatorDashboard:
1353 |     Description: Coordinator Dashboard URL
1354 |     Value: !Sub "http://${CoordinatorENI.PrimaryPrivateIpAddress}:8080/ui"


--------------------------------------------------------------------------------
/sample_presto_config.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atlanhq/presto-on-aws/a0f31d44b8d9729c9fdbccdb516969c735b148f3/sample_presto_config.zip


--------------------------------------------------------------------------------
/scripts/autoscaling_termination_wait/lambda_function.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import boto3
 3 | import os
 4 | from botocore.vendored import requests
 5 | 
 6 | def lambda_handler(event, context):
 7 |     # TODO implement
 8 | 
 9 |     def enqueue_message(event_body):
10 |         queue_url = os.getenv('QUEUE_URL')
11 |         print(queue_url)
12 |         sqs = boto3.client('sqs')
13 |         response = sqs.send_message(
14 |             QueueUrl=queue_url,
15 |             MessageBody=json.dumps(event_body),
16 |             DelaySeconds=60
17 |         )
18 |         print(response)
19 | 
20 |     def complete_lifecycle(event_body):
21 |         res = autoscaling.complete_lifecycle_action(
22 |             LifecycleHookName=event_body["LifecycleHookName"],
23 |             AutoScalingGroupName=event_body["AutoScalingGroupName"],
24 |             LifecycleActionToken=event_body["LifecycleActionToken"],
25 |             LifecycleActionResult='CONTINUE'
26 |         )
27 |         print(res)
28 | 
29 |     event_body = json.loads(event['Records'][0]["body"])
30 |     if event_body["LifecycleTransition"] != "autoscaling:EC2_INSTANCE_TERMINATING":
31 |         print("Not a terminating condition return")
32 |         return
33 |     ec2_instance_id = event_body["EC2InstanceId"]
34 |     ec2 = boto3.resource("ec2")
35 |     autoscaling = boto3.client('autoscaling')
36 |     ec2_instance = ec2.Instance(ec2_instance_id)
37 |     ip = ec2_instance.private_ip_address
38 |     print(ec2_instance_id)
39 |     request_url = "http://{ip}:8080/v1/task".format(ip=ip, node_id=ec2_instance_id)
40 |     try:
41 |         print(request_url)
42 |         worker_tasks = requests.get(request_url)
43 |         worker_tasks = worker_tasks.json()
44 |         print(len(worker_tasks))
45 |         for task in worker_tasks:
46 |             if task['taskStatus']['state'] == 'RUNNING':
47 |                 print('RUNNING QUEURIES FOUND')
48 |                 enqueue_message(event_body)
49 |                 return
50 | 
51 |         print('NO_QUERIES')
52 |         complete_lifecycle(event_body)
53 |         return
54 |     except Exception as e:
55 |         print(str(e))
56 |         print("Terminating instance because worker not responding")
57 |         complete_lifecycle(event_body)
58 |         return
59 | 


--------------------------------------------------------------------------------
/scripts/graceful_shutdown_handler/lambda_function.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import boto3
 4 | from botocore.vendored import requests
 5 | 
 6 | def lambda_handler(event, context):
 7 |     # TODO implement
 8 |     print(event)
 9 |     event_body = json.loads(event['Records'][0]["body"])
10 |     if event_body["LifecycleTransition"] != "autoscaling:EC2_INSTANCE_TERMINATING":
11 |         print("Not a terminating condition return")
12 |         return
13 |     ec2_instance_id = event_body["EC2InstanceId"]
14 |     instance_id = ec2_instance_id
15 |     ec2 = boto3.resource('ec2')
16 |     ec2_instance = ec2.Instance(instance_id)
17 |     ip = ec2_instance.private_ip_address
18 |     for i in range(3):
19 |         try:
20 |             url = 'http://{}:8080/v1/info/state'.format(ip)
21 |             payload = "\"SHUTTING_DOWN\""
22 |             headers = {
23 |                 'Content-Type': "application/json",
24 |                 'cache-control': "no-cache"
25 |             }
26 |         
27 |             response = requests.request("PUT", url, data=payload, headers=headers)
28 |             print(response.text)
29 |         except Exception as e:
30 |             pass
31 |     print(ip)
32 |     queue_url = os.getenv('QUEUE_URL')
33 |     print(queue_url)
34 |     sqs = boto3.client('sqs')
35 |     response = sqs.send_message(
36 |         QueueUrl=queue_url,
37 |         MessageBody=json.dumps(event_body)
38 |     )
39 |     print(response)
40 |     
41 |     return {
42 |         'statusCode': 200,
43 |         'body': json.dumps('Hello from Lambda!')
44 |     }
45 | 


--------------------------------------------------------------------------------
/scripts/ha_lambda/lambda_function.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import boto3
  3 | import http.client
  4 | import time
  5 | import os
  6 | 
  7 | def detach_eni_instance(eniAttachmentId):
  8 |     client = boto3.client('ec2')
  9 |     print("detaching eni {}".format(eniAttachmentId))
 10 |     response = client.detach_network_interface(
 11 |         AttachmentId = eniAttachmentId,
 12 |         Force = True
 13 |     )
 14 |     time.sleep(5)
 15 |     print(response)
 16 |     print("ENI detached")
 17 |     
 18 | def attach_eni_instance(instanceId, eni_id):
 19 |     client = boto3.client('ec2')
 20 |     print("attaching eni {} to instance {}".format(eni_id, instanceId))
 21 |     response = client.attach_network_interface(
 22 |         DeviceIndex=1,
 23 |         InstanceId = instanceId,
 24 |         NetworkInterfaceId = eni_id,
 25 |     )
 26 |     return response
 27 | 
 28 | def instance_health(instanceId):
 29 |     client = boto3.client('ec2')
 30 |     response = client.describe_instances(
 31 |         InstanceIds=[
 32 |             instanceId
 33 |         ]
 34 |     )
 35 |     print("checking health for instance {}".format(instanceId))
 36 |     try:
 37 |         conn = http.client.HTTPConnection(response['Reservations'][0]['Instances'][0]['PrivateIpAddress'], 8080)
 38 |         conn.request("GET", "/v1/info")
 39 |         r1 = conn.getresponse()
 40 |         print(r1.status, r1.reason)
 41 |         data = json.loads(r1.read().decode('utf-8').replace("'", '"'))
 42 |     except Exception as e:
 43 |         print("AN EXCEPTION OCCURED", str(e))
 44 |         data = {
 45 |             "starting": True
 46 |         }
 47 |     return data
 48 |     # return response
 49 |     
 50 | def attach_eni(eni_id):
 51 |     client = boto3.client('ec2')
 52 |     response = client.describe_instances(
 53 |         Filters=[
 54 |             {
 55 |                 'Name': 'tag:presto:opensource:identification:role',
 56 |                 'Values': [
 57 |                     'presto:coordinator'
 58 |                 ]
 59 |             },
 60 |             {
 61 |                 'Name': 'tag:aws:cloudformation:stack-name',
 62 |                 'Values': [
 63 |                     os.environ['STACK_NAME']
 64 |                 ]
 65 |             }
 66 |         ]
 67 |     )
 68 |     for j in range(len(response['Reservations'])):
 69 |         for i in range(len(response['Reservations'][j]['Instances'])):
 70 |             if response['Reservations'][j]['Instances'][i]['State']['Name'] != 'running':
 71 |                 continue
 72 |             privateIpAddress = response['Reservations'][j]['Instances'][i]['PrivateIpAddress']
 73 |             instanceId = response['Reservations'][j]['Instances'][i]['InstanceId']
 74 |             print("Found instance to attach {}, {}".format(privateIpAddress, instanceId))
 75 |             try:
 76 |                 conn = http.client.HTTPConnection(privateIpAddress, 8080)
 77 |                 conn.request("GET", "/v1/info")
 78 |                 r1 = conn.getresponse()
 79 |                 print(privateIpAddress, r1.status, r1.reason)
 80 |                 data = json.loads(r1.read().decode('utf-8').replace("'", '"'))
 81 |             except Exception as e:
 82 |                 print("AN EXCEPTION OCCURED", str(e))
 83 |                 data = {
 84 |                     "starting": True
 85 |                 }
 86 |             if not data['starting']:
 87 |                 print("Instance {} is healthy | Attaching ENI to Instance".format(instanceId))
 88 |                 print(attach_eni_instance(instanceId, eni_id))
 89 |                 break
 90 |             else:
 91 |                 print(instanceId + "Instance is unhealthy ...")
 92 | 
 93 | def lambda_handler(event, context):
 94 |     client = boto3.resource('ec2')
 95 |     network_interface = client.NetworkInterface(os.environ['ENI_ID'])
 96 |     print("Network ENI status: ", network_interface.status)
 97 |     if network_interface.status == "available":
 98 |         print("ENI not attached to any coordinator | Looking for suitable coordinator")
 99 |         attach_eni(os.environ['ENI_ID'])
100 |     else:
101 |         print("ENI is attached | Checking health of the coordinator")
102 |         data = instance_health(network_interface.attachment['InstanceId'])
103 |         # data = instance_health("i-0b9a126690a1fe099")
104 |         if not data['starting']:
105 |             print("Coordinator is healthy | EXITING")
106 |         else:
107 |             print("Coordinator is unhealthy | REPLACING")
108 |             detach_eni_instance(network_interface.attachment['AttachmentId'])
109 |             attach_eni(os.environ['ENI_ID'])
110 | 
111 | 


--------------------------------------------------------------------------------