├── .gitignore
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── main.tf
├── outputs.tf
├── templates
    └── cloud-config
    │   └── init.tpl
└── variables.tf


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Terraform
 2 | .terraform/
 3 | *.tfplan
 4 | *.tfstate
 5 | *.tfstate.backup
 6 | !.gitkeep
 7 | 
 8 | # macOS
 9 | *.DS_Store
10 | .AppleDouble
11 | .LSOverride
12 | ._*
13 | 
14 | # Linux
15 | *~
16 | .*.swp
17 | 
18 | # Windows
19 | Desktop.ini
20 | Thumbs.db
21 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | [author]: # (Frederico Martins <http://github.com/fscm>)
 2 | [version]: # (1.0)
 3 | [license]: # (SPDX-License-Identifier: CC-BY-4.0)
 4 | [copyright]: # (2016-2022, Frederico Martins)
 5 | 
 6 | This project follows a No Code of Conduct (NCoC) philosophy. We are all human
 7 | beings. We should all be capable of getting along well.
 8 | 
 9 | # Contributor (No) Code of Conduct
10 | 
11 | Everyone is expected to behave like an adult and therefore be capable of
12 | having adult discussions. Everyone contributions are accepted regardless of
13 | their level of experience, gender, gender identity and expression, sexual
14 | orientation, disability, personal appearance, body size, race, ethnicity, age,
15 | religion, or nationality. The owners or copyright holders of this project are
16 | not members of a support group for human emotion. This is a community that
17 | strives to focus around its topics. Anything else takes away from that.
18 | 
19 | Everyone should be able to freely express their ideas without being offended
20 | by nor offend others.
21 | 
22 | PROBLEMS, OR OTHER SITUATIONS, SHOULD BE ADDRESSED LIKE IN ANY OTHER PLATFORM,
23 | PROJECT OR DISCUSSION FORUM. IN NO EVENT SHALL THE OWNERS OR COPYRIGHT HOLDERS
24 | OF THIS PROJECT BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
25 | IN AN ACTION, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH
26 | THIS PROJECT COMMUNITY.
27 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing
 2 | 
 3 | If you are thinking of contributing code to this project, first of all, thank
 4 | you! All fixes, patches and enhancements to it are very warmly welcomed.
 5 | 
 6 | Please take a moment to review this document in order to find how your
 7 | contribution can be possible.
 8 | 
 9 | ## Bugs and Feature Requests
10 | 
11 | The preferred way to report bugs or request features is to use
12 | [GitHub issues](issues). For this project, however, that feature is not
13 | available.
14 | 
15 | ## Pull Requests
16 | 
17 | 1. Create a [GitHub account](https://github.com/join) (if you don't have one already)
18 | 2. [Fork](https://help.github.com/articles/fork-a-repo) this project
19 | 3. Create your feature branch: `git checkout -b my-new-feature`
20 | 4. Make your changes
21 | 5. Commit your changes: `git commit -am 'Add some feature'`
22 | 6. Push to the branch: `git push origin my-new-feature`
23 | 7. Submit a pull request
24 | 
25 | Before you submit a
26 | [pull request](https://help.github.com/articles/using-pull-requests/) from your
27 | forked repo, check that it meets these guidelines:
28 | 
29 | - If the pull request adds or changes a functionality, make sure the
30 | documentation is also created or updated  as part of the same pull request.
31 | - Try not to put more than one feature or bug fix in a single pull request. Create a
32 | separate pull request for each feature or bug fix.
33 | - Squash your commits into one for each pull request: `git reset --soft HEAD~<NUMBER_OF_COMMITS> && git commit`.
34 | 
35 | ## License
36 | 
37 | By contributing to this project, you agree that your contributions will be licensed under its [LICENSE](LICENSE).
38 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2016-2022, Frederico Martins
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Apache Spark Terraform Module
  2 | 
  3 | A terraform module to create and manage an Apache Spark cluster on AWS.
  4 | 
  5 | ## Prerequisites
  6 | 
  7 | Terraform and AWS Command Line Interface tools need to be installed on your
  8 | local computer.
  9 | 
 10 | A previously build AMI base image with Apache Spark is required.
 11 | 
 12 | ### Terraform
 13 | 
 14 | Terraform version 0.8 or higher is required.
 15 | 
 16 | Terraform installation instructions can be found
 17 | [here](https://www.terraform.io/intro/getting-started/install.html).
 18 | 
 19 | ### AWS Command Line Interface
 20 | 
 21 | AWS Command Line Interface installation instructions can be found [here](http://docs.aws.amazon.com/cli/latest/userguide/installing.html).
 22 | 
 23 | ### Apache Spark AMI
 24 | 
 25 | This module requires that an AMI base image with Apache Spark built using the
 26 | recipe from [this](https://github.com/fscm/packer-aws-spark) project to already
 27 | exist in your AWS account.
 28 | 
 29 | That AMI ID is the one that should be used as the value for the required
 30 | `ami_id` variable.
 31 | 
 32 | ### AWS Route53 Service (optional)
 33 | 
 34 | If you wish to register the instances FQDN, the AWS Route53 service is also required to be enabled and properly configured.
 35 | 
 36 | To register the instances FQDN on AWS Route53 service you need to set the `private_zone_id` and/or `public_zone_id` variable(s).
 37 | 
 38 | ## Module Input Variables
 39 | 
 40 | - `ami_id` - **[required]** The id of the AMI to use for the instance(s). See the [Apache Spark AMI](#apache-spark-ami) section for more information.
 41 | - `associate_public_ip_address` - Associate a public IP address to the Apache Spark Master instance(s). *[default value: false]*
 42 | - `domain` - **[required]** The domain name to use for the Apache Spark instance(s).
 43 | - `extra_security_group_id` - Extra security group to assign to the Apache Spark instance(s) (e.g.: 'sg-3f983f98'). *[default value: '']*
 44 | - `keyname` - **[required]** The SSH key name to use for the Apache Spark instance(s).
 45 | - `name` - The main name that will be used for the Apache Spark instance(s). *[default value: 'spark']*
 46 | [comment]: # (- `number_of_masters` - Number of Apache Spark Master instances. NOT USED YET." *[default value: 1]*.)
 47 | - `prefix` - A prefix to prepend to the Apache Spark instance(s) name. *[default value: '']*
 48 | - `private_zone_id` - The ID of the hosted zone for the private DNS record(s). *[default value: '']*
 49 | - `public_zone_id` - The ID of the hosted zone for the public DNS record(s). Requires `associate_public_ip_address` to be set to 'true'. *[default value: '']*
 50 | - `spark_master_heap_size` - The heap size for the Apache Spark Master instance(s) (e.g.: '1G'). *[default value: '']*
 51 | - `spark_master_instance_type` - The type of instance to use for the Apache Spark Master instance(s). *[default value: 't2.small']*
 52 | - `spark_master_root_volume_iops` - The amount of provisioned IOPS (for 'io1' type only). *[default value: 0]*
 53 | - `spark_master_root_volume_size` - The volume size in gigabytes. *[default value: '8']*
 54 | - `spark_master_root_volume_type` - The volume type. Must be one of 'standard' (magnetic), 'gp2' (general purpose SSD), or 'io1' (provisioned IOPS SSD). *[default value: 'gp2']*
 55 | - `spark_worker_heap_size` - The heap size for the Apache Spark Worker instance(s) (e.g.: '1G'). *[default value: '']*
 56 | - `spark_worker_instance_type` - The type of instance to use for the Apache Spark Worker instance(s). *[default value: 't2.small']*
 57 | - `spark_worker_max_instances` - Maximum number of Apache Spark Worker instances in the cluster. *[default value: '1']*
 58 | - `spark_worker_min_instances` - "Minimum number of Apache Spark Worker instances in the cluster. *[default value: '1']*
 59 | - `spark_worker_root_volume_iops` - The amount of provisioned IOPS (for 'io1' type only). *[default value: 0]*
 60 | - `spark_worker_root_volume_size` - The volume size in gigabytes. *[default value: '8']*
 61 | - `spark_worker_root_volume_type` - The volume type. Must be one of 'standard' (magnetic), 'gp2' (general purpose SSD), or 'io1' (provisioned IOPS SSD). *[default value: 'gp2']*
 62 | - `subnet_ids` - **[required]** List of Subnet IDs to launch the instance(s) in (e.g.: ['subnet-0zfg04s2','subnet-6jm2z54q']).
 63 | - `ttl` - The TTL (in seconds) for the DNS record(s). *[default value: '600']*
 64 | - `vpc_id` - **[required]** The VPC ID for the security group(s).
 65 | 
 66 | ## Usage
 67 | 
 68 | ```hcl
 69 | module "my_spark_cluster" {
 70 |   source                     = "github.com/fscm/terraform-module-aws-spark"
 71 |   ami_id                     = "ami-gxrd5hz0"
 72 |   domain                     = "mydomain.tld"
 73 |   keyname                    = "my_ssh_key"
 74 |   name                       = "spark"
 75 |   prefix                     = "mycompany-"
 76 |   private_zone_id            = "Z3K95H7K1S3F"
 77 |   spark_worker_max_instances = "3"
 78 |   spark_worker_min_instances = "2"
 79 |   subnet_ids                 = ["subnet-0zfg04s2", "subnet-6jm2z54q"]
 80 |   vpc_id                     = "vpc-3f0tb39m"
 81 | }
 82 | ```
 83 | 
 84 | ## Outputs
 85 | 
 86 | - `master_fqdn` - **[type: list]** List of FQDNs of the Apache Spark Master instance(s).
 87 | - `master_hostname` - **[type: list]** List of hostnames of the Apache Spark Master instance(s).
 88 | - `master_id` - **[type: list]** List of IDs of the Apache Spark Master instance(s).
 89 | - `master_ip` - **[type: list]** List of private IP address of the Apache Spark Master instance(s).
 90 | - `security_group` - **[type: string]** ID of the security group to be added to every instance that requires access to the Apache Spark Cluster.
 91 | - `ssh_key` - **[type: string]** The name of the SSH key used.
 92 | 
 93 | ## Cluster Access
 94 | 
 95 | This modules provides a security group that will allow access to the Apache
 96 | Spark cluster instances.
 97 | 
 98 | That group will allow access to the following ports to all the AWS EC2
 99 | instances that belong to the group:
100 | 
101 | | Service           | Port   | Protocol |
102 | |:------------------|:------:|:--------:|
103 | | Spark Application | 4040   |    TCP   |
104 | | Spark REST Server | 6066   |    TCP   |
105 | | Spark Master      | 7077   |    TCP   |
106 | | Spark Master UI   | 8080   |    TCP   |
107 | | Spark Worker UI   | 8081   |    TCP   |
108 | 
109 | If access to other ports (like the SSH port) is required, you can create your
110 | own security group and add it to the Apache Spark cluster instances using the
111 | `extra_security_group_id` variable.
112 | 
113 | ## Contributing
114 | 
115 | 1. Fork it!
116 | 2. Create your feature branch: `git checkout -b my-new-feature`
117 | 3. Commit your changes: `git commit -am 'Add some feature'`
118 | 4. Push to the branch: `git push origin my-new-feature`
119 | 5. Submit a pull request
120 | 
121 | Please read the [CONTRIBUTING.md](CONTRIBUTING.md) file for more details on how
122 | to contribute to this project.
123 | 
124 | ## Versioning
125 | 
126 | This project uses [SemVer](http://semver.org/) for versioning. For the versions
127 | available, see the [tags on this repository](https://github.com/fscm/terraform-module-aws-spark/tags).
128 | 
129 | ## Authors
130 | 
131 | * **Frederico Martins** - [fscm](https://github.com/fscm)
132 | 
133 | See also the list of [contributors](https://github.com/fscm/terraform-module-aws-spark/contributors)
134 | who participated in this project.
135 | 
136 | ## License
137 | 
138 | This project is licensed under the MIT License - see the [LICENSE](LICENSE)
139 | file for details
140 | 


--------------------------------------------------------------------------------
/main.tf:
--------------------------------------------------------------------------------
  1 | #
  2 | # Terraform module to create an Apache Spark cluster.
  3 | #
  4 | # Copyright 2016-2022, Frederico Martins
  5 | #   Author: Frederico Martins <http://github.com/fscm>
  6 | #
  7 | # SPDX-License-Identifier: MIT
  8 | #
  9 | # This program is free software. You can use it and/or modify it under the
 10 | # terms of the MIT License.
 11 | #
 12 | 
 13 | #
 14 | # Apache Spark instance(s).
 15 | #
 16 | 
 17 | resource "aws_instance" "master" {
 18 |   count                       = "${var.number_of_masters}"
 19 |   ami                         = "${var.ami_id}"
 20 |   associate_public_ip_address = "${var.associate_public_ip_address}"
 21 |   instance_type               = "${var.spark_master_instance_type}"
 22 |   key_name                    = "${var.keyname}"
 23 |   subnet_id                   = "${element(var.subnet_ids, count.index - 1)}"
 24 |   user_data                   = "${element(data.template_file.master.*.rendered, count.index)}"
 25 |   vpc_security_group_ids      = ["${aws_security_group.spark.id}","${aws_security_group.spark_intra.id}","${var.extra_security_group_id}"]
 26 |   root_block_device {
 27 |     volume_size = "${var.spark_master_root_volume_size}"
 28 |     volume_type = "${var.spark_master_root_volume_type}"
 29 |     iops        = "${var.spark_master_root_volume_iops}"
 30 |   }
 31 |   tags {
 32 |     Name    = "${var.prefix}${var.name}-master${format("%02d", count.index + 1)}"
 33 |     Spark   = "true"
 34 |     Service = "Spark Master"
 35 |   }
 36 | }
 37 | 
 38 | data "template_file" "master" {
 39 |   count    = "${var.number_of_masters}"
 40 |   template = "${file("${path.module}/templates/cloud-config/init.tpl")}"
 41 |   vars {
 42 |     domain              = "${var.domain}"
 43 |     hostname            = "${var.prefix}${var.name}-master${format("%02d", count.index + 1)}"
 44 |     spark_args          = "${var.spark_master_heap_size == "" ? var.spark_master_heap_size : "-m var.spark_master_heap_size"}"
 45 |     spark_instance_type = "master"
 46 |   }
 47 | }
 48 | 
 49 | resource "aws_launch_configuration" "worker" {
 50 |   associate_public_ip_address = "${var.associate_public_ip_address}"
 51 |   image_id                    = "${var.ami_id}"
 52 |   instance_type               = "${var.spark_worker_instance_type}"
 53 |   key_name                    = "${var.keyname}"
 54 |   name_prefix                 = "${var.prefix}${var.name}-worker-"
 55 |   security_groups             = ["${aws_security_group.spark.id}","${aws_security_group.spark_intra.id}","${var.extra_security_group_id}"]
 56 |   user_data                   = "${data.template_file.worker.rendered}"
 57 |   lifecycle {
 58 |     create_before_destroy = true
 59 |   }
 60 |   root_block_device {
 61 |     volume_size = "${var.spark_worker_root_volume_size}"
 62 |     volume_type = "${var.spark_worker_root_volume_type}"
 63 |     iops        = "${var.spark_worker_root_volume_iops}"
 64 |   }
 65 | }
 66 | 
 67 | resource "aws_autoscaling_group" "worker" {
 68 |   depends_on                = ["aws_launch_configuration.worker"]
 69 |   health_check_grace_period = 600
 70 |   health_check_type         = "EC2"
 71 |   launch_configuration      = "${aws_launch_configuration.worker.name}"
 72 |   max_size                  = "${var.spark_worker_max_instances}"
 73 |   min_size                  = "${var.spark_worker_min_instances}"
 74 |   name                      = "${var.prefix}${var.name}-worker"
 75 |   termination_policies      = ["OldestInstance"]
 76 |   vpc_zone_identifier       = ["${var.subnet_ids}"]
 77 |   lifecycle {
 78 |     create_before_destroy = true
 79 |   }
 80 |   tag {
 81 |     key                 = "Name"
 82 |     value               = "${var.prefix}${var.name}-worker"
 83 |     propagate_at_launch = true
 84 |   }
 85 |   tag {
 86 |     key                 = "Spark"
 87 |     value               = "true"
 88 |     propagate_at_launch = true
 89 |   }
 90 |   tag {
 91 |     key                 = "Service"
 92 |     value               = "Spark Worker"
 93 |     propagate_at_launch = true
 94 |   }
 95 | }
 96 | 
 97 | data "template_file" "worker" {
 98 |   template = "${file("${path.module}/templates/cloud-config/init.tpl")}"
 99 |   vars {
100 |     domain              = "${var.domain}"
101 |     hostname            = "${var.prefix}${var.name}-worker"
102 |     spark_args          = "${var.spark_worker_heap_size == "" ? var.spark_worker_heap_size : "-m var.spark_worker_heap_size"} -s ${var.private_zone_id != "" ? element(aws_route53_record.private.*.fqdn, 0) : element(aws_instance.master.*.private_ip, 0)} -W 60"
103 |     spark_instance_type = "worker"
104 |   }
105 | }
106 | 
107 | #
108 | # Apache Spark Master DNS record(s).
109 | #
110 | 
111 | resource "aws_route53_record" "private" {
112 |   count   = "${var.private_zone_id != "" ? var.number_of_masters : 0}"
113 |   name    = "${var.prefix}${var.name}-master${format("%02d", count.index + 1)}"
114 |   records = ["${element(aws_instance.master.*.private_ip, count.index)}"]
115 |   ttl     = "${var.ttl}"
116 |   type    = "A"
117 |   zone_id = "${var.private_zone_id}"
118 | }
119 | 
120 | resource "aws_route53_record" "public" {
121 |   count   = "${var.public_zone_id != "" && var.associate_public_ip_address ? var.number_of_masters : 0}"
122 |   name    = "${var.prefix}${var.name}-master${format("%02d", count.index + 1)}"
123 |   records = ["${element(aws_instance.master.*.public_ip, count.index)}"]
124 |   ttl     = "${var.ttl}"
125 |   type    = "A"
126 |   zone_id = "${var.public_zone_id}"
127 | }
128 | 
129 | #
130 | # Apache Spark security group(s).
131 | #
132 | 
133 | resource "aws_security_group" "spark" {
134 |   name   = "${var.prefix}${var.name}"
135 |   vpc_id = "${var.vpc_id}"
136 |   ingress {
137 |     from_port = 4040
138 |     to_port   = 4040
139 |     protocol  = "tcp"
140 |     self      = true
141 |   }
142 |   ingress {
143 |     from_port = 6066
144 |     to_port   = 6066
145 |     protocol  = "tcp"
146 |     self      = true
147 |   }
148 |   ingress {
149 |     from_port = 7077
150 |     to_port   = 7077
151 |     protocol  = "tcp"
152 |     self      = true
153 |   }
154 |   ingress {
155 |     from_port = 8080
156 |     to_port   = 8081
157 |     protocol  = "tcp"
158 |     self      = true
159 |   }
160 |   egress {
161 |     from_port   = 0
162 |     to_port     = 0
163 |     protocol    = "-1"
164 |     cidr_blocks = ["0.0.0.0/0"]
165 |   }
166 |   lifecycle {
167 |     create_before_destroy = true
168 |   }
169 |   tags {
170 |     Name    = "${var.prefix}${var.name}"
171 |     Spark   = "true"
172 |     Service = "Spark"
173 |   }
174 | }
175 | 
176 | resource "aws_security_group" "spark_intra" {
177 |   name   = "${var.prefix}${var.name}-intra"
178 |   vpc_id = "${var.vpc_id}"
179 |   ingress {
180 |     from_port = 0
181 |     to_port   = 65535
182 |     protocol  = "-1"
183 |     self      = true
184 |   }
185 |   egress {
186 |     from_port = 0
187 |     to_port   = 65535
188 |     protocol  = "-1"
189 |     self      = true
190 |   }
191 |   lifecycle {
192 |     create_before_destroy = true
193 |   }
194 |   tags {
195 |     Name    = "${var.prefix}${var.name}-intra"
196 |     Spark   = "true"
197 |     Service = "Spark"
198 |   }
199 | }
200 | 


--------------------------------------------------------------------------------
/outputs.tf:
--------------------------------------------------------------------------------
 1 | #
 2 | # Outputs for the Apache Spark terraform module.
 3 | #
 4 | # Copyright 2016-2022, Frederico Martins
 5 | #   Author: Frederico Martins <http://github.com/fscm>
 6 | #
 7 | # SPDX-License-Identifier: MIT
 8 | #
 9 | # This program is free software. You can use it and/or modify it under the
10 | # terms of the MIT License.
11 | #
12 | 
13 | output "master_fqdn" {
14 |   sensitive = false
15 |   value     = ["${aws_route53_record.private.*.fqdn}"]
16 | }
17 | 
18 | output "master_hostname" {
19 |   sensitive = false
20 |   value     = ["${aws_instance.master.*.private_dns}"]
21 | }
22 | 
23 | output "master_id" {
24 |   sensitive = false
25 |   value     = ["${aws_instance.master.*.id}"]
26 | }
27 | 
28 | output "master_ip" {
29 |   sensitive = false
30 |   value     = ["${aws_instance.master.*.private_ip}"]
31 | }
32 | 
33 | output "security_group" {
34 |   sensitive = false
35 |   value     = "${aws_security_group.spark.id}"
36 | }
37 | 
38 | output "ssh_key" {
39 |   sensitive = false
40 |   value     = "${var.keyname}"
41 | }
42 | 


--------------------------------------------------------------------------------
/templates/cloud-config/init.tpl:
--------------------------------------------------------------------------------
 1 | #cloud-config
 2 | #
 3 | # Cloud-Config template for the Apache Spark instances.
 4 | #
 5 | # Copyright 2016-2022, Frederico Martins
 6 | #   Author: Frederico Martins <http://github.com/fscm>
 7 | #
 8 | # SPDX-License-Identifier: MIT
 9 | #
10 | 
11 | fqdn: ${hostname}.${domain}
12 | hostname: ${hostname}
13 | manage_etc_hosts: true
14 | 
15 | write_files:
16 |   - content: |
17 |       #!/bin/bash
18 |       echo "=== Setting up Apache Spark Instance ==="
19 |       echo "  instance: ${hostname}.${domain}"
20 |       sudo /usr/local/bin/spark_config ${spark_args} -E -S ${spark_instance_type}
21 |       echo "=== All Done ==="
22 |     path: /tmp/setup_spark.sh
23 |     permissions: '0755'
24 | 
25 | runcmd:
26 |   - /tmp/setup_spark.sh
27 |   - rm /tmp/setup_spark.sh
28 | 


--------------------------------------------------------------------------------
/variables.tf:
--------------------------------------------------------------------------------
  1 | #
  2 | # Variables for the Apache Spark terraform module.
  3 | #
  4 | # Copyright 2016-2022, Frederico Martins
  5 | #   Author: Frederico Martins <http://github.com/fscm>
  6 | #
  7 | # SPDX-License-Identifier: MIT
  8 | #
  9 | # This program is free software. You can use it and/or modify it under the
 10 | # terms of the MIT License.
 11 | #
 12 | 
 13 | variable "ami_id" {
 14 |   description = "The id of the AMI to use for the instance(s)."
 15 |   type        = "string"
 16 | }
 17 | 
 18 | variable "associate_public_ip_address" {
 19 |   description = "Associate a public IP address to the Apache Spark instance(s)."
 20 |   default     = false
 21 |   type        = "string"
 22 | }
 23 | 
 24 | variable "domain" {
 25 |   description = "The domain name to use for the Apache Spark instance(s)."
 26 |   type        = "string"
 27 | }
 28 | 
 29 | variable "extra_security_group_id" {
 30 |   description = "Extra security group to assign to the Apache Spark instance(s) (e.g.: 'sg-3f983f98')."
 31 |   default     = ""
 32 |   type        = "string"
 33 | }
 34 | 
 35 | variable "keyname" {
 36 |   description = "The SSH key name to use for the Apache Spark instance(s)."
 37 |   type        = "string"
 38 | }
 39 | 
 40 | variable "name" {
 41 |   description = "The main name that will be used for the Apache Spark instance(s)."
 42 |   default     = "spark"
 43 |   type        = "string"
 44 | }
 45 | 
 46 | variable "number_of_masters" {
 47 |   description = "Number of Apache Spark Master instances. NOT USED YET."
 48 |   default     = "1"
 49 |   type        = "string"
 50 | }
 51 | 
 52 | variable "prefix" {
 53 |   description = "A prefix to prepend to the Apache Spark instance(s) name."
 54 |   default     = ""
 55 |   type        = "string"
 56 | }
 57 | 
 58 | variable "private_zone_id" {
 59 |   description = "The ID of the hosted zone for the private DNS record(s)."
 60 |   default     = ""
 61 |   type        = "string"
 62 | }
 63 | 
 64 | variable "public_zone_id" {
 65 |   description = "The ID of the hosted zone for the public DNS record(s)."
 66 |   default     = ""
 67 |   type        = "string"
 68 | }
 69 | 
 70 | variable "spark_master_heap_size" {
 71 |   description = "The heap size for the Apache Spark Master instance(s) (e.g.: '1G')."
 72 |   default     = ""
 73 |   type        = "string"
 74 | }
 75 | 
 76 | variable "spark_master_instance_type" {
 77 |   description = "The type of instance to use for the Apache Spark Master instance(s)."
 78 |   default     = "t2.small"
 79 |   type        = "string"
 80 | }
 81 | 
 82 | variable "spark_master_root_volume_iops" {
 83 |   description = "The amount of provisioned IOPS (for 'io1' type only)."
 84 |   default     = 0
 85 |   type        = "string"
 86 | }
 87 | 
 88 | variable "spark_master_root_volume_size" {
 89 |   description = "The volume size in gigabytes."
 90 |   default     = "8"
 91 |   type        = "string"
 92 | }
 93 | 
 94 | variable "spark_master_root_volume_type" {
 95 |   description = "The volume type. Must be one of 'standard' (magnetic), 'gp2' (general purpose SSD), or 'io1' (provisioned IOPS SSD)."
 96 |   default     = "gp2"
 97 |   type        = "string"
 98 | }
 99 | 
100 | variable "spark_worker_heap_size" {
101 |   description = "The heap size for the Apache Spark Worker instance(s) (e.g.: '1G')."
102 |   default     = ""
103 |   type        = "string"
104 | }
105 | 
106 | variable "spark_worker_instance_type" {
107 |   description = "The type of instance to use for the Apache Spark Worker instance(s)."
108 |   default     = "t2.small"
109 |   type        = "string"
110 | }
111 | 
112 | variable "spark_worker_max_instances" {
113 |   description = "Maximum number of Apache Spark Worker instances in the cluster."
114 |   default     = "1"
115 |   type        = "string"
116 | }
117 | 
118 | variable "spark_worker_min_instances" {
119 |   description = "Minimum number of Apache Spark Worker instances in the cluster."
120 |   default     = "1"
121 |   type        = "string"
122 | }
123 | 
124 | variable "spark_worker_root_volume_iops" {
125 |   description = "The amount of provisioned IOPS (for 'io1' type only)."
126 |   default     = 0
127 |   type        = "string"
128 | }
129 | 
130 | variable "spark_worker_root_volume_size" {
131 |   description = "The volume size in gigabytes."
132 |   default     = "8"
133 |   type        = "string"
134 | }
135 | 
136 | variable "spark_worker_root_volume_type" {
137 |   description = "The volume type. Must be one of 'standard' (magnetic), 'gp2' (general purpose SSD), or 'io1' (provisioned IOPS SSD)."
138 |   default     = "gp2"
139 |   type        = "string"
140 | }
141 | 
142 | variable "subnet_ids" {
143 | 	description = "List of Subnet IDs to launch the instance(s) in (e.g.: ['subnet-0zfg04s2','subnet-6jm2z54q'])."
144 |   type        = "list"
145 | }
146 | 
147 | variable "ttl" {
148 |   description = "The TTL (in seconds) for the DNS record(s)."
149 |   default     = "600"
150 |   type        = "string"
151 | }
152 | 
153 | variable "vpc_id" {
154 |   description = "The VPC ID for the security group(s)."
155 |   type        = "string"
156 | }
157 | 


--------------------------------------------------------------------------------