├── .terraform-version ├── core-components ├── ansible │ ├── scripts │ │ ├── requirements.txt │ │ ├── cluster-init.sh │ │ └── refresh_inventory.py │ └── playbooks │ │ ├── cluster-unmount-volumes.yml │ │ ├── cluster-stop.yml │ │ ├── cluster-mount-volumes.yml │ │ ├── cluster-init.yml │ │ ├── opscenter-attach-cluster.yml │ │ ├── cluster-start.yml │ │ ├── cluster-update-datastax-agent.yml │ │ ├── cluster-restart.yml │ │ ├── opscenter-install-alerts-dashboards.yml │ │ └── cluster-restack.yml ├── packer │ ├── cassandra │ │ ├── include │ │ │ ├── pip.conf │ │ │ ├── trusted.conf │ │ │ ├── datastax.repo │ │ │ ├── ntp.conf │ │ │ ├── update_auditd.sh │ │ │ ├── sysctl.conf │ │ │ ├── limits.conf │ │ │ ├── remote.yaml │ │ │ └── remote-objects.yaml │ │ └── scripts │ │ │ ├── jmxremote_access.config │ │ │ ├── jmxremote.password │ │ │ ├── cassandra_cqlshrc │ │ │ ├── get_dse_start_time.sh │ │ │ ├── nodetool-ssl.properties │ │ │ ├── update_jmxremote_access.sh │ │ │ ├── disable_eth1.sh │ │ │ ├── unmount_volumes.sh │ │ │ ├── cas_get_tag_values.py │ │ │ ├── enable_eth1.sh │ │ │ ├── create_volume.sh │ │ │ └── cas_ec2_mgr.py │ ├── opscenter │ │ ├── include │ │ │ ├── pip.conf │ │ │ ├── datastax.repo │ │ │ ├── ntp.conf │ │ │ ├── update_auditd.sh │ │ │ ├── sysctl.conf │ │ │ └── limits.conf │ │ ├── Berksfile │ │ ├── scripts │ │ │ ├── disable_eth1.sh │ │ │ ├── register_cluster_with_opscenter.sh │ │ │ ├── enable_eth1.sh │ │ │ ├── cluster_conf.templ │ │ │ ├── bootstrap.sh │ │ │ └── ops_ec2_mgr.py │ │ └── ssl │ │ │ ├── opscenter.crt.pem │ │ │ └── opscenter.key.pem │ ├── fetch-credentials.sh │ └── init-packer-instance-profile.sh ├── terraform │ ├── versions.tf │ ├── layers │ │ ├── opscenter-resources │ │ │ ├── _outputs.tf │ │ │ ├── _variables.tf │ │ │ └── main.tf │ │ ├── vpc-resources │ │ │ ├── vpc-info.tf │ │ │ ├── vpc-create.tf │ │ │ ├── _outputs.tf │ │ │ ├── _variables.tf │ │ │ └── vpc-shared.tf │ │ ├── cluster-resources │ │ │ ├── _outputs.tf │ │ │ └── _variables.tf │ │ └── account-resources │ │ │ ├── _outputs.tf │ │ │ ├── main.tf │ │ │ └── _variables.tf │ ├── modules │ │ ├── bucket-object │ │ │ ├── _variables.tf │ │ │ └── bucket_object.tf │ │ ├── vpc-shared │ │ │ ├── _variables.tf │ │ │ ├── _outputs.tf │ │ │ └── opscenter_common_sg.tf │ │ ├── vpc-info │ │ │ ├── _variables.tf │ │ │ ├── _outputs.tf │ │ │ └── vpc-info.tf │ │ ├── opscenter │ │ │ ├── _outputs.tf │ │ │ ├── amis.tf │ │ │ ├── route53.tf │ │ │ ├── scripts │ │ │ │ └── opscenter-init.tpl │ │ │ ├── _variables.tf │ │ │ ├── lb.tf │ │ │ ├── instances.tf │ │ │ └── sg_opscenter.tf │ │ ├── iam-resources │ │ │ ├── assume-role.tf │ │ │ ├── _outputs.tf │ │ │ ├── _variables.tf │ │ │ ├── policies.tf │ │ │ ├── bastion-role.tf │ │ │ ├── opscenter-role.tf │ │ │ └── cassandra-role.tf │ │ ├── vpc-create │ │ │ ├── _variables.tf │ │ │ ├── _outputs.tf │ │ │ └── vpc.tf │ │ ├── parameter-store │ │ │ ├── parameters.tf │ │ │ └── _variables.tf │ │ ├── bastion │ │ │ ├── _outputs.tf │ │ │ ├── data │ │ │ │ └── bastion-init.tpl │ │ │ ├── cloud_init.tf │ │ │ ├── _variables.tf │ │ │ ├── lb.tf │ │ │ ├── ingress_sg.tf │ │ │ └── asg.tf │ │ └── cassandra │ │ │ ├── _outputs.tf │ │ │ ├── files │ │ │ ├── tuning_changes.sh │ │ │ └── dse-init.tpl │ │ │ ├── post_deploy.tf │ │ │ ├── templates.tf │ │ │ ├── enis.tf │ │ │ ├── _variables.tf │ │ │ ├── security_groups.tf │ │ │ └── instances.tf │ ├── shared │ │ ├── user-keys.yaml.tpl-ansible │ │ └── user-keys.yaml.tpl-extra │ └── aws_backend.tf ├── roles │ ├── assume-role.json.tpl │ ├── packer.json │ └── terraform.json ├── scripts │ └── ssh │ │ ├── ssh_config.tpl │ │ ├── init-ansible-key.sh │ │ └── build-ssh-config.sh └── operations.sh ├── docs ├── images │ ├── pronto-logo.png │ └── cassandra_ips.png ├── 2.PACKER.md ├── 4.ANSIBLE.md └── 3.TERRAFORM.md ├── .github ├── CODEOWNERS ├── PULL_REQUEST_TEMPLATE.md ├── ISSUE_TEMPLATE │ ├── question.md │ ├── bug.md │ ├── feature.md │ └── regression.md └── CONTRIBUTING.md ├── configurations ├── default-account │ ├── default-vpc │ │ ├── vpc-resources │ │ │ ├── tags.tfvars │ │ │ ├── vpc-existing.tfvars │ │ │ └── vpc-new.tfvars │ │ ├── default-cluster │ │ │ ├── tags.tfvars │ │ │ ├── cluster-configs │ │ │ │ └── post-deploy-scripts │ │ │ │ │ └── empty.sh │ │ │ └── cluster.tfvars │ │ └── opscenter-resources │ │ │ ├── tags.tfvars │ │ │ └── opscenter.tfvars │ ├── account-resources │ │ ├── tags.tfvars │ │ └── account.tfvars │ ├── user-keys.yaml.tpl │ ├── packer-resources │ │ └── cassandra │ │ │ └── gen_ca_cert.conf │ └── variables.yaml └── tfvars │ └── cidrs.tfvars ├── CHANGELOG.md ├── .gitignore ├── Dockerfile ├── create-configuration.sh ├── README.md └── init-roles.sh /.terraform-version: -------------------------------------------------------------------------------- 1 | 0.12.24 2 | -------------------------------------------------------------------------------- /core-components/ansible/scripts/requirements.txt: -------------------------------------------------------------------------------- 1 | requests 2 | requests_toolbelt -------------------------------------------------------------------------------- /core-components/packer/cassandra/include/pip.conf: -------------------------------------------------------------------------------- 1 | [global] 2 | index-url = https://pypi.org/simple -------------------------------------------------------------------------------- /core-components/packer/cassandra/scripts/jmxremote_access.config: -------------------------------------------------------------------------------- 1 | cassandra readwrite 2 | 3 | -------------------------------------------------------------------------------- /core-components/packer/opscenter/include/pip.conf: -------------------------------------------------------------------------------- 1 | [global] 2 | index-url = https://pypi.org/simple -------------------------------------------------------------------------------- /core-components/terraform/versions.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_version = ">= 0.12" 3 | } 4 | -------------------------------------------------------------------------------- /docs/images/pronto-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intuit/dse-pronto/HEAD/docs/images/pronto-logo.png -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | # List of source code paths and code owners 2 | # common services & repos 3 | * @bencovi 4 | -------------------------------------------------------------------------------- /docs/images/cassandra_ips.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intuit/dse-pronto/HEAD/docs/images/cassandra_ips.png -------------------------------------------------------------------------------- /core-components/packer/cassandra/scripts/jmxremote.password: -------------------------------------------------------------------------------- 1 | monitorRole QED 2 | controlRole R&D 3 | cassandra cassandra -------------------------------------------------------------------------------- /core-components/terraform/layers/opscenter-resources/_outputs.tf: -------------------------------------------------------------------------------- 1 | output "opscenter_elb" { 2 | value = module.opscenter.opscenter_elb 3 | } -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 6 | 7 | ## What does this change do and why -------------------------------------------------------------------------------- /core-components/terraform/modules/bucket-object/_variables.tf: -------------------------------------------------------------------------------- 1 | variable "bucket_name" {} 2 | variable "key_prefix" {} 3 | variable "file_source" {} 4 | -------------------------------------------------------------------------------- /core-components/terraform/modules/vpc-shared/_variables.tf: -------------------------------------------------------------------------------- 1 | variable "vpc_id" { type = string } 2 | variable "region" {} 3 | variable "account_id" {} 4 | -------------------------------------------------------------------------------- /core-components/packer/cassandra/include/trusted.conf: -------------------------------------------------------------------------------- 1 | server=/internal/169.254.169.253 2 | server=/amazonaws.com/169.254.169.253 3 | server=8.8.8.8 4 | server=8.8.4.4 5 | -------------------------------------------------------------------------------- /configurations/default-account/default-vpc/vpc-resources/tags.tfvars: -------------------------------------------------------------------------------- 1 | # Specify additional "vpc" layer tags here 2 | 3 | # vpc_tags = { 4 | # "vpc_tag_key" = "vpc_tag_value" 5 | # } -------------------------------------------------------------------------------- /core-components/terraform/modules/vpc-info/_variables.tf: -------------------------------------------------------------------------------- 1 | # existing vpc 2 | variable "vpc_id" {} 3 | variable "ingress_subnet_tag_prefix" {} 4 | variable "data_subnet_tag_prefix" {} 5 | -------------------------------------------------------------------------------- /configurations/default-account/account-resources/tags.tfvars: -------------------------------------------------------------------------------- 1 | # Specify additional "account" layer tags here 2 | 3 | # account_tags = { 4 | # "account_tag_key" = "account_tag_value" 5 | # } -------------------------------------------------------------------------------- /core-components/packer/cassandra/include/datastax.repo: -------------------------------------------------------------------------------- 1 | [datastax] 2 | name=DataStax Repo for DataStax Enterprise 3 | baseurl=https://rpm.datastax.com/enterprise/ 4 | enabled=1 5 | gpgcheck=0 -------------------------------------------------------------------------------- /core-components/packer/opscenter/include/datastax.repo: -------------------------------------------------------------------------------- 1 | [datastax] 2 | name=DataStax Repo for DataStax Enterprise 3 | baseurl=https://rpm.datastax.com/enterprise/ 4 | enabled=1 5 | gpgcheck=0 -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/question.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: 💬 Questions / Help 3 | labels: 'question' 4 | about: If you have questions, please check our Slack 5 | --- 6 | 7 | ## 💬 Questions and Help 8 | -------------------------------------------------------------------------------- /configurations/default-account/default-vpc/default-cluster/tags.tfvars: -------------------------------------------------------------------------------- 1 | # Specify additional "cluster" layer tags here 2 | 3 | # cluster_tags = { 4 | # "cluster_tag_key" = "cluster_tag_value" 5 | # } -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # v1.0.0 (Thu Jun 18 2020) 2 | 3 | #### Initial Revision 4 | 5 | - [1.0.0](https://github.com/intuit/dse-pronto/releases/tag/1.0.0): added changelog + other docs 6 | 7 | #### Authors: @bencovi 8 | -------------------------------------------------------------------------------- /configurations/default-account/default-vpc/opscenter-resources/tags.tfvars: -------------------------------------------------------------------------------- 1 | # Specify additional "opscenter" layer tags here 2 | 3 | # opscenter_tags { 4 | # "opscenter_tag_key" = "opscenter_tag_value" 5 | # } -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | __pycache__/ 3 | core-components/roles/assume-role.json 4 | core-components/terraform/build 5 | user-keys.yaml 6 | *.crt 7 | *.key 8 | *.tfstate 9 | ssh_config 10 | hosts 11 | .DS_Store 12 | -------------------------------------------------------------------------------- /core-components/terraform/modules/vpc-shared/_outputs.tf: -------------------------------------------------------------------------------- 1 | output "sg_ops_nodes_to_cas" { 2 | description = "Security group to be used by cassandra nodes to connect to opscenter" 3 | value = aws_security_group.ops_to_cas.id 4 | } 5 | -------------------------------------------------------------------------------- /core-components/terraform/modules/opscenter/_outputs.tf: -------------------------------------------------------------------------------- 1 | output "opscenter_elb" { 2 | value = aws_lb.opscenter.dns_name 3 | } 4 | 5 | output "opscenter_primary_private_ip" { 6 | value = aws_network_interface.opscenter-eni.private_ip 7 | } 8 | -------------------------------------------------------------------------------- /configurations/tfvars/cidrs.tfvars: -------------------------------------------------------------------------------- 1 | ################## 2 | # CIDR lists 3 | ################## 4 | 5 | # CIDRs for bastion SSH (port 22) and OpsCenter ELB (port 443) ingress 6 | ingress_cidrs = [ 7 | "10.11.12.13/22", 8 | "192.168.0.1/32" 9 | ] 10 | -------------------------------------------------------------------------------- /core-components/terraform/modules/bucket-object/bucket_object.tf: -------------------------------------------------------------------------------- 1 | resource "aws_s3_bucket_object" "bucket-object" { 2 | bucket = var.bucket_name 3 | key = var.key_prefix 4 | source = var.file_source 5 | etag = md5(file("${var.file_source}")) 6 | } 7 | -------------------------------------------------------------------------------- /core-components/terraform/modules/opscenter/amis.tf: -------------------------------------------------------------------------------- 1 | data "aws_ami" "opscenter" { 2 | most_recent = true 3 | owners = ["${var.ami_owner_id}"] 4 | 5 | filter { 6 | name = "name" 7 | values = ["${var.ami_prefix}*"] 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /core-components/packer/cassandra/include/ntp.conf: -------------------------------------------------------------------------------- 1 | driftfile /var/lib/ntp/drift 2 | 3 | server 0.amazon.pool.ntp.org iburst 4 | server 1.amazon.pool.ntp.org iburst 5 | server 2.amazon.pool.ntp.org iburst 6 | server 3.amazon.pool.ntp.org iburst 7 | 8 | disable monitor 9 | -------------------------------------------------------------------------------- /core-components/packer/opscenter/include/ntp.conf: -------------------------------------------------------------------------------- 1 | driftfile /var/lib/ntp/drift 2 | 3 | server 0.amazon.pool.ntp.org iburst 4 | server 1.amazon.pool.ntp.org iburst 5 | server 2.amazon.pool.ntp.org iburst 6 | server 3.amazon.pool.ntp.org iburst 7 | 8 | disable monitor 9 | -------------------------------------------------------------------------------- /.github/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | Steps to contribute: 2 | 3 | 1. [Fork the repo](https://help.github.com/en/github/getting-started-with-github/fork-a-repo) 4 | 2. Make your change (fix or feature) 5 | 3. Submit a PR 6 | 7 | And please follow the [Code of Conduct](./CODE_OF_CONDUCT.md). -------------------------------------------------------------------------------- /core-components/packer/cassandra/scripts/cassandra_cqlshrc: -------------------------------------------------------------------------------- 1 | [connection] 2 | hostname = ##PRIVATE_IP## 3 | port = 9142 4 | factory = cqlshlib.ssl.ssl_transport_factory 5 | 6 | [ssl] 7 | certfile=/etc/dse/cassandra/keys/##CERT## 8 | # Optional, true by default 9 | validate = false 10 | -------------------------------------------------------------------------------- /core-components/packer/opscenter/Berksfile: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | #^syntax detection 3 | 4 | load "#{__dir__}/../../scripts/berkshelf_helper.rb" 5 | 6 | source 'https://supermarket.chef.io' 7 | 8 | 9 | 10 | df_cookbook 'yum_s3_repository' 11 | df_cookbook 'java_jdk' 12 | df_cookbook 'dp_opscenter' 13 | -------------------------------------------------------------------------------- /core-components/terraform/layers/vpc-resources/vpc-info.tf: -------------------------------------------------------------------------------- 1 | variable "vpc_id" {} 2 | 3 | module "vpc" { 4 | source = "../../modules/vpc-info" 5 | 6 | vpc_id = var.vpc_id 7 | 8 | ingress_subnet_tag_prefix = var.ingress_subnet_tag_prefix 9 | data_subnet_tag_prefix = var.data_subnet_tag_prefix 10 | } 11 | -------------------------------------------------------------------------------- /core-components/terraform/layers/cluster-resources/_outputs.tf: -------------------------------------------------------------------------------- 1 | output "sg_cas_internode" { 2 | value = module.cassandra.sg_cas_internode 3 | } 4 | 5 | output "sg_cas_client_access" { 6 | value = module.cassandra.sg_cas_client_access 7 | } 8 | 9 | output "cluster_name" { 10 | value = var.cluster_name 11 | } 12 | -------------------------------------------------------------------------------- /core-components/terraform/modules/iam-resources/assume-role.tf: -------------------------------------------------------------------------------- 1 | data "aws_iam_policy_document" "ec2-assume-role-trusted-policy" { 2 | statement { 3 | effect = "Allow" 4 | actions = ["sts:AssumeRole"] 5 | principals { 6 | type = "Service" 7 | identifiers = ["ec2.amazonaws.com"] 8 | } 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /core-components/packer/cassandra/scripts/get_dse_start_time.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #set -x 4 | uptime=$(nodetool info | grep -i uptime | awk -F ':' '{print $2}') 5 | start_time=$(TZ=America/Los_Angeles date --date "- $uptime seconds") 6 | uptime_in_days=$(awk "BEGIN {print $uptime / 86400 }") 7 | echo "DSE was started at : $start_time and it is up since $uptime_in_days days" 8 | -------------------------------------------------------------------------------- /core-components/terraform/layers/account-resources/_outputs.tf: -------------------------------------------------------------------------------- 1 | output "bastion_profile_arn" { 2 | value = module.iam-resources.bastion_profile_arn 3 | } 4 | 5 | output "cassandra_profile_arn" { 6 | value = module.iam-resources.cassandra_profile_arn 7 | } 8 | 9 | output "opscenter_profile_arn" { 10 | value = module.iam-resources.opscenter_profile_arn 11 | } 12 | -------------------------------------------------------------------------------- /core-components/terraform/shared/user-keys.yaml.tpl-ansible: -------------------------------------------------------------------------------- 1 | # cloud-config 2 | system_info: 3 | default_user: 4 | name: ec2-user 5 | 6 | groups: 7 | - default: [users] 8 | 9 | users: 10 | - name: ansible 11 | groups: default 12 | shell: /bin/bash 13 | sudo: ['ALL=(ALL) NOPASSWD:ALL'] 14 | ssh-authorized-keys: 15 | - ##ANSIBLE_PUB_KEY## 16 | -------------------------------------------------------------------------------- /core-components/terraform/modules/vpc-create/_variables.tf: -------------------------------------------------------------------------------- 1 | # new vpc 2 | variable "vpc_cidr" {} 3 | variable "vpc_name" {} 4 | variable "region" {} 5 | variable "azs" { type = list(string) } 6 | variable "ingress_subnets" { type = list(string) } 7 | variable "data_subnets" { type = list(string) } 8 | 9 | variable "ingress_subnet_tag_prefix" {} 10 | variable "data_subnet_tag_prefix" {} -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: 🐛 Bug report 3 | labels: 'bug' 4 | about: Create a report to help us improve 5 | --- 6 | 7 | ## 🐛 Bug Report 8 | 9 | A clear and concise description of what the bug is. 10 | 11 | ## To Reproduce 12 | 13 | Steps to reproduce the behavior. 14 | 15 | ## Expected Behavior 16 | 17 | A clear and concise description of what you expected to happen. 18 | -------------------------------------------------------------------------------- /core-components/terraform/modules/iam-resources/_outputs.tf: -------------------------------------------------------------------------------- 1 | output "bastion_profile_arn" { 2 | value = "${aws_iam_instance_profile.bastion-profile.arn}" 3 | } 4 | 5 | output "cassandra_profile_arn" { 6 | value = "${aws_iam_instance_profile.cassandra-profile.arn}" 7 | } 8 | 9 | output "opscenter_profile_arn" { 10 | value = "${aws_iam_instance_profile.opscenter-profile.arn}" 11 | } 12 | -------------------------------------------------------------------------------- /configurations/default-account/default-vpc/default-cluster/cluster-configs/post-deploy-scripts/empty.sh: -------------------------------------------------------------------------------- 1 | # NOTE: additional post-deploy shell scripts can be placed in this directory... 2 | # ${account_name}/${vpc_name}/${cluster_name}/cluster-configs/post-deploy-scripts/ 3 | 4 | # They will be transferred to your cluster at deploy time, and executed at the end of 5 | # the bootstrap.sh script during cloud-init. 6 | -------------------------------------------------------------------------------- /core-components/ansible/playbooks/cluster-unmount-volumes.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # This playbook unmounts data volumes. 3 | 4 | - name: unmount DSE data volumes 5 | gather_facts: false 6 | hosts: 7 | - '{{ host_list }}' 8 | serial: 1 9 | 10 | tasks: 11 | - name: unmount volumes 12 | become: true 13 | shell: | 14 | cd /opt/dse/cassandra/scripts 15 | ./unmount_volumes.sh" 16 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: 🚀 Feature Proposal 3 | labels: 'feature' 4 | about: Submit a proposal for a new feature 5 | --- 6 | 7 | ## 🚀 Feature Proposal 8 | 9 | A clear and concise description of what the feature is. 10 | 11 | ## Motivation 12 | 13 | Please outline the motivation for the proposal. 14 | 15 | ## Example 16 | 17 | Please provide an example for how this feature would be used. 18 | -------------------------------------------------------------------------------- /core-components/packer/cassandra/scripts/nodetool-ssl.properties: -------------------------------------------------------------------------------- 1 | -Djavax.net.ssl.keyStore=/etc/dse/cassandra/keystores/server-keystore.jks 2 | -Djavax.net.ssl.keyStorePassword=awesomekeypass 3 | -Djavax.net.ssl.trustStore=/etc/dse/cassandra/keystores/server-truststore.jks 4 | -Djavax.net.ssl.trustStorePassword=truststorepass 5 | -Dcom.sun.management.jmxremote.ssl.need.client.auth=true 6 | -Dcom.sun.management.jmxremote.registry.ssl=true -------------------------------------------------------------------------------- /core-components/roles/assume-role.json.tpl: -------------------------------------------------------------------------------- 1 | { 2 | "Version": "2012-10-17", 3 | "Statement": [ 4 | { 5 | "Effect": "Allow", 6 | "Principal": { 7 | "Service": "ec2.amazonaws.com" 8 | }, 9 | "Action": "sts:AssumeRole" 10 | }, 11 | { 12 | "Effect": "Allow", 13 | "Principal": { 14 | "AWS": "##IAM_PROFILE_ARN##" 15 | }, 16 | "Action": "sts:AssumeRole" 17 | } 18 | ] 19 | } -------------------------------------------------------------------------------- /core-components/terraform/modules/parameter-store/parameters.tf: -------------------------------------------------------------------------------- 1 | resource "aws_ssm_parameter" "parameter" { 2 | count = var.parameter_count 3 | type = "String" 4 | name = format("/dse/%s/%s/%s/%s", var.account_name, var.vpc_name, var.cluster_name, lookup(var.parameters[count.index], "key")) 5 | value = lookup(var.parameters[count.index], "value") 6 | tier = lookup(var.parameters[count.index], "tier", "Standard") 7 | overwrite = "true" 8 | } 9 | -------------------------------------------------------------------------------- /core-components/terraform/modules/iam-resources/_variables.tf: -------------------------------------------------------------------------------- 1 | variable "region" { type = string } 2 | variable "account_id" { type = string } 3 | variable "tfstate_bucket" { type = string } 4 | 5 | variable "account_name" { type = string } 6 | variable "vpc_name" { type = string } 7 | variable "cluster_name" { type = string } 8 | 9 | # salt for iam resource naming 10 | variable "prefix" { type = string } 11 | variable "suffix" { type = string } 12 | -------------------------------------------------------------------------------- /configurations/default-account/user-keys.yaml.tpl: -------------------------------------------------------------------------------- 1 | # cloud-config 2 | system_info: 3 | default_user: 4 | name: ec2-user 5 | 6 | groups: 7 | - users: [ansible, ec2-user] 8 | 9 | users: 10 | - name: ansible 11 | groups: default 12 | shell: /bin/bash 13 | sudo: ['ALL=(ALL) NOPASSWD:ALL'] 14 | ssh-authorized-keys: 15 | - ##ANSIBLE_PUB_KEY## 16 | - name: ec2-user 17 | ssh-authorized-keys: 18 | - ##PERSONAL_PUB_KEY## 19 | -------------------------------------------------------------------------------- /core-components/terraform/aws_backend.tf: -------------------------------------------------------------------------------- 1 | # default provider (no alias) for current deployment 2 | provider "aws" { 3 | region = "#region#" 4 | } 5 | 6 | # aliased provider for interacting with tfstate bucket 7 | provider "aws" { 8 | alias = "tfstate" 9 | region = "#tfstate_region#" 10 | } 11 | 12 | terraform { 13 | backend "s3" { 14 | bucket = "#bucket#" 15 | key = "#key#" 16 | region = "#tfstate_region#" 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /core-components/terraform/layers/account-resources/main.tf: -------------------------------------------------------------------------------- 1 | module "iam-resources" { 2 | source = "../../modules/iam-resources" 3 | 4 | region = var.region 5 | tfstate_bucket = var.tfstate_bucket 6 | account_id = var.account_id 7 | account_name = var.account_name 8 | vpc_name = var.vpc_name 9 | cluster_name = var.cluster_name 10 | 11 | prefix = var.iam_resource_prefix 12 | suffix = var.iam_resource_suffix 13 | } -------------------------------------------------------------------------------- /core-components/terraform/shared/user-keys.yaml.tpl-extra: -------------------------------------------------------------------------------- 1 | # cloud-config 2 | system_info: 3 | default_user: 4 | name: ec2-user 5 | 6 | groups: 7 | - users: [ansible, ec2-user] 8 | 9 | users: 10 | - name: ansible 11 | groups: default 12 | shell: /bin/bash 13 | sudo: ['ALL=(ALL) NOPASSWD:ALL'] 14 | ssh-authorized-keys: 15 | - ##ANSIBLE_PUB_KEY## 16 | - name: ec2-user 17 | ssh-authorized-keys: 18 | - ##PERSONAL_PUB_KEY## 19 | -------------------------------------------------------------------------------- /core-components/ansible/playbooks/cluster-stop.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # This playbook stops the DSE service on the seed nodes, then the non-seeds, one at time. 3 | 4 | - name: stop DSE service 5 | gather_facts: false 6 | hosts: 7 | - '{{ host_list }}' 8 | serial: 1 9 | 10 | tasks: 11 | - name: nodetool drain 12 | shell: "nodetool drain" 13 | ignore_errors: true 14 | 15 | - name: stop DSE service 16 | service: 17 | name: dse 18 | state: stopped 19 | -------------------------------------------------------------------------------- /configurations/default-account/account-resources/account.tfvars: -------------------------------------------------------------------------------- 1 | # IAM resources created in the account layer are named generically (e.g. "cassandra-role"). If you're worried about 2 | # naming collisions, you can add a prefix or suffix here (e.g. prefix "dpp-" would result in "dpp-cassandra-role"). 3 | # By default, these are set to empty strings. Whether you create a prefix/suffix or not, these resources are still 4 | # deployed on an account-wide basis, and shared across all clusters you deploy. 5 | 6 | iam_resource_prefix="" 7 | iam_resource_suffix="" -------------------------------------------------------------------------------- /core-components/packer/cassandra/scripts/update_jmxremote_access.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #set -x 4 | 5 | # update the server_encryption section 6 | java_home=$(dirname $(dirname $(/usr/sbin/alternatives --list | awk '{ if ($1 == "java" && $2 == "auto") print $3 }'))) 7 | file=${java_home}/lib/management/jmxremote.access 8 | cp "$file" "$file.bak" 9 | lead='^monitorRole readonly' 10 | tail='^controlRole readwrite' 11 | sed -e "/$lead/,/$tail/{ /$lead/{p; 12 | r ./jmxremote_access.config 13 | }; /$tail/p; 14 | d 15 | }" <"$file.bak" > "$file" 16 | -------------------------------------------------------------------------------- /core-components/terraform/modules/vpc-info/_outputs.tf: -------------------------------------------------------------------------------- 1 | output "vpc_id" { 2 | value = var.vpc_id 3 | } 4 | 5 | output "vpc_cidr" { 6 | value = data.aws_vpc.vpc.cidr_block 7 | } 8 | 9 | output "data_subnet_ids" { 10 | value = data.aws_subnet.data.*.id 11 | } 12 | 13 | output "data_subnet_cidr_blocks" { 14 | value = data.aws_subnet.data.*.cidr_block 15 | } 16 | 17 | output "ingress_subnet_ids" { 18 | value = data.aws_subnet.ingress.*.id 19 | } 20 | 21 | output "ingress_subnet_cidr_blocks" { 22 | value = data.aws_subnet.ingress.*.cidr_block 23 | } 24 | -------------------------------------------------------------------------------- /core-components/terraform/modules/vpc-create/_outputs.tf: -------------------------------------------------------------------------------- 1 | output "vpc_id" { 2 | value = aws_vpc.dse-vpc.id 3 | } 4 | 5 | output "vpc_cidr" { 6 | value = aws_vpc.dse-vpc.cidr_block 7 | } 8 | 9 | output "data_subnet_ids" { 10 | value = aws_subnet.dse-vpc-data.*.id 11 | } 12 | 13 | output "data_subnet_cidr_blocks" { 14 | value = aws_subnet.dse-vpc-data.*.cidr_block 15 | } 16 | 17 | output "ingress_subnet_ids" { 18 | value = aws_subnet.dse-vpc-ingress.*.id 19 | } 20 | 21 | output "ingress_subnet_cidr_blocks" { 22 | value = aws_subnet.dse-vpc-ingress.*.cidr_block 23 | } 24 | -------------------------------------------------------------------------------- /core-components/scripts/ssh/ssh_config.tpl: -------------------------------------------------------------------------------- 1 | Host * 2 | StrictHostKeyChecking no 3 | GlobalKnownHostsFile /dev/null 4 | UserKnownHostsFile /dev/null 5 | LogLevel ERROR 6 | ServerAliveInterval 30 7 | TCPKeepAlive yes 8 | ForwardAgent yes 9 | 10 | ########################################################## 11 | 12 | Host bastion 13 | HostName ##BASTION_DNS## 14 | User ##USER## 15 | ProxyCommand none 16 | IdentityFile ##SSH_KEY_PATH## 17 | 18 | Host ##SEED_IP## 19 | User ##USER## 20 | IdentityFile ##SSH_KEY_PATH## 21 | ProxyCommand ssh -F ssh_config bastion -W %h:%p 22 | -------------------------------------------------------------------------------- /core-components/ansible/playbooks/cluster-mount-volumes.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # This playbook mounts data volumes. 3 | 4 | - name: mount DSE data volumes 5 | gather_facts: false 6 | hosts: 7 | - '{{ host_list }}' 8 | 9 | vars: 10 | volume_size: { volume_size } 11 | stripes: { stripes } 12 | block_size: { block_size } 13 | raid_level: { raid_level } 14 | 15 | tasks: 16 | - name: mount volumes 17 | become: true 18 | shell: | 19 | cd /opt/dse/cassandra/scripts 20 | ./create_volume.sh {{ volume_size }} {{ stripes }} {{ block_size }} {{ raid_level }} 21 | -------------------------------------------------------------------------------- /core-components/terraform/modules/bastion/_outputs.tf: -------------------------------------------------------------------------------- 1 | output "bastion_lb_dns" { 2 | # grotesque hack to get around the fact that the HCL interpolation handler can't short-circuit a nonexistent resource 3 | value = length(var.existing_bastion_sg_id) == 0 ? join("", aws_lb.bastion-nlb.*.dns_name) : "" 4 | } 5 | 6 | output "bastion_sg_id" { 7 | # grotesque hack to get around the fact that the HCL interpolation handler can't short-circuit a nonexistent resource 8 | value = length(var.existing_bastion_sg_id) == 0 ? join("", aws_security_group.bastion-sg.*.id) : var.existing_bastion_sg_id 9 | } 10 | 11 | -------------------------------------------------------------------------------- /core-components/terraform/modules/parameter-store/_variables.tf: -------------------------------------------------------------------------------- 1 | # list of objects (key, value, and optional 'tier' to set a param as Advanced if it's > 4096 bytes) 2 | variable "parameters" { type = list } 3 | 4 | # using a static parameter_count prevents issues with list interpolation when terraform calculates "count" 5 | variable "parameter_count" { type = string } 6 | 7 | # parameters will be stored under key /dse/${account_name}/${vpc_name}/${cluster_name}/${parameters[].key} 8 | variable "cluster_name" { type = string } 9 | variable "vpc_name" { type = string } 10 | variable "account_name" { type = string } 11 | -------------------------------------------------------------------------------- /core-components/packer/cassandra/scripts/disable_eth1.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd /etc/sysconfig/network-scripts 4 | if [[ -e /etc/sysconfig/network-scripts/ifcfg-eth1 ]]; then 5 | sed -i 's/ONBOOT=no/ONBOOT=yes/g' ifcfg-eth0 6 | 7 | /sbin/ifup eth0 8 | sleep 10 9 | /sbin/ifdown eth1 10 | sleep 10 11 | 12 | my_eni_ip=$(ifconfig -a eth0 | grep -w inet | awk '{print $2}' | sed 's,/.*$,,' | sed 's/\./-/g') 13 | my_eni_hostname=ip-${my_eni_ip}.us-west-2.compute.internal 14 | hostnamectl set-hostname ${my_eni_hostname} 15 | echo "preserve_hostname: true" >> /etc/cloud/cloud.cfg 16 | rm ifcfg-eth1 17 | fi 18 | -------------------------------------------------------------------------------- /core-components/packer/opscenter/scripts/disable_eth1.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd /etc/sysconfig/network-scripts 4 | if [[ -e /etc/sysconfig/network-scripts/ifcfg-eth1 ]]; then 5 | sed -i 's/ONBOOT=no/ONBOOT=yes/g' ifcfg-eth0 6 | 7 | /sbin/ifup eth0 8 | sleep 10 9 | /sbin/ifdown eth1 10 | sleep 10 11 | 12 | my_eni_ip=$(ifconfig -a eth0 | grep -w inet | awk '{print $2}' | sed 's,/.*$,,' | sed 's/\./-/g') 13 | my_eni_hostname=ip-${my_eni_ip}.us-west-2.compute.internal 14 | hostnamectl set-hostname ${my_eni_hostname} 15 | echo "preserve_hostname: true" >> /etc/cloud/cloud.cfg 16 | rm ifcfg-eth1 17 | fi 18 | -------------------------------------------------------------------------------- /core-components/packer/opscenter/scripts/register_cluster_with_opscenter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | source ./register_cluster_func.sh 4 | 5 | region=$(curl -s http://169.254.169.254/latest/meta-data/placement/availability-zone | sed 's/\(.*\)[a-z]/\1/') 6 | 7 | bucket=$1 8 | cluster_to_register=$2 9 | vpc_name=$3 10 | account_name=$4 11 | 12 | if [[ ! -z "$cluster_to_register" ]] && [[ ! -z "$bucket" ]] && [[ ! -z "$account_name" ]] && [[ ! -z "$vpc_name" ]]; then 13 | register_with_opscenter ${bucket} ${cluster_to_register} ${vpc_name} ${account_name} 14 | else 15 | echo "cluster info not provided" 16 | fi 17 | 18 | -------------------------------------------------------------------------------- /configurations/default-account/packer-resources/cassandra/gen_ca_cert.conf: -------------------------------------------------------------------------------- 1 | [req] 2 | distinguished_name = req_distinguished_name 3 | prompt = no 4 | output_password = ##CERT_OUPUT_PWD## 5 | default_bits = 2048 6 | 7 | [req_distinguished_name] 8 | C = <<< YOUR_COUNTRY >>> 9 | ST = <<< YOUR_STATE >>> 10 | L = <<< YOUR_LOCALITY >>> 11 | O = <<< YOUR_ORGANIZATION >>> 12 | OU = <<< YOUR_ORGANIZATIONAL_UNIT >>> 13 | CN = <<< YOUR_COMMON_NAME >>> 14 | emailAddress = <<< YOUR_EMAIL >>> 15 | -------------------------------------------------------------------------------- /core-components/terraform/layers/account-resources/_variables.tf: -------------------------------------------------------------------------------- 1 | variable "region" { type = string } 2 | variable "tfstate_bucket" { type = string } 3 | variable "account_id" { type = string } 4 | variable "account_name" { type = string } 5 | variable "vpc_name" { type = string } 6 | variable "cluster_name" { type = string } 7 | 8 | # salt for iam resource naming; default to empty string 9 | variable "iam_resource_prefix" { default = "" } 10 | variable "iam_resource_suffix" { default = "" } 11 | 12 | variable "profile" {} 13 | variable "ami_owner_id" {} 14 | variable "tfstate_region" {} 15 | variable "role_arn" {} 16 | 17 | variable "account_tags" { default = {} } -------------------------------------------------------------------------------- /core-components/terraform/modules/cassandra/_outputs.tf: -------------------------------------------------------------------------------- 1 | output "cassandra_seed_node_ips" { 2 | value = aws_network_interface.cas-seed-eni.*.private_ip 3 | } 4 | 5 | output "cassandra_non_seed_node_ips" { 6 | value = aws_network_interface.cas-non-seed-eni.*.private_ip 7 | } 8 | 9 | output "sg_cas_client_access" { 10 | value = aws_security_group.cas-client-access.id 11 | } 12 | 13 | output "sg_cas_internode" { 14 | value = aws_security_group.cas-internode.id 15 | } 16 | 17 | output "cassandra_asgs" { 18 | value = [concat( 19 | aws_autoscaling_group.cassandra-seed-node.*.name, 20 | aws_autoscaling_group.cassandra-non-seed-node.*.name, 21 | )] 22 | } 23 | 24 | -------------------------------------------------------------------------------- /core-components/packer/cassandra/include/update_auditd.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | file=/etc/audit/auditd.conf 3 | cp "$file" "$file.bak" 4 | sed -i -r "s/num_logs = .*/num_logs = 5/g" $file 5 | sed -i -r "s/max_log_file = .*/max_log_file = 100/g" $file 6 | sed -i -r "s/max_log_file_action = .*/max_log_file_action = rotate/g" $file 7 | sed -i -r "s/space_left = .*/space_left = 2048/g" $file 8 | sed -i -r "s/space_left_action = .*/space_left_action = syslog/g" $file 9 | sed -i -r "s/action_mail_acct = .*/action_mail_acct = root/g" $file 10 | sed -i -r "s/admin_space_left = .*/admin_space_left = 1024/g" $file 11 | sed -i -r "s/admin_space_left_action = .*/admin_space_left_action = SUSPEND/g" $file 12 | -------------------------------------------------------------------------------- /core-components/packer/opscenter/include/update_auditd.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | file=/etc/audit/auditd.conf 3 | cp "$file" "$file.bak" 4 | sed -i -r "s/num_logs = .*/num_logs = 5/g" $file 5 | sed -i -r "s/max_log_file = .*/max_log_file = 100/g" $file 6 | sed -i -r "s/max_log_file_action = .*/max_log_file_action = rotate/g" $file 7 | sed -i -r "s/space_left = .*/space_left = 2048/g" $file 8 | sed -i -r "s/space_left_action = .*/space_left_action = syslog/g" $file 9 | sed -i -r "s/action_mail_acct = .*/action_mail_acct = root/g" $file 10 | sed -i -r "s/admin_space_left = .*/admin_space_left = 1024/g" $file 11 | sed -i -r "s/admin_space_left_action = .*/admin_space_left_action = SUSPEND/g" $file 12 | -------------------------------------------------------------------------------- /configurations/default-account/default-vpc/opscenter-resources/opscenter.tfvars: -------------------------------------------------------------------------------- 1 | # opscenter variables 2 | instance_type = "m5.xlarge" 3 | availability_zones = ["a", "b", "c"] 4 | opscenter_storage_cluster = "<<< YOUR_STORAGE_CLUSTER_NAME_HERE >>>" 5 | ssl_certificate_id = "<<< YOUR_SSL_CERT_ARN_HERE >>>" 6 | studio_enabled = "0" 7 | 8 | # any security group IDs in this list will be given access to the opscenter master node on ports 8443 and 9091 9 | ops_additional_sg_ids = [] 10 | 11 | # optional hosted zone parameter; if specified, opscenter will be given a record in Route 53 12 | hosted_zone_name = "" 13 | private_hosted_zone = "true" 14 | -------------------------------------------------------------------------------- /core-components/terraform/layers/vpc-resources/vpc-create.tf: -------------------------------------------------------------------------------- 1 | # for vpc-create module (new vpc) 2 | variable "vpc_cidr" {} 3 | variable "data_subnets" { type = list(string) } 4 | variable "ingress_subnets" { type = list(string) } 5 | variable "azs" { type = list(string) } 6 | 7 | module "vpc" { 8 | source = "../../modules/vpc-create" 9 | 10 | vpc_cidr = var.vpc_cidr 11 | vpc_name = var.vpc_name 12 | region = var.region 13 | azs = var.azs 14 | ingress_subnets = var.ingress_subnets 15 | data_subnets = var.data_subnets 16 | 17 | ingress_subnet_tag_prefix = var.ingress_subnet_tag_prefix 18 | data_subnet_tag_prefix = var.data_subnet_tag_prefix 19 | } 20 | -------------------------------------------------------------------------------- /core-components/ansible/playbooks/cluster-init.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # This playbook initializes the DSE application, and changes the default cassandra user password. 3 | 4 | - name: initialize DSE application 5 | gather_facts: false 6 | hosts: 7 | - all 8 | serial: 1 9 | 10 | vars: 11 | secrets_ssm_location: { secrets_ssm_location } 12 | region: { region } 13 | 14 | tasks: 15 | - name: copy script 16 | copy: 17 | src: ../scripts/cluster-init.sh 18 | dest: /tmp/cluster-init.sh 19 | mode: a+x 20 | 21 | - name: change cassandra password 22 | become: true 23 | shell: 24 | cmd: /tmp/cluster-init.sh {{ secrets_ssm_location }} {{ region }} 25 | register: result 26 | -------------------------------------------------------------------------------- /core-components/terraform/modules/opscenter/route53.tf: -------------------------------------------------------------------------------- 1 | data "aws_route53_zone" "zone" { 2 | count = "${var.hosted_zone_name != "" ? 1 : 0}" 3 | name = var.hosted_zone_name 4 | private_zone = var.private_hosted_zone 5 | } 6 | 7 | resource "aws_route53_record" "route53_record" { 8 | count = var.hosted_zone_name != "" ? 1 : 0 9 | zone_id = data.aws_route53_zone.zone[0].zone_id 10 | name = "${var.hosted_zone_record_prefix}.${data.aws_route53_zone.zone[0].name}" 11 | type = "A" 12 | alias { 13 | name = aws_lb.opscenter.dns_name 14 | zone_id = aws_lb.opscenter.zone_id 15 | evaluate_target_health = false 16 | } 17 | } -------------------------------------------------------------------------------- /core-components/terraform/layers/vpc-resources/_outputs.tf: -------------------------------------------------------------------------------- 1 | output "vpc_id" { 2 | value = module.vpc.vpc_id 3 | } 4 | 5 | output "data_subnet_ids" { 6 | value = module.vpc.data_subnet_ids 7 | } 8 | 9 | output "data_subnet_cidr_blocks" { 10 | value = module.vpc.data_subnet_cidr_blocks 11 | } 12 | 13 | output "ingress_subnet_ids" { 14 | value = module.vpc.ingress_subnet_ids 15 | } 16 | 17 | output "ingress_subnet_cidr_blocks" { 18 | value = module.vpc.ingress_subnet_cidr_blocks 19 | } 20 | 21 | output "bastion_lb_dns" { 22 | value = module.bastion.bastion_lb_dns 23 | } 24 | 25 | output "bastion_sg_id" { 26 | value = module.bastion.bastion_sg_id 27 | } 28 | 29 | output "sg_ops_nodes_to_cas" { 30 | value = module.vpc-shared.sg_ops_nodes_to_cas 31 | } 32 | -------------------------------------------------------------------------------- /core-components/terraform/modules/cassandra/files/tuning_changes.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | function make_tuning_changes() { 4 | # k1 - change the read_ahead_kb for the ssd drives 5 | lsblk | awk '$6 == "disk" {print "/sys/class/block/"$1"/queue/read_ahead_kb"}' | xargs -I {} -n 1 sh -c 'echo 8 > {}' 6 | # k2 7 | lsblk | awk '$6 == "disk" {print "/sys/block/"$1"/queue/nomerges"}' | xargs -I {} -n 1 sh -c 'echo 1 > {}' 8 | # k3 9 | echo never > /sys/kernel/mm/transparent_hugepage/defrag 10 | # k5 11 | sysctl -w net.core.rmem_max=16777216 net.core.wmem_max=16777216 net.core.rmem_default=16777216 net.core.wmem_default=16777216 net.core.optmem_max=40960 net.ipv4.tcp_rmem="4096 87380 16777216" net.ipv4.tcp_wmem="4096 87380 16777216" 12 | } 13 | 14 | make_tuning_changes 15 | -------------------------------------------------------------------------------- /configurations/default-account/default-vpc/default-cluster/cluster.tfvars: -------------------------------------------------------------------------------- 1 | # deployment info 2 | availability_zones = ["a", "b", "c"] 3 | 4 | # cassandra configuration 5 | keyspace = "<<< YOUR_KEYSPACE_NAME_HERE >>>" 6 | instance_type = "m5.4xlarge" 7 | 8 | # data volume configuration 9 | volume_type = "gp2" 10 | commitlog_size = "30" # gigabytes 11 | data_volume_size = "1024" # gigabytes 12 | iops = "1000" 13 | number_of_stripes = "1" 14 | raid_level = "-1" # RAID -1 signifies no disk striping; 1 volume = 1 mount point 15 | raid_block_size = "128" 16 | 17 | # settings for cassandra.yaml, cassandra-env.sh 18 | num_tokens = "256" 19 | max_heap_size = "8" # gigabytes 20 | -------------------------------------------------------------------------------- /core-components/terraform/modules/cassandra/post_deploy.tf: -------------------------------------------------------------------------------- 1 | ################### 2 | # additional scripts required by bootstrap process can be defined in 3 | # the cluster-specific configuration profile, and will be copied to the 4 | # node and executed by bootstrap.sh 5 | ################### 6 | 7 | locals { 8 | cluster_configs = "${path.module}/../../../../configurations/${local.cluster_key}/cluster-configs" 9 | key_prefix = "${local.cluster_key}/files" 10 | } 11 | 12 | resource "aws_s3_bucket_object" "post-deploy-scripts" { 13 | for_each = fileset(local.cluster_configs, "post-deploy-scripts/*.sh") 14 | bucket = var.tfstate_bucket 15 | key = "${local.key_prefix}/${each.value}" 16 | source = "${local.cluster_configs}/${each.value}" 17 | etag = filemd5("${local.cluster_configs}/${each.value}") 18 | } 19 | -------------------------------------------------------------------------------- /core-components/packer/cassandra/include/sysctl.conf: -------------------------------------------------------------------------------- 1 | net.core.rmem_max=134217728 2 | net.core.wmem_max=134217728 3 | net.core.somaxconn = 65000 4 | net.core.netdev_max_backlog=300000 5 | net.ipv4.tcp_rmem="4096 87380 134217728" 6 | net.ipv4.tcp_wmem="4096 65536 134217728" 7 | net.ipv4.tcp_moderate_rcvbuf=1 8 | net.ipv4.tcp_congestion_control=htcp 9 | net.ipv4.tcp_mtu_probing=1 10 | net.ipv4.tcp_ecn = 0 11 | vm.swappiness=0 12 | vm.dirty_background_bytes = 16777216 # 16MB 13 | vm.dirty_bytes = 4294967296 # 4G 14 | vm.max_map_count = 1073741824 15 | vm.min_free_kbytes=2097152 16 | vm.zone_reclaim_mode=0 17 | fs.file-max = 1073741824 18 | kernel.nmi_watchdog = 1 19 | kernel.panic_on_io_nmi = 1 20 | kernel.panic_on_unrecovered_nmi = 1 21 | kernel.pid_max = 999999 22 | kernel.unknown_nmi_panic = 1 23 | kernel.shmmax = 33554432 24 | kernel.msgmax = 33554432 25 | kernel.msgmnb = 33554432 -------------------------------------------------------------------------------- /core-components/packer/opscenter/include/sysctl.conf: -------------------------------------------------------------------------------- 1 | net.core.rmem_max=134217728 2 | net.core.wmem_max=134217728 3 | net.core.somaxconn = 65000 4 | net.core.netdev_max_backlog=300000 5 | net.ipv4.tcp_rmem="4096 87380 134217728" 6 | net.ipv4.tcp_wmem="4096 65536 134217728" 7 | net.ipv4.tcp_moderate_rcvbuf=1 8 | net.ipv4.tcp_congestion_control=htcp 9 | net.ipv4.tcp_mtu_probing=1 10 | net.ipv4.tcp_ecn = 0 11 | vm.swappiness=0 12 | vm.dirty_background_bytes = 16777216 # 16MB 13 | vm.dirty_bytes = 4294967296 # 4G 14 | vm.max_map_count = 1073741824 15 | vm.min_free_kbytes=2097152 16 | vm.zone_reclaim_mode=0 17 | fs.file-max = 1073741824 18 | kernel.nmi_watchdog = 1 19 | kernel.panic_on_io_nmi = 1 20 | kernel.panic_on_unrecovered_nmi = 1 21 | kernel.pid_max = 999999 22 | kernel.unknown_nmi_panic = 1 23 | kernel.shmmax = 33554432 24 | kernel.msgmax = 33554432 25 | kernel.msgmnb = 33554432 -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/regression.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: 💥 Regression Report 3 | labels: 'regression' 4 | about: Report unexpected behavior that worked in previous versions 5 | --- 6 | 7 | ## 💥 Regression Report 8 | 9 | A clear and concise description of what the regression is. 10 | 11 | ## Last working version 12 | 13 | Worked up to version: 14 | 15 | Stopped working in version: 16 | 17 | ## To Reproduce 18 | 19 | Steps to reproduce the behavior: 20 | 21 | ## Expected behavior 22 | 23 | A clear and concise description of what you expected to happen. 24 | 25 | ## Link to repl or repo (highly encouraged) 26 | 27 | Please provide either a [repl.it demo](https://repl.it/languages/nodejs) or a minimal repository on GitHub. 28 | 29 | Issues without a reproduction link are likely to stall. 30 | 31 | ## Run `npx envinfo` 32 | 33 | Paste the results here: 34 | 35 | ```bash 36 | 37 | ``` 38 | -------------------------------------------------------------------------------- /core-components/ansible/playbooks/opscenter-attach-cluster.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # This playbook registers a cluster with opscenter. 3 | 4 | - name: register cluster with opscenter 5 | gather_facts: false 6 | hosts: 7 | - '{{ opscenter_ip }}' 8 | serial: 1 9 | 10 | vars: 11 | tfstate_bucket: { tfstate_bucket } 12 | target_cluster: { target_cluster } 13 | vpc_name: { vpc_name } 14 | account_name: { account_name } 15 | 16 | tasks: 17 | - name: register cluster 18 | become: true 19 | action: shell /etc/opscenter/scripts/register_cluster_with_opscenter.sh {{ tfstate_bucket }} {{ target_cluster }} {{ vpc_name }} {{ account_name }} 20 | register: result 21 | 22 | - debug: msg="{{ result.stdout }}" 23 | 24 | - name: restart opscenterd service 25 | become: true 26 | service: 27 | name: opscenterd 28 | state: restarted 29 | -------------------------------------------------------------------------------- /core-components/packer/fetch-credentials.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if ! type jq; then 4 | sudo yum install -y jq 5 | fi 6 | 7 | REGION=$(curl -s http://169.254.169.254/latest/meta-data/placement/availability-zone/ | sed 's/[a-z]$//') 8 | PROFILE=$(curl -s http://169.254.169.254/latest/meta-data/iam/security-credentials/) 9 | CREDS=$(curl -s http://169.254.169.254/latest/meta-data/iam/security-credentials/${PROFILE}) 10 | 11 | ACCESS_KEY=$(echo ${CREDS} | jq -r '.AccessKeyId') 12 | SECRET_KEY=$(echo ${CREDS} | jq -r '.SecretAccessKey') 13 | TOKEN=$(echo ${CREDS} | jq -r '.Token') 14 | 15 | AWS_DIR="/home/ec2-user/.aws" 16 | mkdir -p ${AWS_DIR} 17 | 18 | cat > ${AWS_DIR}/credentials << EOM 19 | [default] 20 | aws_access_key_id=${ACCESS_KEY} 21 | aws_secret_access_key=${SECRET_KEY} 22 | aws_session_token=${TOKEN} 23 | EOM 24 | 25 | cat > ${AWS_DIR}/config << EOM 26 | [default] 27 | region=${REGION} 28 | output=json 29 | EOM 30 | -------------------------------------------------------------------------------- /core-components/packer/opscenter/scripts/enable_eth1.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd /etc/sysconfig/network-scripts 4 | if [[ ! -e /etc/sysconfig/network-scripts/ifcfg-eth1 ]]; then 5 | cp ifcfg-eth0 ifcfg-eth1 6 | cp ifcfg-eth0 ifcfg-eth0.bak 7 | sed -i 's/eth0/eth1/g' ifcfg-eth1 8 | sed -i 's/ONBOOT=yes/ONBOOT=no/g' ifcfg-eth0 9 | 10 | echo "FYI: if running this manually while SSHing on eth0, your session is about to hang..." 11 | 12 | /sbin/ifup eth1 13 | sleep 10 14 | /sbin/ifdown eth0 15 | sleep 3 16 | 17 | my_eni_ip=$(ifconfig -a eth1 | grep -w inet | awk '{print $2}' | sed 's,/.*$,,' | sed 's/\./-/g') 18 | my_eni_hostname=ip-${my_eni_ip}.compute.internal 19 | ip=$(ifconfig -a eth1 | grep -w inet | awk '{print $2}') 20 | hostnamectl set-hostname ${my_eni_hostname} 21 | echo "${ip} ${my_eni_hostname} ip-${my_eni_ip}" >> /etc/hosts 22 | echo "preserve_hostname: true" >> /etc/cloud/cloud.cfg 23 | fi 24 | -------------------------------------------------------------------------------- /core-components/terraform/modules/bastion/data/bastion-init.tpl: -------------------------------------------------------------------------------- 1 | #!/bin/bash -ex 2 | 3 | # Ensure our PATH is set correctly (on Amazon Linux, cfn-signal is in /opt/aws/bin) 4 | . ~/.bash_profile 5 | 6 | # Apply all available security updates 7 | yum update -y --security 8 | 9 | ############# 10 | ## authorized_keys 11 | ############# 12 | 13 | cat < /usr/local/bin/sync_authorized_keys 14 | #!/bin/bash -ex 15 | TZ=America/Los_Angeles date 16 | timeout 60 aws s3api get-object --region ${region} --bucket ${ssh_bucket} --key ${ssh_prefix} /tmp/user-keys.yaml 17 | cat /tmp/user-keys.yaml | sed -n '/- name: ec2-user/,/- name:/p' | grep "ssh-rsa" | tr -s ' ' | cut -d ' ' -f3- > /home/ec2-user/.ssh/authorized_keys 18 | EOF 19 | 20 | chmod 744 /usr/local/bin/sync_authorized_keys 21 | /usr/local/bin/sync_authorized_keys || true 22 | cat < /etc/cron.d/sync_authorized_keys 23 | */5 * * * * root /usr/local/bin/sync_authorized_keys >> /var/log/sync_authorized_keys.log 2>&1 24 | EOF 25 | -------------------------------------------------------------------------------- /configurations/default-account/default-vpc/vpc-resources/vpc-existing.tfvars: -------------------------------------------------------------------------------- 1 | 2 | # if TERRAFORM_MANAGED_VPC in variables.yaml is set to "false", this file will be used. 3 | # 4 | # false = no vpc will be created, and the following vars must be set in order to locate the existing vpc: 5 | # - vpc_id 6 | # - ingress_subnet_tag_prefix (default "Ingress", used for bastion) 7 | # - data_subnet_tag_prefix (default "Data", used for C* nodes) 8 | # 9 | # true = vpc will be created from scratch, and the following vars must be set: 10 | # - vpc_cidr (full vpc CIDR) 11 | # - vpc_prefix (name prefix for all vpc resources) 12 | # - azs (list of availability zones) 13 | # - ingress_subnets (list of CIDRs for ingress subnets, used for bastion) 14 | # - data_subnets (list of CIDRs for data subnets, used for C* nodes) 15 | 16 | # settings for EXISTING vpc 17 | vpc_id = "<<< YOUR_VPC_ID_HERE >>>" 18 | ingress_subnet_tag_prefix = "Ingress" 19 | data_subnet_tag_prefix = "Data" 20 | -------------------------------------------------------------------------------- /core-components/packer/cassandra/scripts/unmount_volumes.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script unmounts the primary and secondary-data 4 | 5 | v=$(df | grep "/mnt/cassandra-data-secondary" | awk '{print $1}') 6 | if [[ ! -z ${v//} ]]; then 7 | sudo umount /mnt/cassandra-data-secondary 8 | fi 9 | 10 | v=$(df | grep "/mnt/cassandra-data-primary" | awk '{print $1}') 11 | if [[ ! -z ${v//} ]]; then 12 | sudo umount /mnt/cassandra-data-primary 13 | 14 | lvm_devices=$(sudo pvscan | grep cas-data-vg | wc -l) 15 | 16 | if [[ ${lvm_devices} -gt 1 ]]; then 17 | # Marking the volume group inactive removes it from the kernel and prevents any 18 | # further activity on it. 19 | sudo vgchange -an cas-data-vg 20 | 21 | # We export the layout during attach but doing it again to make sure that no more 22 | # disks are added export the volume group. This prevents it from being accessed on 23 | # the "old" host system and prepares it to be removed. 24 | sudo vgexport cas-data-vg 25 | fi 26 | fi 27 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:latest 2 | RUN apt-get update && apt-get install -y \ 3 | alien \ 4 | unzip \ 5 | wget \ 6 | curl \ 7 | python-setuptools \ 8 | python2.7 \ 9 | python-pip \ 10 | jq \ 11 | ansible \ 12 | vim \ 13 | keychain \ 14 | bash \ 15 | sudo \ 16 | git-core \ 17 | --no-install-recommends \ 18 | && rm -rf /var/lib/apt/lists/* 19 | 20 | RUN pip install \ 21 | botocore \ 22 | boto3 \ 23 | awsretry \ 24 | awscli 25 | 26 | RUN chsh -s /bin/bash 27 | 28 | RUN wget --no-check-certificate https://releases.hashicorp.com/terraform/0.12.24/terraform_0.12.24_linux_amd64.zip \ 29 | && unzip terraform_0.12.24_linux_amd64.zip \ 30 | && mv terraform /usr/local/bin/ 31 | 32 | RUN export VER="1.4.1" \ 33 | && wget https://releases.hashicorp.com/packer/${VER}/packer_${VER}_linux_amd64.zip \ 34 | && unzip packer_${VER}_linux_amd64.zip \ 35 | && mv packer /usr/local/bin 36 | 37 | RUN pip install awscli --upgrade 38 | 39 | COPY credentials /root/.aws/credentials.tpl 40 | -------------------------------------------------------------------------------- /core-components/terraform/layers/vpc-resources/_variables.tf: -------------------------------------------------------------------------------- 1 | ################### 2 | # these are common vars; vpc-info.tf and vpc-create.tf contain their own var definitions 3 | ################### 4 | 5 | variable "region" {} 6 | variable "tfstate_region" {} 7 | variable "role_arn" {} 8 | variable "profile" {} 9 | variable "ami_owner_id" {} 10 | 11 | # for vpc-info/vpc-create module (either) 12 | variable "ingress_subnet_tag_prefix" { default = "Ingress" } 13 | variable "data_subnet_tag_prefix" { default = "Data" } 14 | 15 | # for parameter-store module 16 | variable "cluster_name" { type = string } 17 | variable "vpc_name" { type = string } 18 | variable "account_name" { type = string } 19 | 20 | # for bastion module 21 | variable "account_id" { type = string } 22 | variable "tfstate_bucket" { type = string } 23 | variable "bastion_ami_prefix" { default = "amzn2-ami-hvm-2.0" } 24 | variable "ingress_cidrs" { type = list } 25 | variable "existing_bastion_sg_id" { default = "" } 26 | variable "vpc_tags" { default = {} } 27 | variable "account_tags" { default = {} } 28 | -------------------------------------------------------------------------------- /core-components/ansible/scripts/cluster-init.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #set -x 3 | secrets_ssm_location="${1}" 4 | region="${2}" 5 | 6 | cassandra_pass=$(aws --region ${region} ssm get-parameter --with-decryption --name "${secrets_ssm_location}/cassandra_pass" | jq -r '.[].Value' | base64 -d) 7 | 8 | # Attempt to login with default cassandra password 9 | cqlsh --ssl -u cassandra -p cassandra -e exit 10 | if [[ $? -eq 0 ]]; then 11 | UPDATE_STMT="ALTER USER cassandra WITH PASSWORD '${cassandra_pass}';" 12 | cqlsh --ssl -u cassandra -p cassandra -e "${UPDATE_STMT}" 13 | echo "Password for cassandra user updated" 14 | else 15 | cqlsh --ssl -u cassandra -p ${cassandra_pass} -e exit 16 | if [[ $? -eq 0 ]]; then 17 | echo "Password for cassandra user already updated" 18 | else 19 | echo "ERROR: Unable to access cqlsh via ssl with either the default or provided password (from ${secrets_s3_location})." 20 | echo " - This may indicate a problem with the password, or an issue with ssl certs, or it may indicate DSE isn't running." 21 | return 1 22 | fi 23 | fi 24 | -------------------------------------------------------------------------------- /core-components/terraform/modules/vpc-info/vpc-info.tf: -------------------------------------------------------------------------------- 1 | data "aws_vpc" "vpc" { 2 | id = var.vpc_id 3 | } 4 | 5 | ############# 6 | # The data sources in this module will be gathered if terraform_managed_vpc is 7 | # set to false, and an existing vpc_id is provided. 8 | ############# 9 | 10 | data "aws_subnet_ids" "ingress_subnet_ids" { 11 | vpc_id = var.vpc_id 12 | tags = { 13 | Name = "${var.ingress_subnet_tag_prefix}*" 14 | } 15 | } 16 | 17 | data "aws_subnet_ids" "data_subnet_ids" { 18 | vpc_id = var.vpc_id 19 | tags = { 20 | Name = "${var.data_subnet_tag_prefix}*" 21 | } 22 | } 23 | 24 | ######### 25 | # data subnets 26 | ######### 27 | 28 | data "aws_subnet" "data" { 29 | count = length(data.aws_subnet_ids.data_subnet_ids.ids) 30 | id = tolist(data.aws_subnet_ids.data_subnet_ids.ids)[count.index] 31 | } 32 | 33 | ######### 34 | # ingress subnets 35 | ######### 36 | 37 | data "aws_subnet" "ingress" { 38 | count = length(data.aws_subnet_ids.ingress_subnet_ids.ids) 39 | id = tolist(data.aws_subnet_ids.ingress_subnet_ids.ids)[count.index] 40 | } 41 | -------------------------------------------------------------------------------- /configurations/default-account/default-vpc/vpc-resources/vpc-new.tfvars: -------------------------------------------------------------------------------- 1 | 2 | # if TERRAFORM_MANAGED_VPC in variables.yaml is set to "true", this file will be used. 3 | # 4 | # false = no vpc will be created, and the following vars must be set in order to locate the existing vpc: 5 | # - vpc_id 6 | # - ingress_subnet_tag_prefix (default "Ingress", used for bastion) 7 | # - data_subnet_tag_prefix (default "Data", used for C* nodes) 8 | # 9 | # true = vpc will be created from scratch, and the following vars must be set: 10 | # - vpc_cidr (full vpc CIDR) 11 | # - vpc_prefix (name prefix for all vpc resources) 12 | # - azs (list of availability zones) 13 | # - ingress_subnets (list of CIDRs for ingress subnets, used for bastion) 14 | # - data_subnets (list of CIDRs for data subnets, used for C* nodes) 15 | 16 | # settings for NEW vpc 17 | vpc_cidr = "172.0.0.0/16" 18 | azs = ["a", "b", "c"] 19 | ingress_subnets = ["172.0.10.0/24", "172.0.11.0/24", "172.0.12.0/24"] 20 | data_subnets = ["172.0.20.0/24", "172.0.21.0/24", "172.0.22.0/24"] 21 | -------------------------------------------------------------------------------- /core-components/terraform/modules/cassandra/templates.tf: -------------------------------------------------------------------------------- 1 | data "template_file" "dse-init" { 2 | template = file("${path.module}/files/dse-init.tpl") 3 | 4 | vars = { 5 | dc_name = var.datacenter 6 | auto_start_dse = var.auto_start_dse 7 | region = var.region 8 | ssh_bucket = var.tfstate_bucket 9 | ssh_prefix = "${local.cluster_key}/files/ssh/ec2-user/user-keys.yaml" 10 | graph_enabled = var.graph_enabled 11 | solr_enabled = var.solr_enabled 12 | spark_enabled = var.spark_enabled 13 | ec2_tag_map = jsonencode(merge(var.ec2_tags, local.required_ec2_tags)) 14 | } 15 | } 16 | 17 | data "template_cloudinit_config" "cassandra" { 18 | gzip = false 19 | base64_encode = false 20 | 21 | part { 22 | content_type = "text/cloud-config" 23 | content = file( 24 | "${path.module}/../../../../configurations/${local.cluster_key}/user-keys.yaml", 25 | ) 26 | } 27 | 28 | part { 29 | filename = "dse-init.sh" 30 | content_type = "text/x-shellscript" 31 | content = data.template_file.dse-init.rendered 32 | } 33 | } 34 | 35 | -------------------------------------------------------------------------------- /core-components/ansible/playbooks/cluster-start.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # This playbook starts the DSE service on the seed nodes, then the non-seeds, one at time. 3 | 4 | - name: start DSE service 5 | gather_facts: false 6 | hosts: 7 | - '{{ host_list }}' 8 | serial: 1 9 | 10 | tasks: 11 | - name: capture host IP 12 | shell: hostname -I | awk {'print $1'} 13 | register: host_ip 14 | 15 | - name: gather number of volumes mounted 16 | action: shell df -h | grep -i /mnt/cassandra | wc -l 17 | register: cas_vols_mounted 18 | 19 | - debug: msg="{{ cas_vols_mounted.stdout }}" 20 | 21 | - name: start DSE service 22 | become: true 23 | shell: service dse start 24 | when: cas_vols_mounted.stdout == "2" 25 | 26 | # 9142 = dse client port (ssl) 27 | - wait_for: 28 | port: 9142 29 | host: "{{ host_ip.stdout }}" 30 | delay: 10 31 | timeout: 600 32 | 33 | - name: wait for UN 34 | shell: nodetool status | grep "{{ host_ip.stdout }}" | awk {'print $1'} 35 | register: result 36 | until: result.stdout.find("UN") != -1 37 | retries: 5 38 | delay: 10 39 | -------------------------------------------------------------------------------- /core-components/packer/cassandra/scripts/cas_get_tag_values.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import sys 4 | from optparse import OptionParser 5 | import boto3 6 | 7 | if __name__ == "__main__": 8 | 9 | tag_found = False 10 | 11 | parser = OptionParser() 12 | parser.add_option("-n", "--node", dest="node", help="ip address of the node") 13 | parser.add_option("-t", "--tag", dest="tag", help="tag to query") 14 | parser.add_option("-r", "--region", dest="region", help="region", default="us-west-2") 15 | 16 | (options, args) = parser.parse_args(sys.argv) 17 | session = boto3.Session(region_name=options.region) 18 | 19 | client = session.client("ec2") 20 | response = client.describe_instances(Filters=[{ 'Name': "network-interface.addresses.private-ip-address", "Values": [options.node]}]) 21 | for reservation in (response["Reservations"]): 22 | for instance in reservation["Instances"]: 23 | for tag in instance["Tags"]: 24 | if tag["Key"] == options.tag: 25 | tag_found = True 26 | print(tag["Value"]) 27 | 28 | if not tag_found: 29 | print("Null") 30 | -------------------------------------------------------------------------------- /core-components/terraform/modules/bastion/cloud_init.tf: -------------------------------------------------------------------------------- 1 | ############################################# 2 | # User data (cloud-init) from template 3 | ############################################# 4 | 5 | data "template_file" "bastion-tpl" { 6 | count = length(var.existing_bastion_sg_id) == 0 ? 1 : 0 7 | template = file("${path.module}/data/bastion-init.tpl") 8 | vars = { 9 | account_id = var.account_id 10 | region = var.region 11 | ssh_bucket = var.tfstate_bucket 12 | ssh_prefix = "${var.account_name}/${var.vpc_name}/vpc-resources/files/ssh/ec2-user/user-keys.yaml" 13 | } 14 | } 15 | 16 | data "template_cloudinit_config" "bastion-init" { 17 | count = length(var.existing_bastion_sg_id) == 0 ? 1 : 0 18 | gzip = false 19 | base64_encode = false 20 | 21 | part { 22 | content_type = "text/cloud-config" 23 | content = file( 24 | "${path.module}/../../../../configurations/${var.account_name}/${var.vpc_name}/vpc-resources/user-keys.yaml", 25 | ) 26 | } 27 | part { 28 | filename = "bastion-init.sh" 29 | content_type = "text/x-shellscript" 30 | content = data.template_file.bastion-tpl[0].rendered 31 | } 32 | } 33 | 34 | -------------------------------------------------------------------------------- /core-components/terraform/modules/cassandra/enis.tf: -------------------------------------------------------------------------------- 1 | resource "aws_network_interface" "cas-seed-eni" { 2 | count = length(var.availability_zones) 3 | subnet_id = element(var.cluster_subnet_ids, count.index) 4 | security_groups = [ 5 | aws_security_group.cas-bastion-access.id, 6 | aws_security_group.cas-client-access.id, 7 | aws_security_group.cas-internode.id, 8 | var.sg_ops_nodes_to_cas, 9 | ] 10 | 11 | tags = merge(map("Name", "${var.cluster_name}-seed-${count.index}"), var.ec2_tags, local.required_ec2_tags) 12 | } 13 | 14 | resource "aws_network_interface" "cas-non-seed-eni" { 15 | count = (var.dse_nodes_per_az - 1) * length(var.availability_zones) 16 | 17 | # round-robin the non-seeds into the available subnets 18 | subnet_id = element( 19 | var.cluster_subnet_ids, 20 | count.index % length(var.availability_zones), 21 | ) 22 | security_groups = [ 23 | aws_security_group.cas-bastion-access.id, 24 | aws_security_group.cas-client-access.id, 25 | aws_security_group.cas-internode.id, 26 | var.sg_ops_nodes_to_cas, 27 | ] 28 | 29 | tags = merge(map("Name", "${var.cluster_name}-non-seed-${count.index}"), var.ec2_tags, local.required_ec2_tags) 30 | 31 | } 32 | 33 | -------------------------------------------------------------------------------- /core-components/terraform/modules/bastion/_variables.tf: -------------------------------------------------------------------------------- 1 | variable "region" {} 2 | variable "tfstate_bucket" {} 3 | variable "account_id" {} 4 | variable "account_name" {} 5 | 6 | variable "vpc_id" { type = string } 7 | variable "vpc_name" {} 8 | variable "vpc_cidr" {} 9 | 10 | variable "ami_prefix" { 11 | description = "Baseline AMI to use." 12 | } 13 | variable "instance_type" { 14 | description = "The instance type for bastion nodes." 15 | default = "t3.micro" 16 | } 17 | 18 | variable "data_subnet_ids" { type = list(string) } 19 | variable "ingress_subnet_ids" { type = list(string) } 20 | variable "ingress_sg_port" { default = 22 } 21 | variable "ingress_sg_protocol" { default = "tcp"} 22 | variable "ingress_sg_prefix" { default = "bastion-ssh-ingress" } 23 | 24 | # created by account-resources layer 25 | variable "bastion_role_arn" { type = string } 26 | 27 | # cidr list for SSH ingress 28 | variable "bastion_ingress_cidrs" { type = list(string) } 29 | 30 | # if this is provided, do no work; output this variable to tfstate and exit 31 | variable "existing_bastion_sg_id" { default = "" } 32 | 33 | # tags 34 | variable "ec2_tags" {} 35 | 36 | locals { 37 | required_ec2_tags = { 38 | "Role" = "bastion" 39 | } 40 | } 41 | 42 | -------------------------------------------------------------------------------- /core-components/packer/opscenter/scripts/cluster_conf.templ: -------------------------------------------------------------------------------- 1 | [jmx] 2 | username = 3 | password = 4 | port = 7199 5 | 6 | [kerberos_client_principals] 7 | 8 | [kerberos] 9 | 10 | [storage_cassandra] 11 | username = cassandra 12 | keyspace = OpsCenter_##CLUSTER## 13 | seed_hosts = ##STORAGE_SEEDS## 14 | cql_port = 9142 15 | password = ##STORAGE_PASS## 16 | connect_timeout = 6.0 17 | ssl_keystore = ##STORAGE_KEYSTORE## 18 | ssl_keystore_password = ##STORAGE_KEYPASS## 19 | ssl_truststore = ##STORAGE_TRUSTSTORE## 20 | ssl_truststore_password = ##STORAGE_TRUSTPASS## 21 | 22 | [collection] 23 | nodelist_poll_period = 1800 24 | 25 | [agents] 26 | 27 | [kerberos_hostnames] 28 | 29 | [kerberos_services] 30 | 31 | [cassandra] 32 | username = cassandra 33 | seed_hosts = ##CAS_CLUSTER_SEEDS## 34 | cql_port = 9142 35 | password = ##CAS_CLUSTER_PASS## 36 | ssl_keystore = ##CAS_CLUSTER_KEYSTORE## 37 | ssl_keystore_password = ##CAS_CLUSTER_KEYPASS## 38 | ssl_truststore = ##CAS_CLUSTER_TRUSTSTORE## 39 | ssl_truststore_password = ##CAS_CLUSTER_TRUSTPASS## 40 | 41 | [agents] 42 | ssl_keystore = /etc/dse/cassandra/keystores/server-keystore.jks 43 | ssl_keystore_password = ##CAS_CLUSTER_KEYPASS## 44 | storage_ssl_keystore = /etc/dse/cassandra/keystores/storage-server-keystore.jks 45 | storage_ssl_keystore_password = ##STORAGE_KEYPASS## 46 | -------------------------------------------------------------------------------- /core-components/terraform/modules/bastion/lb.tf: -------------------------------------------------------------------------------- 1 | ############################################# 2 | # NLB for bastion nodes 3 | ############################################# 4 | 5 | locals { 6 | bastion_lb_tags = { 7 | "Region" = "${var.region}" 8 | } 9 | } 10 | resource "aws_lb" "bastion-nlb" { 11 | count = length(var.existing_bastion_sg_id) == 0 ? 1 : 0 12 | name_prefix = "bast-" 13 | 14 | # ALBs don't support TCP (22), use NLB instead 15 | load_balancer_type = "network" 16 | 17 | internal = false 18 | subnets = var.ingress_subnet_ids 19 | 20 | enable_cross_zone_load_balancing = true 21 | 22 | tags = merge(map("Name", "bastion-lb"), var.ec2_tags, local.required_ec2_tags, local.bastion_lb_tags) 23 | } 24 | 25 | resource "aws_lb_target_group" "bastion-targets" { 26 | count = length(var.existing_bastion_sg_id) == 0 ? 1 : 0 27 | name_prefix = "bast-" 28 | port = 22 29 | protocol = "TCP" 30 | vpc_id = var.vpc_id 31 | 32 | # stickiness doesn't work for NLB, but terraform won't let us disable it; therefore, empty list 33 | } 34 | 35 | resource "aws_lb_listener" "bastion-listener" { 36 | count = length(var.existing_bastion_sg_id) == 0 ? 1 : 0 37 | load_balancer_arn = aws_lb.bastion-nlb[0].id 38 | port = "22" 39 | protocol = "TCP" 40 | 41 | default_action { 42 | target_group_arn = aws_lb_target_group.bastion-targets[0].id 43 | type = "forward" 44 | } 45 | } 46 | 47 | -------------------------------------------------------------------------------- /core-components/roles/packer.json: -------------------------------------------------------------------------------- 1 | { 2 | "Version": "2012-10-17", 3 | "Statement": [ 4 | { 5 | "Effect": "Allow", 6 | "Action": [ 7 | "ec2:AttachVolume", 8 | "ec2:AuthorizeSecurityGroupIngress", 9 | "ec2:CopyImage", 10 | "ec2:CreateImage", 11 | "ec2:CreateKeypair", 12 | "ec2:CreateSecurityGroup", 13 | "ec2:CreateSnapshot", 14 | "ec2:CreateTags", 15 | "ec2:CreateVolume", 16 | "ec2:DeleteKeyPair", 17 | "ec2:DeleteSecurityGroup", 18 | "ec2:DeleteSnapshot", 19 | "ec2:DeleteVolume", 20 | "ec2:DeregisterImage", 21 | "ec2:Describe*", 22 | "ec2:DetachVolume", 23 | "ec2:GetPasswordData", 24 | "ec2:ModifyImageAttribute", 25 | "ec2:ModifyInstanceAttribute", 26 | "ec2:ModifySnapshotAttribute", 27 | "ec2:RegisterImage", 28 | "ec2:RunInstances", 29 | "ec2:StopInstances", 30 | "ec2:TerminateInstances" 31 | ], 32 | "Resource": "*" 33 | }, 34 | { 35 | "Effect": "Allow", 36 | "Action": "sts:AssumeRole", 37 | "Resource": "*" 38 | }, 39 | { 40 | "Effect": "Allow", 41 | "Action": [ 42 | "iam:AddRoleToInstanceProfile", 43 | "iam:CreateInstanceProfile", 44 | "iam:CreateRole", 45 | "iam:Get*", 46 | "iam:List*", 47 | "iam:PassRole", 48 | "iam:PutRolePolicy" 49 | ], 50 | "Resource": "*" 51 | } 52 | ] 53 | } 54 | -------------------------------------------------------------------------------- /core-components/roles/terraform.json: -------------------------------------------------------------------------------- 1 | { 2 | "Version": "2012-10-17", 3 | "Statement": [ 4 | { 5 | "Effect": "Allow", 6 | "Action": [ 7 | "s3:CreateBucket", 8 | "s3:DeleteBucketPolicy", 9 | "s3:DeleteObject", 10 | "s3:DeleteObjectVersion", 11 | "s3:Get*", 12 | "s3:List*", 13 | "s3:PutBucket*", 14 | "s3:PutObject" 15 | ], 16 | "Resource": "*" 17 | }, 18 | { 19 | "Effect": "Allow", 20 | "Action": [ 21 | "iam:AddRoleToInstanceProfile", 22 | "iam:AttachRolePolicy", 23 | "iam:CreateInstanceProfile", 24 | "iam:CreatePolicy", 25 | "iam:CreatePolicyVersion", 26 | "iam:CreateRole", 27 | "iam:DeletePolicy", 28 | "iam:DetachRolePolicy", 29 | "iam:Get*", 30 | "iam:List*", 31 | "iam:PassRole" 32 | ], 33 | "Resource": [ 34 | "*" 35 | ] 36 | }, 37 | { 38 | "Effect": "Allow", 39 | "Action": [ 40 | "ssm:DeleteParameter", 41 | "ssm:DeleteParameters", 42 | "ssm:DescribeParameters", 43 | "ssm:GetParameter*", 44 | "ssm:ListTagsForResource", 45 | "ssm:PutParameter" 46 | ], 47 | "Resource": "*" 48 | }, 49 | { 50 | "Effect": "Allow", 51 | "Action": [ 52 | "route53:ChangeResourceRecordSets", 53 | "route53:Get*", 54 | "route53:List*" 55 | ], 56 | "Resource": "*" 57 | } 58 | ] 59 | } 60 | -------------------------------------------------------------------------------- /core-components/terraform/modules/vpc-shared/opscenter_common_sg.tf: -------------------------------------------------------------------------------- 1 | # a single copy of this SG should be created for the vpc and shared across all clusters 2 | resource "aws_security_group" "ops_to_cas" { 3 | name = "sg_ops-nodes-to-cas-${var.account_id}" 4 | description = "Allows inbound opscenter management access for Cassandra nodes" 5 | vpc_id = var.vpc_id 6 | 7 | ingress { 8 | # JMX Monitoring port on node 9 | from_port = 7199 10 | to_port = 7199 11 | protocol = "tcp" 12 | self = true 13 | } 14 | ingress { 15 | # The native transport port for the cluster configured in native_transport_port in cassandra.yaml. 16 | from_port = 9042 17 | to_port = 9042 18 | protocol = "tcp" 19 | self = true 20 | } 21 | ingress { 22 | from_port = 9142 23 | to_port = 9142 24 | protocol = "tcp" 25 | self = true 26 | } 27 | ingress { 28 | from_port = 61620 29 | to_port = 61621 30 | protocol = "tcp" 31 | self = true 32 | } 33 | 34 | egress { 35 | from_port = 0 36 | to_port = 0 37 | protocol = "-1" 38 | cidr_blocks = ["0.0.0.0/0"] 39 | } 40 | 41 | lifecycle { 42 | create_before_destroy = true 43 | } 44 | 45 | tags = { 46 | Name = "sg-ops-nodes-to-cas" 47 | managedBy = "Terraform" 48 | account = var.account_id 49 | region = var.region 50 | pool = "opscenter" 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /core-components/ansible/playbooks/cluster-update-datastax-agent.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # This playbook registers the cluster with opscenter via the DS agent. 3 | 4 | - name: update Datastax Agent to connect with OpsCenter 5 | gather_facts: false 6 | hosts: 7 | - all 8 | 9 | vars: 10 | account_name: { account_name } 11 | vpc_name: { vpc_name } 12 | tfstate_bucket: { tfstate_bucket } 13 | 14 | tasks: 15 | - name: add opscenter IP to datastax agent address.yaml 16 | become: true 17 | shell: | 18 | export region=$(curl -s http://169.254.169.254/latest/meta-data/placement/availability-zone | sed 's/\(.*\)[a-z]/\1/') 19 | export opscenter_ip=$(aws ssm get-parameter --name "/dse/{{ account_name }}/{{ vpc_name }}/opscenter-resources/opscenter_primary_private_ip" --region ${region} | jq -r '.[].Value') 20 | sudo sed -i "s/[# ]*stomp_interface:.*/stomp_interface: ${opscenter_ip}/" /var/lib/datastax-agent/conf/address.yaml 21 | aws s3 cp s3://{{ tfstate_bucket }}/{{ account_name }}/{{ vpc_name }}/opscenter-resources/files/etc/keystores/storage-server-keystore.jks /etc/dse/cassandra/keystores/storage-server-keystore.jks 22 | chmod 755 /etc/dse/cassandra/keystores/storage-server-keystore.jks 23 | chown cassandra:cassandra /etc/dse/cassandra/keystores/storage-server-keystore.jks 24 | register: result 25 | 26 | - debug: msg="{{ result.stdout }}" 27 | 28 | - name: restart datastax-agent service 29 | become: true 30 | service: 31 | name: datastax-agent 32 | state: restarted 33 | -------------------------------------------------------------------------------- /core-components/terraform/modules/cassandra/_variables.tf: -------------------------------------------------------------------------------- 1 | variable "tfstate_bucket" {} 2 | variable "region" {} 3 | variable "account_name" {} 4 | variable "vpc_name" {} 5 | variable "cluster_name" {} 6 | 7 | locals { 8 | cluster_key = "${var.account_name}/${var.vpc_name}/${var.cluster_name}" 9 | } 10 | 11 | locals { 12 | required_ec2_tags = { 13 | "Account" = "${var.account_id}", 14 | "AccountName" = "${var.account_name}", 15 | "VpcName" = "${var.vpc_name}", 16 | "ClusterName" = "${var.cluster_name}", 17 | "Tfstate" = "${var.tfstate_bucket}", 18 | "ManagedBy" = "terraform", 19 | "Region" = "${var.region}" 20 | } 21 | } 22 | 23 | variable "ami_owner_id" {} 24 | variable "ami_prefix" {} 25 | variable "instance_type" {} 26 | variable "account_id" {} 27 | variable "availability_zones" { type = list(string) } 28 | variable "vpc_id" {} 29 | variable "datacenter" {} 30 | 31 | variable "sg_ops_nodes_to_cas" {} 32 | variable "sg_bas_nodes_to_all" {} 33 | 34 | variable "dse_nodes_per_az" {} 35 | 36 | variable "cluster_subnet_cidrs" { type = list(string) } 37 | variable "cluster_subnet_ids" { type = list(string) } 38 | 39 | variable "graph_enabled" { default = 0 } 40 | variable "solr_enabled" { default = 0 } 41 | variable "spark_enabled" { default = 0 } 42 | variable "auto_start_dse" {} 43 | 44 | # created by account-resources layer 45 | variable "cassandra_profile_arn" { type = string } 46 | 47 | # settings for cassandra node root volume 48 | variable "root_volume_type" {} 49 | variable "root_volume_size" {} 50 | variable "root_volume_iops" {} 51 | 52 | # tags 53 | variable "ec2_tags" {} 54 | -------------------------------------------------------------------------------- /core-components/ansible/playbooks/cluster-restart.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # This playbook restarts the DSE service on the seed nodes, then the non-seeds, one at time. 3 | 4 | - name: restart DSE service 5 | gather_facts: false 6 | hosts: 7 | - '{{ host_list }}' 8 | order: inventory 9 | serial: 1 10 | 11 | tasks: 12 | - name: capture host IP 13 | shell: hostname -I | awk {'print $1'} 14 | register: host_ip 15 | 16 | - name: gather number of volumes mounted 17 | action: shell df -h | grep -i /mnt/cassandra | wc -l 18 | register: cas_vols_mounted 19 | 20 | - name: nodetool flush 21 | shell: "nodetool flush" 22 | ignore_errors: true 23 | 24 | - name: gather drain time 25 | action: shell date 26 | register: drain_time 27 | 28 | - debug: msg="{{ drain_time.stdout }}" 29 | 30 | - name: nodetool drain 31 | shell: "nodetool drain" 32 | ignore_errors: true 33 | 34 | - name: restart DSE service 35 | service: 36 | name: dse 37 | state: restarted 38 | when: cas_vols_mounted.stdout == "2" 39 | 40 | # 9142 = dse client port (ssl) 41 | - wait_for: 42 | port: 9142 43 | host: "{{ host_ip.stdout }}" 44 | delay: 10 45 | timeout: 600 46 | 47 | - name: wait for UN 48 | shell: nodetool status | grep "{{ host_ip.stdout }}" | awk {'print $1'} 49 | register: result 50 | until: result.stdout.find("UN") != -1 51 | retries: 30 52 | delay: 10 53 | 54 | - name: restart datastax agent 55 | become: true 56 | service: 57 | name: datastax-agent 58 | state: restarted 59 | 60 | - name: sleep for 3 min 61 | become: true 62 | action: shell sleep 180 63 | -------------------------------------------------------------------------------- /core-components/terraform/layers/opscenter-resources/_variables.tf: -------------------------------------------------------------------------------- 1 | variable "tfstate_bucket" {} 2 | variable "tfstate_region" {} 3 | variable "profile" {} 4 | variable "region" {} 5 | 6 | variable "account_name" {} 7 | variable "vpc_name" {} 8 | variable "cluster_name" {} 9 | 10 | variable "ami_owner_id" {} 11 | variable "ami_opscenter_prefix" { default = "dse-opscenter" } 12 | variable "instance_type" { default = "m5.xlarge" } 13 | 14 | variable "account_id" {} 15 | variable "availability_zones" { type = list(string) } 16 | 17 | variable "role_arn" {} 18 | 19 | variable "ops_additional_sg_ids" { 20 | type = list(string) 21 | default = [] 22 | } 23 | variable "ssl_certificate_id" {} 24 | variable "studio_enabled" { default = "0" } 25 | 26 | # cidr list for HTTPS ingress 27 | variable "ingress_cidrs" { type = list(string) } 28 | 29 | variable "opscenter_storage_cluster" {} 30 | 31 | # opscenter alert configuration 32 | variable "alert_email_enabled" { default = "0" } 33 | variable "alert_levels" { default = "ERROR,CRITICAL,ALERT" } 34 | variable "alert_clusters" { default = "" } 35 | variable "alert_email_smtp_host" { default = "" } 36 | variable "alert_email_smtp_user" { default = "" } 37 | variable "alert_email_smtp_pass" { default = "" } 38 | variable "alert_email_from_addr" { default = "" } 39 | variable "alert_email_to_addr" { default = "" } 40 | variable "alert_email_env" { default = "" } 41 | 42 | # optional hosted zone configuration 43 | variable "hosted_zone_name" { default = "" } 44 | variable "private_hosted_zone" { default = "false" } 45 | variable "hosted_zone_record_prefix" { default = "opscenter" } 46 | 47 | # tags 48 | variable "account_tags" { default = {} } 49 | variable "vpc_tags" { default = {} } 50 | variable "opscenter_tags" { default = {} } 51 | -------------------------------------------------------------------------------- /docs/2.PACKER.md: -------------------------------------------------------------------------------- 1 | # Baking AMIs 2 | 3 | Ensure credentials in your default profile are refreshed and ready to go, then bake an AMI with the [bake-ami.sh](../core-components/bake-ami.sh) 4 | script: 5 | ``` 6 | $ ./core-components/bake-ami.sh 7 | Usage: 8 | bake-ami.sh 9 | -a : [Required] account name 10 | -t : [Required] ami type -> cassandra | opscenter 11 | -i : base ami_id 12 | ``` 13 | * Running with `-t cassandra` will produce a **DSE Cassandra** image. 14 | * Running with `-t opscenter` will produce a **DSE OpsCenter** image. 15 | * In both cases, the base AMI should be Amazon Linux 2.0, and can be provided in one of two ways: 16 | * With the `-i` option 17 | * As `PACKER_BASE_AMI_ID` in [variables.yaml](../configurations/default-account/variables.yaml) 18 | * If not specified, the script will simply use awscli to look up the newest amzn2 AMI. 19 | * **Note:** the `PACKER_SUBNET_ID` variable must be a public subnet with a route to an Internet Gateway. Otherwise, Packer 20 | will time out while waiting for SSH to become available. 21 | 22 | Known issues: 23 | 24 | > `Error launching source instance: PendingVerification: Your request for accessing resources in this region is being validated...` 25 | 26 | A _brand new_ AWS account will sometimes return this error. Just wait it out and try again later. 27 | 28 | > 404 error while running bake-ami.sh 29 | 30 | The script uses `curl -s -4 ifconfig.co` to get your external IP address (for use in a security group's inbound rules). 31 | Sometimes ifconfig doesn't respond. Just run the command again. 32 | 33 | > `Another app is currently holding the yum lock; waiting for it to exit...` 34 | 35 | This happens (often) at the start of a Packer build. Just wait it out, the yum lock should be cleared after a few seconds. -------------------------------------------------------------------------------- /core-components/terraform/layers/cluster-resources/_variables.tf: -------------------------------------------------------------------------------- 1 | variable "tfstate_bucket" {} 2 | variable "tfstate_region" {} 3 | variable "profile" {} 4 | variable "region" {} 5 | variable "role_arn" {} 6 | 7 | variable "account_name" {} 8 | variable "vpc_name" {} 9 | variable "cluster_name" {} 10 | 11 | variable "ami_owner_id" {} 12 | variable "ami_prefix" { default = "dse-cassandra" } 13 | variable "instance_type" { default = "m5.2xlarge" } 14 | 15 | variable "account_id" {} 16 | variable "availability_zones" { type = list(string) } 17 | 18 | variable "dse_nodes_per_az" { default = 1 } 19 | variable "auto_start_dse" { default = 1 } 20 | variable "graph_enabled" { default = 0 } 21 | variable "solr_enabled" { default = 0 } 22 | variable "spark_enabled" { default = 0 } 23 | 24 | # settings for cassandra node root volume 25 | variable "root_volume_type" { default = "gp2" } 26 | variable "root_volume_size" { default = "100" } 27 | variable "root_volume_iops" { default = "300" } 28 | 29 | # tags 30 | variable "account_tags" { default = {} } 31 | variable "vpc_tags" { default = {} } 32 | variable "cluster_tags" { default = {} } 33 | 34 | # the following vars are passed directly through to parameter-store, and are not required by the 35 | # module implementation otherwise. 36 | variable "keyspace" {} 37 | variable "volume_type" {} 38 | variable "iops" {} 39 | variable "data_volume_size" {} 40 | variable "commitlog_size" {} 41 | variable "commitlog_volume_type" { default = "gp2" } 42 | variable "commitlog_iops" { default = "null" } 43 | variable "number_of_stripes" { default = "1" } 44 | variable "raid_level" { default = "-1" } 45 | variable "raid_block_size" { default = "128" } 46 | variable "max_heap_size" { default = "8" } 47 | variable "num_tokens" { default = "256" } 48 | variable "aio_enabled" { default = "true" } 49 | variable "max_queued_native_transport_requests" { default = "-1" } 50 | variable "native_transport_max_threads" { default = "-1" } 51 | -------------------------------------------------------------------------------- /core-components/terraform/modules/bastion/ingress_sg.tf: -------------------------------------------------------------------------------- 1 | ############################################# 2 | # OIP ingress-sg pattern 3 | ############################################# 4 | 5 | resource "aws_security_group" "bastion-ssh-ingress" { 6 | count = length(var.existing_bastion_sg_id) == 0 ? 1 : 0 7 | name_prefix = "${var.ingress_sg_prefix}-${var.ingress_sg_protocol}-${var.ingress_sg_port}-" 8 | description = "Allows ingress from configured CIDR blocks" 9 | vpc_id = var.vpc_id 10 | revoke_rules_on_delete = true 11 | 12 | ingress { 13 | from_port = var.ingress_sg_port 14 | to_port = var.ingress_sg_port 15 | protocol = var.ingress_sg_protocol 16 | cidr_blocks = concat(var.bastion_ingress_cidrs, tolist([var.vpc_cidr])) 17 | } 18 | 19 | egress { 20 | from_port = 0 21 | to_port = 0 22 | protocol = "-1" 23 | cidr_blocks = ["0.0.0.0/0"] 24 | } 25 | 26 | lifecycle { 27 | create_before_destroy = true 28 | } 29 | 30 | tags = merge(map("Name", "${var.ingress_sg_prefix}-${var.ingress_sg_protocol}-${var.ingress_sg_port}"), var.ec2_tags, local.required_ec2_tags) 31 | } 32 | 33 | ############################################# 34 | # Reference group for bastion nodes 35 | ############################################# 36 | 37 | resource "aws_security_group" "bastion-sg" { 38 | count = length(var.existing_bastion_sg_id) == 0 ? 1 : 0 39 | name_prefix = "bastion-elb-nodes-" 40 | description = "Ref group for use in inbound rules, to allow ssh from bastion nodes" 41 | vpc_id = var.vpc_id 42 | revoke_rules_on_delete = true 43 | 44 | egress { 45 | from_port = 0 46 | to_port = 0 47 | protocol = "-1" 48 | cidr_blocks = ["0.0.0.0/0"] 49 | } 50 | 51 | lifecycle { 52 | create_before_destroy = true 53 | } 54 | 55 | tags = merge(map("Name", "bastion-elb-nodes"), var.ec2_tags, local.required_ec2_tags) 56 | } 57 | 58 | -------------------------------------------------------------------------------- /core-components/packer/opscenter/ssl/opscenter.crt.pem: -------------------------------------------------------------------------------- 1 | -----BEGIN CERTIFICATE----- 2 | MIIFQjCCAyoCCQDLMsBbC235eTANBgkqhkiG9w0BAQsFADBjMQswCQYDVQQGEwJV 3 | UzELMAkGA1UECAwCQ0ExEjAQBgNVBAcMCVNhbiBEZWlnbzEPMA0GA1UECgwGSW50 4 | dWl0MSIwIAYJKoZIhvcNAQkBFhNzZ3VwdGEyM0BpbnR1aXQuY29tMB4XDTE4MDQy 5 | NDIyMDMzN1oXDTE5MDQyNDIyMDMzN1owYzELMAkGA1UEBhMCVVMxCzAJBgNVBAgM 6 | AkNBMRIwEAYDVQQHDAlTYW4gRGVpZ28xDzANBgNVBAoMBkludHVpdDEiMCAGCSqG 7 | SIb3DQEJARYTc2d1cHRhMjNAaW50dWl0LmNvbTCCAiIwDQYJKoZIhvcNAQEBBQAD 8 | ggIPADCCAgoCggIBAKmdqmA57XKq3soBYY2+ccIlfliF4giVmrsqNX8UruH6B1IV 9 | QRsmv9mEqtjuQXT+ckl3mM+jleH55nGEc3rZH0vNsK7A4uU34tHqVQhkuZk7pI13 10 | S9S0y440X/D+EmmBsIno7LdjP69zTECpgwnqGFz6FL6WRky0pndDqh1w0pxLiDqX 11 | XkCLGtAIM0Z1q5p2dZPeqxIB8LXrGxKkeWxG2Qyj9P5I3Gql63xxoVuXWEunM9eb 12 | bTUzuQvCaT1M+SLApm1tOsyfrYKgJEiqOEdo7l40dVjpQnVOtlHaJsY/IRD41fC+ 13 | aiOMZWvT85+xj5X1lsswEIHdYLtlKE/ESKteO02rfxBd/5N41s/o48R2SMmgrfY/ 14 | jLxMVgnKNb1KN34hW46YSkZgv6b9fUZ3+vGL3SG/W7lVhgB7aeJG9FKgliob4Aww 15 | 56GJZXanBXv3G9JQSIqZIThUyz2LgjvgEeDtDXBd2+CbpS6pgoM4yUYhyHE866cX 16 | DWSrW2AjMQtKofStOlgUmSNtEhEojsAKY3yo3Qg+TqU0/d9NqA77vvZ/4EOxMXtX 17 | yUcMj2Oop3hjXlxBZuTflm+lpo/m4visvjNg4oegTrXmfhy8fC5Q1zoT1MPApqc+ 18 | bmpadPYwI02hnIQ/ah8NpC4Z1fCPxJ5+XFspKktR5vgWKn8AGCEpdxzaSgTJAgMB 19 | AAEwDQYJKoZIhvcNAQELBQADggIBAKJPrfgKZvG+PBIp4/T9APlSw9oUOiJ72BlW 20 | fZ3rY+WAS631jyT5eBb2EzzbRh6DEbuqahwJY1c4HkimlaUioID3Px2cMU6Hhpn2 21 | mj+SY9hP6Uq5UDqhCVVZ4Pu0Z3PL87fkCszOZUzW8Fn/tEmrCxjXEIuvc1VXVoCZ 22 | Ih4jgyHhc2oXUIMq7twYzktV5JvlhYzk8jRUdWgCUxGLGqBdMWMwhTHNd/PG5Wfl 23 | BuQ3XhxkfJ7BQ9UQZTlyvS4MmNFJ20Vt3y/1neF3nrK9RTdiU2w7B+44TNleNZuS 24 | J3zrXIMa1yyaWGgxEon18421kK+QUs2ypvAhe0RqzPtkz5ERVl+6O9Mf9oRhmiNE 25 | I3Y+VYUNf53p0T/K/GKUBXXos0pvioJje39aOVFjBMILRAGZDbpBKGNxjlyNmTuN 26 | KfnHpdPIG9kCmnbaLMSv5pFtPTs1vmLyaeH/GbxZctQQGyqZR+vjDWuPT1Wphe0s 27 | 1SPCxmRFqRd/rhFwMHRvnT7/6CD25yC/gakl8nFahgMTRoBDiByQoa5G9+TfHyTq 28 | 5HwC6LcbHTCcT0CfiUxHSim7W0SKMSXdAgSibkt1jwxbjgCrXKOkdZGVVLyipVHA 29 | 8UbyqXDcf7XeXapKNNQmB8NHLs7FtdWz2uDCbLpAoBguBX3/31gw4OPnxyK5UoRI 30 | 21K0uXz+ 31 | -----END CERTIFICATE----- 32 | -------------------------------------------------------------------------------- /core-components/terraform/modules/iam-resources/policies.tf: -------------------------------------------------------------------------------- 1 | ################### 2 | # policy granting access to SSM Parameter Store 3 | ################### 4 | 5 | data "aws_iam_policy_document" "ssm-parameterstore-doc" { 6 | statement { 7 | effect = "Allow" 8 | actions = [ 9 | "ssm:DescribeParameters" 10 | ] 11 | resources = ["*"] 12 | } 13 | statement { 14 | effect = "Allow" 15 | actions = [ 16 | "ssm:GetParameters", 17 | "ssm:GetParametersByPath" 18 | ] 19 | resources = ["arn:aws:ssm:*:${var.account_id}:parameter/dse*"] 20 | } 21 | } 22 | 23 | ################### 24 | # policy granting permissions for bootstrap and self-heal 25 | ################### 26 | 27 | data "aws_iam_policy_document" "ec2-autoscaling-doc" { 28 | statement { 29 | effect = "Allow" 30 | actions = [ 31 | "autoscaling:AttachInstances", 32 | "autoscaling:DescribeAutoScalingGroups", 33 | "autoscaling:DescribeTags", 34 | "autoscaling:SetDesiredCapacity", 35 | "autoscaling:TerminateInstanceInAutoScalingGroup" 36 | ] 37 | resources = ["*"] 38 | } 39 | statement { 40 | effect = "Allow" 41 | actions = [ 42 | "ec2:AttachNetworkInterface", 43 | "ec2:AttachVolume", 44 | "ec2:CreateVolume", 45 | "ec2:CreateTags", 46 | "ec2:DescribeInstanceStatus", 47 | "ec2:DescribeInstances", 48 | "ec2:DescribeNetworkInterfaces", 49 | "ec2:DescribeTags", 50 | "ec2:DescribeVolumes", 51 | "ec2:DetachVolume" 52 | ] 53 | resources = ["*"] 54 | } 55 | } 56 | 57 | ################### 58 | # policy granting read access to tfstate bucket 59 | ################### 60 | 61 | data "aws_iam_policy_document" "read-tfstate-doc" { 62 | statement { 63 | effect = "Allow" 64 | actions = [ 65 | "s3:Get*", 66 | "s3:ListObjects*", 67 | "s3:ListBucket*", 68 | "s3:HeadObject", 69 | "s3:PutObject" 70 | ] 71 | resources = [ 72 | "arn:aws:s3:::${var.tfstate_bucket}", 73 | "arn:aws:s3:::${var.tfstate_bucket}/*" 74 | ] 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /core-components/terraform/modules/opscenter/scripts/opscenter-init.tpl: -------------------------------------------------------------------------------- 1 | #!/bin/bash -vx 2 | 3 | # Fix OpsCenter configs 4 | sudo chmod -R a+rx /etc/opscenter/scripts 5 | sudo chmod a+x /etc/opscenter/scripts/*.sh 6 | sudo sed -i -r "s/#ssl/ssl/g" /etc/opscenter/opscenterd.conf 7 | 8 | # If DSE studio is enabled, start it 9 | 10 | if [[ "${studio_enabled}" = "1" ]] ; then 11 | echo "Setting DSE studio..." 12 | sudo sed -i -r "s/httpBindAddress: localhost/httpBindAddress: 0.0.0.0/g" /etc/datastax-studio/conf/configuration.yaml 13 | sudo chmod +x /etc/datastax-studio/bin/server.sh 14 | sudo /etc/datastax-studio/bin/server.sh & 15 | else 16 | echo "DSE studio is not needed..." 17 | fi 18 | 19 | # Setup config to send email alerts from OpsCenter 20 | if [[ "${alert_email_enabled}" = "1" ]] ; then 21 | sudo sed -i "s/enabled=.*/enabled=1/" /etc/opscenter/event-plugins/email.conf 22 | sudo sed -i "s/levels=.*/levels=${alert_levels}/" /etc/opscenter/event-plugins/email.conf 23 | sudo sed -i "s/clusters=.*/clusters=${alert_clusters}/" /etc/opscenter/event-plugins/email.conf 24 | sudo sed -i "s/smtp_host=.*/smtp_host=${alert_email_smtp_host}/" /etc/opscenter/event-plugins/email.conf 25 | sudo sed -i "s/smtp_user=.*/smtp_user=${alert_email_smtp_user}/" /etc/opscenter/event-plugins/email.conf 26 | sudo sed -i "s/smtp_pass=.*/smtp_pass=${alert_email_smtp_pass}/" /etc/opscenter/event-plugins/email.conf 27 | sudo sed -i "s/from_addr=.*/from_addr=${alert_email_from_addr}/" /etc/opscenter/event-plugins/email.conf 28 | sudo sed -i "s/to_addr=.*/to_addr=${alert_email_to_addr}/" /etc/opscenter/event-plugins/email.conf 29 | sudo sed -i "s/OpsCenter Event on/OpsCenter Event on ${alert_email_env}/" /etc/opscenter/event-plugins/email.conf 30 | 31 | sudo chmod 755 /etc/opscenter/event-plugins/email.conf 32 | sudo chown opscenter:opscenter /etc/opscenter/event-plugins/email.conf 33 | else 34 | echo "Email alerts from opscenter are not needed..." 35 | fi 36 | 37 | # Run bootstrap script 38 | cd /etc/opscenter/scripts/ 39 | 40 | ./bootstrap.sh ${bucket} ${vpc_name} ${account_name} >> /var/log/bootstrap_opscenter.log 2>&1 41 | -------------------------------------------------------------------------------- /core-components/terraform/modules/opscenter/_variables.tf: -------------------------------------------------------------------------------- 1 | variable "tfstate_bucket" {} 2 | variable "region" {} 3 | 4 | variable "account_id" {} 5 | variable "account_name" {} 6 | variable "vpc_id" {} 7 | variable "vpc_name" {} 8 | 9 | variable "availability_zones" { type = list(string) } 10 | variable "subnet_id" {} 11 | variable "public_subnet_ids" { type = list(string)} 12 | 13 | variable "ami_prefix" {} 14 | variable "ami_owner_id" {} 15 | variable "instance_type" {} 16 | 17 | # cidr list for HTTPS ingress 18 | variable "opscenter_ingress_cidrs" { type = list(string)} 19 | 20 | variable "sg_ops_nodes_to_cas" {} 21 | variable "sg_bas_nodes_to_all" {} 22 | variable "ops_additional_sg_ids" { 23 | type = list(string) 24 | default = [] 25 | } 26 | 27 | # created by account-resources layer 28 | variable "opscenter_profile_arn" { type = string } 29 | 30 | variable "ssl_certificate_id" {} 31 | 32 | variable "studio_enabled" { default = "0" } 33 | 34 | # opscenter alert configuration 35 | variable "alert_email_enabled" { default = "0" } 36 | variable "alert_levels" { default = "ERROR,CRITICAL,ALERT" } 37 | variable "alert_clusters" { default = "" } 38 | variable "alert_email_smtp_host" { default = "" } 39 | variable "alert_email_smtp_user" { default = "" } 40 | variable "alert_email_smtp_pass" { default = "" } 41 | variable "alert_email_from_addr" { default = "" } 42 | variable "alert_email_to_addr" { default = "" } 43 | variable "alert_email_env" { default = "" } 44 | 45 | # optional hosted zone configuration 46 | variable "hosted_zone_name" { default = "" } 47 | variable "private_hosted_zone" { default = "false" } 48 | variable "hosted_zone_record_prefix" { default = "opscenter" } 49 | 50 | locals { 51 | required_ec2_tags = { 52 | "Name" = "opscenter-primary" 53 | "Account" = var.account_id 54 | "AccountName" = var.account_name 55 | "VpcName" = var.vpc_name 56 | "Tfstate" = var.tfstate_bucket 57 | "ManagedBy" = "terraform" 58 | "Region" = var.region 59 | "pool" = "opscenter" 60 | } 61 | } 62 | 63 | variable "ec2_tags" {} 64 | -------------------------------------------------------------------------------- /core-components/terraform/modules/opscenter/lb.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | opscenter_lb_tags = { 3 | "Name" = "elb-ops-${var.account_id}" 4 | } 5 | } 6 | 7 | resource "aws_lb" "opscenter" { 8 | name = "lb-ops-${var.account_id}" 9 | 10 | load_balancer_type = "application" 11 | internal = false 12 | 13 | security_groups = [ 14 | aws_security_group.ops_elb_443.id, 15 | aws_security_group.ops_elb_9091.id 16 | ] 17 | 18 | # TODO limit to two? Third will always be empty. 19 | subnets = var.public_subnet_ids 20 | 21 | # TODO access logs to S3 22 | tags = merge(var.ec2_tags, local.required_ec2_tags, local.opscenter_lb_tags) 23 | } 24 | 25 | resource "aws_lb_target_group" "opscenter-targets" { 26 | port = 8443 27 | protocol = "HTTPS" 28 | vpc_id = var.vpc_id 29 | 30 | health_check { 31 | healthy_threshold = 2 32 | unhealthy_threshold = 2 33 | timeout = 3 34 | protocol = "HTTPS" 35 | path = "/opscenter/login.html" 36 | interval = 30 37 | } 38 | } 39 | 40 | resource "aws_lb_target_group" "studio-targets" { 41 | port = 9091 42 | protocol = "HTTP" 43 | vpc_id = var.vpc_id 44 | 45 | health_check { 46 | healthy_threshold = 2 47 | unhealthy_threshold = 2 48 | timeout = 3 49 | path = "/" 50 | interval = 30 51 | } 52 | } 53 | 54 | resource "aws_lb_listener" "opscenter-listener" { 55 | load_balancer_arn = aws_lb.opscenter.id 56 | port = "443" 57 | protocol = "HTTPS" 58 | ssl_policy = "ELBSecurityPolicy-TLS-1-2-2017-01" 59 | certificate_arn = var.ssl_certificate_id 60 | 61 | default_action { 62 | target_group_arn = aws_lb_target_group.opscenter-targets.id 63 | type = "forward" 64 | } 65 | } 66 | 67 | resource "aws_lb_listener" "studio-listener" { 68 | load_balancer_arn = aws_lb.opscenter.id 69 | port = "9091" 70 | protocol = "HTTP" 71 | 72 | default_action { 73 | target_group_arn = aws_lb_target_group.studio-targets.id 74 | type = "forward" 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /core-components/terraform/modules/bastion/asg.tf: -------------------------------------------------------------------------------- 1 | ############################################# 2 | # LC and ASG for bastion nodes 3 | ############################################# 4 | 5 | data "aws_ami" "bastion-ami" { 6 | count = length(var.existing_bastion_sg_id) == 0 ? 1 : 0 7 | most_recent = true 8 | owners = ["amazon"] 9 | 10 | filter { 11 | name = "name" 12 | values = ["${var.ami_prefix}*"] 13 | } 14 | } 15 | 16 | resource "aws_launch_configuration" "bastion-lc" { 17 | count = length(var.existing_bastion_sg_id) == 0 ? 1 : 0 18 | name_prefix = "bastion-lc-" 19 | image_id = data.aws_ami.bastion-ami[0].id 20 | instance_type = var.instance_type 21 | placement_tenancy = "default" 22 | associate_public_ip_address = true 23 | 24 | security_groups = [ 25 | aws_security_group.bastion-sg[0].id, 26 | aws_security_group.bastion-ssh-ingress[0].id, 27 | ] 28 | 29 | user_data = data.template_cloudinit_config.bastion-init[0].rendered 30 | iam_instance_profile = var.bastion_role_arn 31 | 32 | lifecycle { 33 | create_before_destroy = true 34 | } 35 | } 36 | 37 | resource "aws_autoscaling_group" "bastion-asg" { 38 | count = length(var.existing_bastion_sg_id) == 0 ? 1 : 0 39 | depends_on = [aws_launch_configuration.bastion-lc] 40 | name_prefix = "bastion-asg-" 41 | max_size = 1 42 | min_size = 1 43 | health_check_grace_period = 600 44 | health_check_type = "EC2" 45 | desired_capacity = 1 46 | launch_configuration = aws_launch_configuration.bastion-lc[0].name 47 | vpc_zone_identifier = var.ingress_subnet_ids 48 | target_group_arns = [aws_lb_target_group.bastion-targets[0].id] 49 | 50 | lifecycle { 51 | create_before_destroy = true 52 | } 53 | 54 | 55 | tag { 56 | key = "Name" 57 | value = "bastion" 58 | propagate_at_launch = true 59 | } 60 | 61 | dynamic "tag" { 62 | for_each = merge(var.ec2_tags, local.required_ec2_tags) 63 | 64 | content { 65 | key = tag.key 66 | value = tag.value 67 | propagate_at_launch = true 68 | } 69 | } 70 | } 71 | 72 | -------------------------------------------------------------------------------- /core-components/packer/cassandra/scripts/enable_eth1.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [[ "$(uname -a)" =~ amzn2 ]]; then 4 | # ensure eth1 config is present, and used on next boot 5 | cd /etc/sysconfig/network-scripts 6 | cp ifcfg-eth0 ifcfg-eth1 7 | cp ifcfg-eth0 ifcfg-eth0.bak 8 | sed -i 's/eth0/eth1/g' ifcfg-eth1 9 | sed -i 's/ONBOOT=yes/ONBOOT=no/g' ifcfg-eth0 10 | 11 | # make sure eth1 is UP 12 | if [[ $(ip link show | grep -c "eth1.*state UP") == 0 ]]; then 13 | /sbin/ifup eth1 14 | sleep 10 15 | fi 16 | 17 | # make sure eth0 is DOWN 18 | if [[ $(ip link show | grep -c "eth0.*state DOWN") == 0 ]]; then 19 | echo "FYI: if running this manually while SSHing on eth0, your session is about to hang..." 20 | /sbin/ifdown eth0 21 | sleep 3 22 | fi 23 | 24 | # capture current IP 25 | my_eni_ip=$(ifconfig -a eth1 | grep -w inet | awk '{print $2}' | sed 's,/.*$,,' | sed 's/\./-/g') 26 | my_eni_hostname=ip-${my_eni_ip}.compute.internal 27 | ip=$(ifconfig -a eth1 | grep -w inet | awk '{print $2}') 28 | 29 | # make sure hostname is set properly in /etc/hosts 30 | if [[ ! $(hostname -i) =~ "${ip}" ]]; then 31 | hostnamectl set-hostname ${my_eni_hostname} 32 | echo "${ip} ${my_eni_hostname} ip-${my_eni_ip}" >> /etc/hosts 33 | echo "preserve_hostname: true" >> /etc/cloud/cloud.cfg 34 | fi 35 | else 36 | # this is the old (rhel-7.4) version of the script, this whole "else" can be removed if/when we completely deprecate RHEL 37 | cd /etc/sysconfig/network-scripts 38 | if [[ ! -e /etc/sysconfig/network-scripts/ifcfg-eth1 ]]; then 39 | cp ifcfg-eth0 ifcfg-eth1 40 | cp ifcfg-eth0 ifcfg-eth0.bak 41 | sed -i 's/eth0/eth1/g' ifcfg-eth1 42 | sed -i 's/ONBOOT=yes/ONBOOT=no/g' ifcfg-eth0 43 | 44 | echo "FYI: if running this manually while SSHing on eth0, your session is about to hang..." 45 | 46 | /sbin/ifup eth1 47 | sleep 10 48 | /sbin/ifdown eth0 49 | sleep 3 50 | 51 | my_eni_ip=$(ifconfig -a eth1 | grep -w inet | awk '{print $2}' | sed 's,/.*$,,' | sed 's/\./-/g') 52 | my_eni_hostname=ip-${my_eni_ip}.compute.internal 53 | ip=$(ifconfig -a eth1 | grep -w inet | awk '{print $2}') 54 | hostnamectl set-hostname ${my_eni_hostname} 55 | echo "${ip} ${my_eni_hostname} ip-${my_eni_ip}" >> /etc/hosts 56 | echo "preserve_hostname: true" >> /etc/cloud/cloud.cfg 57 | fi 58 | fi 59 | -------------------------------------------------------------------------------- /core-components/packer/opscenter/scripts/bootstrap.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | source ./register_cluster_func.sh 3 | 4 | bucket=$1 5 | vpc_name=$2 6 | account_name=$3 7 | 8 | region=$(curl -s http://169.254.169.254/latest/meta-data/placement/availability-zone | sed 's/\(.*\)[a-z]/\1/') 9 | 10 | storage_cluster=$(aws ssm get-parameters --names "/dse/${account_name}/${vpc_name}/opscenter-resources/opscenter_storage_cluster" \ 11 | --query Parameters[0].Value --output text \ 12 | --region ${region}) 13 | 14 | function sync_configs_from_s3() { 15 | s3_file_path="${account_name}/${vpc_name}/opscenter-resources/files" 16 | aws s3 ls s3://${bucket}/${s3_file_path}/etc/opscenterd.conf 17 | file_exists=$? 18 | 19 | if [[ ${file_exists} -eq 0 ]]; then 20 | echo "opscenterd.conf exists in s3" 21 | else 22 | echo "opscenterd.conf does not exists in s3" 23 | if test -f "/etc/opscenter/opscenterd.conf"; then 24 | # enable authentication 25 | sed -i -r "s/enabled = False/enabled = True/g" /etc/opscenter/opscenterd.conf 26 | # write the modified opscenterd.conf to s3 27 | aws s3 cp /etc/opscenter/opscenterd.conf s3://${bucket}/${s3_file_path}/etc/opscenterd.conf 28 | fi 29 | fi 30 | 31 | echo ${storage_cluster} 32 | register_with_opscenter ${bucket} ${storage_cluster} ${vpc_name} ${account_name} 33 | 34 | # sync /etc/opscenter from s3 35 | aws s3 sync "s3://${bucket}/${s3_file_path}/etc/" /etc/opscenter/ --region ${region} 36 | 37 | # sync varlib from s3 38 | aws s3 sync "s3://${bucket}/${s3_file_path}/varlib/" /var/lib/opscenter/ --region ${region} 39 | chown -R opscenter:opscenter /etc/opscenter 40 | } 41 | 42 | function update_limits() { 43 | echo "opscenter hard nofile 500000" >> /etc/security/limits.conf 44 | echo "opscenter soft nofile 500000" >> /etc/security/limits.conf 45 | } 46 | 47 | function attach_network() 48 | { 49 | echo "FUNC: attach_network" 50 | echo "USER: `whoami`" 51 | echo "CWD: $PWD" 52 | PRIVATE_IP=$(curl -L 169.254.169.254/latest/meta-data/local-ipv4) 53 | REGION=$(curl -s http://169.254.169.254/latest/meta-data/placement/availability-zone | sed 's/\(.*\)[a-z]/\1/') 54 | ./ops_eni_mgr.py -r ${REGION} -o attach -n ${PRIVATE_IP} 55 | sudo ./enable_eth1.sh 56 | PRIVATE_IP=$(ifconfig eth1 | grep -w "inet" | awk '{print $2}') 57 | echo "New ip: ${PRIVATE_IP}" 58 | } 59 | 60 | attach_network 61 | sync_configs_from_s3 62 | update_limits 63 | sudo service opscenterd restart -------------------------------------------------------------------------------- /configurations/default-account/variables.yaml: -------------------------------------------------------------------------------- 1 | ################################################## 2 | # these vars are required for packer AMI baking 3 | ################################################## 4 | 5 | # AWS credentials for the AMI baking account 6 | PACKER_AWS_PROFILE: "packer" 7 | PACKER_AWS_REGION: "<<< YOUR_AMI_BAKING_VPC_REGION_HERE >>>" 8 | 9 | # packer will bake AMIs in the following account/vpc/subnet 10 | PACKER_ACCOUNT_ID: "<<< YOUR_AMI_BAKING_ACCOUNT_ID_HERE >>>" 11 | PACKER_VPC_ID: "<<< YOUR_AMI_BAKING_VPC_ID_HERE >>>" 12 | PACKER_SUBNET_ID: "<<< YOUR_AMI_BAKING_SUBNET_ID_HERE >>>" 13 | 14 | # source AMI to use as the baseline. if not set, a suitable amzn2 image will be found when packer runs. 15 | PACKER_BASE_AMI_ID: "" 16 | 17 | # versions of DSE and OpsCenter artifacts to install 18 | PACKER_DSE_FULL_VER: "5.1.11-1" 19 | PACKER_DS_AGENT_VER: "6.1.7-1" 20 | PACKER_OPSCENTER_FULL_VER: "6.7.4-1" 21 | PACKER_DS_STUDIO_VER: "6.7.0" 22 | 23 | ################################################## 24 | # these vars are required for terraform deployments 25 | ################################################## 26 | 27 | # AWS credentials for the deployment target account 28 | TERRAFORM_AWS_CRED_PATH: "~/.aws/credentials" 29 | TERRAFORM_AWS_PROFILE: "terraform" 30 | TERRAFORM_AWS_REGION: "<<< YOUR_TARGET_VPC_REGION_HERE >>>" 31 | 32 | # bucket name and region for terraform's TFSTATE location 33 | TERRAFORM_STATE_BUCKET: "<<< YOUR_TFSTATE_BUCKET_NAME_HERE >>>" 34 | TERRAFORM_STATE_REGION: "<<< YOUR_TFSTATE_REGION_HERE >>>" 35 | 36 | # the account terraform will deploy into 37 | TERRAFORM_ACCOUNT_ID: "<<< YOUR_TARGET_ACCOUNT_ID_HERE >>>" 38 | 39 | # if this is set to true, TF will deploy a suitable vpc. if false, TF will expect you to have your own vpc. 40 | TERRAFORM_MANAGED_VPC: "true" 41 | 42 | # role for terraform to assume in the target account 43 | TERRAFORM_ASSUME_ROLE: "terraform-role" 44 | 45 | ################################################## 46 | # these vars are used to generate user-keys.yaml 47 | ################################################## 48 | 49 | # path on disk to ansible public SSH key; if it doesn't exist, a new key will be created at this path 50 | TERRAFORM_ANSIBLE_KEY_PATH: "~/.ssh/ansible_id_rsa.pub" 51 | 52 | # path on disk to a public key (for ec2-user); if it doesn't exist, only the ansible SSH key will be included 53 | TERRAFORM_SSH_KEY_PATH: "~/.ssh/id_rsa.pub" 54 | -------------------------------------------------------------------------------- /core-components/terraform/modules/iam-resources/bastion-role.tf: -------------------------------------------------------------------------------- 1 | ################### 2 | # role and instance profile for bastion 3 | ################### 4 | 5 | resource "aws_iam_role" "bastion-role" { 6 | name = "${var.prefix}bastion-role${var.suffix}" 7 | assume_role_policy = data.aws_iam_policy_document.ec2-assume-role-trusted-policy.json 8 | } 9 | 10 | resource "aws_iam_instance_profile" "bastion-profile" { 11 | name = aws_iam_role.bastion-role.name 12 | role = aws_iam_role.bastion-role.name 13 | } 14 | 15 | ################### 16 | # policy granting bastion access to SSM Parameter Store 17 | ################### 18 | 19 | resource "aws_iam_policy" "bastion-ssm-policy" { 20 | depends_on = [aws_iam_role.bastion-role] 21 | name = "${var.prefix}bastion-ssm-policy${var.suffix}" 22 | description = "Allow bastion instances access to parameter store" 23 | policy = data.aws_iam_policy_document.ssm-parameterstore-doc.json 24 | } 25 | 26 | resource "aws_iam_role_policy_attachment" "bastion-ssm-attach" { 27 | role = aws_iam_role.bastion-role.name 28 | policy_arn = aws_iam_policy.bastion-ssm-policy.arn 29 | } 30 | 31 | ################### 32 | # policy granting bastion read access to tfstate bucket 33 | ################### 34 | 35 | resource "aws_iam_policy" "bastion-readbucket-policy" { 36 | depends_on = [aws_iam_role.bastion-role] 37 | name = "${var.prefix}bastion-readbucket-policy${var.suffix}" 38 | description = "Allow bastion instances read access to tfstate" 39 | policy = data.aws_iam_policy_document.read-tfstate-doc.json 40 | } 41 | 42 | resource "aws_iam_role_policy_attachment" "bastion-readbucket-attach" { 43 | role = aws_iam_role.bastion-role.name 44 | policy_arn = aws_iam_policy.bastion-readbucket-policy.arn 45 | } 46 | 47 | ################### 48 | # policy granting bastion permissions for bootstrap 49 | ################### 50 | 51 | data "aws_iam_policy_document" "bastion-bootstrap-doc" { 52 | statement { 53 | effect = "Allow" 54 | actions = [ 55 | "ec2:AssociateAddress" 56 | ] 57 | resources = ["*"] 58 | } 59 | } 60 | 61 | resource "aws_iam_policy" "bastion-bootstrap-policy" { 62 | depends_on = [aws_iam_role.bastion-role] 63 | name = "${var.prefix}bastion-bootstrap-policy${var.suffix}" 64 | policy = data.aws_iam_policy_document.bastion-bootstrap-doc.json 65 | } 66 | 67 | resource "aws_iam_role_policy_attachment" "bastion-bootstrap-attach" { 68 | role = aws_iam_role.bastion-role.name 69 | policy_arn = aws_iam_policy.bastion-bootstrap-policy.arn 70 | } 71 | -------------------------------------------------------------------------------- /core-components/operations.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | ROOT=$(git rev-parse --show-toplevel) 5 | CORE="$ROOT/core-components" 6 | CONFIGS="$ROOT/configurations" 7 | 8 | usage() { 9 | echo "Usage:" 10 | echo " operations.sh" 11 | echo " -a : [Required] account name" 12 | echo " -v : [Required] vpc name" 13 | echo " -c : [Required] cluster name" 14 | echo " -o : [Required] operation -> init | restart | restack | etc." 15 | echo " -h : host ip (default: all)" 16 | } 17 | 18 | parse() { 19 | grep ^$1 ${variables_path} | awk {'print $NF'} | tr -d '"' 20 | } 21 | 22 | get_tfvar() { 23 | grep "^${1}" ${terraform_var_file} | tr -d '" ' | awk -F'=' {'print $NF'} 24 | } 25 | 26 | while getopts ":o:h:a:v:c:" opt; do 27 | case "${opt}" in 28 | a) 29 | account_name=${OPTARG};; 30 | v) 31 | vpc_name=${OPTARG};; 32 | c) 33 | cluster_name=${OPTARG};; 34 | o) 35 | OPERATION=${OPTARG} ;; 36 | h) 37 | HOST_IP=${OPTARG} ;; 38 | *) 39 | usage; exit 1 ;; 40 | esac 41 | done 42 | shift "$((OPTIND-1))" 43 | 44 | if [[ -z "${OPERATION// }" ]]; then usage; exit 1; fi 45 | if [[ -z "${account_name// }" ]]; then usage; exit 1; fi 46 | if [[ -z "${vpc_name// }" ]]; then usage; exit 1; fi 47 | if [[ -z "${cluster_name// }" ]]; then usage; exit 1; fi 48 | 49 | variables_path=${CONFIGS}/${account_name}/variables.yaml 50 | 51 | if ! command -v ansible > /dev/null; then 52 | echo "Ansible is required." 53 | exit 1 54 | fi 55 | 56 | terraform_var_file="${CONFIGS}/${account_name}/${vpc_name}/vpc-resources/vpc.tfvars" 57 | REGION="$(get_tfvar region)" 58 | if [[ -z ${REGION} ]]; then 59 | REGION="$(parse TERRAFORM_AWS_REGION)" 60 | fi 61 | PROFILE="$(parse TERRAFORM_AWS_PROFILE)" 62 | BUCKET="$(parse TERRAFORM_STATE_BUCKET)" 63 | BUCKET_REGION="$(parse TERRAFORM_STATE_REGION)" 64 | ROLE_NAME="$(parse TERRAFORM_ASSUME_ROLE)" 65 | ACCOUNT="$(aws --profile ${PROFILE} sts get-caller-identity --query 'Account' --output text)" 66 | 67 | if [[ "${OPERATION}" != "" ]]; then 68 | echo "Running OPERATION: ${OPERATION}" 69 | else 70 | echo "Must specify a OPERATION to run." 71 | echo " - Options are: mount, unmount, start, stop, restart" 72 | echo " (see ansible.sh and 'playbooks' dir)" 73 | exit 1 74 | fi 75 | 76 | # Output ssh_config for Ansible 77 | ${CORE}/scripts/ssh/build-ssh-config.sh -a ${account_name} -v ${vpc_name} -c ${cluster_name} 78 | 79 | pushd ${CORE}/ansible > /dev/null 80 | 81 | HOST_ARG="-h ${HOST_IP}" 82 | if [[ -z "${HOST_IP}" ]]; then 83 | HOST_ARG="" 84 | fi 85 | 86 | ./ansible.sh -o ${OPERATION} -p ${PROFILE} -r ${REGION} -b ${BUCKET} -a ${account_name} -v ${vpc_name} -c ${cluster_name} -i ${ACCOUNT} -n ${ROLE_NAME} ${HOST_ARG} 87 | 88 | popd > /dev/null 89 | -------------------------------------------------------------------------------- /core-components/packer/cassandra/include/limits.conf: -------------------------------------------------------------------------------- 1 | /etc/security/limits.conf 2 | # 3 | #This file sets the resource limits for the users logged in via PAM. 4 | #It does not affect resource limits of the system services. 5 | # 6 | #Also note that configuration files in /etc/security/limits.d directory, 7 | #which are read in alphabetical order, override the settings in this 8 | #file in case the domain is the same or more specific. 9 | #That means for example that setting a limit for wildcard domain here 10 | #can be overriden with a wildcard setting in a config file in the 11 | #subdirectory, but a user specific setting here can be overriden only 12 | #with a user specific setting in the subdirectory. 13 | # 14 | #Each line describes a limit for a user in the form: 15 | # 16 | # 17 | # 18 | #Where: 19 | # can be: 20 | # - a user name 21 | # - a group name, with @group syntax 22 | # - the wildcard *, for default entry 23 | # - the wildcard %, can be also used with %group syntax, 24 | # for maxlogin limit 25 | # 26 | # can have the two values: 27 | # - "soft" for enforcing the soft limits 28 | # - "hard" for enforcing hard limits 29 | # 30 | # can be one of the following: 31 | # - core - limits the core file size (KB) 32 | # - data - max data size (KB) 33 | # - fsize - maximum filesize (KB) 34 | # - memlock - max locked-in-memory address space (KB) 35 | # - nofile - max number of open file descriptors 36 | # - rss - max resident set size (KB) 37 | # - stack - max stack size (KB) 38 | # - cpu - max CPU time (MIN) 39 | # - nproc - max number of processes 40 | # - as - address space limit (KB) 41 | # - maxlogins - max number of logins for this user 42 | # - maxsyslogins - max number of logins on the system 43 | # - priority - the priority to run user process with 44 | # - locks - max number of file locks the user can hold 45 | # - sigpending - max number of pending signals 46 | # - msgqueue - max memory used by POSIX message queues (bytes) 47 | # - nice - max nice priority allowed to raise to values: [-20, 19] 48 | # - rtprio - max realtime priority 49 | # 50 | # 51 | # 52 | 53 | * - nofile 1048576 54 | * - memlock unlimited 55 | * - fsize unlimited 56 | * - data unlimited 57 | * - rss unlimited 58 | * - stack unlimited 59 | * - cpu unlimited 60 | * - nproc unlimited 61 | * - as unlimited 62 | * - locks unlimited 63 | * - sigpending unlimited 64 | * - msgqueue unlimited -------------------------------------------------------------------------------- /core-components/packer/opscenter/include/limits.conf: -------------------------------------------------------------------------------- 1 | /etc/security/limits.conf 2 | # 3 | #This file sets the resource limits for the users logged in via PAM. 4 | #It does not affect resource limits of the system services. 5 | # 6 | #Also note that configuration files in /etc/security/limits.d directory, 7 | #which are read in alphabetical order, override the settings in this 8 | #file in case the domain is the same or more specific. 9 | #That means for example that setting a limit for wildcard domain here 10 | #can be overriden with a wildcard setting in a config file in the 11 | #subdirectory, but a user specific setting here can be overriden only 12 | #with a user specific setting in the subdirectory. 13 | # 14 | #Each line describes a limit for a user in the form: 15 | # 16 | # 17 | # 18 | #Where: 19 | # can be: 20 | # - a user name 21 | # - a group name, with @group syntax 22 | # - the wildcard *, for default entry 23 | # - the wildcard %, can be also used with %group syntax, 24 | # for maxlogin limit 25 | # 26 | # can have the two values: 27 | # - "soft" for enforcing the soft limits 28 | # - "hard" for enforcing hard limits 29 | # 30 | # can be one of the following: 31 | # - core - limits the core file size (KB) 32 | # - data - max data size (KB) 33 | # - fsize - maximum filesize (KB) 34 | # - memlock - max locked-in-memory address space (KB) 35 | # - nofile - max number of open file descriptors 36 | # - rss - max resident set size (KB) 37 | # - stack - max stack size (KB) 38 | # - cpu - max CPU time (MIN) 39 | # - nproc - max number of processes 40 | # - as - address space limit (KB) 41 | # - maxlogins - max number of logins for this user 42 | # - maxsyslogins - max number of logins on the system 43 | # - priority - the priority to run user process with 44 | # - locks - max number of file locks the user can hold 45 | # - sigpending - max number of pending signals 46 | # - msgqueue - max memory used by POSIX message queues (bytes) 47 | # - nice - max nice priority allowed to raise to values: [-20, 19] 48 | # - rtprio - max realtime priority 49 | # 50 | # 51 | # 52 | 53 | * - nofile 1048576 54 | * - memlock unlimited 55 | * - fsize unlimited 56 | * - data unlimited 57 | * - rss unlimited 58 | * - stack unlimited 59 | * - cpu unlimited 60 | * - nproc unlimited 61 | * - as unlimited 62 | * - locks unlimited 63 | * - sigpending unlimited 64 | * - msgqueue unlimited -------------------------------------------------------------------------------- /core-components/terraform/modules/cassandra/security_groups.tf: -------------------------------------------------------------------------------- 1 | resource "aws_security_group" "cas-internode" { 2 | name_prefix = "cas-internode-${var.cluster_name}-" 3 | description = "Allows cassandra nodes to talk to one another" 4 | vpc_id = var.vpc_id 5 | revoke_rules_on_delete = true 6 | 7 | ingress { 8 | from_port = 7000 9 | to_port = 7001 10 | protocol = "tcp" 11 | self = true 12 | } 13 | ingress { 14 | from_port = 8609 15 | to_port = 8609 16 | protocol = "tcp" 17 | self = true 18 | } 19 | ingress { 20 | from_port = 7198 21 | to_port = 7199 22 | protocol = "tcp" 23 | self = true 24 | } 25 | ingress { 26 | from_port = 9042 27 | to_port = 9042 28 | protocol = "tcp" 29 | self = true 30 | } 31 | ingress { 32 | from_port = 9142 33 | to_port = 9142 34 | protocol = "tcp" 35 | self = true 36 | } 37 | ingress { 38 | from_port = 9160 39 | to_port = 9161 40 | protocol = "tcp" 41 | self = true 42 | } 43 | 44 | egress { 45 | from_port = 0 46 | to_port = 0 47 | protocol = "-1" 48 | cidr_blocks = ["0.0.0.0/0"] 49 | } 50 | 51 | lifecycle { 52 | create_before_destroy = true 53 | } 54 | 55 | tags = merge(map("Name", "cas-internode-${var.cluster_name}"), var.ec2_tags, local.required_ec2_tags) 56 | } 57 | 58 | resource "aws_security_group" "cas-client-access" { 59 | name_prefix = "cas-client-access-${var.cluster_name}-" 60 | description = "Allows inbound access to cassandra clients" 61 | vpc_id = var.vpc_id 62 | revoke_rules_on_delete = true 63 | 64 | # this SG is created when Terraform applies the cassandra module, but it has no ingress rules yet. 65 | # those can be added later, when the clients are determined. 66 | 67 | egress { 68 | from_port = 0 69 | to_port = 0 70 | protocol = "-1" 71 | cidr_blocks = ["0.0.0.0/0"] 72 | } 73 | 74 | lifecycle { 75 | create_before_destroy = true 76 | } 77 | 78 | tags = merge(map("Name", "cas-client-access-${var.cluster_name}"), var.ec2_tags, local.required_ec2_tags) 79 | } 80 | 81 | resource "aws_security_group" "cas-bastion-access" { 82 | name_prefix = "cas-bastion-access-${var.cluster_name}-" 83 | description = "Allows SSH access via bastion" 84 | vpc_id = var.vpc_id 85 | revoke_rules_on_delete = true 86 | 87 | ingress { 88 | from_port = 22 89 | to_port = 22 90 | protocol = "tcp" 91 | security_groups = [var.sg_bas_nodes_to_all] 92 | } 93 | 94 | egress { 95 | from_port = 0 96 | to_port = 0 97 | protocol = "-1" 98 | cidr_blocks = ["0.0.0.0/0"] 99 | } 100 | 101 | lifecycle { 102 | create_before_destroy = true 103 | } 104 | 105 | tags = merge(map("Name", "cas-bastion-access-${var.cluster_name}"), var.ec2_tags, local.required_ec2_tags) 106 | } 107 | 108 | -------------------------------------------------------------------------------- /docs/4.ANSIBLE.md: -------------------------------------------------------------------------------- 1 | # Runtime Operations 2 | 3 | Some of the prep for Ansible will require awscli, Python3, and some supporting libraries. If running on a Macbook, 4 | [here](https://docs.python-guide.org/starting/install3/osx/) are instructions for installing Python3: 5 | 6 | ``` 7 | brew install awscli python 8 | python3 -m pip install --user boto3 nose tornado simple-yaml six awsretry 9 | ``` 10 | * **Note:** a previously installed "six" library may have root permissions; if you get an error, try the following: 11 | ``` 12 | sudo chown -R ${YOUR_USERNAME} $(python -m site --user-site) 13 | python3 -m pip install --user --ignore-installed six 14 | ``` 15 | **If you've just gotten done deploying your cluster,** you'll need to wait a few minutes before proceeding to any remote 16 | Ansible operations. The AMI runs a lot of scripts during cloud-init, including locating the appropriate network device 17 | and attaching it, then setting it as the `active_interface`. Ansible can't connect to any nodes before this has happened. 18 | 19 | The runtime operations of a Cassandra cluster tend to be specific to the application, schema, and overall automation tendencies. 20 | That said, a few playbooks have been included here as examples. They can be run via the [operations script](../core-components/operations.sh): 21 | ``` 22 | $ ./core-components/operations.sh 23 | Usage: 24 | operations.sh 25 | -a : [Required] account name 26 | -v : [Required] vpc name 27 | -c : [Required] cluster name 28 | -o : [Required] operation 29 | -h : host ip (default: all) 30 | ``` 31 | The script will: 32 | 33 | 1. Find the bastion LB DNS name and active IP addresses of your cluster, and build an ssh_config file for Ansible (and you) 34 | to use. 35 | * The script which generates the SSH config is [here](../core-components/scripts/ssh/build-ssh-config.sh) if you wish to run it separately. 36 | * To SSH, something along the lines of `ssh -F ./ssh_config ` should work, provided you use the active ENI 37 | after Terraform has run. 38 | 2. Build a host list for Ansible to consume. 39 | 3. Run the specified playbook. 40 | * Included options are init, start/stop/restart, mount/unmount, and restack. Playbooks are [here](../core-components/ansible/playbooks). 41 | 42 | **Note:** The "init" operation should be run first. This will change the password for the cassandra user, provided you've 43 | run the `init-secrets.sh` script. See **"Setup - Passwords"** [here](1.INITIAL_SETUP.md). 44 | ``` 45 | [ec2-user@ip-10-0-0-104 ~]$ nodetool status 46 | Datacenter: us-west-2 47 | ===================== 48 | Status=Up/Down 49 | |/ State=Normal/Leaving/Joining/Moving 50 | -- Address Load Tokens Owns Host ID Rack 51 | UN 172.0.0.1 404.28 KiB 256 ? 95ba1239-94cc-4809-9535-f6f620f54122 us-west-2b 52 | UN 172.0.0.2 403.35 KiB 256 ? 54f13ae9-bf55-485a-9dfb-663daa7b0a79 us-west-2a 53 | UN 172.0.0.3 371.37 KiB 256 ? 1710316c-33f0-4a81-befa-7504a6e41592 us-west-2c 54 | 55 | Note: Non-system keyspaces don't have the same replication settings, effective ownership information is meaningless 56 | ``` -------------------------------------------------------------------------------- /docs/3.TERRAFORM.md: -------------------------------------------------------------------------------- 1 | # Deploying 2 | 3 | Ensure credentials in your default profile are refreshed and ready to go, then deploy AWS resources with the [deploy.sh](../core-components/deploy.sh) 4 | script: 5 | * This script will create a temporary build directory containing all required variables, then initialize Terraform and run. 6 | * The command (`-m` option) should generally be set to `plan` first, to review [what will be deployed](https://www.terraform.io/docs/commands/plan.html), 7 | and then `apply`. 8 | ``` 9 | $ ./core-components/deploy.sh 10 | Usage: 11 | deploy.sh 12 | -a : [Required] account name 13 | -v : [Required] vpc name 14 | -c : [Required] cluster name 15 | -m : [Required] command -> apply | plan | show | destroy 16 | -l : layer -> account | vpc | cluster | opscenter (default: all) 17 | -h : Display this help message. 18 | ``` 19 | You must run this script for each layer: 20 | 21 | 1. Running with `-l account` will deploy any resources shared across the account (such as IAM roles). 22 | 2. Running with `-l vpc` will (depending on `TERRAFORM_MANAGED_VPC` in [variables.yaml](../configurations/default-account/variables.yaml)) 23 | _either:_ 24 | * deploy a new VPC, or 25 | * gather and output required info about an existing VPC. 26 | * Then, it will deploy any resources shared across the VPC (such as a bastion instance). 27 | 3. Running with `-l cluster` will deploy the Cassandra cluster named in the `-c` option. 28 | 4. Running with `-l opscenter` will deploy a primary OpsCenter instance. 29 | * There are more detailed instructions for deploying OpsCenter [here](OPSCENTER.md). 30 | 31 | Known issues: 32 | 33 | > `Error loading state: InternalError: We encountered an internal error. Please try again.` 34 | 35 | Sometimes, when deploying for the first time, this error will appear between Terraform commands. Just give it a retry. 36 | 37 | > `aws_ssm_parameter.parameter: value of 'count' cannot be computed` 38 | 39 | Terraform has an issue calculating the size of lists passed to [dynamic resources](../core-components/terraform/modules/parameter-store/parameters.tf), 40 | if the list contains any elements that are interpolated while the plan is being assembled. The workaround is to specify 41 | a static list size, like [this](../core-components/terraform/layers/vpc-resources/vpc-shared.tf#L89). 42 | 43 | ### SSH 44 | 45 | After deploying the `account` and `cluster` layers (producing bastion & cassandra nodes) you'll be able to SSH. Your DSE 46 | nodes should end up with a second EIP attached, after the bootstrap process has finished: 47 | 48 | ![IP](./images/cassandra_ips.png) 49 | 50 | If not, check the "bootstrap debugging" section [here](MORE_DETAILS.md). 51 | 52 | The second EIP is the address you'll want to SSH to. First, create ssh_config with the [build-ssh-config.sh](../core-components/scripts/ssh/build-ssh-config.sh) 53 | script: 54 | ``` 55 | $ ./core-components/scripts/ssh/build-ssh-config.sh 56 | Usage: 57 | build-ssh-config.sh 58 | -a : [Required] account name 59 | -v : [Required] vpc name 60 | -c : [Required] cluster name 61 | ``` 62 | Then SSH using the generated config: 63 | ``` 64 | $ ssh -F ssh_config ${IP_ADDRESS} 65 | ``` 66 | -------------------------------------------------------------------------------- /core-components/terraform/modules/iam-resources/opscenter-role.tf: -------------------------------------------------------------------------------- 1 | ################### 2 | # role and instance profile for DSE nodes 3 | ################### 4 | 5 | resource "aws_iam_role" "opscenter-role" { 6 | name = "${var.prefix}opscenter-role${var.suffix}" 7 | assume_role_policy = data.aws_iam_policy_document.ec2-assume-role-trusted-policy.json 8 | } 9 | 10 | resource "aws_iam_instance_profile" "opscenter-profile" { 11 | name = aws_iam_role.opscenter-role.name 12 | role = aws_iam_role.opscenter-role.name 13 | } 14 | 15 | ################### 16 | # policy granting DSE node access to SSM Parameter Store 17 | ################### 18 | 19 | resource "aws_iam_policy" "opscenter-ssm-policy" { 20 | depends_on = [aws_iam_role.opscenter-role] 21 | name = "${var.prefix}opscenter-ssm-policy${var.suffix}" 22 | description = "Allow OpsCenter instances access to parameter store" 23 | policy = data.aws_iam_policy_document.ssm-parameterstore-doc.json 24 | } 25 | 26 | resource "aws_iam_role_policy_attachment" "opscenter-ssm-attach" { 27 | role = aws_iam_role.opscenter-role.name 28 | policy_arn = aws_iam_policy.opscenter-ssm-policy.arn 29 | } 30 | 31 | ################### 32 | # policy granting OpsCenter node read access to tfstate bucket 33 | ################### 34 | 35 | resource "aws_iam_policy" "opscenter-readbucket-policy" { 36 | depends_on = [aws_iam_role.opscenter-role] 37 | name = "${var.prefix}opscenter-readbucket-policy${var.suffix}" 38 | description = "Allow OpsCenter instances read access to tfstate" 39 | policy = data.aws_iam_policy_document.read-tfstate-doc.json 40 | } 41 | 42 | resource "aws_iam_role_policy_attachment" "opscenter-readbucket-attach" { 43 | role = aws_iam_role.opscenter-role.name 44 | policy_arn = aws_iam_policy.opscenter-readbucket-policy.arn 45 | } 46 | 47 | ################### 48 | # policy granting OpsCenter node scoped write access to tfstate bucket 49 | ################### 50 | 51 | data "aws_iam_policy_document" "opscenter-bucket-doc" { 52 | statement { 53 | effect = "Allow" 54 | actions = [ 55 | "s3:Put*" 56 | ] 57 | resources = [ 58 | "arn:aws:s3:::${var.tfstate_bucket}/${var.account_name}/*/opscenter-resources/*" 59 | ] 60 | } 61 | } 62 | 63 | resource "aws_iam_policy" "opscenter-bucket-policy" { 64 | name = "${var.prefix}opscenter-bucket-policy${var.suffix}" 65 | description = "Allow OpsCenter instances scoped write access to tfstate bucket" 66 | policy = data.aws_iam_policy_document.opscenter-bucket-doc.json 67 | } 68 | 69 | resource "aws_iam_role_policy_attachment" "opscenter-bucket-attach" { 70 | role = aws_iam_role.opscenter-role.name 71 | policy_arn = aws_iam_policy.opscenter-bucket-policy.arn 72 | } 73 | 74 | ################### 75 | # policy granting DSE node permissions for bootstrap and self-heal 76 | ################### 77 | 78 | resource "aws_iam_policy" "opscenter-bootstrap-policy" { 79 | name = "${var.prefix}opscenter-bootstrap-policy${var.suffix}" 80 | description = "Allow OpsCenter instances to bootstrap" 81 | policy = data.aws_iam_policy_document.ec2-autoscaling-doc.json 82 | } 83 | 84 | resource "aws_iam_role_policy_attachment" "opscenter-bootstrap-attach" { 85 | role = aws_iam_role.opscenter-role.name 86 | policy_arn = aws_iam_policy.opscenter-bootstrap-policy.arn 87 | } 88 | -------------------------------------------------------------------------------- /core-components/terraform/modules/cassandra/files/dse-init.tpl: -------------------------------------------------------------------------------- 1 | #!/bin/bash -vx 2 | 3 | AZ=$(curl -s http://169.254.169.254/latest/meta-data/placement/availability-zone) 4 | 5 | # stop services 6 | 7 | systemctl stop dse 8 | systemctl stop datastax-agent 9 | 10 | agent_pid=$(ps -ef | grep -i "[d]atastax-agent" | awk '{print $2}') 11 | kill -9 $agent_pid 12 | 13 | echo "Beginning preparation of cassandra directories" 14 | 15 | chown -R cassandra:cassandra /var/run/dse 16 | chown -R cassandra:cassandra /var/run/datastax-agent 17 | 18 | # fix configs 19 | 20 | sed -i "s/rack=.*/rack=$AZ/" /etc/dse/cassandra/cassandra-rackdc.properties 21 | sed -i "s/dc=.*/dc=${dc_name}/" /etc/dse/cassandra/cassandra-rackdc.properties 22 | 23 | sed -i "s/GRAPH_ENABLED=.*/GRAPH_ENABLED=${graph_enabled}/" /etc/default/dse 24 | sed -i "s/SOLR_ENABLED=.*/SOLR_ENABLED=${solr_enabled}/" /etc/default/dse 25 | sed -i "s/SPARK_ENABLED=.*/SPARK_ENABLED=${spark_enabled}/" /etc/default/dse 26 | 27 | sed -i 's@\*/10@*/1@' /etc/cron.d/sysstat 28 | echo "Modified /etc/cron.d/sysstat to gather metrics every minute" 29 | 30 | # run bootstrap script 31 | 32 | echo "auto_start_dse = ${auto_start_dse}" > /var/log/bootstrap_cassandra.log 2>&1 33 | echo "graph_enabled = ${graph_enabled}" >> /var/log/bootstrap_cassandra.log 2>&1 34 | echo "solr_enabled = ${solr_enabled}" >> /var/log/bootstrap_cassandra.log 2>&1 35 | echo "spark_enabled = ${spark_enabled}" >> /var/log/bootstrap_cassandra.log 2>&1 36 | 37 | chmod +x /opt/dse/cassandra/scripts 38 | /opt/dse/cassandra/scripts/bootstrap.sh ${auto_start_dse} >> /var/log/bootstrap_cassandra.log 2>&1 39 | 40 | ############# 41 | ## reasonable block device settings for SSD (post-mount) 42 | ############# 43 | 44 | for dev in $(ls /sys/block); do 45 | echo 4 > /sys/block/$dev/queue/read_ahead_kb 46 | echo 1 > /sys/block/$dev/queue/nomerges 47 | #echo deadline > /sys/block/$dev/queue/scheduler 48 | done 49 | 50 | ############# 51 | ## authorized_keys 52 | ############# 53 | 54 | cat < /usr/local/bin/sync_authorized_keys 55 | #!/bin/bash -ex 56 | TZ=America/Los_Angeles date 57 | timeout 60 aws s3api get-object --region ${region} --bucket ${ssh_bucket} --key ${ssh_prefix} /tmp/user-keys.yaml 58 | cat /tmp/user-keys.yaml | sed -n '/- name: ec2-user/,/- name:/p' | grep "ssh-rsa" | tr -s ' ' | cut -d ' ' -f3- > /home/ec2-user/.ssh/authorized_keys 59 | EOF 60 | 61 | chmod 744 /usr/local/bin/sync_authorized_keys 62 | /usr/local/bin/sync_authorized_keys || true 63 | cat < /etc/cron.d/sync_authorized_keys 64 | */5 * * * * root /usr/local/bin/sync_authorized_keys >> /var/log/sync_authorized_keys.log 2>&1 65 | EOF 66 | 67 | # set the ansible user's passwd to not expire 68 | chage -I -1 -m 0 -M 99999 -E -1 ansible 69 | 70 | ################################### 71 | # Tag root volume with tags 72 | ################################### 73 | instance_id=`curl http://169.254.169.254/latest/meta-data/instance-id/` 74 | volume_id=`aws ec2 describe-instances --instance-id $instance_id --query "Reservations[].Instances[].BlockDeviceMappings[0].{VolumeID: Ebs.VolumeId}" --region ${region} | jq -r '.[0] | .VolumeID'` 75 | tags="" 76 | tag_keys=$(echo '${ec2_tag_map}' | jq -r '.|keys | .[]') 77 | for k in $tag_keys; do 78 | echo "$k = $(echo '${ec2_tag_map}' | jq -r --arg k "$k" '.[$k]')" 79 | tags="$tags Key=$k,Value=$(echo '${ec2_tag_map}' | jq -r --arg k "$k" '.[$k]')" 80 | done 81 | aws ec2 create-tags --region ${region} --resources $volume_id --tags $tags 82 | 83 | -------------------------------------------------------------------------------- /core-components/packer/opscenter/ssl/opscenter.key.pem: -------------------------------------------------------------------------------- 1 | -----BEGIN PRIVATE KEY----- 2 | MIIJQwIBADANBgkqhkiG9w0BAQEFAASCCS0wggkpAgEAAoICAQCpnapgOe1yqt7K 3 | AWGNvnHCJX5YheIIlZq7KjV/FK7h+gdSFUEbJr/ZhKrY7kF0/nJJd5jPo5Xh+eZx 4 | hHN62R9LzbCuwOLlN+LR6lUIZLmZO6SNd0vUtMuONF/w/hJpgbCJ6Oy3Yz+vc0xA 5 | qYMJ6hhc+hS+lkZMtKZ3Q6odcNKcS4g6l15AixrQCDNGdauadnWT3qsSAfC16xsS 6 | pHlsRtkMo/T+SNxqpet8caFbl1hLpzPXm201M7kLwmk9TPkiwKZtbTrMn62CoCRI 7 | qjhHaO5eNHVY6UJ1TrZR2ibGPyEQ+NXwvmojjGVr0/OfsY+V9ZbLMBCB3WC7ZShP 8 | xEirXjtNq38QXf+TeNbP6OPEdkjJoK32P4y8TFYJyjW9Sjd+IVuOmEpGYL+m/X1G 9 | d/rxi90hv1u5VYYAe2niRvRSoJYqG+AMMOehiWV2pwV79xvSUEiKmSE4VMs9i4I7 10 | 4BHg7Q1wXdvgm6UuqYKDOMlGIchxPOunFw1kq1tgIzELSqH0rTpYFJkjbRIRKI7A 11 | CmN8qN0IPk6lNP3fTagO+772f+BDsTF7V8lHDI9jqKd4Y15cQWbk35ZvpaaP5uL4 12 | rL4zYOKHoE615n4cvHwuUNc6E9TDwKanPm5qWnT2MCNNoZyEP2ofDaQuGdXwj8Se 13 | flxbKSpLUeb4Fip/ABghKXcc2koEyQIDAQABAoICAEyk0A43XtKH37BMO3a/bcwb 14 | ASKjLOZQI+EbS50nDXdTcednb0oDkoGJ72GjF7T0HVSRVF3PP7UjJTx6a2xBnGC2 15 | i3YL9vs466eLSe1HBo71YoACPiPZpnmVtRzC0jMWxV0sAXtEv8xaP0Sg1HOwcl3h 16 | K2qLoSycHPSbgM1Y4/J4NxCywYZM1UrXycO2/FoJ2Ut+PLMgaVLq8ArWg3+n7ABI 17 | PjaBWghe/S/4cL9gfieBYG227/wwaRelO8TVeWZ4wzuDNiSsqszx7DNhp5itE7lB 18 | 3xzUGpHDhaMq/oZqlFiE+LeWc4Oot1OoLNgmdgl/NiqOh4cHL+S96x9QRL4bEJRl 19 | uPWd7Fdl0fIrtJE1ISKgA4s2pUL2LhPz2gXQd/G+mZVDuFeRxOFcOqBdQ8cgILCA 20 | WrGBBk/9uaepeQAT7Qe1DMIGvmFJZFJhMAxTF5MVgPOSk7bRhLklwahb1ZUhlr/N 21 | A7HmkzNCkiaZ4pjt3ZFDsfHH2x42jE9UR+KJxN0UmZlCJx7ea9ZsNO8pbSfuqldw 22 | aQ43tHPsoWL/vmyMRNWWZDqRbxAoy6bE4wq0qQOYiyJTsfOW+TTqwOwFFPgyrw5C 23 | M8M9q7Ar8BYnC9+QtIXS3wlqhjDuFzAcNR5/aQCfccrtVZTZiJlEmoyTvjoPMj0O 24 | ESbFzRpPeo0eqxKge7eZAoIBAQDaX+dvl7KpC6iXI9s+KRxLxqulIDNSHJ13hsFD 25 | RRY4eCA5IDTEYkZviRrT9Tx3wXm/KWSEu0/zQV0QdwKweL/Bum0idRWMvtokPBhB 26 | TbWALGEail22UBW9D+RNuX+neov8h6WzYlLEXCJlOqkiFnnzomCVlOaMUUofA1S1 27 | B2YR56ubk4QaHDaJLlZvXyRmuV56pJauVBOhpwppUMplNZ+ll+OsvU1pTjBYj6pN 28 | 4sVbNYq0SsVZz+6bTpJBnLGU2mAcB5NakYvxQq0Ct8KcHNPOZs1R8aTGO54cJaRB 29 | 3FYCz5ERfy3fPrezuAEI9NyA84sbhw+jBJGOPK544mwHz7VPAoIBAQDG1xrDi+FG 30 | 8EX+RXqWuhIvuvxv/gsvxuqIvy/xLpZlPNEq9F/7PlnI7VYNRvhrpPpxjRDawHrv 31 | beO9cVFHS1sVzI5F4+ByPM3OcK7ociIGYVgGCdT0clCjob9y90sP+oLWRujZzEY5 32 | pZ2zebQlGsNJd0lKgDP4meXg2K2hsapO/FDXsaPo3kOrvm+rwAYRQranzqcCbia9 33 | 2jCQIsoIxxUoTS78RjVbBbrJJy22bfsPE0L6LL29DtaC8NsIl3WpvJe/LoUiUGlg 34 | vRvt5DYpYXhGt3R1LbafGQvWmpa/wvJTeUCogz4HIDJmfB6j8Iwc7h33+fbPDuW+ 35 | /msxau0hZk5nAoIBAQCP89yawYNy0E6oo2hph6wxf5uy1JFANcDQBDLuZ2KIgxeq 36 | 0MI4GxiXeegHhYgA4beh/mVW4iuIDbSNoAZT0qWFzaEXr9su1rjyXfMXSWS2Q4sX 37 | ddLk1dIeuZUkb33pJ22UYM8oe05YRuwdkxffcrWdl5LXyDpakVOtMgLq+x+b+f40 38 | JoBUvfhtYfB0pmFWnifb7Revf5kZXQYN+sYRBdJh1MivHHMlUfeoybvYb/7ta1Qu 39 | GYQS+nEuTBM50VN27qWglJXchEmeFvf57K0LQC2XyEjdFdArJS3LHD01dUv1umLF 40 | B/NzUOLERC3ae5/IGcO/t77lFaPmAbZ51bilRZUdAoIBADR3nANPTLFYQ0/V64QM 41 | MzJPB79RbIg917Mfp7nfTiYGhzbrZsw35p8jKRq1yGU3EjKWQ4VFVusItxhM52KF 42 | mtJEuKHugFIEg1Xe2/hP7XR7iSuGup+J7odO7eKbKD5eHtoFufmGcl1RuywZiBRm 43 | XHNtO3SiIZSOn17WLfLFm6fd4kMYB6UxyO9vf631LGSvktzypZT5ldVw1nGiDGeG 44 | iKmqvCQrLncHKwNbCC/kE2Ql8/1pguN5H9dLULCSrqSMNAppbw3f4/53x//cwQHo 45 | S0F1tRAacN7IUBxfjDmrah/mlc4hW2Or9kCercMafQ5BMr01Qw/d4XelUImoqkuu 46 | 4C8CggEBALWYs7rOH8ynGykhZxJovlIy4N6L8WbdHSS0N06Ggp/LgItozgWMJjRG 47 | RmSpTfhZEfQ1EPIGAxeqYqWCYUaU2ez9jAbNv0uQM55ZVLbz+9lcS8pdfEUxyoig 48 | 60G+yggmdPozPuVw1Ex0p3I1DIm4a6Qnw6Q8evtcdsjCtal7xEv5YUBnXyK+g00P 49 | z6EYxT0ri/AmqHPtDPFYzLHUr3Cp6Ah9mnGDgZ11HVJNF50QaZgYVUwm0QEBx+uO 50 | wtmJrvU1xtxghuIJtM2ptLGMKhZzbfVfezZLRqg6E9GoRgyZazButGVq0oFuc9td 51 | JYWQEINj5961m0mxat6Z++xk5ag9y1E= 52 | -----END PRIVATE KEY----- 53 | -------------------------------------------------------------------------------- /core-components/terraform/layers/vpc-resources/vpc-shared.tf: -------------------------------------------------------------------------------- 1 | data "terraform_remote_state" "account-resources" { 2 | backend = "s3" 3 | config = { 4 | role_arn = var.role_arn 5 | bucket = var.tfstate_bucket 6 | key = "${var.account_name}/account-resources/account.tfstate" 7 | region = var.tfstate_region 8 | } 9 | } 10 | 11 | ################### 12 | # deploy resources shared across vpc 13 | ################### 14 | 15 | module "vpc-shared" { 16 | source = "../../modules/vpc-shared" 17 | 18 | vpc_id = module.vpc.vpc_id 19 | region = var.region 20 | account_id = var.account_id 21 | } 22 | 23 | ################### 24 | # deploy 1 bastion per vpc 25 | ################### 26 | 27 | module "bastion" { 28 | source = "../../modules/bastion" 29 | region = var.region 30 | tfstate_bucket = var.tfstate_bucket 31 | account_id = var.account_id 32 | account_name = var.account_name 33 | vpc_id = module.vpc.vpc_id 34 | vpc_name = var.vpc_name 35 | vpc_cidr = module.vpc.vpc_cidr 36 | 37 | ingress_sg_prefix = "bastion-ingress" 38 | ingress_sg_port = 22 39 | ingress_sg_protocol = "tcp" 40 | ami_prefix = var.bastion_ami_prefix 41 | bastion_role_arn = data.terraform_remote_state.account-resources.outputs.bastion_profile_arn 42 | ingress_subnet_ids = module.vpc.ingress_subnet_ids 43 | data_subnet_ids = module.vpc.data_subnet_ids 44 | 45 | # cidr list for SSH ingress 46 | bastion_ingress_cidrs = var.ingress_cidrs 47 | 48 | # if this is provided, do no work; output this variable to tfstate and exit 49 | existing_bastion_sg_id = var.existing_bastion_sg_id 50 | 51 | # this order is important: duplicate tags will be overwritten in argument order 52 | ec2_tags = merge(var.account_tags, var.vpc_tags) 53 | } 54 | 55 | ################### 56 | # upload ssh keys to tfstate bucket 57 | ################### 58 | 59 | locals { 60 | bastion_key = "${var.account_name}/${var.vpc_name}/vpc-resources" 61 | } 62 | 63 | module "bastion-ssh-keys" { 64 | source = "../../modules/bucket-object" 65 | bucket_name = var.tfstate_bucket 66 | key_prefix = "${local.bastion_key}/files/ssh/ec2-user/user-keys.yaml" 67 | file_source = "${path.module}/../../../../configurations/${local.bastion_key}/user-keys.yaml" 68 | 69 | # use specific provider for tfstate bucket, as it may not be the same as the deployment region 70 | providers = { 71 | aws = aws.tfstate 72 | } 73 | } 74 | 75 | ################### 76 | # vars required outside terraform -> Parameter Store 77 | ################### 78 | 79 | module "parameter-store" { 80 | source = "../../modules/parameter-store" 81 | 82 | cluster_name = var.cluster_name 83 | vpc_name = var.vpc_name 84 | account_name = var.account_name 85 | 86 | # remember to update this when adding/removing parameters from the list below! 87 | # dynamic list sizes can screw with terraform (it's a known bug) and result in the error: 88 | # aws_ssm_parameter.parameter: value of 'count' cannot be computed 89 | parameter_count=1 90 | 91 | parameters = [ 92 | { 93 | # storing vpc_id in Parameter Store, in order to access it from 94 | # non-terraform scripts (without having to examine tfstate) 95 | key = "vpc_id", 96 | value = module.vpc.vpc_id 97 | } 98 | ] 99 | } 100 | -------------------------------------------------------------------------------- /core-components/packer/init-packer-instance-profile.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | usage() { 4 | echo "Usage:" 5 | echo " init-packer-instance-profile.sh -p -r -n " 6 | } 7 | 8 | while getopts ":p:r:n:" opt; do 9 | case "${opt}" in 10 | p) 11 | PROFILE=${OPTARG} ;; 12 | r) 13 | REGION=${OPTARG} ;; 14 | n) 15 | PACKER_ROLE_NAME=${OPTARG} ;; 16 | *) 17 | usage; exit 1 ;; 18 | esac 19 | done 20 | shift $((OPTIND -1)) 21 | 22 | if [[ -z "${PROFILE// }" ]]; then usage; exit 1; fi 23 | if [[ -z "${REGION// }" ]]; then usage; exit 1; fi 24 | if [[ -z "${PACKER_ROLE_NAME// }" ]]; then usage; exit 1; fi 25 | 26 | ASSUME_ROLE_DOC=/tmp/assume-role.json 27 | cat > ${ASSUME_ROLE_DOC} << EOM 28 | { 29 | "Version": "2012-10-17", 30 | "Statement": [ 31 | { 32 | "Effect": "Allow", 33 | "Principal": { 34 | "Service": "ec2.amazonaws.com" 35 | }, 36 | "Action": "sts:AssumeRole" 37 | } 38 | ] 39 | } 40 | EOM 41 | 42 | aws iam get-role --role-name ${PACKER_ROLE_NAME} > /dev/null 2>&1 43 | if [[ $? -ne 0 ]]; then 44 | echo "Creating role: ${PACKER_ROLE_NAME}" 45 | aws iam create-role --profile ${PROFILE} --region ${REGION} \ 46 | --role-name ${PACKER_ROLE_NAME} --assume-role-policy-document file://${ASSUME_ROLE_DOC} > /dev/null 47 | fi 48 | 49 | echo "Waiting for role..." 50 | aws iam wait role-exists --role-name ${PACKER_ROLE_NAME} --profile ${PROFILE} --region ${REGION} 51 | 52 | read -r -d '' PACKER_POLICY << EOM 53 | { 54 | "Version": "2012-10-17", 55 | "Statement": [ 56 | { 57 | "Effect":"Allow", 58 | "Action":"iam:PassRole", 59 | "Resource": [ 60 | "*" 61 | ] 62 | } 63 | ] 64 | } 65 | EOM 66 | 67 | echo "Attaching policy..." 68 | 69 | aws iam put-role-policy --profile ${PROFILE} --region ${REGION} \ 70 | --role-name ${PACKER_ROLE_NAME} \ 71 | --policy-name ${PACKER_ROLE_NAME} \ 72 | --policy-document "${PACKER_POLICY//[$'\t\r\n ']}" 73 | 74 | aws iam wait instance-profile-exists --profile ${PROFILE} --region ${REGION} \ 75 | --instance-profile-name ${PACKER_ROLE_NAME} > /dev/null 2>&1 76 | if [[ $? -ne 0 ]]; then 77 | echo "Creating instance profile: ${PACKER_ROLE_NAME}" 78 | aws iam create-instance-profile --profile ${PROFILE} --region ${REGION} \ 79 | --instance-profile-name ${PACKER_ROLE_NAME} > /dev/null 80 | fi 81 | 82 | echo "Waiting for instance profile..." 83 | aws iam wait instance-profile-exists --profile ${PROFILE} --region ${REGION} \ 84 | --instance-profile-name ${PACKER_ROLE_NAME} 85 | 86 | INSTANCE_PROFILES=$(aws iam list-instance-profiles-for-role --role-name ${PACKER_ROLE_NAME} \ 87 | --query 'InstanceProfiles[*].InstanceProfileName' --profile ${PROFILE} --region ${REGION} --output text) 88 | if [[ ! "${INSTANCE_PROFILES}" =~ "${PACKER_ROLE_NAME}" ]]; then 89 | echo "Adding instance profile to role..." 90 | aws iam add-role-to-instance-profile --profile ${PROFILE} --region ${REGION} \ 91 | --instance-profile-name ${PACKER_ROLE_NAME} --role-name ${PACKER_ROLE_NAME} 92 | fi 93 | 94 | # no convenient "iam wait" command for this (yet) 95 | echo "Waiting for role-profile attachment..." 96 | sleep 5 97 | while ! [[ $(aws iam list-instance-profiles-for-role \ 98 | --role-name ${PACKER_ROLE_NAME} --query 'InstanceProfiles[] | length(@)' \ 99 | --profile ${PROFILE} --region ${REGION}) -ge 1 ]]; 100 | do echo "Still waiting..."; sleep 5; done 101 | -------------------------------------------------------------------------------- /core-components/packer/cassandra/include/remote.yaml: -------------------------------------------------------------------------------- 1 | # username/password can be set in this file as per TinkerPop documentation, however it will be stored in plain text. 2 | # prefer passing those arguments from the command line via -u and -p or by specifying them in 3 | # .cassandra/gremlinshrc as shown below and then setting file permissions to prevent unauthorized access: 4 | # ----------------------------- 5 | # [authentication] 6 | # username = fred 7 | # password = !!bang!!$ 8 | # ----------------------------- 9 | hosts: [##PRIVATE_IP##] 10 | port: 8182 11 | username: cassandra 12 | password: cassandra 13 | serializer: { className: org.apache.tinkerpop.gremlin.driver.ser.GryoMessageSerializerV3d0, 14 | config: { serializeResultToString: true, ioRegistries: [org.apache.tinkerpop.gremlin.tinkergraph.structure.TinkerIoRegistryV3d0] }} 15 | connectionPool: { 16 | enableSsl: true, 17 | # JSSE keystore file path. Similar to setting JSSE property javax.net.ssl.keyStore. 18 | # keyStore:, 19 | # JSSE keystore password. Similar to setting JSSE property javax.net.ssl.keyStorePassword. 20 | # keyStorePassword:, 21 | # JSSE truststore file path. Similar to setting JSSE property javax.net.ssl.trustStore. 22 | # trustStore:, 23 | # JSSE truststore password. Similar to setting JSSE property javax.net.ssl.trustStorePassword. 24 | # trustStorePassword:, 25 | # JSSE keystore format. 'jks' or 'pkcs12'. Similar to setting JSSE property javax.net.ssl.keyStoreType. 26 | # keyStoreType:, 27 | # https://docs.oracle.com/javase/8/docs/technotes/guides/security/SunProviders.html#SunJSSE_Protocols 28 | # sslEnabledProtocols:, 29 | # https://docs.oracle.com/javase/8/docs/technotes/guides/security/SunProviders.html#SupportedCipherSuites 30 | # sslCipherSuites:, 31 | # If true, trust all certificates and do not perform any validation. 32 | sslSkipCertValidation: true, 33 | # The maximum length in bytes that a message can be sent to the server. This number can be no greater than the 34 | # setting of the same name in the server configuration. 35 | maxContentLength: 65536000, 36 | # The maximum number of in-flight requests that can occur on a connection. 37 | maxInProcessPerConnection: 4, 38 | # The maximum number of times that a connection can be borrowed from the pool simultaneously. 39 | maxSimultaneousUsagePerConnection: 16, 40 | # The maximum size of a connection pool for a host. 41 | maxSize: 8, 42 | # The amount of time in milliseconds to wait for a new connection before timing out. 43 | maxWaitForConnection: 3000, 44 | # The amount of time in milliseconds to wait for a session to close before timing out (does not apply to 45 | # sessionless connections). 46 | maxWaitForSessionClose: 3000, 47 | # The minimum number of in-flight requests that can occur on a connection. 48 | minInProcessPerConnection: 1, 49 | # The maximum number of times that a connection can be borrowed from the pool simultaneously. 50 | minSimultaneousUsagePerConnection: 8, 51 | # The minimum size of a connection pool for a host. 52 | minSize: 2, 53 | # The amount of time in milliseconds to wait before trying to reconnect to a dead host. 54 | reconnectInterval: 1000, 55 | # The override value for the size of the result batches to be returned from the server. 56 | resultIterationBatchSize: 64 57 | } 58 | # Sets the AuthProperties.Property.JAAS_ENTRY properties for authentication to Gremlin Server. 59 | # jaasEntry: 60 | # Sets the AuthProperties.Property.PROTOCOL properties for authentication to Gremlin Server. 61 | # protocol: 62 | # username: xxx 63 | # password: xxx -------------------------------------------------------------------------------- /core-components/packer/cassandra/include/remote-objects.yaml: -------------------------------------------------------------------------------- 1 | # username/password can be set in this file as per TinkerPop documentation, however it will be stored in plain text. 2 | # prefer passing those arguments from the command line via -u and -p or by specifying them in 3 | # .cassandra/gremlinshrc as shown below and then setting file permissions to prevent unauthorized access: 4 | # ----------------------------- 5 | # [authentication] 6 | # username = fred 7 | # password = !!bang!!$ 8 | # ----------------------------- 9 | hosts: [##PRIVATE_IP##] 10 | port: 8182 11 | serializer: { className: org.apache.tinkerpop.gremlin.driver.ser.GryoMessageSerializerV3d0, 12 | config: { ioRegistries: [org.apache.tinkerpop.gremlin.tinkergraph.structure.TinkerIoRegistryV3d0] }} 13 | connectionPool: { 14 | # Determines if SSL should be enabled or not. If enabled on the server then it must be enabled on the client. 15 | enableSsl: false, 16 | # JSSE keystore file path. Similar to setting JSSE property javax.net.ssl.keyStore. 17 | # keyStore:, 18 | # JSSE keystore password. Similar to setting JSSE property javax.net.ssl.keyStorePassword. 19 | # keyStorePassword:, 20 | # JSSE truststore file path. Similar to setting JSSE property javax.net.ssl.trustStore. 21 | # trustStore:, 22 | # JSSE truststore password. Similar to setting JSSE property javax.net.ssl.trustStorePassword. 23 | # trustStorePassword:, 24 | # JSSE keystore format. 'jks' or 'pkcs12'. Similar to setting JSSE property javax.net.ssl.keyStoreType. 25 | # keyStoreType:, 26 | # https://docs.oracle.com/javase/8/docs/technotes/guides/security/SunProviders.html#SunJSSE_Protocols 27 | # sslEnabledProtocols:, 28 | # https://docs.oracle.com/javase/8/docs/technotes/guides/security/SunProviders.html#SupportedCipherSuites 29 | # sslCipherSuites:, 30 | # If true, trust all certificates and do not perform any validation. 31 | # sslSkipCertValidation:, 32 | # The maximum length in bytes that a message can be sent to the server. This number can be no greater than the 33 | # setting of the same name in the server configuration. 34 | maxContentLength: 65536000, 35 | # The maximum number of in-flight requests that can occur on a connection. 36 | maxInProcessPerConnection: 4, 37 | # The maximum number of times that a connection can be borrowed from the pool simultaneously. 38 | maxSimultaneousUsagePerConnection: 16, 39 | # The maximum size of a connection pool for a host. 40 | maxSize: 8, 41 | # The amount of time in milliseconds to wait for a new connection before timing out. 42 | maxWaitForConnection: 3000, 43 | # The amount of time in milliseconds to wait for a session to close before timing out (does not apply to 44 | # sessionless connections). 45 | maxWaitForSessionClose: 3000, 46 | # The minimum number of in-flight requests that can occur on a connection. 47 | minInProcessPerConnection: 1, 48 | # The maximum number of times that a connection can be borrowed from the pool simultaneously. 49 | minSimultaneousUsagePerConnection: 8, 50 | # The minimum size of a connection pool for a host. 51 | minSize: 2, 52 | # The amount of time in milliseconds to wait before trying to reconnect to a dead host. 53 | reconnectInterval: 1000, 54 | # The override value for the size of the result batches to be returned from the server. 55 | resultIterationBatchSize: 64 56 | } 57 | # Sets the AuthProperties.Property.JAAS_ENTRY properties for authentication to Gremlin Server. 58 | # jaasEntry: 59 | # Sets the AuthProperties.Property.PROTOCOL properties for authentication to Gremlin Server. 60 | # protocol: 61 | # username: xxx 62 | # password: xxx -------------------------------------------------------------------------------- /core-components/ansible/playbooks/opscenter-install-alerts-dashboards.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # This playbook installs a basic set of alerts and dashboards on opscenter. 3 | 4 | - name: install alerts and dashboards 5 | gather_facts: false 6 | hosts: 7 | - '{{ opscenter_ip }}' 8 | serial: 1 9 | 10 | vars: 11 | tfstate_bucket: { tfstate_bucket } 12 | target_cluster: { target_cluster } 13 | target_vpc: { target_vpc } 14 | target_account: { target_account } 15 | region: { region } 16 | opscenter_config_path: { opscenter_config_path } 17 | 18 | tasks: 19 | - name: copy scripts 20 | become: true 21 | copy: 22 | src: ../scripts/ 23 | dest: /tmp/scripts/ 24 | directory_mode: yes 25 | 26 | - name: copy configs 27 | become: true 28 | copy: 29 | src: "{{ opscenter_config_path }}" 30 | dest: /tmp/scripts/ 31 | directory_mode: yes 32 | 33 | - name: add alerts 34 | become: true 35 | shell: | 36 | cd /tmp/scripts 37 | export secrets_ssm_path=/dse/{{ target_account }}/{{ target_vpc }}/opscenter-resources/secrets 38 | ops_admin_pass=$(aws --region {{ region }} ssm get-parameter --name ${secrets_ssm_path}/ops_admin --with-decryption | jq -r '.[].Value' | base64 -d) 39 | export OPS_ADMIN_PWD=${ops_admin_pass} 40 | python3 ./cluster-opscenter-mgr.py -p ./opscenter-configs/opscenter-alerts.json -c {{ target_cluster }} -o add_alerts -u https://localhost:8443 41 | aws s3 sync /var/lib/opscenter/ "s3://{{ tfstate_bucket }}/{{ target_account }}/{{ target_vpc }}/opscenter-resources/files/varlib/" --region {{ region }} 42 | register: result 43 | 44 | - debug: msg="{{ result.stdout }}" 45 | 46 | - name: add dashboards 47 | become: true 48 | shell: | 49 | cd /tmp/scripts 50 | export region=$(curl -s http://169.254.169.254/latest/meta-data/placement/availability-zone | sed 's/\(.*\)[a-z]/\1/') 51 | export secrets_ssm_path=/dse/{{ target_account }}/{{ target_vpc }}/opscenter-resources/secrets 52 | ops_admin_pass=$(aws --region {{ region }} ssm get-parameter --name ${secrets_ssm_path}/ops_admin --with-decryption | jq -r '.[].Value' | base64 -d) 53 | export OPS_ADMIN_PWD=${ops_admin_pass} 54 | python3 ./cluster-opscenter-mgr.py -p ./opscenter-configs/opscenter-dashboard.json -c {{ target_cluster }} -o add_dashboards -u https://localhost:8443 55 | aws s3 sync /var/lib/opscenter/ "s3://{{ tfstate_bucket }}/{{ target_account }}/{{ target_vpc }}/opscenter-resources/files/varlib/" --region {{ region }} 56 | register: result 57 | 58 | - debug: msg="{{ result.stdout }}" 59 | 60 | - name: disable subset of best-practice rules 61 | become: true 62 | shell: | 63 | cd /tmp/scripts 64 | export region=$(curl -s http://169.254.169.254/latest/meta-data/placement/availability-zone | sed 's/\(.*\)[a-z]/\1/') 65 | export secrets_ssm_path=/dse/{{ target_account }}/{{ target_vpc }}/opscenter-resources/secrets 66 | ops_admin_pass=$(aws --region {{ region }} ssm get-parameter --name ${secrets_ssm_path}/ops_admin --with-decryption | jq -r '.[].Value' | base64 -d) 67 | export OPS_ADMIN_PWD=${ops_admin_pass} 68 | python3 ./cluster-opscenter-mgr.py -p ./opscenter-configs/opscenter-enabled-bestpractices.json -c {{ target_cluster }} -o update_bestpractices -u https://localhost:8443 69 | aws s3 sync /var/lib/opscenter/ "s3://{{ tfstate_bucket }}/{{ target_account }}/{{ target_vpc }}/opscenter-resources/files/varlib/" --region {{ region }} 70 | register: result 71 | 72 | - debug: msg="{{ result.stdout }}" 73 | -------------------------------------------------------------------------------- /core-components/packer/cassandra/scripts/create_volume.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | # This script mounts the primary and secondary-data for the cassandra_seed_node_ips 5 | 6 | VOL_SIZE=$1 7 | STRIPES=$2 8 | BLOCK_SIZE=$3 9 | RAID_LEVEL=$4 10 | 11 | sudo mkdir -p /mnt/{cassandra-data-primary,cassandra-data-secondary} 12 | 13 | function check_device_names() 14 | { 15 | # the "nitro" hypervisor uses the NVMe specification, remaps "xvd" device names to "nvme" 16 | if ls -l /dev/xvda | grep -q nvme; then 17 | ROOT_DEVICE="nvme0n1" 18 | SECONDARY_DEVICE="nvme1n1" 19 | PRIMARY_DEVICE="nvme2n1" 20 | else 21 | ROOT_DEVICE="xvda" 22 | SECONDARY_DEVICE="xvdb" 23 | PRIMARY_DEVICE="xvdc" 24 | fi 25 | } 26 | 27 | function mount_secondary() 28 | { 29 | # Mount the secondary-data 30 | v=$(df | grep "/mnt/cassandra-data-secondary" | awk '{print $1}') 31 | if [[ -z ${v//} ]]; then 32 | disk="/dev/${SECONDARY_DEVICE}" 33 | fs_type=$(blkid ${disk} | awk '{print $3}' | awk -F '=' '{print $2}') 34 | if [[ -z ${fs_type//} ]]; then 35 | sudo mkfs -t ext4 ${disk} 36 | fi 37 | sudo mount ${disk} /mnt/cassandra-data-secondary 38 | sudo mkdir -p /mnt/cassandra-data-secondary/{commitlog,saved_caches,hints,cdc_raw} 39 | sudo echo "${disk} /mnt/cassandra-data-secondary ext4 rw,auto 0 0" >> /etc/fstab 40 | sudo chown -R cassandra:cassandra /mnt/cassandra-data-secondary 41 | fi 42 | } 43 | 44 | function mount_primary() 45 | { 46 | # Mount the primary-data 47 | v=$(df | grep "/mnt/cassandra-data-primary" | awk '{print $1}') 48 | if [[ -z ${v//} ]]; then 49 | if [[ ${RAID_LEVEL} = -1 ]]; then 50 | # mounting a single volume 51 | disk="/dev/${PRIMARY_DEVICE}" 52 | fs_type=$(blkid ${disk} | awk '{print $3}' | awk -F '=' '{print $2}') 53 | if [[ -z ${fs_type//} ]]; then 54 | sudo mkfs -t ext4 ${disk} 55 | fi 56 | sudo mount ${disk} /mnt/cassandra-data-primary 57 | else 58 | # try to do pvimport if there is data in pvscan 59 | lvm_devices=$(pvscan | grep cas-data-vg | wc -l) 60 | 61 | # There could be 2 or more disks 62 | if [[ ${lvm_devices} -gt 1 ]]; then 63 | # re-import volume group to avoid "unable to export cas-data-vg" 64 | vgchange -an cas-data-vg 65 | vgexport cas-data-vg 66 | vgimport cas-data-vg 67 | vgchange -ay cas-data-vg 68 | fi 69 | 70 | fs_type=$(blkid /dev/cas-data-vg/cas-data | awk ' { print $3 }' | awk -F '=' '{print $2}') 71 | if [[ -z ${fs_type//} ]]; then 72 | lsblk | awk -v rootdev="${ROOT_DEVICE}" '$7 == "" && $6 == "disk" && $1 != rootdev {print "/dev/"$1}' | xargs pvcreate 73 | lsblk | awk -v rootdev="${ROOT_DEVICE}" '$7 == "" && $6 == "disk" && $1 != rootdev {print "/dev/"$1}' | xargs vgcreate cas-data-vg 74 | lvcreate --name cas-data --type raid0 -i ${STRIPES} -I ${BLOCK_SIZE} --size ${VOL_SIZE}GB cas-data-vg 75 | # Make volume group inactive 76 | vgchange -an cas-data-vg 77 | # Write the volume group info 78 | vgexport cas-data-vg 79 | # Import it back again (redundant but does not hurt) 80 | vgimport cas-data-vg 81 | # Make volumegroup active 82 | vgchange -ay cas-data-vg 83 | # format filesystem 84 | mkfs.ext4 /dev/cas-data-vg/cas-data 85 | fi 86 | mount /dev/cas-data-vg/cas-data /mnt/cassandra-data-primary 87 | fi 88 | sudo chown -R cassandra:cassandra /mnt/cassandra-data-primary 89 | sudo echo "/dev/cas-data-vg/cas-data /mnt/cassandra-data-primary ext4 rw,auto 0 0" >> /etc/fstab 90 | fi 91 | } 92 | 93 | check_device_names 94 | mount_secondary 95 | mount_primary 96 | -------------------------------------------------------------------------------- /core-components/ansible/playbooks/cluster-restack.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # This playbook restacks the cluster, performing the following actions on one node at a time: 3 | # 1. Drain and stop the current node 4 | # 2. Terminate the old node 5 | # 3. (cloud-init bootstrap) => new node will initialize, attaching ENI and storage, starting DSE service 6 | # 4. Wait for UN (up normal) 7 | # 5. Sleep 5 minutes, allowing the cluster time to settle before moving on to next node 8 | 9 | - name: restack DSE cluster 10 | gather_facts: false 11 | any_errors_fatal: true 12 | hosts: 13 | - '{{ host_list }}' 14 | order: inventory 15 | serial: 1 16 | 17 | vars: 18 | account: { account } 19 | role_name: { role_name } 20 | region: { region } 21 | cluster: { cluster } 22 | 23 | tasks: 24 | - name: capture hostip 25 | shell: hostname -I | awk {'print $1'} 26 | register: host_ip 27 | 28 | - debug: msg="using hostname {{ host_ip.stdout }}" 29 | 30 | - name: gather drain time 31 | shell: TZ=":US/Pacific" date 32 | register: drain_time 33 | 34 | - debug: msg="drain time is {{ drain_time.stdout }}" 35 | 36 | - name: 1 Drain 37 | shell: "nodetool drain" 38 | ignore_errors: true 39 | 40 | - name: 2 Stop the DSE service 41 | service: 42 | name: dse 43 | state: stopped 44 | ignore_errors: true 45 | 46 | - name: 3 Unmount the volumes 47 | become: true 48 | shell: | 49 | umount /mnt/cassandra-data-primary && sleep 120 50 | umount /mnt/cassandra-data-secondary && sleep 120 51 | ignore_errors: true 52 | 53 | - name: 4 Detach the storage from current 54 | become: true 55 | shell: "/opt/dse/cassandra/scripts/cas_ebs_mgr.py -a {{ account }} -r {{ region }} -c {{ cluster }} -o detach -l {{ inventory_hostname }}" 56 | register: result_detach_storage 57 | 58 | - name: 5 Assume role for local action 59 | shell: aws sts assume-role --role-arn "arn:aws:iam::{{ account }}:role/{{ role_name }}" --role-session-name ansible 60 | delegate_to: localhost 61 | register: assumed_role 62 | 63 | - set_fact: 64 | sts_output: "{{ assumed_role.stdout|from_json }}" 65 | 66 | - name: 6 (running locally) Terminate the node in ASG, wait for replacement 67 | shell: | 68 | export AWS_ACCESS_KEY_ID="{{ sts_output|json_query('Credentials.AccessKeyId') }}" 69 | export AWS_SECRET_ACCESS_KEY="{{ sts_output|json_query('Credentials.SecretAccessKey') }}" 70 | export AWS_SESSION_TOKEN="{{ sts_output|json_query('Credentials.SessionToken') }}" 71 | ../../packer/cassandra/scripts/cas_asg_mgr.py -a {{ account }} -c {{ cluster }} -o reset -n {{ inventory_hostname }} 72 | unset AWS_ACCESS_KEY_ID 73 | unset AWS_SECRET_ACCESS_KEY 74 | unset AWS_SESSION_TOKEN 75 | delegate_to: localhost 76 | register: result 77 | - debug: msg="{{ result.stdout }}" 78 | 79 | # 9142 = dse client port (ssl) 80 | - wait_for: 81 | port: 9142 82 | host: "{{ host_ip.stdout }}" 83 | delay: 10 84 | timeout: 600 85 | 86 | - name: 7 Wait for UN 87 | shell: nodetool status | grep "{{ host_ip.stdout }}" | awk {'print $1'} 88 | register: result 89 | until: result.stdout.find("UN") != -1 90 | retries: 30 91 | delay: 10 92 | 93 | - name: gather end time 94 | action: shell TZ=":US/Pacific" date 95 | register: end_time 96 | 97 | - debug: msg="{{ end_time.stdout }}" 98 | 99 | - name: sleep for 5 mins 100 | become: true 101 | action: shell sleep 300 -------------------------------------------------------------------------------- /core-components/terraform/modules/iam-resources/cassandra-role.tf: -------------------------------------------------------------------------------- 1 | ################### 2 | # role and instance profile for DSE nodes 3 | ################### 4 | 5 | resource "aws_iam_role" "cassandra-role" { 6 | name = "${var.prefix}cassandra-role${var.suffix}" 7 | assume_role_policy = data.aws_iam_policy_document.ec2-assume-role-trusted-policy.json 8 | } 9 | 10 | resource "aws_iam_instance_profile" "cassandra-profile" { 11 | name = aws_iam_role.cassandra-role.name 12 | role = aws_iam_role.cassandra-role.name 13 | } 14 | 15 | ################### 16 | # policy granting DSE node access to SSM Parameter Store 17 | ################### 18 | 19 | resource "aws_iam_policy" "cassandra-ssm-policy" { 20 | depends_on = [aws_iam_role.cassandra-role] 21 | name = "${var.prefix}cassandra-ssm-policy${var.suffix}" 22 | description = "Allow DSE instances access to parameter store" 23 | policy = data.aws_iam_policy_document.ssm-parameterstore-doc.json 24 | } 25 | 26 | resource "aws_iam_role_policy_attachment" "cassandra-ssm-attach" { 27 | role = aws_iam_role.cassandra-role.name 28 | policy_arn = aws_iam_policy.cassandra-ssm-policy.arn 29 | } 30 | 31 | ################### 32 | # policy granting DSE node read access to tfstate bucket 33 | ################### 34 | 35 | resource "aws_iam_policy" "cassandra-readbucket-policy" { 36 | depends_on = [aws_iam_role.opscenter-role] 37 | name = "${var.prefix}cassandra-readbucket-policy${var.suffix}" 38 | description = "Allow DSE instances read access to tfstate" 39 | policy = data.aws_iam_policy_document.read-tfstate-doc.json 40 | } 41 | 42 | resource "aws_iam_role_policy_attachment" "cassandra-readbucket-attach" { 43 | role = aws_iam_role.cassandra-role.name 44 | policy_arn = aws_iam_policy.cassandra-readbucket-policy.arn 45 | } 46 | 47 | ################### 48 | # policy granting DSE node scoped write access to tfstate bucket 49 | ################### 50 | 51 | data "aws_iam_policy_document" "cassandra-bucket-permissions-doc" { 52 | statement { 53 | effect = "Allow" 54 | actions = [ 55 | "s3:Put*" 56 | ] 57 | resources = [ 58 | "arn:aws:s3:::${var.tfstate_bucket}/${var.account_name}/*/files", 59 | "arn:aws:s3:::${var.tfstate_bucket}/${var.account_name}/*/files/*" 60 | ] 61 | } 62 | statement { 63 | effect = "Allow" 64 | actions = [ 65 | "s3:DeleteObject" 66 | ] 67 | resources = [ 68 | "arn:aws:s3:::${var.tfstate_bucket}/${var.account_name}/*/files/lock", 69 | "arn:aws:s3:::${var.tfstate_bucket}/${var.account_name}/*/files/lock/*" 70 | ] 71 | } 72 | } 73 | 74 | resource "aws_iam_policy" "cassandra-bucket-permissions-policy" { 75 | name = "${var.prefix}cassandra-bucket-policy${var.suffix}" 76 | description = "Allow DSE instances scoped write access to tfstate bucket" 77 | policy = data.aws_iam_policy_document.cassandra-bucket-permissions-doc.json 78 | } 79 | 80 | resource "aws_iam_role_policy_attachment" "cassandra-bucket-attach" { 81 | role = aws_iam_role.cassandra-role.name 82 | policy_arn = aws_iam_policy.cassandra-bucket-permissions-policy.arn 83 | } 84 | 85 | ################### 86 | # policy granting DSE node permissions for bootstrap and self-heal 87 | ################### 88 | 89 | resource "aws_iam_policy" "cassandra-bootstrap-policy" { 90 | name = "${var.prefix}cassandra-bootstrap-policy${var.suffix}" 91 | description = "Allow DSE instances to bootstrap" 92 | policy = data.aws_iam_policy_document.ec2-autoscaling-doc.json 93 | } 94 | 95 | resource "aws_iam_role_policy_attachment" "cassandra-bootstrap-attach" { 96 | role = aws_iam_role.cassandra-role.name 97 | policy_arn = aws_iam_policy.cassandra-bootstrap-policy.arn 98 | } 99 | -------------------------------------------------------------------------------- /core-components/terraform/layers/opscenter-resources/main.tf: -------------------------------------------------------------------------------- 1 | data "terraform_remote_state" "account-resources" { 2 | backend = "s3" 3 | config = { 4 | role_arn = var.role_arn 5 | bucket = var.tfstate_bucket 6 | key = "${var.account_name}/account-resources/account.tfstate" 7 | region = var.tfstate_region 8 | } 9 | } 10 | 11 | data "terraform_remote_state" "vpc-resources" { 12 | backend = "s3" 13 | config = { 14 | role_arn = var.role_arn 15 | bucket = var.tfstate_bucket 16 | key = "${var.account_name}/${var.vpc_name}/vpc-resources/vpc.tfstate" 17 | region = var.tfstate_region 18 | } 19 | } 20 | 21 | module "opscenter" { 22 | source = "../../modules/opscenter" 23 | 24 | account_id = var.account_id 25 | account_name = var.account_name 26 | vpc_id = data.terraform_remote_state.vpc-resources.outputs.vpc_id 27 | vpc_name = var.vpc_name 28 | region = var.region 29 | tfstate_bucket = var.tfstate_bucket 30 | availability_zones = var.availability_zones 31 | 32 | ssl_certificate_id = var.ssl_certificate_id 33 | 34 | subnet_id = element(data.terraform_remote_state.vpc-resources.outputs.data_subnet_ids,0) 35 | public_subnet_ids = data.terraform_remote_state.vpc-resources.outputs.ingress_subnet_ids 36 | ami_owner_id = var.ami_owner_id 37 | ami_prefix = var.ami_opscenter_prefix 38 | instance_type = var.instance_type 39 | opscenter_profile_arn = data.terraform_remote_state.account-resources.outputs.opscenter_profile_arn 40 | studio_enabled = var.studio_enabled 41 | 42 | # security groups 43 | sg_ops_nodes_to_cas = data.terraform_remote_state.vpc-resources.outputs.sg_ops_nodes_to_cas 44 | sg_bas_nodes_to_all = data.terraform_remote_state.vpc-resources.outputs.bastion_sg_id 45 | ops_additional_sg_ids = var.ops_additional_sg_ids 46 | opscenter_ingress_cidrs = var.ingress_cidrs 47 | 48 | # opscenter alert configuration 49 | alert_email_enabled = var.alert_email_enabled 50 | alert_levels = var.alert_levels 51 | alert_clusters = var.alert_clusters 52 | alert_email_smtp_host = var.alert_email_smtp_host 53 | alert_email_smtp_user = var.alert_email_smtp_user 54 | alert_email_smtp_pass = var.alert_email_smtp_pass 55 | alert_email_from_addr = var.alert_email_from_addr 56 | alert_email_to_addr = var.alert_email_to_addr 57 | alert_email_env = var.alert_email_env 58 | 59 | # this order is important: duplicate tags will be overwritten in argument order 60 | ec2_tags = merge(var.account_tags, var.vpc_tags, var.opscenter_tags) 61 | 62 | hosted_zone_name = var.hosted_zone_name 63 | private_hosted_zone = var.private_hosted_zone 64 | hosted_zone_record_prefix = var.hosted_zone_record_prefix 65 | } 66 | 67 | ################### 68 | # vars required outside terraform -> Parameter Store 69 | ################### 70 | 71 | module "parameter-store" { 72 | source = "../../modules/parameter-store" 73 | 74 | account_name = var.account_name 75 | vpc_name = var.vpc_name 76 | cluster_name = var.cluster_name 77 | 78 | # remember to update this when adding/removing parameters from the list below! 79 | # dynamic list sizes can screw with terraform (it's a known bug) and result in the error: 80 | # aws_ssm_parameter.parameter: value of 'count' cannot be computed 81 | parameter_count=2 82 | 83 | # list of objects (key, value, and optional 'tier' to set a param as Advanced if it's > 4096 bytes) 84 | parameters = [ 85 | { 86 | key = "opscenter_primary_private_ip", 87 | value = module.opscenter.opscenter_primary_private_ip 88 | }, 89 | { 90 | key = "opscenter_storage_cluster", 91 | value = var.opscenter_storage_cluster 92 | } 93 | ] 94 | } 95 | -------------------------------------------------------------------------------- /core-components/terraform/modules/cassandra/instances.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | cas_ami_tags = { 3 | "AmiName" = "${data.aws_ami.cassandra.name}" 4 | } 5 | } 6 | 7 | data "aws_ami" "cassandra" { 8 | most_recent = true 9 | owners = [var.ami_owner_id] 10 | filter { 11 | name = "name" 12 | values = ["${var.ami_prefix}*"] 13 | } 14 | } 15 | 16 | resource "aws_launch_configuration" "cassandra-config" { 17 | name_prefix = "${var.cluster_name}-" 18 | placement_tenancy = "default" 19 | associate_public_ip_address = false 20 | image_id = data.aws_ami.cassandra.id 21 | instance_type = var.instance_type 22 | iam_instance_profile = var.cassandra_profile_arn 23 | ebs_optimized = true 24 | user_data = data.template_cloudinit_config.cassandra.rendered 25 | 26 | root_block_device { 27 | volume_type = var.root_volume_type 28 | volume_size = var.root_volume_size 29 | iops = var.root_volume_iops 30 | delete_on_termination = true 31 | encrypted = false 32 | } 33 | 34 | security_groups = [ 35 | aws_security_group.cas-bastion-access.id, 36 | aws_security_group.cas-client-access.id, 37 | aws_security_group.cas-internode.id, 38 | var.sg_ops_nodes_to_cas, 39 | ] 40 | 41 | lifecycle { 42 | create_before_destroy = true 43 | } 44 | } 45 | 46 | resource "aws_autoscaling_group" "cassandra-seed-node" { 47 | depends_on = [aws_launch_configuration.cassandra-config] 48 | count = length(var.availability_zones) 49 | name = "asg-${var.cluster_name}-seed-${count.index}" 50 | max_size = 2 51 | min_size = 1 52 | health_check_grace_period = 600 53 | health_check_type = "EC2" 54 | desired_capacity = "1" 55 | launch_configuration = aws_launch_configuration.cassandra-config.name 56 | vpc_zone_identifier = [element(var.cluster_subnet_ids, count.index)] 57 | 58 | lifecycle { 59 | create_before_destroy = true 60 | } 61 | 62 | tag { 63 | key = "Name" 64 | value = "${var.cluster_name}-seed-${count.index}" 65 | propagate_at_launch = true 66 | } 67 | 68 | dynamic "tag" { 69 | for_each = merge(var.ec2_tags, local.required_ec2_tags, local.cas_ami_tags) 70 | 71 | content { 72 | key = tag.key 73 | value = tag.value 74 | propagate_at_launch = true 75 | } 76 | } 77 | } 78 | 79 | resource "aws_autoscaling_group" "cassandra-non-seed-node" { 80 | depends_on = [aws_launch_configuration.cassandra-config] 81 | count = (var.dse_nodes_per_az - 1) * length(var.availability_zones) 82 | name = "asg-${var.cluster_name}-non-seed-${count.index}" 83 | max_size = 2 84 | min_size = 1 85 | health_check_grace_period = 600 86 | health_check_type = "EC2" 87 | desired_capacity = "1" 88 | launch_configuration = aws_launch_configuration.cassandra-config.name 89 | vpc_zone_identifier = [element(var.cluster_subnet_ids, count.index)] 90 | 91 | lifecycle { 92 | create_before_destroy = true 93 | } 94 | 95 | tag { 96 | key = "Name" 97 | value = "${var.cluster_name}-non-seed-${count.index}" 98 | propagate_at_launch = true 99 | } 100 | 101 | dynamic "tag" { 102 | for_each = merge(var.ec2_tags, local.required_ec2_tags, local.cas_ami_tags) 103 | 104 | content { 105 | key = tag.key 106 | value = tag.value 107 | propagate_at_launch = true 108 | } 109 | } 110 | } 111 | 112 | -------------------------------------------------------------------------------- /core-components/scripts/ssh/init-ansible-key.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ROOT=$(git rev-parse --show-toplevel) 4 | CONFIGS="$ROOT/configurations" 5 | 6 | usage() { 7 | echo "Usage:" 8 | echo " init-ansible-key.sh" 9 | echo " -a : [Required] account name" 10 | echo " -v : [Required] vpc name" 11 | echo " -c : [Required] cluster name" 12 | echo " -u : [Optional] path to existing user public key (.pub)" 13 | echo " -n : [Optional] path to existing ansible public key (.pub)" 14 | echo " -f : [Optional] force re-creation of user-keys file" 15 | } 16 | 17 | parse() { 18 | grep ^$1 ${variables_path} | awk {'print $NF'} | tr -d '"' 19 | } 20 | 21 | FORCE=false 22 | 23 | while getopts ":a:v:c:u:n:f" opt; do 24 | case "${opt}" in 25 | a) 26 | ACCOUNT_NAME=${OPTARG};; 27 | v) 28 | VPC_NAME=${OPTARG};; 29 | c) 30 | CLUSTER_NAME=${OPTARG};; 31 | u) 32 | USER_KEY=${OPTARG};; 33 | n) 34 | ANSIBLE_KEY=${OPTARG};; 35 | f) 36 | FORCE=true;; 37 | *) 38 | usage; exit 1 ;; 39 | esac 40 | done 41 | shift $((OPTIND -1)) 42 | 43 | if [[ -z "${ACCOUNT_NAME// }" ]]; then usage; exit 1; fi 44 | if [[ -z "${VPC_NAME// }" ]]; then usage; exit 1; fi 45 | if [[ -z "${CLUSTER_NAME// }" ]]; then usage; exit 1; fi 46 | 47 | case "${CLUSTER_NAME}" in 48 | "account-resources") 49 | config_path="${ACCOUNT_NAME}/account-resources" 50 | exit 0;; 51 | "vpc-resources") 52 | config_path="${ACCOUNT_NAME}/${VPC_NAME}/vpc-resources";; 53 | "opscenter-resources") 54 | config_path="${ACCOUNT_NAME}/${VPC_NAME}/opscenter-resources";; 55 | *) 56 | config_path="${ACCOUNT_NAME}/${VPC_NAME}/${CLUSTER_NAME}";; 57 | esac 58 | 59 | KEYFILE_TPL=${CONFIGS}/${ACCOUNT_NAME}/user-keys.yaml.tpl 60 | KEYFILE=${CONFIGS}/${config_path}/user-keys.yaml 61 | 62 | if [[ ${FORCE} = true ]]; then 63 | \rm -f ${KEYFILE} 64 | fi 65 | 66 | if [[ -f ${KEYFILE} ]]; then 67 | echo "SSH key file already present at: $(realpath --relative-to=${ROOT} ${KEYFILE})" 68 | exit 0 69 | fi 70 | 71 | variables_path=${CONFIGS}/${ACCOUNT_NAME}/variables.yaml 72 | 73 | if [[ -z "${USER_KEY// }" ]]; then 74 | USER_KEY=$(parse TERRAFORM_SSH_KEY_PATH) 75 | fi 76 | 77 | if [[ -z "${ANSIBLE_KEY// }" ]]; then 78 | ANSIBLE_KEY=$(parse TERRAFORM_ANSIBLE_KEY_PATH) 79 | fi 80 | 81 | eval ANSIBLE_KEY=${ANSIBLE_KEY} 82 | eval USER_KEY=${USER_KEY} 83 | 84 | mkdir -p $(dirname ${ANSIBLE_KEY}) 85 | mkdir -p $(dirname ${USER_KEY}) 86 | 87 | # verify ansible key exists, or create one 88 | if [[ -f "${ANSIBLE_KEY//.pub/}" ]]; then 89 | echo "Ansible SSH key exists at: ${ANSIBLE_KEY}" 90 | if [[ ! -f "${ANSIBLE_KEY}" ]] || ! ssh-keygen -l -f ${ANSIBLE_KEY} > /dev/null; then 91 | echo "No public key file found; generating one at ${ANSIBLE_KEY}" 92 | ssh-keygen -y -f ${ANSIBLE_KEY//.pub/} > ${ANSIBLE_KEY} 93 | fi 94 | else 95 | PK="${ANSIBLE_KEY//.pub/}" 96 | echo "Creating ansible SSH key at: ${PK}" 97 | ssh-keygen -t rsa -b 2048 -N "" -V "always:forever" -C ansible -f ${PK} 98 | fi 99 | 100 | # if user key exists, proceed with both user & ansible keys; otherwise, require the user to create a personal ssh key 101 | if [[ -f ${USER_KEY} ]] && ssh-keygen -l -f ${USER_KEY} > /dev/null; then 102 | echo "User SSH key exists at: ${USER_KEY}" 103 | 104 | # write out both keys 105 | sed -e "s?##ANSIBLE_PUB_KEY##?$(cat ${ANSIBLE_KEY})?g" \ 106 | -e "s?##PERSONAL_PUB_KEY##?$(cat ${USER_KEY})?g" \ 107 | ${KEYFILE_TPL} > ${KEYFILE} 108 | 109 | echo "New key file written at: $(realpath --relative-to=${ROOT} ${KEYFILE})" 110 | else 111 | echo "User SSH key does not exist at: ${USER_KEY}" 112 | echo "Please generate an SSH key and configure the 'TERRAFORM_SSH_KEY_PATH' param in your variables.yaml file!" 113 | exit 1 114 | fi 115 | -------------------------------------------------------------------------------- /core-components/terraform/modules/vpc-create/vpc.tf: -------------------------------------------------------------------------------- 1 | ############# 2 | # The data sources in this module will be gathered if terraform_managed_vpc is 3 | # set to true, and required vpc/cidr variables are provided. 4 | ############# 5 | 6 | resource "aws_vpc" "dse-vpc" { 7 | cidr_block = var.vpc_cidr 8 | instance_tenancy = "default" 9 | tags = { 10 | Name = var.vpc_name 11 | } 12 | } 13 | 14 | resource "aws_internet_gateway" "dse-vpc" { 15 | vpc_id = aws_vpc.dse-vpc.id 16 | tags = { 17 | Name = "${var.vpc_name}-igw" 18 | } 19 | } 20 | 21 | ############# 22 | # subnets (should be 3 ingress, 3 data) 23 | ############# 24 | 25 | resource "aws_subnet" "dse-vpc-ingress" { 26 | count = length(var.ingress_subnets) 27 | vpc_id = aws_vpc.dse-vpc.id 28 | cidr_block = var.ingress_subnets[count.index] 29 | availability_zone = "${var.region}${var.azs[count.index]}" 30 | tags = { 31 | Name = "${var.ingress_subnet_tag_prefix}-subnet-${count.index}" 32 | } 33 | } 34 | 35 | resource "aws_subnet" "dse-vpc-data" { 36 | count = length(var.data_subnets) 37 | vpc_id = aws_vpc.dse-vpc.id 38 | cidr_block = var.data_subnets[count.index] 39 | availability_zone = "${var.region}${var.azs[count.index]}" 40 | tags = { 41 | Name = "${var.data_subnet_tag_prefix}-subnet-${count.index}" 42 | } 43 | } 44 | 45 | ############# 46 | # nat gateway (1 in each ingress subnet) 47 | ############# 48 | 49 | resource "aws_eip" "dse-vpc" { 50 | count = length(var.ingress_subnets) 51 | vpc = true 52 | tags = { 53 | Name = "${var.vpc_name}-natgw-eip-${count.index}" 54 | } 55 | } 56 | 57 | resource "aws_nat_gateway" "dse-vpc" { 58 | count = length(var.ingress_subnets) 59 | allocation_id = aws_eip.dse-vpc.*.id[count.index] 60 | subnet_id = aws_subnet.dse-vpc-ingress.*.id[count.index] 61 | tags = { 62 | Name = "${var.vpc_name}-natgw-${count.index}" 63 | } 64 | } 65 | 66 | ############# 67 | # route tables for ingress subnets 68 | ############# 69 | 70 | resource "aws_route_table" "ingress-rtb" { 71 | count = length(var.ingress_subnets) 72 | vpc_id = aws_vpc.dse-vpc.id 73 | tags = { 74 | Name = "${var.ingress_subnet_tag_prefix}-rtb-${count.index}" 75 | } 76 | } 77 | 78 | # route to IGW for each ingress subnet 79 | resource "aws_route" "ingress-igw" { 80 | count = length(var.ingress_subnets) 81 | route_table_id = aws_route_table.ingress-rtb.*.id[count.index] 82 | destination_cidr_block = "0.0.0.0/0" 83 | gateway_id = aws_internet_gateway.dse-vpc.id 84 | timeouts { 85 | create = "5m" 86 | } 87 | } 88 | 89 | resource "aws_route_table_association" "ingress-rtb-assoc" { 90 | count = length(var.ingress_subnets) 91 | subnet_id = aws_subnet.dse-vpc-ingress.*.id[count.index] 92 | route_table_id = aws_route_table.ingress-rtb.*.id[count.index] 93 | } 94 | 95 | ############# 96 | # route tables for data subnets 97 | ############# 98 | 99 | resource "aws_route_table" "data-rtb" { 100 | count = length(var.data_subnets) 101 | vpc_id = aws_vpc.dse-vpc.id 102 | tags = { 103 | Name = "${var.data_subnet_tag_prefix}-rtb-${count.index}" 104 | } 105 | } 106 | 107 | # route to NATGW for each data subnet 108 | resource "aws_route" "data-natgw" { 109 | count = length(var.data_subnets) 110 | route_table_id = aws_route_table.data-rtb.*.id[count.index] 111 | destination_cidr_block = "0.0.0.0/0" 112 | nat_gateway_id = aws_nat_gateway.dse-vpc.*.id[count.index] 113 | timeouts { 114 | create = "5m" 115 | } 116 | } 117 | 118 | resource "aws_route_table_association" "data-rtb-assoc" { 119 | count = length(var.data_subnets) 120 | subnet_id = aws_subnet.dse-vpc-data.*.id[count.index] 121 | route_table_id = aws_route_table.data-rtb.*.id[count.index] 122 | } 123 | -------------------------------------------------------------------------------- /core-components/terraform/modules/opscenter/instances.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | opscenter_eni_tags = { 3 | "Name" = "opscenter-primary" 4 | } 5 | } 6 | 7 | data "template_file" "opscenter-init" { 8 | template = file("${path.module}/scripts/opscenter-init.tpl") 9 | 10 | vars = { 11 | bucket = var.tfstate_bucket 12 | account = var.account_id 13 | region = var.region 14 | studio_enabled = var.studio_enabled 15 | account_name = var.account_name 16 | vpc_name = var.vpc_name 17 | 18 | # opscenter alert configuration 19 | alert_email_enabled = var.alert_email_enabled 20 | alert_levels = var.alert_levels 21 | alert_clusters = var.alert_clusters 22 | alert_email_smtp_host = var.alert_email_smtp_host 23 | alert_email_smtp_user = var.alert_email_smtp_user 24 | alert_email_smtp_pass = var.alert_email_smtp_pass 25 | alert_email_from_addr = var.alert_email_from_addr 26 | alert_email_to_addr = var.alert_email_to_addr 27 | alert_email_env = var.alert_email_env 28 | } 29 | } 30 | 31 | data "template_cloudinit_config" "opscenter" { 32 | gzip = false 33 | base64_encode = false 34 | 35 | part { 36 | content_type = "text/cloud-config" 37 | content = file("${path.module}/../../../../configurations/${var.account_name}/${var.vpc_name}/opscenter-resources/user-keys.yaml") 38 | } 39 | 40 | part { 41 | filename = "opscenter-init.sh" 42 | content_type = "text/x-shellscript" 43 | content = data.template_file.opscenter-init.rendered 44 | } 45 | } 46 | 47 | resource "aws_network_interface" "opscenter-eni" { 48 | subnet_id = var.subnet_id 49 | 50 | security_groups = [ 51 | aws_security_group.opscenter-bastion-access.id, # SSH 52 | aws_security_group.ops_elb_to_nodes.id, 53 | var.sg_ops_nodes_to_cas, 54 | aws_security_group.ops_addl_inbound.id 55 | ] 56 | 57 | tags = merge(var.ec2_tags, local.required_ec2_tags, local.opscenter_eni_tags) 58 | } 59 | 60 | data "aws_subnet" "selected" { 61 | id = "${var.subnet_id}" 62 | } 63 | 64 | resource "aws_launch_configuration" "opscenter-config" { 65 | name_prefix = "opscenter-" 66 | placement_tenancy = "default" 67 | associate_public_ip_address = false 68 | image_id = data.aws_ami.opscenter.id 69 | instance_type = var.instance_type 70 | 71 | security_groups = [ 72 | aws_security_group.opscenter-bastion-access.id, # SSH 73 | aws_security_group.ops_elb_to_nodes.id, 74 | var.sg_ops_nodes_to_cas, 75 | ] 76 | 77 | ebs_optimized = true 78 | iam_instance_profile = var.opscenter_profile_arn 79 | user_data = data.template_cloudinit_config.opscenter.rendered 80 | 81 | lifecycle { 82 | create_before_destroy = true 83 | } 84 | } 85 | 86 | resource "aws_autoscaling_group" "opscenter-asg" { 87 | depends_on = [aws_launch_configuration.opscenter-config] 88 | name = "asg-opscenter" 89 | max_size = 2 90 | min_size = 1 91 | health_check_grace_period = 600 92 | health_check_type = "EC2" 93 | desired_capacity = "1" 94 | launch_configuration = aws_launch_configuration.opscenter-config.name 95 | vpc_zone_identifier = [var.subnet_id] 96 | target_group_arns = [aws_lb_target_group.opscenter-targets.id, aws_lb_target_group.studio-targets.id] 97 | 98 | lifecycle { 99 | create_before_destroy = true 100 | } 101 | 102 | tag { 103 | key = "Name" 104 | value = "opscenter-primary" 105 | propagate_at_launch = true 106 | } 107 | 108 | tag { 109 | key = "AmiName" 110 | value = data.aws_ami.opscenter.name 111 | propagate_at_launch = true 112 | } 113 | 114 | dynamic "tag" { 115 | for_each = merge(local.required_ec2_tags, var.ec2_tags) 116 | 117 | content { 118 | key = tag.key 119 | value = tag.value 120 | propagate_at_launch = true 121 | } 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /create-configuration.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ROOT=$(git rev-parse --show-toplevel) 4 | CORE="$ROOT/core-components" 5 | CONFIGS="$ROOT/configurations" 6 | 7 | usage() { 8 | echo "Usage:" 9 | echo " create-configuration.sh" 10 | echo " -a : [Required] account name (e.g. 'my-dse-account')" 11 | echo " -v : [Required] vpc name (e.g. 'primary-vpc' or 'west-vpc')" 12 | echo " -c : [Required] cluster name (e.g. 'dse-cluster' or 'storage-cluster')" 13 | echo " -x : if specified, allow terraform to manage my vpc (default: false)" 14 | } 15 | 16 | TF_MANAGED_VPC=false 17 | 18 | while getopts ":a:c:v:x" opt; do 19 | case "${opt}" in 20 | a) 21 | ACCOUNT_NAME=${OPTARG};; 22 | c) 23 | CLUSTER_NAME=${OPTARG};; 24 | v) 25 | VPC_NAME=${OPTARG};; 26 | x) 27 | TF_MANAGED_VPC=true;; 28 | *) 29 | usage; exit 1;; 30 | esac 31 | done 32 | shift $((OPTIND -1)) 33 | 34 | if [[ -z "${ACCOUNT_NAME// }" ]]; then usage; exit 1; fi 35 | if [[ -z "${VPC_NAME// }" ]]; then usage; exit 1; fi 36 | if [[ -z "${CLUSTER_NAME// }" ]]; then usage; exit 1; fi 37 | 38 | # validate name formats 39 | if [[ ! "${ACCOUNT_NAME}${VPC_NAME}${CLUSTER_NAME}" =~ ^[a-zA-Z]{1}[a-zA-Z0-9\-]+$ ]]; then 40 | echo "ERROR: Account name (-a), VPC name (-v), and Cluster name (-c) must start with a letter, and contain only alphanumeric characters (and dashes)." 41 | exit 1 42 | fi 43 | 44 | # default dir paths 45 | DEFAULT_ACCOUNT_DIR=${CONFIGS}/default-account 46 | DEFAULT_VPC_DIR=${DEFAULT_ACCOUNT_DIR}/default-vpc 47 | DEFAULT_CLUSTER_DIR=${DEFAULT_VPC_DIR}/default-cluster 48 | 49 | # new dir paths 50 | NEW_ACCOUNT_DIR=${CONFIGS}/${ACCOUNT_NAME} 51 | NEW_VPC_DIR=${NEW_ACCOUNT_DIR}/${VPC_NAME} 52 | NEW_CLUSTER_DIR=${NEW_VPC_DIR}/${CLUSTER_NAME} 53 | 54 | # first, make sure this doesn't already exist 55 | if ls -d ${NEW_CLUSTER_DIR} 2>/dev/null; then 56 | echo "ERROR: Cluster config '${ACCOUNT_NAME}/${VPC_NAME}/${CLUSTER_NAME}' already exists." 57 | exit 1 58 | fi 59 | 60 | # create full account dir if needed 61 | if ! ls -d ${NEW_ACCOUNT_DIR} 2>/dev/null; then 62 | echo "Creating new account configurations..." 63 | mkdir ${NEW_ACCOUNT_DIR} 64 | cp -R ${DEFAULT_ACCOUNT_DIR}/ ${NEW_ACCOUNT_DIR} 65 | \rm -rf ${NEW_ACCOUNT_DIR}/default-vpc 66 | fi 67 | 68 | # create vpc dir if needed 69 | if ! ls -d ${NEW_VPC_DIR} 2>/dev/null; then 70 | echo "Creating new vpc configurations..." 71 | mkdir ${NEW_VPC_DIR} 72 | cp -R ${DEFAULT_VPC_DIR}/ ${NEW_VPC_DIR} 73 | \rm -rf ${NEW_VPC_DIR}/default-cluster 74 | 75 | # copied both "vpc-existing" and "vpc-new" tfvars; get rid of the one we don't need 76 | if [[ ${TF_MANAGED_VPC} = false ]]; then 77 | \rm -f ${NEW_VPC_DIR}/vpc-resources/vpc-new.tfvars 78 | mv ${NEW_VPC_DIR}/vpc-resources/vpc-existing.tfvars ${NEW_VPC_DIR}/vpc-resources/vpc.tfvars 79 | echo "Including terraform variables suitable for an existing VPC." 80 | echo " - Copied from: $(realpath --relative-to=. ${DEFAULT_VPC_DIR}/vpc-resources/vpc-existing.tfvars)" 81 | echo " - Make sure you've run the validate-vpc.sh script on this VPC!" 82 | echo " - If this isn't what you wanted, delete the cluster config dir and run this script again without the -x option." 83 | else 84 | \rm -f ${NEW_VPC_DIR}/vpc-resources/vpc-existing.tfvars 85 | mv ${NEW_VPC_DIR}/vpc-resources/vpc-new.tfvars ${NEW_VPC_DIR}/vpc-resources/vpc.tfvars 86 | echo "Including terraform variables suitable for a new (terraform-managed) VPC." 87 | echo " - Copied from: $(realpath --relative-to=. ${DEFAULT_VPC_DIR}/vpc-resources/vpc-new.tfvars)" 88 | echo " - If this isn't what you wanted, delete the cluster config dir and run this script again with the -x option." 89 | fi 90 | fi 91 | 92 | # create cluster dir 93 | echo "Creating new cluster configurations..." 94 | mkdir ${NEW_CLUSTER_DIR} 95 | cp -R ${DEFAULT_CLUSTER_DIR}/ ${NEW_CLUSTER_DIR} 96 | 97 | # info log 98 | echo "Created new configuration profile: cluster '${CLUSTER_NAME}' in account '${ACCOUNT_NAME}' and vpc '${VPC_NAME}'" 99 | echo " - Profile is in dir: $(realpath --relative-to=. ${NEW_CLUSTER_DIR})" 100 | echo " - Customize your configs as needed; fill in any variables left blank (look for \"<<< XYZ_HERE >>>\")." 101 | echo " $ find $(realpath --relative-to=. ${NEW_ACCOUNT_DIR}) -type f | xargs grep \"<<< .* >>>\"" 102 | echo " - Remember to commit to a git branch somewhere!" 103 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DSE Pronto 2 | 3 | An automation suite for deploying and managing [DataStax Cassandra](https://docs.datastax.com/en/landing_page/doc/landing_page/current.html) 4 | clusters in AWS. 5 | 6 | [![pronto](./docs/images/pronto-logo.png)](https://github.intuit.com/pages/open-source/logo-generator/) 7 | 8 | This repository collects Intuit's DSE automation. We've taken all of our learning for managing Cassandra in AWS and 9 | condensed it into a single package for others to leverage. It uses standard tools 10 | ([Packer](https://packer.io/docs/index.html), [Terraform](https://www.terraform.io/docs/index.html), and 11 | [Ansible](https://docs.ansible.com/ansible/latest/index.html)) and can be run from a laptop. That said, we have a hard 12 | preference for automated deployments using a CICD orchestrator along the lines of [Jenkins](https://jenkins.io/), 13 | [CodeBuild](https://aws.amazon.com/codebuild/)/[CodeDeploy](https://aws.amazon.com/codedeploy/), 14 | [Bamboo](https://www.atlassian.com/software/bamboo), [GitLab](https://about.gitlab.com/), or [Spinnaker](https://www.spinnaker.io/). 15 | 16 | The tools in this repo can take you from an empty AWS account to a fully-functional DSE cluster, but you should have an 17 | understanding of AWS resources, Cassandra cluster management, and at least a passing familiarity with Packer, Terraform, 18 | and Ansible. 19 | 20 | **This is not a "managed" Cassandra solution.** If you need one of those, [AWS has you covered](https://aws.amazon.com/keyspaces/). 21 | If you need a fully managed _DataStax_ solution including OpsCenter and other DSE features, 22 | [DataStax Astra](https://www.datastax.com/products/datastax-astra) is now officially a thing. 23 | 24 | On the other hand, if what you're looking for is an open source framework to help you _manage your own_ DSE cluster... 25 | then welcome to DSE Pronto! 26 | 27 | ## Notes and Features 28 | 29 | * Support for every phase of deployment, from an empty account to production: 30 | * Baking an AMI 31 | * Deploying a new VPC 32 | * Creating account-wide resources (like IAM roles) and VPC-wide resources (like a bastion host for SSH) 33 | * Launching a cluster 34 | * Runtime operations 35 | * Restacking and resizing a cluster 36 | * Bringing nodes up and down 37 | * Configuring OpsCenter 38 | * Including a number of predefined alerts and best practices 39 | * Transparent restacking operations, to keep in compliance with latest baseline images 40 | * Data stored on persistent EBS volumes, static EIP for predictable address, both located (using EC2 tags) and reattached 41 | during restack 42 | * DSE 5 & DSE 6 both supported, along with DSE OpsCenter & DSE Studio 43 | * Latest Amazon Linux 2.0 & Python 3 in use 44 | * [More FAQs and details here](docs/MORE_DETAILS.md) 45 | 46 | ## Tools Required 47 | 48 | * **On MacOS:** `brew install awscli coreutils packer ansible tfenv jq && tfenv install 0.12.24` 49 | * The scripts in this repo require a minimum of `aws-cli/1.16.280` and `botocore/1.13.16`. Type `aws --version` to verify. 50 | * Everything has also been tested with `aws-cli/2.0.0` and associated prerequisites. 51 | * Some scripts also require Python 3 ([installation](https://docs.python-guide.org/starting/install3/osx/)). 52 | * **In Docker:** the included [Dockerfile](./Dockerfile) will produce a suitable Docker image, including all tools needed. 53 | * Elsewhere: 54 | * Install Packer: https://www.packer.io/intro/getting-started/install.html 55 | * Install Terraform (0.12.24): https://www.terraform.io/intro/getting-started/install.html 56 | * install Ansible: https://docs.ansible.com/ansible/latest/installation_guide/intro_installation.html 57 | * **Why Terraform 0.12.24?** Go [here](docs/MORE_DETAILS.md) to find out! 58 | 59 | ## 1. Initial Setup 60 | 61 | There's a bunch of **one-time** setup you'll need to do before you start baking AMIs or deploying clusters. 62 | 63 | Please follow [all of the steps here](docs/1.INITIAL_SETUP.md) before proceeding. 64 | 65 | ## 2. Baking AMIs 66 | 67 | Instructions for baking AWS images with Packer are [here](docs/2.PACKER.md). 68 | 69 | ## 3. Deploying 70 | 71 | Instructions for deploying AWS resources with Terraform are [here](docs/3.TERRAFORM.md). 72 | 73 | ## 4. Runtime Operations 74 | 75 | Instructions for running playbooks with Ansible are [here](docs/4.ANSIBLE.md). 76 | 77 | ## 5. OpsCenter 78 | 79 | Instructions for deploying and managing an OpsCenter node are [here](docs/OPSCENTER.md). 80 | 81 | ## 6. Debugging 82 | 83 | If you're having trouble getting anything to work, go [here](docs/MORE_DETAILS.md) for tips on debugging! 84 | 85 | ## 7. Cleaning Up 86 | 87 | Instructions for deleting everything deployed by this repo are [here](docs/CLEANUP.md). 88 | 89 | ### Links 90 | 91 | * [Contributing](.github/CONTRIBUTING.md) 92 | * [License](LICENSE) 93 | 94 | --- 95 | Copyright 2020 Intuit Inc. 96 | -------------------------------------------------------------------------------- /core-components/scripts/ssh/build-ssh-config.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ROOT=$(git rev-parse --show-toplevel) 4 | CORE="$ROOT/core-components" 5 | CONFIGS="$ROOT/configurations" 6 | 7 | usage() { 8 | echo "Usage:" 9 | echo " build-ssh-config.sh" 10 | echo " -a : [Required] account name" 11 | echo " -v : [Required] vpc name" 12 | echo " -c : [Required] cluster name" 13 | } 14 | 15 | parse() { 16 | grep ^$1 ${variables_path} | awk {'print $NF'} | tr -d '"' 17 | } 18 | 19 | get_tfvar() { 20 | grep "^${1}" ${terraform_var_file} | tr -d '" ' | awk -F'=' {'print $NF'} 21 | } 22 | 23 | while getopts ":a:v:c:" opt; do 24 | case "${opt}" in 25 | a) 26 | account_name=${OPTARG};; 27 | v) 28 | vpc_name=${OPTARG};; 29 | c) 30 | cluster_name=${OPTARG};; 31 | *) 32 | usage; exit 1;; 33 | esac 34 | done 35 | shift "$((OPTIND-1))" 36 | 37 | if [[ -z "${account_name// }" ]]; then usage; exit 1; fi 38 | if [[ -z "${vpc_name// }" ]]; then usage; exit 1; fi 39 | if [[ -z "${cluster_name// }" ]]; then usage; exit 1; fi 40 | 41 | variables_path=${CONFIGS}/${account_name}/variables.yaml 42 | 43 | terraform_var_file="${CONFIGS}/${account_name}/${vpc_name}/vpc-resources/vpc.tfvars" 44 | PROFILE="$(parse TERRAFORM_AWS_PROFILE)" 45 | REGION="$(get_tfvar region)" 46 | if [ -z ${REGION} ]; then 47 | REGION="$(parse TERRAFORM_AWS_REGION)" 48 | fi 49 | AWS_CMD="aws --profile ${PROFILE} --region ${REGION}" 50 | 51 | TARGET_ACCOUNT="$(parse TERRAFORM_ACCOUNT_ID)" 52 | ANSIBLE_KEY_PATH="$(parse TERRAFORM_ANSIBLE_KEY_PATH)" 53 | 54 | if [[ -z "${ANSIBLE_KEY_PATH// }" ]]; then 55 | echo "ANSIBLE_KEY_PATH must be specified in variables.yaml in order to run this script!" 56 | exit 1 57 | fi 58 | 59 | eval ANSIBLE_KEY_PATH=${ANSIBLE_KEY_PATH} 60 | 61 | echo "-------------------" 62 | echo "Ansible public key:" 63 | cat ${ANSIBLE_KEY_PATH} 64 | echo "-------------------" 65 | 66 | # Check credentials before starting 67 | ${AWS_CMD} sts get-caller-identity > /dev/null 68 | if [[ $? -ne 0 ]]; then 69 | echo "Local AWS credentials are not valid (profile: ${PROFILE})" 70 | exit 1 71 | fi 72 | 73 | echo "Generating ssh_config for cluster '${cluster_name}'..." 74 | 75 | # get the VPC ID for the cluster from Parameter Store 76 | VPC_ID=$(${AWS_CMD} ssm get-parameters \ 77 | --names "/dse/${account_name}/${vpc_name}/vpc-resources/vpc_id" \ 78 | --query "Parameters[0].Value" --output text) 79 | 80 | echo " - VPC: ${VPC_ID}" 81 | 82 | # get the ENI IP for each seed node 83 | SEED_IP=$(${AWS_CMD} ec2 describe-network-interfaces \ 84 | --filters "Name=tag:Name,Values=${cluster_name}-seed*" \ 85 | --query 'NetworkInterfaces[].PrivateIpAddress' \ 86 | --output text | awk '$1=$1') 87 | 88 | echo " - Seeds: ${SEED_IP}" 89 | 90 | # get the ENI IP for each non-seed node 91 | NON_SEED_IP=$(${AWS_CMD} ec2 describe-network-interfaces \ 92 | --filters "Name=tag:Name,Values=${cluster_name}-non-seed*" \ 93 | --query 'NetworkInterfaces[].PrivateIpAddress' \ 94 | --output text | awk '$1=$1') 95 | 96 | echo " - Non-seeds: ${NON_SEED_IP}" 97 | 98 | if [[ "${SEED_IP}" == "" ]]; then 99 | if [[ ${cluster_name} != "opscenter-resources" ]]; then 100 | # opscenter won't have a seed IP; otherwise, require it 101 | echo "No seed node IPs found, exiting." 102 | exit 1 103 | fi 104 | fi 105 | 106 | sc=$(echo ${SEED_IP} | wc -w | tr -d ' ') 107 | nsc=$(echo ${NON_SEED_IP} | wc -w | tr -d ' ') 108 | tc=$(echo ${SEED_IP} ${NON_SEED_IP} | wc -w | tr -d ' ') 109 | echo " - Found ${sc} seed nodes, ${nsc} non-seed nodes (${tc} total)" 110 | 111 | # get bastion LB dns name 112 | BASTION_DNS=$(${AWS_CMD} elbv2 describe-load-balancers \ 113 | --query "LoadBalancers[?VpcId=='${VPC_ID}' && starts_with(LoadBalancerName, 'bast-')]|[0].DNSName" \ 114 | --output text) 115 | 116 | if [[ "${BASTION_DNS}" == "" ]]; then 117 | echo "No bastion LB found, exiting." 118 | exit 1 119 | fi 120 | 121 | echo " - Found bastion LB at: ${BASTION_DNS}" 122 | 123 | # output ansible ssh_config 124 | CFG_PATH=${CORE}/ansible/ssh_config 125 | 126 | sed -e "s?##SSH_KEY_PATH##?${ANSIBLE_KEY_PATH//.pub/}?g" \ 127 | -e "s?##BASTION_DNS##?${BASTION_DNS}?g" \ 128 | -e "s?##SEED_IP##?10.* ${SEED_IP} ${NON_SEED_IP}?g" \ 129 | -e "s?##USER##?ansible?g" \ 130 | ${CORE}/scripts/ssh/ssh_config.tpl > ${CFG_PATH} 131 | 132 | echo " - Ansible SSH config output at: $(realpath --relative-to=. ${CFG_PATH})" 133 | 134 | # output local user's ssh_config 135 | USER_KEY_PATH="$(parse TERRAFORM_SSH_KEY_PATH)" 136 | CFG_PATH=${ROOT}/ssh_config 137 | 138 | sed -e "s?##SSH_KEY_PATH##?${USER_KEY_PATH//.pub/}?g" \ 139 | -e "s?##BASTION_DNS##?${BASTION_DNS}?g" \ 140 | -e "s?##SEED_IP##?10.* ${SEED_IP} ${NON_SEED_IP}?g" \ 141 | -e "s?##USER##?ec2-user?g" \ 142 | ${CORE}/scripts/ssh/ssh_config.tpl > ${CFG_PATH} 143 | 144 | echo " - Personal (your key) SSH config output at: $(realpath --relative-to=. ${CFG_PATH})" 145 | -------------------------------------------------------------------------------- /core-components/packer/opscenter/scripts/ops_ec2_mgr.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | ## 4 | # Objective: Helper class for EBS, ENI, and ASG managers 5 | 6 | import botocore 7 | import pprint 8 | from awsretry import AWSRetry 9 | 10 | pp = pprint.PrettyPrinter(indent=4) 11 | 12 | 13 | class OpsEC2Manager: 14 | _session = None 15 | _client = None 16 | _resource = None 17 | _pp = None 18 | 19 | def __init__(self, session, client, resource): 20 | self._session = session 21 | self._client = client 22 | self._resource = resource 23 | self._pp = pprint.PrettyPrinter(indent=4) 24 | 25 | @AWSRetry.backoff(tries=20, delay=2, backoff=1.5) 26 | def get_instances(self, filters): 27 | try: 28 | pp.pprint(filters) 29 | response = self._client.describe_instances(Filters=filters) 30 | return response 31 | except botocore.exceptions.ClientError as e: 32 | raise e 33 | 34 | @AWSRetry.backoff(tries=20, delay=2, backoff=1.5) 35 | def get_volumes_by_id(self, vol_id): 36 | try: 37 | response = self._client.describe_volumes(VolumeIds=[vol_id]) 38 | if len(response["Volumes"]) == 1: 39 | return response["Volumes"][0] 40 | return response 41 | except botocore.exceptions.ClientError as e: 42 | raise e 43 | 44 | def get_host_names_from_ips(self, node_list): 45 | node_names = [] 46 | filters = [{'Name': "network-interface.addresses.private-ip-address", "Values": node_list}] 47 | response = self.get_instances(filters) 48 | 49 | for reservation in (response["Reservations"]): 50 | for instance in reservation["Instances"]: 51 | inst = self._resource.Instance(instance["InstanceId"]) 52 | for tag in inst.tags: 53 | if tag["Key"] == "Name": 54 | if not tag["Value"] in node_names: 55 | node_names.append(tag["Value"]) 56 | return node_names 57 | 58 | def get_hosts(self, node_list, match_by_name): 59 | """ 60 | Inputs 61 | :param account : 62 | :param cluster : used to query the EC2s with cluster tag 63 | :param node_list : 64 | :param match_by_name : 65 | :return: list of EC2s which are in available state 66 | """ 67 | hosts = [] 68 | filters = [ 69 | {'Name': "tag:pool", "Values": ["opscenter"]}, 70 | {'Name': "instance-state-name", "Values": ["running"]} 71 | ] 72 | 73 | if match_by_name: 74 | node_names = self.get_host_names_from_ips(node_list) 75 | 76 | print("---- Node names: ----------") 77 | self._pp.pprint(node_names) 78 | 79 | if len(node_names): 80 | filters.append({'Name': "tag:Name", "Values": node_names}) 81 | else: 82 | # match by ip 83 | if len(node_list): 84 | filters.append({'Name': "network-interface.addresses.private-ip-address", "Values": node_list}) 85 | 86 | response = self.get_instances(filters) 87 | for reservation in (response["Reservations"]): 88 | for instance in reservation["Instances"]: 89 | inst = self._resource.Instance(instance["InstanceId"]) 90 | inst_details = {"id": instance["InstanceId"], "eth1": None, "az": inst.placement["AvailabilityZone"]} 91 | 92 | for t in inst.tags: 93 | if t["Key"] == "Name": 94 | inst_details["name"] = t["Value"] 95 | if t["Key"] == "active_interface": 96 | inst_details["active_interface"] = t["Value"] 97 | if t["Key"] == "aws:autoscaling:groupName": 98 | inst_details["asg"] = t["Value"] 99 | 100 | for a in inst.network_interfaces_attribute: 101 | if a["Attachment"]["DeviceIndex"] == 0: 102 | inst_details["eth0"] = a["PrivateIpAddress"] 103 | if a["Attachment"]["DeviceIndex"] == 1: 104 | inst_details["eth1"] = a["PrivateIpAddress"] 105 | 106 | block_devices = [] 107 | for block_device in inst.block_device_mappings: 108 | volume_id = block_device["Ebs"]["VolumeId"] 109 | volume = self.get_volumes_by_id(volume_id) 110 | device_details = {"name": block_device["DeviceName"], "volume_id": volume_id, 111 | "volume_type": volume["VolumeType"], "volume_size": volume["Size"]} 112 | block_devices.append(device_details) 113 | 114 | inst_details["block_devices"] = block_devices 115 | hosts.append(inst_details) 116 | 117 | return hosts 118 | -------------------------------------------------------------------------------- /core-components/ansible/scripts/refresh_inventory.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from optparse import OptionParser 4 | import sys 5 | import os 6 | from os.path import * 7 | import boto3 8 | import botocore 9 | import pprint 10 | 11 | import yaml 12 | from yaml import load, dump 13 | 14 | try: 15 | from yaml import CLoader as Loader, CDumper as Dumper 16 | except ImportError: 17 | from yaml import Loader, Dumper 18 | 19 | pp = pprint.PrettyPrinter(indent=4) 20 | 21 | 22 | class casHostManager: 23 | """Class that manages the ansible-hosts for Cassandra nodes""" 24 | _session = None 25 | _client = None 26 | _resource = None 27 | 28 | def __init__(self, profile,region): 29 | self._session = boto3.Session(profile_name=profile,region_name=region) 30 | self._client = self._session.client('ec2') 31 | self._resource = self._session.resource('ec2') 32 | self._pp = pprint.PrettyPrinter(indent=4) 33 | 34 | def get_Hosts(self, account, cluster): 35 | """ 36 | Inputs 37 | :param account : used to query the ENIs with account tag 38 | :param cluster : used to query the ENIs with cluster tag 39 | :return: list of ENIs which are in available state 40 | """ 41 | hosts = [] 42 | 43 | response = self._client.describe_instances(Filters=[ 44 | { 'Name': "tag:ClusterName", "Values": [cluster]}, 45 | { 'Name': "tag:Account", "Values": [account]}, 46 | { 'Name': "instance-state-name", "Values": ["running"]} 47 | ]) 48 | for reservation in (response["Reservations"]): 49 | for instance in reservation["Instances"]: 50 | inst_details = {} 51 | inst_details["id"] = instance["InstanceId"] 52 | inst = self._resource.Instance(instance["InstanceId"]) 53 | 54 | for t in inst.tags: 55 | if t["Key"] == "Name": 56 | inst_details["name"] = t["Value"] 57 | if t["Key"] == "active_interface": 58 | inst_details["active_interface"] = t["Value"] 59 | if t["Key"] == "aws:autoscaling:groupName": 60 | inst_details["asg"] = t["Value"] 61 | for a in inst.network_interfaces_attribute: 62 | if a["Attachment"]["DeviceIndex"] == 0: 63 | inst_details["eth0"] = a["PrivateIpAddress"] 64 | if a["Attachment"]["DeviceIndex"] == 1: 65 | inst_details["eth1"] = a["PrivateIpAddress"] 66 | inst_details["zone"] = inst.placement["AvailabilityZone"] 67 | 68 | hosts.append(inst_details) 69 | return hosts 70 | 71 | def generateAnsibleHost(self, hosts, host_file): 72 | ansible_data = {} 73 | seed_nodes = {} 74 | non_seed_nodes = {} 75 | inactive_nodes = {} 76 | 77 | for host in hosts: 78 | 79 | pp.pprint(host) 80 | 81 | active_interface = host["active_interface"] 82 | active_ip = host[active_interface] 83 | name = "%s:%s" %(host["name"], active_interface) 84 | if active_interface == "eth0": 85 | inactive_nodes[active_ip] = name 86 | else: 87 | if "non-seed" not in host["name"]: 88 | seed_nodes[active_ip] = name 89 | else: 90 | non_seed_nodes[active_ip] = name 91 | 92 | ansible_data["seeds"] = {} 93 | ansible_data["seeds"]["hosts"] = seed_nodes 94 | 95 | ansible_data["non-seeds"] = {} 96 | ansible_data["non-seeds"]["hosts"] = non_seed_nodes 97 | 98 | ansible_data["inactive"] = {} 99 | ansible_data["inactive"]["hosts"] = inactive_nodes 100 | 101 | #pp.pprint(ansible_data) 102 | 103 | with open(host_file, 'w') as stream: 104 | for group in ansible_data.keys(): 105 | stream.write("[%s]\n" %(group)) 106 | for host in ansible_data[group]["hosts"].keys(): 107 | stream.write("%s\n" %(host)) 108 | stream.close() 109 | 110 | 111 | if __name__ == "__main__": 112 | parser = OptionParser() 113 | parser.add_option("-f", "--file", dest="file", help="host file") 114 | parser.add_option("-p", "--aws_profile", dest="aws_profile", help="AWS profile") 115 | parser.add_option("-a", "--account", dest="account", help="account") 116 | parser.add_option("-c", "--cluster", dest="cluster", help="cluster") 117 | parser.add_option("-r", "--region", dest="region", help="region", default="us-west-2") 118 | 119 | (options, args) = parser.parse_args(sys.argv) 120 | 121 | casHostMgr = casHostManager(options.aws_profile, options.region) 122 | 123 | nodes = casHostMgr.get_Hosts(options.account, options.cluster) 124 | casHostMgr.generateAnsibleHost(nodes, options.file) 125 | 126 | sys.exit(0) 127 | -------------------------------------------------------------------------------- /core-components/packer/cassandra/scripts/cas_ec2_mgr.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | ## 4 | # Objective: Helper class for EBS, ENI, and ASG managers 5 | 6 | import botocore 7 | import pprint 8 | from awsretry import AWSRetry 9 | 10 | pp = pprint.PrettyPrinter(indent=4) 11 | 12 | 13 | class CasEC2Manager: 14 | _session = None 15 | _client = None 16 | _resource = None 17 | _pp = None 18 | 19 | def __init__(self, session, client, resource): 20 | self._session = session 21 | self._client = client 22 | self._resource = resource 23 | self._pp = pprint.PrettyPrinter(indent=4) 24 | 25 | @AWSRetry.backoff(tries=20, delay=2, backoff=1.5) 26 | def get_instances(self, filters): 27 | try: 28 | pp.pprint(filters) 29 | response = self._client.describe_instances(Filters=filters) 30 | return response 31 | except botocore.exceptions.ClientError as e: 32 | raise e 33 | 34 | @AWSRetry.backoff(tries=20, delay=2, backoff=1.5) 35 | def get_volumes_by_id(self, vol_id): 36 | try: 37 | response = self._client.describe_volumes(VolumeIds=[vol_id]) 38 | if len(response["Volumes"]) == 1: 39 | return response["Volumes"][0] 40 | return response 41 | except botocore.exceptions.ClientError as e: 42 | raise e 43 | 44 | def get_host_names_from_ips(self, node_list): 45 | node_names = [] 46 | filters = [{'Name': "network-interface.addresses.private-ip-address", "Values": node_list}] 47 | response = self.get_instances(filters) 48 | 49 | for reservation in (response["Reservations"]): 50 | for instance in reservation["Instances"]: 51 | inst = self._resource.Instance(instance["InstanceId"]) 52 | for tag in inst.tags: 53 | if tag["Key"] == "Name": 54 | if not tag["Value"] in node_names: 55 | node_names.append(tag["Value"]) 56 | return node_names 57 | 58 | def get_hosts(self, account, cluster, node_list, match_by_name): 59 | """ 60 | Inputs 61 | :param account : 62 | :param cluster : used to query the EC2s with cluster tag 63 | :param node_list : 64 | :param match_by_name : 65 | :return: list of EC2s which are in available state 66 | """ 67 | hosts = [] 68 | filters = [ 69 | {'Name': "tag:ClusterName", "Values": [cluster]}, 70 | {'Name': "tag:Account", "Values": [account]}, 71 | {'Name': "instance-state-name", "Values": ["running"]} 72 | ] 73 | 74 | if match_by_name: 75 | node_names = self.get_host_names_from_ips(node_list) 76 | 77 | print("---- Node names: ----------") 78 | self._pp.pprint(node_names) 79 | 80 | if len(node_names): 81 | filters.append({'Name': "tag:Name", "Values": node_names}) 82 | else: 83 | # match by ip 84 | if len(node_list): 85 | filters.append({'Name': "network-interface.addresses.private-ip-address", "Values": node_list}) 86 | 87 | response = self.get_instances(filters) 88 | for reservation in (response["Reservations"]): 89 | for instance in reservation["Instances"]: 90 | inst = self._resource.Instance(instance["InstanceId"]) 91 | inst_details = {"id": instance["InstanceId"], "eth1": None, "az": inst.placement["AvailabilityZone"]} 92 | 93 | for t in inst.tags: 94 | if t["Key"] == "Name": 95 | inst_details["name"] = t["Value"] 96 | if t["Key"] == "active_interface": 97 | inst_details["active_interface"] = t["Value"] 98 | if t["Key"] == "aws:autoscaling:groupName": 99 | inst_details["asg"] = t["Value"] 100 | 101 | for a in inst.network_interfaces_attribute: 102 | if a["Attachment"]["DeviceIndex"] == 0: 103 | inst_details["eth0"] = a["PrivateIpAddress"] 104 | if a["Attachment"]["DeviceIndex"] == 1: 105 | inst_details["eth1"] = a["PrivateIpAddress"] 106 | 107 | block_devices = [] 108 | for block_device in inst.block_device_mappings: 109 | volume_id = block_device["Ebs"]["VolumeId"] 110 | volume = self.get_volumes_by_id(volume_id) 111 | device_details = {"name": block_device["DeviceName"], "volume_id": volume_id, 112 | "volume_type": volume["VolumeType"], "volume_size": volume["Size"]} 113 | block_devices.append(device_details) 114 | 115 | inst_details["block_devices"] = block_devices 116 | hosts.append(inst_details) 117 | 118 | return hosts 119 | -------------------------------------------------------------------------------- /core-components/terraform/modules/opscenter/sg_opscenter.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | opscenter_bastion_access_tags = { 3 | "Name" = "opscenter-bastion-access" 4 | } 5 | 6 | ops_elb_443_tags = { 7 | "Name" = "sg-ops-https-to-elb" 8 | } 9 | 10 | ops_elb_9091_tags = { 11 | "Name" = "sg-ops-studio-to-elb" 12 | } 13 | 14 | ops_elb_to_nodes_tags = { 15 | "Name" = "sg-ops-elb-to-nodes" 16 | } 17 | 18 | ops_addl_inbound_tags = { 19 | "Name" = "sg-ops-additional" 20 | } 21 | } 22 | 23 | resource "aws_security_group" "opscenter-bastion-access" { 24 | name_prefix = "opscenter-bastion-access-" 25 | description = "Allows SSH access via bastion" 26 | vpc_id = var.vpc_id 27 | revoke_rules_on_delete = true 28 | 29 | ingress { 30 | from_port = 22 31 | to_port = 22 32 | protocol = "tcp" 33 | security_groups = [var.sg_bas_nodes_to_all] 34 | } 35 | 36 | egress { 37 | from_port = 0 38 | to_port = 0 39 | protocol = "-1" 40 | cidr_blocks = ["0.0.0.0/0"] 41 | } 42 | 43 | lifecycle { 44 | create_before_destroy = true 45 | } 46 | 47 | tags = merge(var.ec2_tags, local.required_ec2_tags, local.opscenter_bastion_access_tags) 48 | } 49 | 50 | resource "aws_security_group" "ops_elb_443" { 51 | name = "sg_ops-https-to-elb-${var.account_id}" 52 | description = "Allow inbound access from configured CIDRs on HTTPS port" 53 | vpc_id = var.vpc_id 54 | 55 | ingress { 56 | from_port = 443 57 | to_port = 443 58 | protocol = "tcp" 59 | cidr_blocks = var.opscenter_ingress_cidrs 60 | } 61 | 62 | egress { 63 | from_port = 0 64 | to_port = 0 65 | protocol = "-1" 66 | cidr_blocks = ["0.0.0.0/0"] 67 | } 68 | 69 | lifecycle { 70 | create_before_destroy = true 71 | } 72 | 73 | tags = merge(var.ec2_tags, local.required_ec2_tags, local.ops_elb_443_tags) 74 | } 75 | 76 | resource "aws_security_group" "ops_elb_9091" { 77 | name = "sg_ops-studio-to-elb-${var.account_id}" 78 | description = "Allow inbound access from configured CIDRs on DataStax Studio port" 79 | vpc_id = var.vpc_id 80 | 81 | ingress { 82 | from_port = 9091 83 | to_port = 9091 84 | protocol = "tcp" 85 | cidr_blocks = var.opscenter_ingress_cidrs 86 | } 87 | 88 | egress { 89 | from_port = 0 90 | to_port = 0 91 | protocol = "-1" 92 | cidr_blocks = ["0.0.0.0/0"] 93 | } 94 | 95 | lifecycle { 96 | create_before_destroy = true 97 | } 98 | 99 | tags = merge(var.ec2_tags, local.required_ec2_tags, local.ops_elb_9091_tags) 100 | } 101 | 102 | 103 | resource "aws_security_group" "ops_elb_to_nodes" { 104 | name = "sg_ops-elb-to-nodes-${var.account_id}" 105 | description = "Allows inbound HTTPS access from opscenter elb" 106 | vpc_id = var.vpc_id 107 | 108 | # TODO tighten 109 | ingress { 110 | from_port = 8443 111 | to_port = 8443 112 | protocol = "tcp" 113 | security_groups = [aws_security_group.ops_elb_443.id] 114 | } 115 | ingress { 116 | from_port = 9091 117 | to_port = 9091 118 | protocol = "tcp" 119 | security_groups = [aws_security_group.ops_elb_9091.id] 120 | } 121 | egress { 122 | from_port = 0 123 | to_port = 0 124 | protocol = "-1" 125 | cidr_blocks = ["0.0.0.0/0"] 126 | } 127 | 128 | lifecycle { 129 | create_before_destroy = true 130 | } 131 | 132 | tags = merge(var.ec2_tags, local.required_ec2_tags, local.ops_elb_9091_tags) 133 | 134 | } 135 | 136 | ############################ 137 | # separate security group to allow any extra ingress from a list of other SGs 138 | ############################ 139 | 140 | resource "aws_security_group" "ops_addl_inbound" { 141 | name = "sg_ops-additional-${var.account_id}" 142 | description = "Allows inbound HTTPS access from provided SGs" 143 | vpc_id = var.vpc_id 144 | 145 | egress { 146 | from_port = 0 147 | to_port = 0 148 | protocol = "-1" 149 | cidr_blocks = ["0.0.0.0/0"] 150 | } 151 | 152 | lifecycle { 153 | create_before_destroy = true 154 | } 155 | 156 | tags = merge(var.ec2_tags, local.required_ec2_tags, local.ops_addl_inbound_tags) 157 | } 158 | 159 | # rule for port 8443 160 | resource "aws_security_group_rule" "ops_addl_8443" { 161 | count = length(var.ops_additional_sg_ids) 162 | security_group_id = aws_security_group.ops_addl_inbound.id 163 | source_security_group_id = element(var.ops_additional_sg_ids, count.index) 164 | 165 | type = "ingress" 166 | from_port = 8443 167 | to_port = 8443 168 | protocol = "tcp" 169 | } 170 | 171 | # rule for port 9091 172 | resource "aws_security_group_rule" "ops_addl_9091" { 173 | count = length(var.ops_additional_sg_ids) 174 | security_group_id = aws_security_group.ops_addl_inbound.id 175 | source_security_group_id = element(var.ops_additional_sg_ids, count.index) 176 | 177 | type = "ingress" 178 | from_port = 9091 179 | to_port = 9091 180 | protocol = "tcp" 181 | } 182 | -------------------------------------------------------------------------------- /init-roles.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ROOT=$(git rev-parse --show-toplevel) 4 | CORE="$ROOT/core-components" 5 | CONFIGS="$ROOT/configurations" 6 | 7 | usage() { 8 | echo "Usage:" 9 | echo " init-roles.sh" 10 | echo " -p : [Required] AWS profile with Admin access to IAM in the target account" 11 | echo " -a : [Required] account name" 12 | } 13 | 14 | parse() { 15 | grep ^$1 ${variables_path} | awk {'print $NF'} | tr -d '"' 16 | } 17 | 18 | while getopts ":p:a:" opt; do 19 | case "${opt}" in 20 | p) 21 | PROFILE=${OPTARG};; 22 | a) 23 | account_name=${OPTARG};; 24 | *) 25 | usage; exit 1;; 26 | esac 27 | done 28 | shift "$((OPTIND-1))" 29 | 30 | if [[ -z "${PROFILE// }" ]]; then usage; exit 1; fi 31 | if [[ -z "${account_name// }" ]]; then usage; exit 1; fi 32 | 33 | AWS_CMD="aws --profile ${PROFILE}" 34 | 35 | ################################ 36 | # Check credentials before starting 37 | ################################ 38 | 39 | ${AWS_CMD} sts get-caller-identity > /dev/null 40 | if [[ $? -ne 0 ]]; then 41 | echo "Provided AWS credentials are not valid (profile: '${PROFILE}')" 42 | exit 1 43 | fi 44 | 45 | ################################ 46 | # Collect vars 47 | ################################ 48 | 49 | variables_path=${CONFIGS}/${account_name}/variables.yaml 50 | 51 | packer_profile=$(parse PACKER_AWS_PROFILE) 52 | terraform_profile=$(parse TERRAFORM_AWS_PROFILE) 53 | 54 | ACCOUNT_ID=$(${AWS_CMD} sts get-caller-identity --query "Account" --output text) 55 | IAM_PROFILE_ARN=$(${AWS_CMD} sts get-caller-identity --query "Arn" --output text) 56 | 57 | # check if current ARN is an assumed role 58 | if [[ "${IAM_PROFILE_ARN}" == *":assumed-role/"* ]]; then 59 | # get the base role ARN 60 | BASE_ROLE_NAME=$(echo ${IAM_PROFILE_ARN} | awk -F'/' {'print $2'}) 61 | IAM_PROFILE_ARN=$(${AWS_CMD} iam get-role --role-name ${BASE_ROLE_NAME} --query Role.Arn --output text) 62 | fi 63 | 64 | echo "Current IAM role: ${IAM_PROFILE_ARN}" 65 | 66 | PACKER_ROLE_NAME="packer-role" 67 | TERRAFORM_ROLE_NAME="terraform-role" 68 | 69 | ################################ 70 | # Assemble role templates 71 | ################################ 72 | 73 | TERRAFORM_JSON="${CORE}/roles/terraform.json" 74 | PACKER_JSON="${CORE}/roles/packer.json" 75 | 76 | # AssumeRole policy needs an IAM ARN to set as Principal 77 | ASSUME_ROLE_JSON="${CORE}/roles/assume-role.json" 78 | sed -e "s?##IAM_PROFILE_ARN##?${IAM_PROFILE_ARN}?g" \ 79 | ${ASSUME_ROLE_JSON}.tpl > ${ASSUME_ROLE_JSON} 80 | 81 | echo "---------------" 82 | echo "AssumeRole policy:" 83 | cat ${ASSUME_ROLE_JSON} 84 | echo "---------------" 85 | 86 | ################################ 87 | # Get to work 88 | ################################ 89 | 90 | ${AWS_CMD} iam get-role --role-name ${PACKER_ROLE_NAME} > /dev/null 2>&1 91 | if [[ $? -ne 0 ]]; then 92 | echo "Creating role: ${PACKER_ROLE_NAME}" 93 | ${AWS_CMD} iam create-role --role-name ${PACKER_ROLE_NAME} --assume-role-policy-document file://${ASSUME_ROLE_JSON} 94 | else 95 | echo "Updating trust policy on role: ${PACKER_ROLE_NAME}" 96 | ${AWS_CMD} iam update-assume-role-policy --role-name ${PACKER_ROLE_NAME} --policy-document file://${ASSUME_ROLE_JSON} 97 | fi 98 | 99 | ${AWS_CMD} iam get-role --role-name ${TERRAFORM_ROLE_NAME} > /dev/null 2>&1 100 | if [[ $? -ne 0 ]]; then 101 | echo "Creating role: ${TERRAFORM_ROLE_NAME}" 102 | ${AWS_CMD} iam create-role --role-name ${TERRAFORM_ROLE_NAME} --assume-role-policy-document file://${ASSUME_ROLE_JSON} 103 | else 104 | echo "Updating trust policy on role: ${TERRAFORM_ROLE_NAME}" 105 | ${AWS_CMD} iam update-assume-role-policy --role-name ${TERRAFORM_ROLE_NAME} --policy-document file://${ASSUME_ROLE_JSON} 106 | fi 107 | 108 | echo "Waiting for roles..." 109 | ${AWS_CMD} iam wait role-exists --role-name ${PACKER_ROLE_NAME} 110 | ${AWS_CMD} iam wait role-exists --role-name ${TERRAFORM_ROLE_NAME} 111 | 112 | echo "Attaching policies to Packer role..." 113 | ${AWS_CMD} iam put-role-policy \ 114 | --role-name ${PACKER_ROLE_NAME} \ 115 | --policy-name ${PACKER_ROLE_NAME}-policy \ 116 | --policy-document file://${PACKER_JSON} 117 | 118 | echo "Adding managed policies to Terraform role..." 119 | 120 | declare -a arr=( 121 | "AmazonEC2FullAccess" 122 | "AWSCertificateManagerReadOnly" 123 | "AmazonVPCReadOnlyAccess" 124 | "AWSCloudTrailReadOnlyAccess" 125 | "CloudWatchReadOnlyAccess" 126 | ) 127 | 128 | for POLICY in "${arr[@]}"; do 129 | echo " - $POLICY" 130 | ${AWS_CMD} iam attach-role-policy \ 131 | --role-name ${TERRAFORM_ROLE_NAME} --policy-arn "arn:aws:iam::aws:policy/${POLICY}" 132 | if [[ $? -ne 0 ]]; then exit 1; fi 133 | done 134 | 135 | echo "Adding inline policies to Terraform role..." 136 | 137 | ${AWS_CMD} iam put-role-policy \ 138 | --role-name ${TERRAFORM_ROLE_NAME} \ 139 | --policy-name ${TERRAFORM_ROLE_NAME}-policy \ 140 | --policy-document file://${TERRAFORM_JSON} 141 | 142 | echo "---------------" 143 | echo "Add the following profiles to your ~/.aws/credentials file:" 144 | echo "" 145 | cat <