├── .gitignore ├── LICENSE ├── README.md ├── aws-config.tf.template ├── certs └── .gitignore ├── emr ├── configuration.json ├── emr.tf ├── iam.tf ├── outputs.tf ├── security-groups.tf ├── variables.tf └── versions.tf ├── foxy-proxy.xml ├── main.tf ├── proxy ├── ec2.tf ├── outputs.tf ├── provisioner │ ├── apache-proxy-ap.conf.template │ ├── apache-proxy-hbase.conf.template │ ├── apache-proxy-hue.conf.template │ ├── apache-proxy-jupyter.conf.template │ ├── apache-proxy-nn.conf.template │ ├── apache-proxy-rm.conf.template │ ├── apache-proxy-top.conf.template │ ├── apache-proxy-zeppelin.conf.template │ ├── index.html.template │ ├── make-dummy-cert.sh │ ├── provision.sh │ └── setup-reverse-proxy.py ├── proxy.iml ├── security-groups.tf ├── variables.tf └── versions.tf ├── route53 ├── route53.tf ├── variables.tf └── versions.tf ├── scripts ├── install-jupyter-2020.11.sh ├── install-jupyter-2022.05.sh ├── install-jupyter-2023.07.sh ├── install-jupyter.sh ├── install-kafka.sh ├── pyspark-ml-crashcourse-setup.sh ├── setup-pyspark-advanced.sh ├── setup-pyspark-datascience.sh └── setup-training.sh └── versions.tf /.gitignore: -------------------------------------------------------------------------------- 1 | aws-config.tf 2 | .terraform 3 | terraform.tfstate 4 | terraform.tfstate.backup 5 | **/*.iml 6 | .idea/** 7 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Licensed under the Apache License, Version 2.0 (the "License"); 2 | you may not use this file except in compliance with the License. 3 | You may obtain a copy of the License at 4 | 5 | http://www.apache.org/licenses/LICENSE-2.0 6 | 7 | Unless required by applicable law or agreed to in writing, software 8 | distributed under the License is distributed on an "AS IS" BASIS, 9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | See the License for the specific language governing permissions and 11 | limitations under the License. 12 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 1. Preparations 2 | 3 | ## Prepare SSH key 4 | 5 | First of all you need to create an SSH key pair for securely accessing the cluster. 6 | 7 | ssh-keygen -t rsa -C "EMR Access Key" -f deployer-key 8 | puttygen deployer-key -o deployer-key.ppk 9 | 10 | ## Create AWS Route53 Zone 11 | 12 | The Terraform scripts will regsiter all master nodes in the public DNS via 13 | Route53. Therefore you need to provide a AWS Route53 zone in advance which can 14 | be used for creating appropriate DNS records. 15 | 16 | ## Create AWS Configuration 17 | 18 | You need to copy the `aws-config.tf.template` file to `aws-config.tf` and modify 19 | it so that it contains your AWS credentials and the desired AWS region and 20 | availability zone. You can also specify the file name of the SSH key. 21 | 22 | ## Modify General Configuration 23 | 24 | Now that you have everything together, you also might want to adjust some 25 | settings in `main.tf`. Per default four EMR clusters will be created, each 26 | having two nodes (one master and one worker). At least you need to specify 27 | the Route53 zone to use. 28 | 29 | The following properties can be set in the `emr` section in `main.tf`: 30 | 31 | * `proxy_domain` Specify set the Route53 zone name for registering the 32 | web-interfaces to the public DNS. This setting has to match the corresponding 33 | domain name and will be used for setting up the reverse proxy 34 | * `proxy_user` Configure the user name used for basic http authentication. This 35 | provides a very basic level of security for the clusters. 36 | * `proxy_password` Configure the corresponding password for http basic auth to 37 | the web interfaces. 38 | * `names` Configures the names of the clusters. For each name a separate EMR 39 | cluster will be created. 40 | * `release` Specfify the desired EMR release 41 | * `application` Specify the EMR components to be installed. 42 | * `master_type` Set the desired EC2 instance type for the master 43 | * `worker_type` Set the desired EC2 instance type for the worker 44 | 45 | In addition the following properties can be set in the `route53` section 46 | in `main.tf`: 47 | 48 | * `zone_name` Again this needs to contain the Route53 zone name where all 49 | DNS entries are created. The scripts will NOT create this zone, it has be 50 | be provided by you in advance. 51 | 52 | You also might want to change the network configuration, but if you change the 53 | subnets, you also should adjust `foxy-proxy.xml` with the corresponding settings. 54 | 55 | 56 | # 2. Starting and stopping the Clusters 57 | 58 | ## Start Cluster 59 | 60 | terraform init 61 | terraform apply 62 | 63 | ## Destroy Cluster 64 | 65 | terraform destroy 66 | 67 | Note that you probably need to destroy the security groups manually in the 68 | web-interface, since cycling dependencies are not handled correctly in 69 | Terraform 70 | 71 | ## Manual Cleanup 72 | 73 | Sometimes it might be neccessary to clean up some resources manually, where 74 | not AWS web frontend is available: 75 | 76 | aws iam remove-role-from-instance-profile --role-name training_ec2_role --instance-profile-name training_ec2_profile 77 | aws iam delete-instance-profile --instance-profile-name training_ec2_profile 78 | 79 | 80 | # 3. Connect to Cluster 81 | 82 | You can then connect to the cluster via SSH 83 | 84 | ssh -i deployer-key hadoop@. 85 | 86 | where `cluster_name` is one of the `names` configured in `main.tf` and 87 | `route_53_zone_name` is the Route53 zone where all computers will be registered. 88 | 89 | ## Web Interface 90 | 91 | As part of the deployment a reverse proxy will be setup such that you can 92 | access most services via your web-browser. You can find an entry page at 93 | 94 | http://. 95 | 96 | where `cluster_name` is one of the `names` configured in `main.tf` and 97 | `route_53_zone_name` is the Route53 zone where all computers will be registered. 98 | 99 | ## Web Tunnel Connection 100 | 101 | You can create a proxy tunnel using SSH dynamic port forwarding, which again can 102 | be easily used with FoxyProxy plugin. 103 | 104 | ssh -i deployer-key -ND 8157 hadoop@. 105 | 106 | The tunneled URLs for the relevant services in EMR are as follows: 107 | 108 | YARN - http://master:8088 109 | HDFS - http://master:50070 110 | Hue - http://master:8888 111 | Zeppelin - http://master:8890 112 | Spark History - http://master:18080 113 | Jupyter Notebook - http://master:8899 114 | 115 | -------------------------------------------------------------------------------- /aws-config.tf.template: -------------------------------------------------------------------------------- 1 | variable "aws_region" { 2 | description = "AWS region to launch servers." 3 | default = "eu-central-1" 4 | } 5 | 6 | variable "aws_availability_zone" { 7 | description = "AWS region to launch servers." 8 | default = "eu-central-1a" 9 | } 10 | 11 | provider "aws" { 12 | access_key = "AWS_ACCESS_KEY" 13 | secret_key = "AWS_SECRET_KEY" 14 | region = "${var.aws_region}" 15 | } 16 | 17 | resource "aws_key_pair" "deployer" { 18 | key_name = "deployer-key" 19 | public_key = "ssh-rsa SSH_PUBLIC_KEY" 20 | } 21 | 22 | -------------------------------------------------------------------------------- /certs/.gitignore: -------------------------------------------------------------------------------- 1 | *.pem 2 | -------------------------------------------------------------------------------- /emr/configuration.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "Classification": "hue-ini", 4 | "Properties": {}, 5 | "Configurations": [ 6 | { 7 | "Classification": "librdbms", 8 | "Properties": {}, 9 | "Configurations": [ 10 | { 11 | "Classification": "databases", 12 | "Properties": {}, 13 | "Configurations": [ 14 | { 15 | "Classification": "mysql", 16 | "Properties": { 17 | "nice_name": "MySQL Training DB", 18 | "name": "training", 19 | "engine": "mysql", 20 | "host": "localhost", 21 | "port": "3306", 22 | "user": "user", 23 | "password": "user" 24 | }, 25 | "Configurations": [] 26 | } 27 | ] 28 | } 29 | ] 30 | }, 31 | { 32 | "Classification": "notebook", 33 | "Properties": {}, 34 | "Configurations": [ 35 | { 36 | "Classification": "interpreters", 37 | "Properties": {}, 38 | "Configurations": [ 39 | { 40 | "Classification": "mysql", 41 | "Properties": { 42 | "name": "MySQL Training DB", 43 | "interface": "rdbms" 44 | }, 45 | "Configurations": [] 46 | } 47 | ] 48 | } 49 | ] 50 | } 51 | ] 52 | }, 53 | { 54 | "Classification": "spark-defaults", 55 | "Properties": { 56 | "spark.jars.packages":"mysql:mysql-connector-java:6.0.6,org.apache.spark:spark-sql-kafka-0-10_2.12:3.4.1" 57 | } 58 | } 59 | ] 60 | 61 | -------------------------------------------------------------------------------- /emr/emr.tf: -------------------------------------------------------------------------------- 1 | resource "aws_emr_cluster" "cluster" { 2 | count = length(var.names) 3 | name = var.names[count.index] 4 | release_label = var.release 5 | applications = concat(var.applications) 6 | log_uri = var.log_uri 7 | 8 | ec2_attributes { 9 | subnet_id = var.subnet_id 10 | key_name = element(var.ssh_key_ids, count.index) 11 | emr_managed_master_security_group = aws_security_group.master.id 12 | emr_managed_slave_security_group = aws_security_group.slave.id 13 | service_access_security_group = aws_security_group.service.id 14 | 15 | 16 | # additional_master_security_groups = aws_security_group.allow_ssh.id 17 | instance_profile = aws_iam_instance_profile.training_ec2_profile.arn 18 | } 19 | 20 | ebs_root_volume_size = "32" 21 | 22 | master_instance_group { 23 | instance_type = var.master_type 24 | bid_price = var.master_bid_price 25 | ebs_config { 26 | size = var.master_ebs_size 27 | type = "gp2" 28 | volumes_per_instance = 1 29 | } 30 | } 31 | 32 | core_instance_group { 33 | instance_type = var.worker_type 34 | instance_count = var.worker_count 35 | bid_price = var.worker_bid_price 36 | ebs_config { 37 | size = var.worker_ebs_size 38 | type = "gp2" 39 | volumes_per_instance = 1 40 | } 41 | } 42 | 43 | tags = merge( 44 | var.tags, 45 | { 46 | "name" = element(var.names, count.index) 47 | }, 48 | ) 49 | 50 | # configurations = "s3://dimajix-training/scripts/aws/emr-configurations.json" 51 | configurations = file("emr/configuration.json") 52 | 53 | service_role = aws_iam_role.training_emr_service_role.arn 54 | 55 | depends_on = [ 56 | aws_security_group.master, 57 | aws_security_group.slave, 58 | ] 59 | 60 | #bootstrap_action { 61 | # path = "s3://dimajix-training/scripts/aws/install-kafka.sh" 62 | # name = "install-kafka" 63 | #} 64 | bootstrap_action { 65 | path = "s3://dimajix-training/scripts/aws/install-jupyter-2023.07.sh" 66 | name = "install-jupyter" 67 | } 68 | #bootstrap_action { 69 | # path = "s3://dimajix-training/scripts/aws/setup-training.sh" 70 | # name = "setup-training" 71 | #} 72 | #bootstrap_action { 73 | # path = "s3://dimajix-training/scripts/aws/setup-pyspark-advanced.sh" 74 | # name = "setup-training" 75 | #} 76 | bootstrap_action { 77 | path = "s3://dimajix-training/scripts/aws/setup-pyspark-datascience.sh" 78 | name = "setup-training" 79 | } 80 | } 81 | 82 | -------------------------------------------------------------------------------- /emr/iam.tf: -------------------------------------------------------------------------------- 1 | # IAM role for EMR Service 2 | resource "aws_iam_role" "training_emr_service_role" { 3 | name = "training_emr_service_role" 4 | 5 | assume_role_policy = < 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /main.tf: -------------------------------------------------------------------------------- 1 | variable "common_tags" { 2 | type = map(string) 3 | default = { 4 | builtWith = "terraform" 5 | terraformGroup = "training-dmx" 6 | } 7 | } 8 | 9 | resource "aws_key_pair" "deployer" { 10 | key_name = "training-dmx" 11 | public_key = file("deployer-key.pub") 12 | } 13 | 14 | 15 | module "vpc" { 16 | source = "terraform-aws-modules/vpc/aws" 17 | version = "5.1.2" 18 | 19 | name = "training-vpc" 20 | tags = var.common_tags 21 | 22 | azs = var.aws_availability_zones 23 | cidr = "10.200.0.0/16" 24 | private_subnets = ["10.200.1.0/24"] 25 | public_subnets = ["10.200.101.0/24"] 26 | enable_nat_gateway = "true" 27 | single_nat_gateway = "false" 28 | enable_dns_hostnames = "true" 29 | enable_dns_support = "true" 30 | } 31 | 32 | 33 | module "vpc_endpoints" { 34 | source = "terraform-aws-modules/vpc/aws//modules/vpc-endpoints" 35 | version = "5.1.2" 36 | 37 | vpc_id = module.vpc.vpc_id 38 | 39 | create_security_group = true 40 | security_group_name_prefix = "training-vpc-endpoints-" 41 | security_group_description = "VPC endpoint security group" 42 | security_group_rules = { 43 | ingress_https = { 44 | description = "HTTPS from VPC" 45 | cidr_blocks = [module.vpc.vpc_cidr_block] 46 | } 47 | } 48 | 49 | endpoints = { 50 | s3 = { 51 | service = "s3" 52 | service_type = "Gateway" 53 | route_table_ids = module.vpc.private_route_table_ids 54 | tags = var.common_tags 55 | } 56 | } 57 | 58 | tags = var.common_tags 59 | } 60 | 61 | 62 | module "emr" { 63 | source = "./emr" 64 | tags = var.common_tags 65 | 66 | # Configuration: Set the cluster names 67 | names = ["kku"] 68 | #names = ["cl1","kku"] 69 | # Configuration: Set the desired EMR release 70 | release = "emr-6.13.0" 71 | # Configuration: Set the desired EMR components 72 | applications = ["Spark","Hadoop","Hue","Zeppelin","Hive","Zookeeper"] 73 | # Configuration: Set the desired EC2 instance type for the master 74 | # Refer to https://aws.amazon.com/de/ec2/spot/pricing/ for spot pricing 75 | master_type = "m5.xlarge" 76 | master_ebs_size = "60" 77 | master_bid_price = "" # 0.30 78 | # Configuration: Set the desired EC2 instance type for the workers 79 | worker_type = "m5.xlarge" 80 | worker_ebs_size = "120" 81 | worker_bid_price = "" # 0.60 82 | worker_count = 1 83 | # Setup logging 84 | log_uri = "s3://dimajix-logs/training/emr" 85 | 86 | vpc_id = module.vpc.vpc_id 87 | subnet_id = module.vpc.private_subnets[0] 88 | edge_security_group_id = module.proxy.security_group_id 89 | ssh_key_ids = [aws_key_pair.deployer.id] 90 | } 91 | 92 | 93 | module "proxy" { 94 | source = "./proxy" 95 | tags = var.common_tags 96 | names = module.emr.names 97 | public_masters = module.emr.master_public_dns 98 | private_masters = module.emr.master_private_dns 99 | 100 | # Configure the domain 101 | proxy_domain = "training.dimajix-aws.net" 102 | # Configuration: Set the user name for basic auth 103 | proxy_user = "destatis" 104 | # Configuration: Set the password for basic auth 105 | proxy_password = "dmx2023" 106 | 107 | vpc_id = module.vpc.vpc_id 108 | subnet_id = module.vpc.public_subnets[0] 109 | ssh_key_id = aws_key_pair.deployer.id 110 | ssh_key = file("deployer-key") 111 | ssl_certs = "certs" 112 | } 113 | 114 | 115 | module "route53" { 116 | source = "./route53" 117 | tags = var.common_tags 118 | names = module.emr.names 119 | targets = [module.proxy.public_dns] 120 | 121 | # Configuration: Set the Route53 zone to use 122 | zone_name = "training.dimajix-aws.net" 123 | } 124 | 125 | -------------------------------------------------------------------------------- /proxy/ec2.tf: -------------------------------------------------------------------------------- 1 | resource "aws_instance" "proxy" { 2 | # ami = "ami-05c26ae4789875080" 3 | # ami = "ami-0bdbe51a2e8070ff2" 4 | ami = "ami-04e601abe3e1a910f" 5 | key_name = var.ssh_key_id 6 | instance_type = "c5.xlarge" 7 | 8 | root_block_device { 9 | volume_type = "gp2" 10 | volume_size = 80 11 | } 12 | 13 | iam_instance_profile = aws_iam_instance_profile.proxy.name 14 | 15 | vpc_security_group_ids = [ aws_security_group.proxy.id ] 16 | subnet_id = var.subnet_id 17 | associate_public_ip_address = true 18 | 19 | connection { 20 | host = self.public_ip 21 | type = "ssh" 22 | user = "ubuntu" 23 | private_key = var.ssh_key 24 | } 25 | provisioner "file" { 26 | source = "proxy/provisioner" 27 | destination = "/home/ubuntu" 28 | } 29 | provisioner "file" { 30 | source = var.ssl_certs 31 | destination = "/home/ubuntu/certs/" 32 | } 33 | 34 | provisioner "remote-exec" { 35 | inline = [ 36 | "sleep 15", 37 | "cloud-init status --wait" 38 | ] 39 | } 40 | provisioner "remote-exec" { 41 | inline = [ 42 | "chmod +x /home/ubuntu/provisioner", 43 | "sh /home/ubuntu/provisioner/provision.sh -d ${var.proxy_domain} -u ${var.proxy_user} -p ${var.proxy_password} -C /home/ubuntu/certs --pubic-masters ${join(",",var.public_masters)} --private-masters ${join(",",var.private_masters)} --names ${join(",",var.names)}" 44 | ] 45 | } 46 | 47 | tags = merge( { "Name" = "training-emr-proxy" }, var.tags ) 48 | } 49 | 50 | 51 | # IAM Role for EC2 Instance Profile 52 | resource "aws_iam_role" "proxy" { 53 | name = "training-emr-proxy" 54 | 55 | assume_role_policy = < 2 | ServerName ap.{{aliasHostName}} 3 | ServerAdmin webmaster@localhost 4 | DocumentRoot /var/www/html 5 | 6 | RemoteIPHeader X-Forwarded-For 7 | RequestHeader set X-Forwarded-Proto 'http' 8 | RequestHeader set X-Forwarded-Host 'ap.{{aliasHostName}}:80' 9 | RequestHeader set X-Forwarded-Server 'ap.{{aliasHostName}}' 10 | RequestHeader set X-Forwarded-Port "80" 11 | 12 | RewriteEngine On 13 | ProxyVia Off 14 | ProxyRequests off 15 | ProxyPreserveHost On 16 | ProxyHTMLEnable Off 17 | ProxyHTMLMeta Off 18 | ProxyHTMLExtended Off 19 | AllowEncodedSlashes NoDecode 20 | 21 | ProxyHTMLURLMap http://{{public_master}}:20888/ / 22 | ProxyHTMLURLMap http://{{private_master}}:20888/ / 23 | 24 | AuthType Basic 25 | AuthName "Dimajix Training Environment" 26 | AuthBasicProvider file 27 | AuthUserFile "{{htpasswd}}" 28 | Require user {{username}} 29 | 30 | ProxyPass http://{{private_master}}:20888/ 31 | ProxyPassReverse / 32 | Header unset X-Content-Type-Options 33 | 34 | 35 | 36 | 37 | 38 | ServerName ap.{{aliasHostName}} 39 | ServerAdmin webmaster@localhost 40 | DocumentRoot /var/www/html 41 | 42 | SSLEngine On 43 | SSLCompression off 44 | SSLProtocol all -SSLv2 -SSLv3 45 | SSLCipherSuite EECDH+AESGCM:EDH+AESGCM:AES256+EECDH:AES256+EDH:ECDHE-RSA-AES128-SHA 46 | SSLCertificateFile /etc/apache2/ssl/ap.{{aliasHostName}}.cert 47 | SSLCertificateKeyFile /etc/apache2/ssl/ap.{{aliasHostName}}.key 48 | 49 | RemoteIPHeader X-Forwarded-For 50 | RequestHeader set X-Forwarded-Proto 'https' 51 | RequestHeader set X-Forwarded-Host 'ap.{{aliasHostName}}:443' 52 | RequestHeader set X-Forwarded-Server 'ap.{{aliasHostName}}' 53 | RequestHeader set X-Forwarded-Port "443" 54 | 55 | RewriteEngine On 56 | ProxyVia Off 57 | ProxyRequests off 58 | ProxyPreserveHost On 59 | ProxyHTMLEnable Off 60 | ProxyHTMLMeta Off 61 | ProxyHTMLExtended Off 62 | AllowEncodedSlashes NoDecode 63 | 64 | ProxyHTMLURLMap http://{{public_master}}:20888/ / 65 | ProxyHTMLURLMap http://{{private_master}}:20888/ / 66 | 67 | AuthType Basic 68 | AuthName "Dimajix Training Environment" 69 | AuthBasicProvider file 70 | AuthUserFile "{{htpasswd}}" 71 | Require user {{username}} 72 | 73 | ProxyPass http://{{private_master}}:20888/ 74 | ProxyPassReverse / 75 | Header unset X-Content-Type-Options 76 | 77 | 78 | 79 | 80 | -------------------------------------------------------------------------------- /proxy/provisioner/apache-proxy-hbase.conf.template: -------------------------------------------------------------------------------- 1 | 2 | ServerName hbase.{{aliasHostName}} 3 | ServerAdmin webmaster@localhost 4 | DocumentRoot /var/www/html 5 | 6 | RemoteIPHeader X-Forwarded-For 7 | RequestHeader set X-Forwarded-Proto 'http' 8 | RequestHeader set X-Forwarded-Host 'hbase.{{aliasHostName}}:80' 9 | RequestHeader set X-Forwarded-Server 'hbase.{{aliasHostName}}' 10 | RequestHeader set X-Forwarded-Port "80" 11 | 12 | RewriteEngine On 13 | 14 | ProxyVia Off 15 | ProxyRequests off 16 | ProxyPreserveHost On 17 | ProxyHTMLEnable Off 18 | ProxyHTMLMeta Off 19 | ProxyHTMLExtended Off 20 | AllowEncodedSlashes NoDecode 21 | 22 | ProxyHTMLURLMap http://{{public_master}}:16010/ / 23 | ProxyHTMLURLMap http://{{private_master}}:16010/ / 24 | 25 | AuthType Basic 26 | AuthName "Dimajix Training Environment" 27 | AuthBasicProvider file 28 | AuthUserFile "{{htpasswd}}" 29 | Require user {{username}} 30 | 31 | ProxyPass http://{{private_master}}:16010/ 32 | ProxyPassReverse / 33 | # RequestHeader unset Accept-Encoding 34 | # SetEnv proxy-nokeepalive 1 35 | 36 | 37 | 38 | 39 | 40 | ServerName hbase.{{aliasHostName}} 41 | ServerAdmin webmaster@localhost 42 | DocumentRoot /var/www/html 43 | 44 | SSLEngine On 45 | SSLCompression off 46 | SSLProtocol all -SSLv2 -SSLv3 47 | SSLCipherSuite EECDH+AESGCM:EDH+AESGCM:AES256+EECDH:AES256+EDH:ECDHE-RSA-AES128-SHA 48 | SSLCertificateFile /etc/apache2/ssl/hbase.{{aliasHostName}}.cert 49 | SSLCertificateKeyFile /etc/apache2/ssl/hbase.{{aliasHostName}}.key 50 | 51 | RemoteIPHeader X-Forwarded-For 52 | RequestHeader set X-Forwarded-Proto 'https' 53 | RequestHeader set X-Forwarded-Host 'hbase.{{aliasHostName}}:443' 54 | RequestHeader set X-Forwarded-Server 'hbase.{{aliasHostName}}' 55 | RequestHeader set X-Forwarded-Port '443' 56 | 57 | RewriteEngine On 58 | 59 | ProxyVia Off 60 | ProxyRequests off 61 | ProxyPreserveHost On 62 | ProxyHTMLEnable Off 63 | ProxyHTMLMeta Off 64 | ProxyHTMLExtended Off 65 | AllowEncodedSlashes NoDecode 66 | 67 | ProxyHTMLURLMap http://{{public_master}}:16010/ / 68 | ProxyHTMLURLMap http://{{private_master}}:16010/ / 69 | 70 | AuthType Basic 71 | AuthName "Dimajix Training Environment" 72 | AuthBasicProvider file 73 | AuthUserFile "{{htpasswd}}" 74 | Require user {{username}} 75 | 76 | ProxyPass http://{{private_master}}:16010/ 77 | ProxyPassReverse / 78 | # RequestHeader unset Accept-Encoding 79 | # SetEnv proxy-nokeepalive 1 80 | 81 | 82 | 83 | -------------------------------------------------------------------------------- /proxy/provisioner/apache-proxy-hue.conf.template: -------------------------------------------------------------------------------- 1 | 2 | ServerName hue.{{aliasHostName}} 3 | ServerAdmin webmaster@localhost 4 | DocumentRoot /var/www/html 5 | 6 | RemoteIPHeader X-Forwarded-For 7 | RequestHeader set X-Forwarded-Proto 'http' 8 | RequestHeader set X-Forwarded-Host 'hue.{{aliasHostName}}:80' 9 | RequestHeader set X-Forwarded-Server 'hue.{{aliasHostName}}' 10 | RequestHeader set X-Forwarded-Port "80" 11 | 12 | RewriteEngine On 13 | 14 | ProxyVia Off 15 | ProxyRequests off 16 | ProxyPreserveHost On 17 | ProxyHTMLEnable Off 18 | ProxyHTMLMeta Off 19 | ProxyHTMLExtended Off 20 | AllowEncodedSlashes NoDecode 21 | 22 | ProxyHTMLURLMap http://{{public_master}}:8888/ / 23 | ProxyHTMLURLMap http://{{private_master}}:8888/ / 24 | 25 | AuthType Basic 26 | AuthName "Dimajix Training Environment" 27 | AuthBasicProvider file 28 | AuthUserFile "{{htpasswd}}" 29 | Require user {{username}} 30 | 31 | ProxyPass http://{{private_master}}:8888/ 32 | ProxyPassReverse / 33 | # RequestHeader unset Accept-Encoding 34 | # SetEnv proxy-nokeepalive 1 35 | 36 | 37 | 38 | 39 | 40 | ServerName hue.{{aliasHostName}} 41 | ServerAdmin webmaster@localhost 42 | DocumentRoot /var/www/html 43 | 44 | SSLEngine On 45 | SSLCompression off 46 | SSLProtocol all -SSLv2 -SSLv3 47 | SSLCipherSuite EECDH+AESGCM:EDH+AESGCM:AES256+EECDH:AES256+EDH:ECDHE-RSA-AES128-SHA 48 | SSLCertificateFile /etc/apache2/ssl/hue.{{aliasHostName}}.cert 49 | SSLCertificateKeyFile /etc/apache2/ssl/hue.{{aliasHostName}}.key 50 | 51 | RemoteIPHeader X-Forwarded-For 52 | RequestHeader set X-Forwarded-Proto 'https' 53 | RequestHeader set X-Forwarded-Host 'hue.{{aliasHostName}}:443' 54 | RequestHeader set X-Forwarded-Server 'hue.{{aliasHostName}}' 55 | RequestHeader set X-Forwarded-Port "443" 56 | 57 | RewriteEngine On 58 | ProxyVia Off 59 | ProxyRequests off 60 | ProxyPreserveHost On 61 | ProxyHTMLEnable Off 62 | ProxyHTMLMeta Off 63 | ProxyHTMLExtended Off 64 | AllowEncodedSlashes NoDecode 65 | 66 | ProxyHTMLURLMap http://{{public_master}}:8888/ / 67 | ProxyHTMLURLMap http://{{private_master}}:8888/ / 68 | 69 | AuthType Basic 70 | AuthName "Dimajix Training Environment" 71 | AuthBasicProvider file 72 | AuthUserFile "{{htpasswd}}" 73 | Require user {{username}} 74 | 75 | ProxyPass http://{{private_master}}:8888/ 76 | ProxyPassReverse / 77 | # RequestHeader unset Accept-Encoding 78 | # SetEnv proxy-nokeepalive 1 79 | 80 | 81 | 82 | -------------------------------------------------------------------------------- /proxy/provisioner/apache-proxy-jupyter.conf.template: -------------------------------------------------------------------------------- 1 | 2 | ServerName jupyter.{{aliasHostName}} 3 | ServerAdmin webmaster@localhost 4 | DocumentRoot /var/www/html 5 | 6 | RemoteIPHeader X-Forwarded-For 7 | RequestHeader set X-Forwarded-Proto 'http' 8 | RequestHeader set X-Forwarded-Host 'jupyter.{{aliasHostName}}:80' 9 | RequestHeader set X-Forwarded-Server 'jupyter.{{aliasHostName}}' 10 | RequestHeader set X-Forwarded-Port "80" 11 | 12 | RewriteEngine On 13 | RewriteCond %{REQUEST_URI} ^/api/kernels/ [NC,OR] 14 | RewriteCond %{REQUEST_URI} ^/terminals/websocket/ [NC,OR] 15 | RewriteCond %{HTTP:Upgrade} ^WebSocket$ [NC,OR] 16 | RewriteCond %{HTTP:CONNECTION} ^Upgrade$ [NC] 17 | RewriteRule .* "ws://{{private_master}}:8899%{REQUEST_URI}" [P,L,END] 18 | 19 | ProxyVia Off 20 | ProxyRequests off 21 | ProxyPreserveHost On 22 | ProxyHTMLEnable Off 23 | ProxyHTMLMeta Off 24 | ProxyHTMLExtended Off 25 | AllowEncodedSlashes NoDecode 26 | 27 | ProxyHTMLURLMap http://{{public_master}}:8899/ / 28 | ProxyHTMLURLMap http://{{private_master}}:8899/ / 29 | 30 | AuthType Basic 31 | AuthName "Dimajix Training Environment" 32 | AuthBasicProvider file 33 | AuthUserFile "{{htpasswd}}" 34 | Require user {{username}} 35 | 36 | ProxyPass http://{{private_master}}:8899/ 37 | ProxyPassReverse / 38 | # RequestHeader unset Accept-Encoding 39 | # SetEnv proxy-nokeepalive 1 40 | 41 | 42 | AuthType None 43 | Require all granted 44 | ProxyPass ws://{{private_master}}:8899/api/kernels/ upgrade=websocket 45 | ProxyPassReverse ws://{{private_master}}:8899/api/kernels/ 46 | 47 | 48 | AuthType None 49 | Require all granted 50 | ProxyPass ws://{{private_master}}:8899/terminals/websocket/ upgrade=websocket 51 | ProxyPassReverse ws://{{private_master}}:8899/terminals/websocket/ 52 | 53 | 54 | 55 | 56 | 57 | ServerName jupyter.{{aliasHostName}} 58 | ServerAdmin webmaster@localhost 59 | DocumentRoot /var/www/html 60 | 61 | SSLEngine On 62 | SSLCompression off 63 | SSLProtocol all -SSLv2 -SSLv3 64 | SSLCipherSuite EECDH+AESGCM:EDH+AESGCM:AES256+EECDH:AES256+EDH:ECDHE-RSA-AES128-SHA 65 | SSLCertificateFile /etc/apache2/ssl/jupyter.{{aliasHostName}}.cert 66 | SSLCertificateKeyFile /etc/apache2/ssl/jupyter.{{aliasHostName}}.key 67 | 68 | RemoteIPHeader X-Forwarded-For 69 | RequestHeader set X-Forwarded-Proto 'https' 70 | RequestHeader set X-Forwarded-Host 'jupyter.{{aliasHostName}}:443' 71 | RequestHeader set X-Forwarded-Server 'jupyter.{{aliasHostName}}' 72 | RequestHeader set X-Forwarded-Port "443" 73 | 74 | RewriteEngine On 75 | RewriteCond %{REQUEST_URI} ^/api/kernels/ [NC,OR] 76 | RewriteCond %{REQUEST_URI} ^/terminals/websocket/ [NC,OR] 77 | RewriteCond %{HTTP:Upgrade} ^WebSocket$ [NC,OR] 78 | RewriteCond %{HTTP:CONNECTION} ^Upgrade$ [NC] 79 | RewriteRule .* "ws://{{private_master}}:8899%{REQUEST_URI}" [P,L,END] 80 | 81 | ProxyVia Off 82 | ProxyRequests off 83 | ProxyPreserveHost On 84 | ProxyHTMLEnable Off 85 | ProxyHTMLMeta Off 86 | ProxyHTMLExtended Off 87 | AllowEncodedSlashes NoDecode 88 | 89 | ProxyHTMLURLMap http://{{public_master}}:8899/ / 90 | ProxyHTMLURLMap http://{{private_master}}:8899/ / 91 | 92 | AuthType Basic 93 | AuthName "Dimajix Training Environment" 94 | AuthBasicProvider file 95 | AuthUserFile "{{htpasswd}}" 96 | Require user {{username}} 97 | 98 | ProxyPass http://{{private_master}}:8899/ 99 | ProxyPassReverse / 100 | # RequestHeader unset Accept-Encoding 101 | # SetEnv proxy-nokeepalive 1 102 | 103 | 104 | AuthType None 105 | Require all granted 106 | ProxyPass ws://{{private_master}}:8899/api/kernels/ upgrade=websocket 107 | ProxyPassReverse ws://{{private_master}}:8899/api/kernels/ 108 | 109 | 110 | AuthType None 111 | Require all granted 112 | ProxyPass ws://{{private_master}}:8899/terminals/websocket/ upgrade=websocket 113 | ProxyPassReverse ws://{{private_master}}:8899/terminals/websocket/ 114 | 115 | 116 | 117 | -------------------------------------------------------------------------------- /proxy/provisioner/apache-proxy-nn.conf.template: -------------------------------------------------------------------------------- 1 | 2 | ServerName nn.{{aliasHostName}} 3 | ServerAdmin webmaster@localhost 4 | DocumentRoot /var/www/html 5 | 6 | RemoteIPHeader X-Forwarded-For 7 | RequestHeader set X-Forwarded-Proto 'http' 8 | RequestHeader set X-Forwarded-Host 'nn.{{aliasHostName}}:80' 9 | RequestHeader set X-Forwarded-Server 'nn.{{aliasHostName}}' 10 | RequestHeader set X-Forwarded-Port "80" 11 | 12 | RewriteEngine On 13 | RewriteCond %{HTTP:Upgrade} =WebSocket [NC,NV] 14 | # RewriteRule ^/(.*) ws://azerty01:31321/$1 [P] 15 | 16 | ProxyVia Off 17 | ProxyRequests off 18 | ProxyPreserveHost On 19 | ProxyHTMLEnable Off 20 | ProxyHTMLMeta Off 21 | ProxyHTMLExtended Off 22 | AllowEncodedSlashes NoDecode 23 | 24 | ProxyHTMLURLMap http://{{public_master}}:9870/ / 25 | ProxyHTMLURLMap http://{{private_master}}:9870/ / 26 | 27 | AuthType Basic 28 | AuthName "Dimajix Training Environment" 29 | AuthBasicProvider file 30 | AuthUserFile "{{htpasswd}}" 31 | Require user {{username}} 32 | 33 | ProxyPass http://{{private_master}}:9870/ 34 | ProxyPassReverse / 35 | RequestHeader unset Accept-Encoding 36 | 37 | 38 | 39 | 40 | 41 | ServerName nn.{{aliasHostName}} 42 | ServerAdmin webmaster@localhost 43 | DocumentRoot /var/www/html 44 | 45 | SSLEngine On 46 | SSLCompression off 47 | SSLProtocol all -SSLv2 -SSLv3 48 | SSLCipherSuite EECDH+AESGCM:EDH+AESGCM:AES256+EECDH:AES256+EDH:ECDHE-RSA-AES128-SHA 49 | SSLCertificateFile /etc/apache2/ssl/nn.{{aliasHostName}}.cert 50 | SSLCertificateKeyFile /etc/apache2/ssl/nn.{{aliasHostName}}.key 51 | 52 | RemoteIPHeader X-Forwarded-For 53 | RequestHeader set X-Forwarded-Proto 'https' 54 | RequestHeader set X-Forwarded-Host 'nn.{{aliasHostName}}:443' 55 | RequestHeader set X-Forwarded-Server 'nn.{{aliasHostName}}' 56 | RequestHeader set X-Forwarded-Port "443" 57 | 58 | RewriteEngine On 59 | RewriteCond %{HTTP:Upgrade} =WebSocket [NC,NV] 60 | # RewriteRule ^/(.*) ws://azerty01:31321/$1 [P] 61 | ProxyVia Off 62 | ProxyRequests off 63 | ProxyPreserveHost On 64 | ProxyHTMLEnable Off 65 | ProxyHTMLMeta Off 66 | ProxyHTMLExtended Off 67 | AllowEncodedSlashes NoDecode 68 | 69 | ProxyHTMLURLMap http://{{public_master}}:9870/ / 70 | ProxyHTMLURLMap http://{{private_master}}:9870/ / 71 | 72 | AuthType Basic 73 | AuthName "Dimajix Training Environment" 74 | AuthBasicProvider file 75 | AuthUserFile "{{htpasswd}}" 76 | Require user {{username}} 77 | 78 | ProxyPass http://{{private_master}}:9870/ 79 | ProxyPassReverse / 80 | RequestHeader unset Accept-Encoding 81 | 82 | 83 | 84 | -------------------------------------------------------------------------------- /proxy/provisioner/apache-proxy-rm.conf.template: -------------------------------------------------------------------------------- 1 | 2 | ServerName rm.{{aliasHostName}} 3 | ServerAdmin webmaster@localhost 4 | DocumentRoot /var/www/html 5 | 6 | RemoteIPHeader X-Forwarded-For 7 | RequestHeader set X-Forwarded-Proto 'http' 8 | RequestHeader set X-Forwarded-Host 'rm.{{aliasHostName}}:80' 9 | RequestHeader set X-Forwarded-Server 'rm.{{aliasHostName}}' 10 | RequestHeader set X-Forwarded-Port "80" 11 | 12 | RewriteEngine On 13 | 14 | ProxyVia Off 15 | ProxyRequests off 16 | ProxyPreserveHost On 17 | ProxyHTMLEnable On 18 | ProxyHTMLMeta Off 19 | ProxyHTMLExtended Off 20 | AllowEncodedSlashes NoDecode 21 | 22 | ProxyHTMLURLMap http://{{public_master}}:20888 http://ap.{{aliasHostName}} 23 | ProxyHTMLURLMap http://{{public_master}}:8088/ / 24 | ProxyHTMLURLMap http://{{private_master}}:20888 http://ap.{{aliasHostName}} 25 | ProxyHTMLURLMap http://{{private_master}}:8088/ / 26 | 27 | AuthType Basic 28 | AuthName "Dimajix Training Environment" 29 | AuthBasicProvider file 30 | AuthUserFile "{{htpasswd}}" 31 | Require user {{username}} 32 | 33 | ProxyPass http://{{private_master}}:8088/ 34 | ProxyPassReverse / 35 | Header unset X-Content-Type-Options 36 | RequestHeader unset Accept-Encoding 37 | 38 | 39 | 40 | 41 | 42 | ServerName rm.{{aliasHostName}} 43 | ServerAdmin webmaster@localhost 44 | DocumentRoot /var/www/html 45 | 46 | SSLEngine On 47 | SSLCompression off 48 | SSLProtocol all -SSLv2 -SSLv3 49 | SSLCipherSuite EECDH+AESGCM:EDH+AESGCM:AES256+EECDH:AES256+EDH:ECDHE-RSA-AES128-SHA 50 | SSLCertificateFile /etc/apache2/ssl/rm.{{aliasHostName}}.cert 51 | SSLCertificateKeyFile /etc/apache2/ssl/rm.{{aliasHostName}}.key 52 | 53 | RemoteIPHeader X-Forwarded-For 54 | RequestHeader set X-Forwarded-Proto 'https' 55 | RequestHeader set X-Forwarded-Host 'rm.{{aliasHostName}}:443' 56 | RequestHeader set X-Forwarded-Server 'rm.{{aliasHostName}}' 57 | RequestHeader set X-Forwarded-Port "443" 58 | 59 | RewriteEngine On 60 | 61 | ProxyVia Off 62 | ProxyRequests off 63 | ProxyPreserveHost On 64 | ProxyHTMLEnable On 65 | ProxyHTMLMeta Off 66 | ProxyHTMLExtended Off 67 | AllowEncodedSlashes NoDecode 68 | 69 | ProxyHTMLURLMap http://{{public_master}}:20888 https://ap.{{aliasHostName}} 70 | ProxyHTMLURLMap http://{{public_master}}:8088/ / 71 | ProxyHTMLURLMap http://{{private_master}}:20888 https://ap.{{aliasHostName}} 72 | ProxyHTMLURLMap http://{{private_master}}:8088/ / 73 | 74 | AuthType Basic 75 | AuthName "Dimajix Training Environment" 76 | AuthBasicProvider file 77 | AuthUserFile "{{htpasswd}}" 78 | Require user {{username}} 79 | 80 | ProxyPass http://{{private_master}}:8088/ 81 | ProxyPassReverse / 82 | Header unset X-Content-Type-Options 83 | RequestHeader unset Accept-Encoding 84 | 85 | 86 | 87 | -------------------------------------------------------------------------------- /proxy/provisioner/apache-proxy-top.conf.template: -------------------------------------------------------------------------------- 1 | ProxyHTMLLinks a href 2 | ProxyHTMLLinks area href 3 | ProxyHTMLLinks link href 4 | ProxyHTMLLinks img src longdesc usemap 5 | ProxyHTMLLinks object classid codebase data usemap 6 | ProxyHTMLLinks q cite 7 | ProxyHTMLLinks blockquote cite 8 | ProxyHTMLLinks ins cite 9 | ProxyHTMLLinks del cite 10 | ProxyHTMLLinks form action 11 | ProxyHTMLLinks input src usemap 12 | ProxyHTMLLinks head profile 13 | ProxyHTMLLinks base href 14 | ProxyHTMLLinks script src for 15 | 16 | ProxyHTMLLinks frame src longdesc 17 | ProxyHTMLLinks iframe src longdesc 18 | ProxyHTMLLinks body background 19 | ProxyHTMLLinks applet codebase 20 | 21 | 22 | ProxyHTMLEvents onclick ondblclick onmousedown onmouseup \ 23 | onmouseover onmousemove onmouseout onkeypress \ 24 | onkeydown onkeyup onfocus onblur onload \ 25 | onunload onsubmit onreset onselect onchange 26 | 27 | 28 | 29 | ServerName {{aliasHostName}} 30 | ServerAdmin webmaster@localhost 31 | DocumentRoot /var/www/html/{{aliasHostName}} 32 | 33 | 34 | AuthType Basic 35 | AuthName "Dimajix Training Environment" 36 | AuthBasicProvider file 37 | AuthUserFile "{{htpasswd}}" 38 | Require user {{username}} 39 | 40 | 41 | 42 | 43 | 44 | ServerName {{aliasHostName}} 45 | ServerAdmin webmaster@localhost 46 | DocumentRoot /var/www/html/{{aliasHostName}} 47 | 48 | SSLEngine On 49 | SSLCompression off 50 | SSLProtocol all -SSLv2 -SSLv3 51 | SSLCipherSuite EECDH+AESGCM:EDH+AESGCM:AES256+EECDH:AES256+EDH:ECDHE-RSA-AES128-SHA 52 | SSLCertificateFile /etc/apache2/ssl/{{aliasHostName}}.cert 53 | SSLCertificateKeyFile /etc/apache2/ssl/{{aliasHostName}}.key 54 | 55 | 56 | AuthType Basic 57 | AuthName "Dimajix Training Environment" 58 | AuthBasicProvider file 59 | AuthUserFile "{{htpasswd}}" 60 | Require user {{username}} 61 | 62 | 63 | 64 | -------------------------------------------------------------------------------- /proxy/provisioner/apache-proxy-zeppelin.conf.template: -------------------------------------------------------------------------------- 1 | 2 | ServerName zeppelin.{{aliasHostName}} 3 | ServerAdmin webmaster@localhost 4 | DocumentRoot /var/www/html 5 | 6 | RemoteIPHeader X-Forwarded-For 7 | RequestHeader set X-Forwarded-Proto 'http' 8 | RequestHeader set X-Forwarded-Host 'zeppelin.{{aliasHostName}}:80' 9 | RequestHeader set X-Forwarded-Server 'zeppelin.{{aliasHostName}}' 10 | RequestHeader set X-Forwarded-Port "80" 11 | 12 | RewriteEngine On 13 | RewriteCond %{REQUEST_URI} ^/ws [NC,OR] 14 | RewriteCond %{HTTP:Upgrade} ^WebSocket$ [NC,OR] 15 | RewriteCond %{HTTP:CONNECTION} ^Upgrade$ [NC] 16 | # RewriteCond %{HTTP:Upgrade} =WebSocket [NC,NV] 17 | # RewriteRule ^/(.*) ws://azerty01:31321/$1 [P] 18 | 19 | ProxyVia Off 20 | ProxyRequests off 21 | ProxyPreserveHost On 22 | ProxyHTMLEnable Off 23 | ProxyHTMLMeta Off 24 | ProxyHTMLExtended Off 25 | AllowEncodedSlashes NoDecode 26 | 27 | ProxyHTMLURLMap http://{{public_master}}:8890/ / 28 | ProxyHTMLURLMap http://{{private_master}}:8890/ / 29 | 30 | AuthType Basic 31 | AuthName "Dimajix Training Environment" 32 | AuthBasicProvider file 33 | AuthUserFile "{{htpasswd}}" 34 | Require user {{username}} 35 | 36 | ProxyPass http://{{private_master}}:8890/ 37 | ProxyPassReverse / 38 | # RequestHeader unset Accept-Encoding 39 | # SetEnv proxy-nokeepalive 1 40 | 41 | 42 | ProxyPass ws://{{private_master}}:8890/ws 43 | ProxyPassReverse ws://{{private_master}}:8890/ws 44 | 45 | 46 | 47 | 48 | 49 | ServerName zeppelin.{{aliasHostName}} 50 | ServerAdmin webmaster@localhost 51 | DocumentRoot /var/www/html 52 | 53 | SSLEngine On 54 | SSLCompression off 55 | SSLProtocol all -SSLv2 -SSLv3 56 | SSLCipherSuite EECDH+AESGCM:EDH+AESGCM:AES256+EECDH:AES256+EDH:ECDHE-RSA-AES128-SHA 57 | SSLCertificateFile /etc/apache2/ssl/zeppelin.{{aliasHostName}}.cert 58 | SSLCertificateKeyFile /etc/apache2/ssl/zeppelin.{{aliasHostName}}.key 59 | 60 | RemoteIPHeader X-Forwarded-For 61 | RequestHeader set X-Forwarded-Proto 'https' 62 | RequestHeader set X-Forwarded-Host 'zeppelin.{{aliasHostName}}:443' 63 | RequestHeader set X-Forwarded-Server 'zeppelin.{{aliasHostName}}' 64 | RequestHeader set X-Forwarded-Port "443" 65 | 66 | RewriteEngine On 67 | RewriteCond %{REQUEST_URI} ^/ws [NC,OR] 68 | RewriteCond %{HTTP:Upgrade} ^WebSocket$ [NC,OR] 69 | RewriteCond %{HTTP:CONNECTION} ^Upgrade$ [NC] 70 | # RewriteCond %{HTTP:Upgrade} =WebSocket [NC,NV] 71 | # RewriteRule ^/(.*) ws://azerty01:31321/$1 [P] 72 | 73 | ProxyVia Off 74 | ProxyRequests off 75 | ProxyPreserveHost On 76 | ProxyHTMLEnable Off 77 | ProxyHTMLMeta Off 78 | ProxyHTMLExtended Off 79 | AllowEncodedSlashes NoDecode 80 | 81 | ProxyHTMLURLMap http://{{public_master}}:8890/ / 82 | ProxyHTMLURLMap http://{{private_master}}:8890/ / 83 | 84 | AuthType Basic 85 | AuthName "Dimajix Training Environment" 86 | AuthBasicProvider file 87 | AuthUserFile "{{htpasswd}}" 88 | Require user {{username}} 89 | 90 | ProxyPass http://{{private_master}}:8890/ 91 | ProxyPassReverse / 92 | # RequestHeader unset Accept-Encoding 93 | # SetEnv proxy-nokeepalive 1 94 | 95 | 96 | ProxyPass ws://{{private_master}}:8890/ws 97 | ProxyPassReverse ws://{{private_master}}:8890/ws 98 | 99 | 100 | 101 | 102 | -------------------------------------------------------------------------------- /proxy/provisioner/index.html.template: -------------------------------------------------------------------------------- 1 | 2 |

Amazon EMR Environment

3 | 4 |

Services

5 | 12 | 13 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /proxy/provisioner/make-dummy-cert.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | umask 077 3 | 4 | setup_apache() { 5 | sudo yum install mod24_ssl.x86_64 6 | 7 | sudo mkdir /etc/httpd/ssl 8 | } 9 | 10 | answers() { 11 | echo -- 12 | echo SomeState 13 | echo SomeCity 14 | echo SomeOrganization 15 | echo SomeOrganizationalUnit 16 | echo jupyter.kku.training.dimajix-aws.net 17 | echo root@jupyter.kku.training.dimajix-aws.net 18 | } 19 | 20 | if [ $# -eq 0 ] ; then 21 | echo $"Usage: `basename $0` filename [...]" 22 | exit 0 23 | fi 24 | 25 | for target in $@ ; do 26 | PEM1=`/bin/mktemp /tmp/openssl.XXXXXX` 27 | PEM2=`/bin/mktemp /tmp/openssl.XXXXXX` 28 | trap "rm -f $PEM1 $PEM2" SIGINT 29 | answers | /usr/bin/openssl req -newkey rsa:2048 -keyout $PEM1 -nodes -x509 -days 365 -out $PEM2 2> /dev/null 30 | cat $PEM1 > ${target} 31 | echo "" >> ${target} 32 | cat $PEM2 >> ${target} 33 | rm -f $PEM1 $PEM2 34 | done 35 | 36 | -------------------------------------------------------------------------------- /proxy/provisioner/provision.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | basedir=$(dirname $0) 3 | 4 | 5 | install_reverse_proxy() { 6 | sudo apt-get update 7 | sudo NEEDRESTART_MODE=a apt-get install --yes apache2 python3-pip python3-openssl python3-requests python3-urllib3 8 | sudo pip3 install htpasswd pystache 9 | 10 | sudo python3 $basedir/setup-reverse-proxy.py "$@" 11 | sudo a2enmod ssl headers request remoteip rewrite proxy proxy_html proxy_http proxy_wstunnel xml2enc 12 | sudo systemctl restart apache2 13 | } 14 | 15 | 16 | install_reverse_proxy "$@" 17 | 18 | -------------------------------------------------------------------------------- /proxy/provisioner/setup-reverse-proxy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | import pystache 4 | import os 5 | import os.path 6 | import sys 7 | import argparse 8 | import htpasswd 9 | import time 10 | from OpenSSL import crypto, SSL 11 | 12 | curdir = os.path.abspath(os.path.dirname(__file__)) 13 | 14 | HTTPD_CONF_DIR = '/etc/apache2' 15 | CERT_DIR = '/etc/apache2/ssl' 16 | 17 | 18 | def ensure_directory(dirname): 19 | if not os.path.exists(dirname): 20 | os.makedirs(dirname, 755) 21 | 22 | 23 | def ensure_parentdir(filename): 24 | dirname = os.path.dirname(filename) 25 | if not os.path.exists(dirname): 26 | os.makedirs(dirname, 755) 27 | 28 | 29 | def render_template(template, target, env): 30 | index_template = open(os.path.join(curdir, template)).read() 31 | index_html = pystache.render(index_template, env) 32 | print(f"Generating {template} -> {target}") 33 | ensure_parentdir(target) 34 | with open(target, 'wt') as f: 35 | f.write(index_html) 36 | os.chmod(target, 644) 37 | 38 | 39 | def render_httpd_template(template, env): 40 | aliasName = env['aliasHostName'] 41 | target = aliasName + template.replace(".template", "").replace("apache-proxy", "") 42 | filename = os.path.join(HTTPD_CONF_DIR, "sites-available", target) 43 | linkname = os.path.join(HTTPD_CONF_DIR, "sites-enabled", target) 44 | render_template(template, filename, env) 45 | os.symlink(filename, linkname) 46 | 47 | 48 | def create_certificate(hostname): 49 | # create a key pair 50 | k = crypto.PKey() 51 | k.generate_key(crypto.TYPE_RSA, 1024) 52 | 53 | serial = int(time.time()) 54 | 55 | # create a self-signed cert 56 | cert = crypto.X509() 57 | subject = cert.get_subject() 58 | subject.C = "DE" 59 | subject.ST = "Hessen" 60 | subject.L = "Frankfurt" 61 | subject.O = "dimajix" 62 | subject.OU = "dimajix Training" 63 | subject.CN = hostname 64 | cert.set_serial_number(serial) 65 | cert.gmtime_adj_notBefore(0) 66 | cert.gmtime_adj_notAfter(10*24*60*60) 67 | cert.set_issuer(subject) 68 | cert.set_pubkey(k) 69 | cert.sign(k, 'sha1') 70 | 71 | ensure_directory(CERT_DIR) 72 | 73 | certfile = os.path.join(CERT_DIR, hostname + '.cert') 74 | if not os.path.exists(certfile): 75 | with open(certfile, 'wt') as f: 76 | f.write(crypto.dump_privatekey(crypto.FILETYPE_PEM, k)) 77 | f.write(crypto.dump_certificate(crypto.FILETYPE_PEM, cert)) 78 | 79 | 80 | def link_certificate(hostname, keyfile, certfile): 81 | print(f"Linking {certfile} -> {os.path.join(CERT_DIR, hostname + '.cert')}") 82 | ensure_directory(CERT_DIR) 83 | os.symlink(certfile, os.path.join(CERT_DIR, hostname + ".cert")) 84 | os.symlink(keyfile, os.path.join(CERT_DIR, hostname + ".key")) 85 | 86 | 87 | def setup_single_cluster(env): 88 | aliasName = env['aliasHostName'] 89 | 90 | htpasswd = f"/var/www/html/{aliasName}/htpasswd" 91 | setup_htpasswd(htpasswd, env) 92 | env["htpasswd"] = htpasswd 93 | 94 | render_template('index.html.template', f'/var/www/html/{aliasName}/index.html', env) 95 | render_httpd_template('apache-proxy-top.conf.template', env) 96 | render_httpd_template('apache-proxy-nn.conf.template', env) 97 | render_httpd_template('apache-proxy-ap.conf.template', env) 98 | render_httpd_template('apache-proxy-rm.conf.template', env) 99 | render_httpd_template('apache-proxy-hue.conf.template', env) 100 | render_httpd_template('apache-proxy-hbase.conf.template', env) 101 | render_httpd_template('apache-proxy-zeppelin.conf.template', env) 102 | render_httpd_template('apache-proxy-jupyter.conf.template', env) 103 | 104 | certdir = env['ssl_certdir'] 105 | keyfile = os.path.join(certdir, "root-privkey.pem") 106 | certfile = os.path.join(certdir, "root-cert.pem") 107 | link_certificate(aliasName, keyfile, certfile) 108 | 109 | keyfile = os.path.join(certdir, env['name'] + "-privkey.pem") 110 | certfile = os.path.join(certdir, env['name'] + "-cert.pem") 111 | link_certificate('nn.' + aliasName, keyfile, certfile) 112 | link_certificate('ap.' + aliasName, keyfile, certfile) 113 | link_certificate('rm.' + aliasName, keyfile, certfile) 114 | link_certificate('hue.' + aliasName, keyfile, certfile) 115 | link_certificate('hbase.' + aliasName, keyfile, certfile) 116 | link_certificate('zeppelin.' + aliasName, keyfile, certfile) 117 | link_certificate('jupyter.' + aliasName, keyfile, certfile) 118 | 119 | #create_certificate(hostname) 120 | #create_certificate('nn.' + hostname) 121 | #create_certificate('ap.' + hostname) 122 | #create_certificate('rm.' + hostname) 123 | #create_certificate('hue.' + hostname) 124 | #create_certificate('hbase.' + hostname) 125 | #create_certificate('zeppelin.' + hostname) 126 | #create_certificate('jupyter.' + hostname) 127 | 128 | 129 | def setup_htpasswd(filename, env): 130 | ensure_parentdir(filename) 131 | with open(filename, 'wt') as userdb: 132 | pass 133 | os.chmod(filename, 644) 134 | with htpasswd.Basic(filename) as userdb: 135 | userdb.add(env['username'], env['password']) 136 | 137 | 138 | def parse_args(raw_args): 139 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) 140 | parser.add_argument('-d', '--domain', dest='domain', help='Domain for registering proxy host', default='training.dimajix-aws.net') 141 | parser.add_argument('-u', '--username', dest='username', help='Username for authentication', default='dimajix-training') 142 | parser.add_argument('-p', '--password', dest='password', help='Password for authentication', default='dmx2018') 143 | parser.add_argument('-N', '--names', dest='names', help='Nice names to create proxies for', default='kku') 144 | parser.add_argument('--pubic-masters', dest='public_masters', help='Target machines to proxy', default='') 145 | parser.add_argument('--private-masters', dest='private_masters', help='Target machines to proxy', default='') 146 | parser.add_argument('-C', '--ssl-certdir', dest='certdir', help='SSL certificate directory', default='') 147 | 148 | return parser.parse_args(args=raw_args) 149 | 150 | 151 | if __name__ == "__main__": 152 | args = parse_args(sys.argv[1:]) 153 | 154 | public_masters = args.public_masters.split(",") 155 | private_masters = args.private_masters.split(",") 156 | alias_names = args.names.split(",") 157 | 158 | for public_master, private_master, alias in zip(public_masters, private_masters, alias_names): 159 | alias_domain = alias + "." + args.domain 160 | env = { 161 | 'public_master': public_master, 162 | 'private_master': private_master, 163 | 'name': alias, 164 | 'aliasHostName': alias_domain, 165 | 'username': args.username, 166 | 'password': args.password, 167 | 'ssl_certdir': args.certdir 168 | } 169 | 170 | setup_single_cluster(env) 171 | 172 | -------------------------------------------------------------------------------- /proxy/proxy.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /proxy/security-groups.tf: -------------------------------------------------------------------------------- 1 | resource "aws_security_group" "proxy" { 2 | name = "training_emr_proxy" 3 | description = "Allow inbound ssh" 4 | vpc_id = var.vpc_id 5 | tags = merge(var.tags) 6 | } 7 | 8 | resource "aws_security_group_rule" "proxy_ingress_self" { 9 | security_group_id = aws_security_group.proxy.id 10 | type = "ingress" 11 | from_port = 0 12 | to_port = 0 13 | protocol = "all" 14 | self = true 15 | } 16 | 17 | resource "aws_security_group_rule" "proxy_ingress_ssh" { 18 | security_group_id = aws_security_group.proxy.id 19 | type = "ingress" 20 | from_port = 22 21 | to_port = 22 22 | protocol = "tcp" 23 | cidr_blocks = ["0.0.0.0/0"] 24 | } 25 | 26 | resource "aws_security_group_rule" "proxy_ingress_http" { 27 | security_group_id = aws_security_group.proxy.id 28 | type = "ingress" 29 | from_port = 80 30 | to_port = 80 31 | protocol = "tcp" 32 | cidr_blocks = ["0.0.0.0/0"] 33 | } 34 | 35 | resource "aws_security_group_rule" "proxy_ingress_https" { 36 | security_group_id = aws_security_group.proxy.id 37 | type = "ingress" 38 | from_port = 443 39 | to_port = 443 40 | protocol = "tcp" 41 | cidr_blocks = ["0.0.0.0/0"] 42 | } 43 | 44 | resource "aws_security_group_rule" "proxy_egress_allow_all" { 45 | security_group_id = aws_security_group.proxy.id 46 | type = "egress" 47 | from_port = 0 48 | to_port = 0 49 | protocol = "all" 50 | cidr_blocks = [ "0.0.0.0/0" ] 51 | } 52 | 53 | -------------------------------------------------------------------------------- /proxy/variables.tf: -------------------------------------------------------------------------------- 1 | variable "names" { 2 | type = list(string) 3 | } 4 | 5 | variable "public_masters" { 6 | type = list(string) 7 | } 8 | variable "private_masters" { 9 | type = list(string) 10 | } 11 | 12 | variable "vpc_id" {} 13 | variable "subnet_id" {} 14 | 15 | variable "ssh_key_id" {} 16 | variable "ssh_key" {} 17 | 18 | variable "ssl_certs" {} 19 | 20 | variable "proxy_domain" { 21 | default = "training.dimajix-aws.net" 22 | } 23 | 24 | variable "proxy_user" { 25 | default = "user" 26 | } 27 | 28 | variable "proxy_password" { 29 | default = "password" 30 | } 31 | 32 | variable "tags" { 33 | type = map(string) 34 | default = {} 35 | } 36 | 37 | -------------------------------------------------------------------------------- /proxy/versions.tf: -------------------------------------------------------------------------------- 1 | 2 | terraform { 3 | required_version = ">= 0.14" 4 | } 5 | -------------------------------------------------------------------------------- /route53/route53.tf: -------------------------------------------------------------------------------- 1 | data "aws_route53_zone" "emr" { 2 | name = var.zone_name 3 | private_zone = false 4 | } 5 | 6 | resource "aws_route53_record" "top" { 7 | count = length(var.names) 8 | zone_id = data.aws_route53_zone.emr.zone_id 9 | name = element(var.names, count.index) 10 | type = "CNAME" 11 | ttl = "300" 12 | records = [element(var.targets, count.index)] 13 | } 14 | 15 | resource "aws_route53_record" "rm" { 16 | count = length(var.names) 17 | zone_id = data.aws_route53_zone.emr.zone_id 18 | name = "rm.${element(var.names, count.index)}" 19 | type = "CNAME" 20 | ttl = "300" 21 | records = [element(var.targets, count.index)] 22 | } 23 | 24 | resource "aws_route53_record" "nn" { 25 | count = length(var.names) 26 | zone_id = data.aws_route53_zone.emr.zone_id 27 | name = "nn.${element(var.names, count.index)}" 28 | type = "CNAME" 29 | ttl = "300" 30 | records = [element(var.targets, count.index)] 31 | } 32 | 33 | resource "aws_route53_record" "ap" { 34 | count = length(var.names) 35 | zone_id = data.aws_route53_zone.emr.zone_id 36 | name = "ap.${element(var.names, count.index)}" 37 | type = "CNAME" 38 | ttl = "300" 39 | records = [element(var.targets, count.index)] 40 | } 41 | 42 | resource "aws_route53_record" "zeppelin" { 43 | count = length(var.names) 44 | zone_id = data.aws_route53_zone.emr.zone_id 45 | name = "zeppelin.${element(var.names, count.index)}" 46 | type = "CNAME" 47 | ttl = "300" 48 | records = [element(var.targets, count.index)] 49 | } 50 | 51 | resource "aws_route53_record" "jupyter" { 52 | count = length(var.names) 53 | zone_id = data.aws_route53_zone.emr.zone_id 54 | name = "jupyter.${element(var.names, count.index)}" 55 | type = "CNAME" 56 | ttl = "300" 57 | records = [element(var.targets, count.index)] 58 | } 59 | 60 | resource "aws_route53_record" "hue" { 61 | count = length(var.names) 62 | zone_id = data.aws_route53_zone.emr.zone_id 63 | name = "hue.${element(var.names, count.index)}" 64 | type = "CNAME" 65 | ttl = "300" 66 | records = [element(var.targets, count.index)] 67 | } 68 | -------------------------------------------------------------------------------- /route53/variables.tf: -------------------------------------------------------------------------------- 1 | variable "names" { 2 | type = list(string) 3 | default = ["emr"] 4 | } 5 | 6 | variable "targets" { 7 | type = list(string) 8 | default = [] 9 | } 10 | 11 | variable "zone_name" { 12 | default = "aws.dimajix.net" 13 | } 14 | 15 | variable "tags" { 16 | type = map(string) 17 | default = {} 18 | } 19 | 20 | -------------------------------------------------------------------------------- /route53/versions.tf: -------------------------------------------------------------------------------- 1 | 2 | terraform { 3 | required_version = ">= 0.14" 4 | } 5 | -------------------------------------------------------------------------------- /scripts/install-jupyter-2020.11.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | AWS_INFO_DIR="/mnt/var/lib/info/" 4 | 5 | ANACONDA_PREFIX=/opt/anaconda3 6 | ANACONDA_VERSION=2020.11 7 | ANACONDA_INSTALLER=Anaconda3-${ANACONDA_VERSION}-Linux-x86_64.sh 8 | ANACONDA_USER=hadoop 9 | ANACONDA_USER_HOME=$(eval echo ~${ANACONDA_USER}) 10 | 11 | SPARK_HOME=/usr/lib/spark 12 | SPARK_MASTER=yarn 13 | 14 | is_master() { 15 | grep -q "\"isMaster\": true" ${AWS_INFO_DIR}/instance.json 16 | return $? 17 | } 18 | 19 | 20 | install_anaconda() { 21 | # Download Anaconda3 if it is not already present 22 | if [ ! -f ${ANACONDA_INSTALLER} ]; 23 | then 24 | sudo wget https://repo.continuum.io/archive/${ANACONDA_INSTALLER} 25 | sudo chmod a+rx ${ANACONDA_INSTALLER} 26 | fi 27 | 28 | # Start automatic installation into /opt/anaconda3. The parameters 29 | # -f force the installation, even if the directory already exists 30 | # -b silently accepts the license 31 | # -p specifies the installation location 32 | sudo sh ${ANACONDA_INSTALLER} -f -b -p ${ANACONDA_PREFIX} 33 | sudo rm -f ${ANACONDA_INSTALLER} 34 | 35 | # Update some components, otherwise PyArrow cannot be installed 36 | sudo ${ANACONDA_PREFIX}/bin/conda update --yes --freeze anaconda lz4-c openssl 37 | # Install as much as possible via Anaconda 38 | sudo ${ANACONDA_PREFIX}/bin/conda install --yes --freeze python=3.8 pyarrow=2.0.0 s3fs=0.5.2 cartopy=0.18.0 39 | sudo ${ANACONDA_PREFIX}/bin/pip install contextily geopandas 40 | } 41 | 42 | 43 | configure_jupyter_notebook() { 44 | sudo -u ${ANACONDA_USER} mkdir -p ${ANACONDA_USER_HOME}/.jupyter 45 | sudo -u ${ANACONDA_USER} tee ${ANACONDA_USER_HOME}/.jupyter/jupyter_notebook_config.py >/dev/null </dev/null < /dev/null </dev/null </dev/null < /dev/null </dev/null </dev/null < /dev/null </dev/null < /dev/null < /dev/null < /dev/null </dev/null 2>&1 || : 116 | install -d -m 0755 -o \$SVC_USER -g \$SVC_USER \$(dirname \$LOGFILE) 1>/dev/null 2>&1 || : 117 | 118 | if [ ! -x \$EXEC_PATH ]; then 119 | echo "\$EXEC_PATH is not an executable" 120 | exit 1 121 | fi 122 | 123 | run_prestart() { 124 | cd \${WORKING_DIR} 125 | su -s /bin/bash \$SVC_USER -c "nohup nice -n 0 \ 126 | \${EXEC_PATH} \$DAEMON_FLAGS \ 127 | > \$LOGFILE 2>&1 & "'echo \$!' > "\$PIDFILE" 128 | } 129 | 130 | export -f run_prestart 131 | $EXEC_LAUNCHER run_prestart 132 | end script 133 | 134 | script 135 | 136 | # sleep for sometime for the daemon to start running 137 | sleep \$SLEEP_TIME 138 | if [ ! -f \$PIDFILE ]; then 139 | echo "\$PIDFILE not found" 140 | exit 1 141 | fi 142 | pid=\$(<"\$PIDFILE") 143 | while ps -p \$pid > /dev/null; do 144 | sleep \$SLEEP_TIME 145 | done 146 | echo "\$pid stopped running..." 147 | 148 | end script 149 | 150 | pre-stop script 151 | 152 | # do nothing 153 | 154 | end script 155 | 156 | post-stop script 157 | if [ ! -f \$PIDFILE ]; then 158 | echo "\$PIDFILE not found" 159 | exit 160 | fi 161 | pid=\$(<"\$PIDFILE") 162 | if kill \$pid > /dev/null 2>&1; then 163 | echo "process \$pid is killed" 164 | fi 165 | rm -rf \$PIDFILE 166 | end script 167 | EOL 168 | sudo initctl start jupyter-notebook-server 169 | } 170 | 171 | 172 | install_anaconda 173 | 174 | if is_master; 175 | then 176 | install_pyspark_kernel 177 | install_python_kernel 178 | install_startup 179 | fi 180 | 181 | -------------------------------------------------------------------------------- /scripts/install-kafka.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Script to setup a Kafka server 4 | 5 | AWS_INFO_DIR="/mnt/var/lib/info/" 6 | 7 | az="aws" 8 | broker_id="0" 9 | 10 | repo="https://downloads.apache.org/kafka" 11 | scala_version="2.12" 12 | kafka_version="2.8.1" 13 | num_partitions="16" 14 | repl_factor="1" 15 | log_retention="168" 16 | zookeeper_connect="localhost:2181" 17 | mount_point="/mnt" 18 | 19 | 20 | is_master() { 21 | grep -q "\"isMaster\": true" ${AWS_INFO_DIR}/instance.json 22 | return $? 23 | } 24 | 25 | 26 | install_kafka() { 27 | # Add Kafka user 28 | sudo useradd kafka 29 | 30 | # add directories that support kafka 31 | sudo mkdir -p /var/run/kafka 32 | sudo mkdir -p /var/log/kafka 33 | sudo mkdir -p ${mount_point}/kafka-logs 34 | 35 | # download kafka 36 | base_name=kafka_${scala_version}-${kafka_version} 37 | cd /tmp 38 | sudo curl -O ${repo}/${kafka_version}/$base_name.tgz 39 | 40 | # unpack the tarball 41 | sudo rm -rf /opt/kafka* 42 | sudo tar xzf /tmp/$base_name.tgz -C /opt 43 | sudo rm -f /tmp/$base_name.tgz 44 | 45 | sudo ln -sf /opt/kafka_${scala_version}-${kafka_version} /opt/kafka 46 | } 47 | 48 | configure_kafka() { 49 | cd /opt/kafka 50 | 51 | # configure the server 52 | cat config/server.properties \ 53 | | sed "s|broker.id=0|broker.id=${broker_id}|" \ 54 | | sed "s|log.dirs=/tmp/kafka-logs|log.dirs=${mount_point}/kafka-logs|" \ 55 | | sed "s|num.partitions=1|num.partitions=${num_partitions}|" \ 56 | | sed "s|log.retention.hours=168|log.retention.hours=${log_retention}|" \ 57 | | sed "s|zookeeper.connect=localhost:2181|zookeeper.connect=${zookeeper_connect}|" \ 58 | > /tmp/server.properties 59 | echo >> /tmp/server.properties 60 | echo "# rack ID" >> /tmp/server.properties 61 | echo "broker.rack=$az" >> /tmp/server.properties 62 | echo " " >> /tmp/server.properties 63 | echo "# replication factor" >> /tmp/server.properties 64 | echo "default.replication.factor=${repl_factor}" >> /tmp/server.properties 65 | echo "# enable topic delete" >> /tmp/server.properties 66 | echo "delete.topic.enable=true" >> /tmp/server.properties 67 | 68 | sudo mv -f /tmp/server.properties config/server.properties 69 | 70 | sudo chown -R kafka:kafka /opt/kafka 71 | sudo chown kafka:kafka /var/run/kafka 72 | sudo chown kafka:kafka /var/log/kafka 73 | sudo chown kafka:kafka ${mount_point}/kafka-logs 74 | } 75 | 76 | 77 | install_startup() { 78 | sudo tee /etc/init.d/kafka > /dev/null < /dev/null | grep -c \$ppid 2> /dev/null\` -eq '1' ]; then 113 | echo -n "\$prog is already running" 114 | failure 115 | echo 116 | return 1 117 | else 118 | rm -f \$KAFKA_PIDFILE 119 | fi 120 | fi 121 | 122 | rm -f \$KAFKA_CONSOLE_LOG 123 | mkdir -p \$(dirname \$KAFKA_PIDFILE) 124 | chown \$KAFKA_USER \$(dirname \$KAFKA_PIDFILE) || true 125 | 126 | # Run daemon 127 | mkdir -p \$(dirname \$KAFKA_CONSOLE_LOG) 128 | KAFKA_GC_LOG_OPTS=" " nohup sh \$KAFKA_SCRIPT \$KAFKA_CONFIG 2>&1 >> \$KAFKA_CONSOLE_LOG 2>&1 & 129 | PID=\$! 130 | echo \$PID > \$KAFKA_PIDFILE 131 | 132 | sleep 10 133 | if [ \`ps --pid \$PID 2> /dev/null | grep -c \$PID 2> /dev/null\` -eq '1' ]; then 134 | success 135 | echo 136 | else 137 | rm -f \$KAFKA_PIDFILE 138 | failure 139 | echo 140 | return 1 141 | fi 142 | return 0 143 | } 144 | 145 | 146 | stop() { 147 | echo -n \$"Stopping \$prog: " 148 | count=0; 149 | 150 | if [ -f \$KAFKA_PIDFILE ]; then 151 | read kpid < \$KAFKA_PIDFILE 152 | let kwait=\$SHUTDOWN_WAIT 153 | 154 | # Try issuing SIGTERM 155 | kill -15 \$kpid 156 | until [ \`ps --pid \$kpid 2> /dev/null | grep -c \$kpid 2> /dev/null\` -eq '0' ] || [ \$count -gt \$kwait ] 157 | do 158 | sleep 1 159 | let count=\$count+1; 160 | done 161 | 162 | if [ \$count -gt \$kwait ]; then 163 | kill -9 \$kpid 164 | fi 165 | fi 166 | 167 | rm -f \$KAFKA_PIDFILE 168 | rm -f \$KAFKA_CONSOLE_LOG 169 | success 170 | echo 171 | } 172 | 173 | reload() { 174 | stop 175 | start 176 | } 177 | 178 | restart() { 179 | stop 180 | start 181 | } 182 | 183 | status() { 184 | if [ -f \$KAFKA_PIDFILE ]; then 185 | read ppid < \$KAFKA_PIDFILE 186 | if [ \`ps --pid $ppid 2> /dev/null | grep -c \$ppid 2> /dev/null\` -eq '1' ]; then 187 | echo "\$prog is running (pid \$ppid)" 188 | return 0 189 | else 190 | echo "\$prog dead but pid file exists" 191 | return 1 192 | fi 193 | fi 194 | echo "\$prog is not running" 195 | return 3 196 | } 197 | 198 | case "\$1" in 199 | start) 200 | start 201 | ;; 202 | 203 | stop) 204 | stop 205 | ;; 206 | 207 | reload) 208 | reload 209 | ;; 210 | 211 | restart) 212 | restart 213 | ;; 214 | 215 | status) 216 | status 217 | ;; 218 | *) 219 | 220 | echo \$"Usage: \$0 {start|stop|reload|restart|status}" 221 | exit 1 222 | esac 223 | 224 | exit \$? 225 | EOL 226 | 227 | sudo chmod a+rx /etc/init.d/kafka 228 | } 229 | 230 | 231 | if is_master; 232 | then 233 | install_kafka 234 | configure_kafka 235 | install_startup 236 | 237 | # Start Kafka 238 | #sudo /etc/init.d/kafka start 239 | fi 240 | 241 | -------------------------------------------------------------------------------- /scripts/pyspark-ml-crashcourse-setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | AWS_INFO_DIR="/mnt/var/lib/info/" 4 | 5 | ANACONDA_PREFIX=/emr/anaconda3 6 | ANACONDA_VERSION=4.2.0 7 | ANACONDA_INSTALLER=Anaconda3-${ANACONDA_VERSION}-Linux-x86_64.sh 8 | 9 | SPARK_HOME=/usr/lib/spark 10 | SPARK_MASTER=yarn 11 | 12 | is_master() { 13 | grep -q "\"isMaster\": true" ${AWS_INFO_DIR}/instance.json 14 | return $? 15 | } 16 | 17 | 18 | install_anaconda() { 19 | # Download Anaconda3 if it is not already present 20 | if [ ! -f ${ANACONDA_INSTALLER} ]; 21 | then 22 | sudo wget https://repo.continuum.io/archive/${ANACONDA_INSTALLER} 23 | sudo chmod a+rx ${ANACONDA_INSTALLER} 24 | fi 25 | 26 | # Start automatic installation into /opt/anaconda3. The parameters 27 | # -f force the installation, even if the directory already exists 28 | # -b silently accepts the license 29 | # -p specifies the installation location 30 | sudo sh ${ANACONDA_INSTALLER} -f -b -p ${ANACONDA_PREFIX} 31 | sudo rm -f ${ANACONDA_INSTALLER} 32 | } 33 | 34 | 35 | install_notebooks() { 36 | sudo yum -y install git 37 | git clone https://github.com/dimajix/pyspark-ml-crashcourse.git /home/hadoop/pyspark-ml-crashcourse 38 | chown hadoop:hadoop /home/hadoop/pyspark-ml-crashcourse 39 | } 40 | 41 | 42 | install_pyspark_kernel() { 43 | sudo mkdir -p ${ANACONDA_PREFIX}/share/jupyter/kernels/PySpark3 44 | sudo tee ${ANACONDA_PREFIX}/share/jupyter/kernels/PySpark3/kernel.json >/dev/null < /dev/null < /dev/null < /dev/null </dev/null 2>&1 || : 123 | install -d -m 0755 -o \$SVC_USER -g \$SVC_USER \$(dirname \$LOGFILE) 1>/dev/null 2>&1 || : 124 | 125 | if [ ! -x \$EXEC_PATH ]; then 126 | echo "\$EXEC_PATH is not an executable" 127 | exit 1 128 | fi 129 | 130 | run_prestart() { 131 | cd \${WORKING_DIR} 132 | su -s /bin/bash \$SVC_USER -c "nohup nice -n 0 \ 133 | \${EXEC_PATH} \$DAEMON_FLAGS \ 134 | > \$LOGFILE 2>&1 & "'echo \$!' > "\$PIDFILE" 135 | } 136 | 137 | export -f run_prestart 138 | $EXEC_LAUNCHER run_prestart 139 | end script 140 | 141 | script 142 | 143 | # sleep for sometime for the daemon to start running 144 | sleep \$SLEEP_TIME 145 | if [ ! -f \$PIDFILE ]; then 146 | echo "\$PIDFILE not found" 147 | exit 1 148 | fi 149 | pid=\$(<"\$PIDFILE") 150 | while ps -p \$pid > /dev/null; do 151 | sleep \$SLEEP_TIME 152 | done 153 | echo "\$pid stopped running..." 154 | 155 | end script 156 | 157 | pre-stop script 158 | 159 | # do nothing 160 | 161 | end script 162 | 163 | post-stop script 164 | if [ ! -f \$PIDFILE ]; then 165 | echo "\$PIDFILE not found" 166 | exit 167 | fi 168 | pid=\$(<"\$PIDFILE") 169 | if kill \$pid > /dev/null 2>&1; then 170 | echo "process \$pid is killed" 171 | fi 172 | rm -rf \$PIDFILE 173 | end script 174 | EOL 175 | sudo initctl start jupyter-notebook-server 176 | } 177 | 178 | 179 | install_anaconda 180 | 181 | if is_master; 182 | then 183 | install_pyspark_kernel 184 | install_python_kernel 185 | install_startup 186 | install_notebooks 187 | fi 188 | 189 | -------------------------------------------------------------------------------- /scripts/setup-pyspark-advanced.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | AWS_INFO_DIR="/mnt/var/lib/info/" 4 | 5 | 6 | is_master() { 7 | grep -q "\"isMaster\": true" ${AWS_INFO_DIR}/instance.json 8 | return $? 9 | } 10 | 11 | 12 | install_training_repo() { 13 | sudo yum -y install git 14 | 15 | git clone https://github.com/dimajix/pyspark-advanced.git /home/hadoop/pyspark-advanced 16 | chown hadoop:hadoop /home/hadoop/pyspark-advanced 17 | } 18 | 19 | 20 | 21 | if is_master; 22 | then 23 | install_training_repo 24 | fi 25 | 26 | -------------------------------------------------------------------------------- /scripts/setup-pyspark-datascience.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | AWS_INFO_DIR="/mnt/var/lib/info/" 4 | 5 | 6 | is_master() { 7 | grep -q "\"isMaster\": true" ${AWS_INFO_DIR}/instance.json 8 | return $? 9 | } 10 | 11 | 12 | install_training_repo() { 13 | sudo yum -y install git 14 | 15 | git clone https://github.com/dimajix/pyspark-datascience.git /home/hadoop/pyspark-datascience 16 | chown hadoop:hadoop /home/hadoop/pyspark-datascience 17 | } 18 | 19 | 20 | 21 | if is_master; 22 | then 23 | install_training_repo 24 | fi 25 | 26 | -------------------------------------------------------------------------------- /scripts/setup-training.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | AWS_INFO_DIR="/mnt/var/lib/info/" 4 | 5 | 6 | is_master() { 7 | grep -q "\"isMaster\": true" ${AWS_INFO_DIR}/instance.json 8 | return $? 9 | } 10 | 11 | 12 | exec_mysql() { 13 | sudo mysql --user=root -e "$@" 14 | } 15 | 16 | 17 | install_training_repo() { 18 | sudo yum -y install git python2-boto3 19 | 20 | git clone https://github.com/dimajix/spark-training.git /home/hadoop/spark-training 21 | chown hadoop:hadoop /home/hadoop/spark-training 22 | 23 | # Copy Zeppelin Notebooks into Zeppelin 24 | #srcdir=/home/hadoop/spark-training 25 | #tgtdir=/var/lib/zeppelin 26 | #files=$(find $srcdir -type d -name "zeppelin-*") 27 | 28 | #for file in $files; do 29 | # reldir=${file#$srcdir/} 30 | # src=$file 31 | # dst=$tgtdir/$reldir 32 | # mkdir -p $(dirname $dst) 33 | # echo $src "=>" $dst 34 | # cp -a $src $dst 35 | #done 36 | } 37 | 38 | 39 | create_mysql_database() { 40 | sudo yum -y install mariadb-server 41 | sudo systemctl start mariadb 42 | 43 | exec_mysql "CREATE DATABASE IF NOT EXISTS training;" 44 | exec_mysql "GRANT ALL ON TABLE training.* TO 'user'@'%' IDENTIFIED BY 'user'; FLUSH PRIVILEGES;" 45 | exec_mysql "GRANT ALL ON TABLE training.* TO 'user'@'localhost' IDENTIFIED BY 'user'; FLUSH PRIVILEGES;" 46 | } 47 | 48 | 49 | 50 | if is_master; 51 | then 52 | install_training_repo 53 | create_mysql_database 54 | fi 55 | 56 | -------------------------------------------------------------------------------- /versions.tf: -------------------------------------------------------------------------------- 1 | 2 | terraform { 3 | required_version = ">= 0.14" 4 | } 5 | --------------------------------------------------------------------------------