├── packer ├── region_ami_mapping.yaml ├── base_ami.json ├── base_ami.ubuntu14.json ├── base_ami.ubuntu16.json └── copy_base_ami_to_other_regions.sh ├── Pegasus_small.jpg ├── defaults.yml ├── run_tests.sh ├── examples ├── spark │ ├── master.yml │ ├── workers.yml │ └── spark_hadoop.sh ├── hadoop │ ├── namenode.yml │ ├── datanodes.yml │ └── hadoop.sh ├── flink │ └── flink_hadoop.sh └── eventsim │ └── spark_hadoop.sh ├── config ├── hive │ ├── setup_hive.sh │ └── setup_cluster.sh ├── alluxio │ ├── format_fs.sh │ ├── setup_single.sh │ └── setup_cluster.sh ├── hadoop │ ├── format_hdfs.sh │ ├── config_datanode.sh │ ├── config_hosts.sh │ ├── config_namenode.sh │ ├── setup_cluster.sh │ └── setup_single.sh ├── presto │ ├── config_worker.sh │ ├── config_coordinator.sh │ ├── setup_cli.sh │ ├── setup_cluster.sh │ └── setup_single.sh ├── spark │ ├── config_workers.sh │ ├── setup_cluster.sh │ └── setup_single.sh ├── opscenter │ └── setup_cluster.sh ├── kafka-manager │ ├── setup_kafka_manager.sh │ └── setup_cluster.sh ├── riak │ ├── create_cluster.sh │ ├── setup_single.sh │ └── setup_cluster.sh ├── pig │ ├── setup_pig.sh │ └── setup_cluster.sh ├── kibana │ ├── setup_single.sh │ └── setup_cluster.sh ├── flink │ ├── config_master.sh │ ├── setup_cluster.sh │ └── setup_single.sh ├── zeppelin │ ├── setup_cluster.sh │ └── setup_zeppelin.sh ├── redis │ ├── setup_single.sh │ └── setup_cluster.sh ├── ssh │ ├── setup_ssh.sh │ ├── add_to_known_hosts.sh │ └── setup_passwordless_ssh.sh ├── pass_aws_cred ├── kafka │ ├── setup_cluster.sh │ └── setup_single.sh ├── hbase │ ├── setup_cluster.sh │ └── setup_single.sh ├── zookeeper │ ├── setup_cluster.sh │ └── setup_single.sh ├── cassandra │ ├── setup_single.sh │ └── setup_cluster.sh ├── storm │ ├── setup_cluster.sh │ └── setup_single.sh ├── secor │ ├── setup_cluster.sh │ └── setup_secor.sh └── elasticsearch │ ├── setup_cluster.sh │ └── setup_single.sh ├── test ├── templates │ ├── valid_ondemand.yml │ ├── valid_spot.yml │ └── valid_ws.yml └── test_utils.bats ├── dependencies.txt ├── install ├── kibana │ ├── install_kibana.sh │ └── install_kibana_cluster.sh ├── riak │ ├── install_riak.sh │ └── install_riak_cluster.sh ├── secor │ ├── install_secor.sh │ └── install_secor_cluster.sh ├── zeppelin │ ├── install_zeppelin_cluster.sh │ └── install_zeppelin.sh ├── environment │ ├── install_env_cluster.sh │ └── install_env.sh ├── memsql │ ├── install_memsql.sh │ └── install_memsql_cluster.sh ├── kafka-manager │ ├── install_kafka_manager.sh │ └── install_kafka_manager_cluster.sh ├── cluster_download └── download_tech ├── service ├── storm │ ├── start_slave.sh │ ├── start_master.sh │ ├── stop_service.sh │ └── start_service.sh ├── kibana │ ├── stop_service.sh │ └── start_service.sh ├── opscenter │ ├── stop_service.sh │ └── start_service.sh ├── zeppelin │ ├── start_service.sh │ └── stop_service.sh ├── flink │ ├── stop_service.sh │ └── start_service.sh ├── hbase │ ├── start_service.sh │ └── stop_service.sh ├── memsql │ ├── stop_service.sh │ └── start_service.sh ├── alluxio │ ├── stop_service.sh │ └── start_service.sh ├── secor │ ├── stop_service.sh │ └── start_service.sh ├── redis │ ├── stop_service.sh │ ├── join_redis_cluster.sh │ └── start_service.sh ├── riak │ ├── start_service.sh │ └── stop_service.sh ├── cassandra │ ├── start_service.sh │ └── stop_service.sh ├── elasticsearch │ ├── start_service.sh │ └── stop_service.sh ├── kafka-manager │ ├── stop_service.sh │ └── start_service.sh ├── kafka │ ├── stop_service.sh │ └── start_service.sh ├── spark │ ├── stop_service.sh │ ├── setup_ipython.sh │ └── start_service.sh ├── zookeeper │ ├── start_service.sh │ └── stop_service.sh ├── presto │ ├── start_service.sh │ └── stop_service.sh └── hadoop │ ├── stop_service.sh │ └── start_service.sh ├── Dockerfile ├── colors.sh ├── .gitignore ├── run_peg_docker.sh ├── pegasus-completion.sh ├── aws-queries.sh ├── LICENSE └── peg /packer/region_ami_mapping.yaml: -------------------------------------------------------------------------------- 1 | us-east-1: ami-3508e54f 2 | us-west-2: ami-0d50a275 3 | -------------------------------------------------------------------------------- /Pegasus_small.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InsightDataScience/pegasus/master/Pegasus_small.jpg -------------------------------------------------------------------------------- /defaults.yml: -------------------------------------------------------------------------------- 1 | vol_type: standard 2 | purchase_type: spot 3 | price: 0.08 4 | use_eips: true 5 | -------------------------------------------------------------------------------- /run_tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export PEG_ROOT=$(dirname "${BASH_SOURCE}") 4 | bats --tap ${PEG_ROOT}/test/test_utils.bats 5 | -------------------------------------------------------------------------------- /examples/spark/master.yml: -------------------------------------------------------------------------------- 1 | purchase_type: on_demand 2 | subnet_id: 3 | num_instances: 1 4 | key_name: 5 | security_group_ids: 6 | instance_type: m4.large 7 | tag_name: 8 | vol_size: 100 9 | role: master 10 | use_eips: true 11 | -------------------------------------------------------------------------------- /examples/spark/workers.yml: -------------------------------------------------------------------------------- 1 | purchase_type: on_demand 2 | subnet_id: 3 | num_instances: 3 4 | key_name: 5 | security_group_ids: 6 | instance_type: m4.large 7 | tag_name: 8 | vol_size: 100 9 | role: worker 10 | use_eips: true 11 | -------------------------------------------------------------------------------- /config/hive/setup_hive.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | . ~/.profile 4 | 5 | hdfs dfs -mkdir /tmp 6 | hdfs dfs -mkdir -p /user/hive/warehouse 7 | hdfs dfs -chmod g+w /tmp 8 | hdfs dfs -chmod g+w /user/hive/warehouse 9 | hdfs dfs -chmod 1777 /user/hive/warehouse 10 | -------------------------------------------------------------------------------- /examples/hadoop/namenode.yml: -------------------------------------------------------------------------------- 1 | purchase_type: on_demand 2 | subnet_id: subnet-3a78835f 3 | num_instances: 1 4 | key_name: insight-cluster 5 | security_group_ids: sg-9206aaf7 6 | instance_type: m4.large 7 | tag_name: hadoop-cluster 8 | vol_size: 100 9 | role: master 10 | use_eips: true 11 | -------------------------------------------------------------------------------- /test/templates/valid_ondemand.yml: -------------------------------------------------------------------------------- 1 | purchase_type: on_demand 2 | subnet_id: subnet-3a78835f 3 | num_instances: 1 4 | key_name: insight-cluster 5 | security_group_ids: sg-9206aaf7 6 | instance_type: m4.large 7 | tag_name: test-cluster 8 | vol_size: 100 9 | role: master 10 | use_eips: true 11 | -------------------------------------------------------------------------------- /test/templates/valid_spot.yml: -------------------------------------------------------------------------------- 1 | purchase_type: spot 2 | price: 0.25 3 | subnet_id: subnet-3a78835f 4 | num_instances: 1 5 | key_name: insight-cluster 6 | security_group_ids: sg-9206aaf7 7 | instance_type: m4.large 8 | tag_name: test-cluster 9 | vol_size: 100 10 | role: master 11 | use_eips: true 12 | -------------------------------------------------------------------------------- /examples/hadoop/datanodes.yml: -------------------------------------------------------------------------------- 1 | purchase_type: on_demand 2 | subnet_id: subnet-3a78835f 3 | price: 0.13 4 | num_instances: 3 5 | key_name: insight-cluster 6 | security_group_ids: sg-9206aaf7 7 | instance_type: m4.large 8 | tag_name: hadoop-cluster 9 | vol_size: 100 10 | role: worker 11 | use_eips: true 12 | -------------------------------------------------------------------------------- /test/templates/valid_ws.yml: -------------------------------------------------------------------------------- 1 | purchase_type: on_demand 2 | subnet_id: subnet-3a78 835f 3 | num_instances: 1 4 | key_name: insig ht-cluster 5 | security_group_ids: sg-9206aaf7 6 | instance_type: m4.l arge 7 | tag_name: test-cluster 8 | vol_size: 100 9 | 10 | role:master 11 | use_eips: true 12 | 13 | -------------------------------------------------------------------------------- /dependencies.txt: -------------------------------------------------------------------------------- 1 | aws: 2 | cassandra: 3 | elasticsearch: 4 | environment: 5 | flink: hadoop 6 | hadoop: 7 | hbase: zookeeper,hadoop 8 | hive: hadoop 9 | kafka: zookeeper 10 | kafka-manager: zookeeper,kafka 11 | kibana: elasticsearch 12 | opscenter: cassandra 13 | pig: hadoop 14 | presto: 15 | redis: 16 | riak: 17 | secor: zookeeper,kafka,hadoop 18 | spark: hadoop 19 | ssh: 20 | storm: zookeeper 21 | tachyon: 22 | zookeeper: 23 | zeppelin: 24 | -------------------------------------------------------------------------------- /config/hive/setup_cluster.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # check input arguments 4 | if [ "$#" -ne 1 ]; then 5 | echo "Please specify the cluster name" && exit 1 6 | fi 7 | 8 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 9 | source ${PEG_ROOT}/util.sh 10 | 11 | CLUSTER_NAME=$1 12 | 13 | MASTER_DNS=$(fetch_cluster_master_public_dns ${CLUSTER_NAME}) 14 | 15 | single_script="${PEG_ROOT}/config/hive/setup_hive.sh" 16 | run_script_on_node ${MASTER_DNS} ${single_script} 17 | 18 | -------------------------------------------------------------------------------- /install/kibana/install_kibana.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source ~/.profile 4 | 5 | if [ ! -d /usr/local/secor ]; then 6 | cd /usr/local 7 | sudo git clone https://github.com/pinterest/secor.git 8 | sudo mkdir /usr/local/secor/bin 9 | fi 10 | 11 | if ! grep "export SECOR_HOME" ~/.profile; then 12 | echo -e "\nexport SECOR_HOME=/usr/local/secor\nexport PATH=\$PATH:\$SECOR_HOME/bin" | cat >> ~/.profile 13 | fi 14 | . ~/.profile 15 | 16 | sudo chown -R ubuntu $SECOR_HOME 17 | 18 | cd $SECOR_HOME 19 | sudo mvn clean package & 20 | wait 21 | sudo tar -zxvf ./target/secor-*-SNAPSHOT-bin.tar.gz -C ./bin/ 22 | -------------------------------------------------------------------------------- /config/alluxio/format_fs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | /usr/local/alluxio/bin/alluxio format 18 | -------------------------------------------------------------------------------- /config/hadoop/format_hdfs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | . ~/.profile 18 | 19 | hdfs namenode -format 20 | -------------------------------------------------------------------------------- /config/presto/config_worker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | . ~/.profile 18 | 19 | # config.properties 20 | cat >> $PRESTO_HOME/etc/config.properties << EOL 21 | coordinator=false 22 | EOL 23 | 24 | -------------------------------------------------------------------------------- /service/storm/start_slave.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | . ~/.profile 18 | 19 | tmux new-session -s supervisor -n bash -d 20 | tmux send-keys -t supervisor '$STORM_HOME/bin/storm supervisor' C-m 21 | 22 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | ########################################## 2 | # Dockerfile to run Pegasus 3 | # Based on Debian 4 | ########################################### 5 | FROM debian:jessie 6 | 7 | MAINTAINER Austin Ouyang 8 | 9 | RUN apt-get update \ 10 | && apt-get install -y vim \ 11 | && apt-get install -y openssh-client \ 12 | && apt-get install -y python \ 13 | && apt-get install -y python-dev \ 14 | && apt-get install -y python-pip \ 15 | && apt-get install -y git 16 | 17 | RUN pip install awscli 18 | 19 | RUN git clone https://github.com/sstephenson/bats.git /root/bats 20 | 21 | RUN /root/bats/install.sh /usr/local 22 | 23 | ENV PEGASUS_HOME /root/pegasus 24 | ENV PATH $PEGASUS_HOME:$PATH 25 | ENV REM_USER ubuntu 26 | 27 | COPY . /root/pegasus 28 | 29 | RUN echo "source pegasus-completion.sh" >> /root/.bashrc 30 | 31 | WORKDIR /root 32 | 33 | -------------------------------------------------------------------------------- /config/spark/config_workers.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | . ~/.profile; 18 | 19 | SLAVE_DNS=( "$@" ) 20 | 21 | touch $SPARK_HOME/conf/slaves; 22 | for dns in ${SLAVE_DNS[@]} 23 | do 24 | echo $dns | cat >> $SPARK_HOME/conf/slaves; 25 | done 26 | 27 | -------------------------------------------------------------------------------- /packer/base_ami.json: -------------------------------------------------------------------------------- 1 | { 2 | "variables": { 3 | "aws_access_key": "{{env `AWS_ACCESS_KEY_ID`}}", 4 | "aws_secret_key": "{{env `AWS_SECRET_ACCESS_KEY`}}" 5 | }, 6 | "builders": [{ 7 | "type": "amazon-ebs", 8 | "access_key": "{{user `aws_access_key`}}", 9 | "secret_key": "{{user `aws_secret_key`}}", 10 | "region": "us-west-2", 11 | "source_ami": "ami-5189a661", 12 | "instance_type": "m4.large", 13 | "ssh_username": "ubuntu", 14 | "ami_name": "pegasus-java8-{{timestamp}}", 15 | "ami_groups": "all", 16 | "tags": { 17 | "Name": "pegasus-base" 18 | } 19 | }], 20 | "provisioners": [{ 21 | "type": "shell", 22 | "scripts": [ 23 | "../install/environment/install_env.sh" 24 | ], 25 | "pause_before": "30s" 26 | }] 27 | } 28 | -------------------------------------------------------------------------------- /config/opscenter/setup_cluster.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # check input arguments 18 | if [ "$#" -ne 1 ]; then 19 | echo "Please specify cluster name!" && exit 1 20 | fi 21 | 22 | CLUSTER_NAME=$1 23 | 24 | echo "Opscenter has no configurations!" 25 | -------------------------------------------------------------------------------- /packer/base_ami.ubuntu14.json: -------------------------------------------------------------------------------- 1 | { 2 | "variables": { 3 | "aws_access_key": "{{env `AWS_ACCESS_KEY_ID`}}", 4 | "aws_secret_key": "{{env `AWS_SECRET_ACCESS_KEY`}}" 5 | }, 6 | "builders": [{ 7 | "type": "amazon-ebs", 8 | "access_key": "{{user `aws_access_key`}}", 9 | "secret_key": "{{user `aws_secret_key`}}", 10 | "region": "us-west-2", 11 | "source_ami": "ami-5189a661", 12 | "instance_type": "m4.large", 13 | "ssh_username": "ubuntu", 14 | "ami_name": "pegasus-java8-{{timestamp}}", 15 | "ami_groups": "all", 16 | "tags": { 17 | "Name": "pegasus-base" 18 | } 19 | }], 20 | "provisioners": [{ 21 | "type": "shell", 22 | "scripts": [ 23 | "../install/environment/install_env.sh" 24 | ], 25 | "pause_before": "30s" 26 | }] 27 | } 28 | -------------------------------------------------------------------------------- /packer/base_ami.ubuntu16.json: -------------------------------------------------------------------------------- 1 | { 2 | "variables": { 3 | "aws_access_key": "{{env `AWS_ACCESS_KEY_ID`}}", 4 | "aws_secret_key": "{{env `AWS_SECRET_ACCESS_KEY`}}" 5 | }, 6 | "builders": [{ 7 | "type": "amazon-ebs", 8 | "access_key": "{{user `aws_access_key`}}", 9 | "secret_key": "{{user `aws_secret_key`}}", 10 | "region": "us-west-2", 11 | "source_ami": "ami-6e1a0117", 12 | "instance_type": "m4.large", 13 | "ssh_username": "ubuntu", 14 | "ami_name": "pegasus-ubuntu16-java8-{{timestamp}}", 15 | "ami_groups": "all", 16 | "tags": { 17 | "Name": "pegasus-base" 18 | } 19 | }], 20 | "provisioners": [{ 21 | "type": "shell", 22 | "scripts": [ 23 | "../install/environment/install_env.sh" 24 | ], 25 | "pause_before": "30s" 26 | }] 27 | } 28 | -------------------------------------------------------------------------------- /config/presto/config_coordinator.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | . ~/.profile 18 | 19 | # config.properties 20 | cat >> $PRESTO_HOME/etc/config.properties << EOL 21 | coordinator=true 22 | node-scheduler.include-coordinator=false 23 | discovery-server.enabled=true 24 | EOL 25 | 26 | 27 | -------------------------------------------------------------------------------- /service/storm/start_master.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | . ~/.profile 18 | 19 | tmux new-session -s nimbus -n bash -d 20 | tmux send-keys -t nimbus '$STORM_HOME/bin/storm nimbus' C-m 21 | 22 | tmux new-session -s stormui -n bash -d 23 | tmux send-keys -t stormui '$STORM_HOME/bin/storm ui' C-m 24 | -------------------------------------------------------------------------------- /examples/hadoop/hadoop.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 18 | 19 | CLUSTER_NAME=hadoop-cluster 20 | 21 | peg up ${PEG_ROOT}/examples/hadoop/namenode.yml & 22 | peg up ${PEG_ROOT}/examples/hadoop/datanodes.yml & 23 | 24 | wait 25 | 26 | peg fetch ${CLUSTER_NAME} 27 | 28 | peg install ${CLUSTER_NAME} ssh 29 | peg install ${CLUSTER_NAME} hadoop 30 | -------------------------------------------------------------------------------- /config/kafka-manager/setup_kafka_manager.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | DNS=( "$@" ) 18 | 19 | source ~/.profile 20 | 21 | ZK_SERVERS="" 22 | for dns in ${DNS[@]} 23 | do 24 | ZK_SERVERS=$ZK_SERVERS$dns:2181, 25 | done 26 | 27 | sudo sed -i 's@kafka-manager.zkhosts="kafka-manager-zookeeper:2181"@kafka-manager.zkhosts="'"${ZK_SERVERS:0:-1}"'"@g' $KAFKA_MANAGER_HOME/conf/application.conf 28 | -------------------------------------------------------------------------------- /config/riak/create_cluster.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | node=$1 18 | 19 | node=${node//-/.} 20 | node=${node:3} 21 | HOSTNAME=`hostname` 22 | HOSTNAME=${HOSTNAME//-/.} 23 | HOSTNAME=${HOSTNAME:3} 24 | echo "adding $node to $HOSTNAME" 25 | sudo riak-admin cluster join riak@${node} 26 | sudo riak-admin cluster plan 27 | sudo riak-admin cluster commit 28 | sudo riak-admin status | grep ring_members 29 | -------------------------------------------------------------------------------- /config/pig/setup_pig.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | sudo apt-get update 18 | 19 | wget http://mirror.tcpdiag.net/apache/pig/pig-0.14.0/pig-0.14.0.tar.gz -P ~/Downloads 20 | sudo tar -zxvf ~/Downloads/pig-*.tar.gz -C /usr/local 21 | sudo mv /usr/local/pig-* /usr/local/pig 22 | 23 | echo -e "\nexport PIG_HOME=/usr/local/pig\nexport PATH=\$PATH:\$PIG_HOME/bin\n" | cat >> ~/.profile 24 | 25 | . ~/.profile 26 | -------------------------------------------------------------------------------- /config/presto/setup_cli.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | . ~/.profile 18 | 19 | PRESTO_VER=$(head -n 1 $PRESTO_HOME/tech_ver.txt) 20 | 21 | wget https://repo1.maven.org/maven2/com/facebook/presto/presto-cli/$PRESTO_VER/presto-cli-$PRESTO_VER-executable.jar -P ~/Downloads 22 | 23 | mv ~/Downloads/presto-cli* ~/Downloads/presto 24 | chmod +x ~/Downloads/presto 25 | sudo mv ~/Downloads/presto /usr/bin/ 26 | -------------------------------------------------------------------------------- /config/kibana/setup_single.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | ELASTICSEARCH_DNS=$1 18 | 19 | . ~/.profile 20 | 21 | sudo $KIBANA_HOME/bin/kibana-plugin install x-pack 22 | 23 | sed -i 's@#elasticsearch.url: "http://localhost:9200"@elasticsearch.url: "http://'"$ELASTICSEARCH_DNS"':9200"@g' $KIBANA_HOME/config/kibana.yml 24 | sed -i 's@#server.host: "localhost"@server.host: "0.0.0.0"@g' $KIBANA_HOME/config/kibana.yml 25 | -------------------------------------------------------------------------------- /packer/copy_base_ami_to_other_regions.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | SOURCE_REGION=us-west-2 4 | BASE_AMI_ID=ami-62e01e02 5 | DEST_REGIONS=( 6 | us-east-1 7 | ) 8 | 9 | function copy_ami { 10 | local source_region=$1 11 | local dest_region=$2 12 | local base_ami_id=$3 13 | 14 | aws ec2 copy-image \ 15 | --source-region $source_region \ 16 | --source-image-id $base_ami_id \ 17 | --name pegasus-ubuntu16-java8-$(date +%s) \ 18 | --region $dest_region \ 19 | --output text 20 | } 21 | 22 | function make_ami_public { 23 | local base_ami_id=$1 24 | 25 | aws ec2 modify-image-attribute \ 26 | --image-id "$base_ami_id" \ 27 | --launch-permission "{\"Add\": [{\"Group\":\"all\"}]}" 28 | } 29 | 30 | for DEST_REGION in ${DEST_REGIONS[@]}; do 31 | OUTPUT_AMI_ID=$(copy_ami ${SOURCE_REGION} ${DEST_REGION} ${BASE_AMI_ID}) 32 | make_ami_public ${OUTPUT_AMI_ID} 33 | cat "${DEST_REGION}: ${OUTPUT_AMI_ID}" >> region_ami_mapping.yaml 34 | sleep 3 35 | done 36 | 37 | -------------------------------------------------------------------------------- /install/riak/install_riak.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # check if Riak is installed 18 | # if not, install it from the package manager 19 | 20 | if [ -f /usr/sbin/riak ]; then 21 | echo "Riak installed." 22 | else 23 | echo "Riak missing." 24 | echo "installing Riak ..." 25 | curl -s https://packagecloud.io/install/repositories/basho/riak/script.deb.sh | sudo bash 26 | sudo apt-get install riak=2.0.7-1 27 | fi 28 | -------------------------------------------------------------------------------- /service/kibana/stop_service.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 18 | source ${PEG_ROOT}/util.sh 19 | 20 | if [ "$#" -ne 1 ]; then 21 | echo "Please specify cluster name!" && exit 1 22 | fi 23 | 24 | CLUSTER_NAME=$1 25 | 26 | MASTER_DNS=$(fetch_cluster_master_public_dns ${CLUSTER_NAME}) 27 | 28 | run_cmd_on_node ${MASTER_DNS} 'sudo pkill -f kibana' 29 | 30 | echo "Kibana Stopped!" 31 | -------------------------------------------------------------------------------- /service/opscenter/stop_service.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 18 | source ${PEG_ROOT}/util.sh 19 | 20 | if [ "$#" -ne 1 ]; then 21 | echo "Please specify cluster name!" && exit 1 22 | fi 23 | 24 | CLUSTER_NAME=$1 25 | 26 | MASTER_DNS=$(fetch_cluster_master_public_dns ${CLUSTER_NAME}) 27 | run_cmd_on_node ${MASTER_DNS} 'sudo pkill -f opscenter' 28 | 29 | echo "Opscenter Stopped!" 30 | -------------------------------------------------------------------------------- /service/zeppelin/start_service.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 18 | source ${PEG_ROOT}/util.sh 19 | 20 | # check input arguments 21 | if [ "$#" -ne 1 ]; then 22 | echo "Please specify cluster name!" && exit 1 23 | fi 24 | 25 | CLUSTER_NAME=$1 26 | 27 | MASTER_DNS=$(fetch_cluster_master_public_dns ${CLUSTER_NAME}) 28 | run_cmd_on_node ${MASTER_DNS} '. ~/.profile; zeppelin-daemon.sh start' 29 | -------------------------------------------------------------------------------- /service/zeppelin/stop_service.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 18 | source ${PEG_ROOT}/util.sh 19 | 20 | # check input arguments 21 | if [ "$#" -ne 1 ]; then 22 | echo "Please specify cluster name!" && exit 1 23 | fi 24 | 25 | CLUSTER_NAME=$1 26 | 27 | MASTER_DNS=$(fetch_cluster_master_public_dns ${CLUSTER_NAME}) 28 | run_cmd_on_node ${MASTER_DNS} '. ~/.profile; zeppelin-daemon.sh stop' 29 | -------------------------------------------------------------------------------- /service/flink/stop_service.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 18 | source ${PEG_ROOT}/util.sh 19 | 20 | if [ "$#" -ne 1 ]; then 21 | echo "Please specify cluster name!" && exit 1 22 | fi 23 | 24 | CLUSTER_NAME=$1 25 | 26 | MASTER_DNS=$(fetch_cluster_master_public_dns ${CLUSTER_NAME}) 27 | 28 | run_cmd_on_node ${MASTER_DNS} '/usr/local/flink/bin/stop-cluster.sh' 29 | 30 | echo "Flink Stopped!" 31 | -------------------------------------------------------------------------------- /service/hbase/start_service.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 18 | source ${PEG_ROOT}/util.sh 19 | 20 | if [ "$#" -ne 1 ]; then 21 | echo "Please specify cluster name!" && exit 1 22 | fi 23 | 24 | CLUSTER_NAME=$1 25 | 26 | MASTER_DNS=$(fetch_cluster_master_public_dns ${CLUSTER_NAME}) 27 | 28 | run_cmd_on_node ${MASTER_DNS} '. ~/.profile; $HBASE_HOME/bin/start-hbase.sh' 29 | 30 | echo "HBase Started!" 31 | -------------------------------------------------------------------------------- /service/hbase/stop_service.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 18 | source ${PEG_ROOT}/util.sh 19 | 20 | if [ "$#" -ne 1 ]; then 21 | echo "Please specify cluster name!" && exit 1 22 | fi 23 | 24 | CLUSTER_NAME=$1 25 | 26 | MASTER_DNS=$(fetch_cluster_master_public_dns ${CLUSTER_NAME}) 27 | 28 | run_cmd_on_node ${MASTER_DNS} '. ~/.profile; $HBASE_HOME/bin/stop-hbase.sh' 29 | 30 | echo "HBase Stopped!" 31 | -------------------------------------------------------------------------------- /examples/flink/flink_hadoop.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 18 | 19 | CLUSTER_NAME=flink-cluster 20 | 21 | peg up ${PEG_ROOT}/examples/flink/master.yml & 22 | peg up ${PEG_ROOT}/examples/flink/workers.yml & 23 | 24 | wait 25 | 26 | peg fetch ${CLUSTER_NAME} 27 | 28 | peg install ${CLUSTER_NAME} ssh 29 | peg install ${CLUSTER_NAME} aws 30 | peg install ${CLUSTER_NAME} hadoop 31 | peg install ${CLUSTER_NAME} flink 32 | -------------------------------------------------------------------------------- /config/flink/config_master.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | . ~/.profile; 18 | 19 | MASTER_DNS=$1; shift 20 | SLAVE_DNS=( "$@" ) 21 | 22 | mv $FLINK_HOME/conf/masters $FLINK_HOME/conf/masters.backup 23 | echo $MASTER_DNS:8081 > $FLINK_HOME/conf/masters 24 | 25 | mv $FLINK_HOME/conf/slaves $FLINK_HOME/conf/slaves.backup 26 | touch $FLINK_HOME/conf/slaves; 27 | for dns in ${SLAVE_DNS[@]} 28 | do 29 | echo $dns | cat >> $FLINK_HOME/conf/slaves; 30 | done 31 | 32 | -------------------------------------------------------------------------------- /config/zeppelin/setup_cluster.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | if [ "$#" -ne 1 ]; then 18 | echo "Please specify cluster name!" && exit 1 19 | fi 20 | 21 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 22 | source ${PEG_ROOT}/util.sh 23 | 24 | CLUSTER_NAME=$1 25 | 26 | MASTER_DNS=$(fetch_cluster_master_public_dns ${CLUSTER_NAME}) 27 | 28 | single_script="${PEG_ROOT}/config/zeppelin/setup_zeppelin.sh" 29 | run_script_on_node ${MASTER_DNS} ${single_script} 30 | 31 | -------------------------------------------------------------------------------- /config/redis/setup_single.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | . ~/.profile 18 | 19 | sed -i "s@# cluster-enabled yes@cluster-enabled yes@g" $REDIS_HOME/redis.conf 20 | sed -i 's@# cluster-config-file nodes-6379.conf@cluster-config-file nodes-6379.conf@g' $REDIS_HOME/redis.conf 21 | sed -i 's@# cluster-node-timeout 15000@cluster-node-timeout 5000@g' $REDIS_HOME/redis.conf 22 | sed -i 's@appendonly no@appendonly yes@g' $REDIS_HOME/redis.conf 23 | 24 | cd $REDIS_HOME 25 | make 26 | cd ~ 27 | 28 | -------------------------------------------------------------------------------- /service/memsql/stop_service.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 18 | source ${PEG_ROOT}/util.sh 19 | 20 | # check input arguments 21 | if [ "$#" -ne 1 ]; then 22 | echo "Please specify cluster name!" && exit 1 23 | fi 24 | 25 | CLUSTER_NAME=$1 26 | 27 | MASTER_DNS=$(fetch_cluster_master_public_dns ${CLUSTER_NAME}) 28 | 29 | cmd='. ~/.profile; sudo memsql-ops agent-stop --all' 30 | run_cmd_on_node ${MASTER_DNS} ${cmd} 31 | 32 | echo "Memsql Stopped!" 33 | -------------------------------------------------------------------------------- /examples/spark/spark_hadoop.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 18 | 19 | CLUSTER_NAME=spark-cluster 20 | 21 | peg up ${PEG_ROOT}/examples/spark/master.yml & 22 | peg up ${PEG_ROOT}/examples/spark/workers.yml & 23 | 24 | wait 25 | 26 | peg fetch ${CLUSTER_NAME} 27 | 28 | peg install ${CLUSTER_NAME} ssh 29 | peg install ${CLUSTER_NAME} aws 30 | peg install ${CLUSTER_NAME} environment 31 | peg install ${CLUSTER_NAME} hadoop 32 | peg install ${CLUSTER_NAME} spark 33 | -------------------------------------------------------------------------------- /service/alluxio/stop_service.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 18 | source ${PEG_ROOT}/util.sh 19 | 20 | # check input arguments 21 | if [ "$#" -ne 1 ]; then 22 | echo "Please specify cluster name!" && exit 1 23 | fi 24 | 25 | CLUSTER_NAME=$1 26 | 27 | MASTER_DNS=$(fetch_cluster_master_public_dns ${CLUSTER_NAME}) 28 | 29 | cmd='. ~/.profile; /usr/local/alluxio/bin/alluxio-stop.sh all' 30 | run_cmd_on_node ${MASTER_DNS} ${cmd} 31 | 32 | echo "Alluxio Stopped!" 33 | -------------------------------------------------------------------------------- /config/pig/setup_cluster.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # check input arguments 18 | if [ "$#" -ne 1 ]; then 19 | echo "Please specify the cluster name" && exit 1 20 | fi 21 | 22 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 23 | source ${PEG_ROOT}/util.sh 24 | 25 | CLUSTER_NAME=$1 26 | 27 | MASTER_DNS=$(fetch_cluster_master_public_dns ${CLUSTER_DNS}) 28 | 29 | single_script="${PEG_ROOT}/config/pig/setup_pig.sh" 30 | run_script_on_node ${MASTER_DNS} ${single_script} 31 | 32 | echo "Pig configuration complete!" 33 | -------------------------------------------------------------------------------- /config/alluxio/setup_single.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | MASTER_HOSTNAME=$1; shift 18 | WORKER_HOSTNAMES=( "$@" ) 19 | 20 | . ~/.profile 21 | 22 | cp $ALLUXIO_HOME/conf/alluxio-env.sh.template $ALLUXIO_HOME/conf/alluxio-env.sh 23 | 24 | echo "export ALLUXIO_MASTER_ADDRESS=$MASTER_HOSTNAME" | cat >> ~/.profile 25 | . ~/.profile 26 | 27 | mv $ALLUXIO_HOME/conf/workers $ALLUXIO_HOME/conf/workers.backup 28 | for worker in ${WORKER_HOSTNAMES[@]} 29 | do 30 | echo $worker >> $ALLUXIO_HOME/conf/workers 31 | done 32 | 33 | -------------------------------------------------------------------------------- /config/hadoop/config_datanode.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | . ~/.profile 18 | 19 | # configure hdfs-site.xml 20 | sed -i '20i \n dfs.replication\n 3\n' $HADOOP_HOME/etc/hadoop/hdfs-site.xml 21 | sed -i '24i \n dfs.datanode.data.dir\n file:///usr/local/hadoop/hadoop_data/hdfs/datanode\n' $HADOOP_HOME/etc/hadoop/hdfs-site.xml 22 | 23 | sudo mkdir -p $HADOOP_HOME/hadoop_data/hdfs/datanode 24 | 25 | sudo chown -R ubuntu $HADOOP_HOME 26 | -------------------------------------------------------------------------------- /service/secor/stop_service.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 18 | source ${PEG_ROOT}/util.sh 19 | 20 | if [ "$#" -ne 1 ]; then 21 | echo "Please specify cluster name!" && exit 1 22 | fi 23 | 24 | CLUSTER_NAME=$1 25 | 26 | MASTER_PUBLIC_DNS=$(fetch_public_dns_of_node_in_cluster ${CLUSTER_NAME} 1) 27 | 28 | cmd="sudo kill -9 \$(ps aux | grep '[s]ecor' | awk '{print \$2}')" 29 | 30 | echo ${MASTER_PUBLIC_DNS} 31 | run_cmd_on_node ${MASTER_PUBLIC_DNS} ${cmd} 32 | 33 | wait 34 | 35 | echo "Secor Stopped!" 36 | -------------------------------------------------------------------------------- /colors.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # Some useful colors. 18 | if [[ -z "${color_start-}" ]]; then 19 | declare -r color_start="\033[" 20 | declare -r color_red="${color_start}0;31m" 21 | declare -r color_green="${color_start}0;32m" 22 | declare -r color_yellow="${color_start}0;33m" 23 | declare -r color_blue="${color_start}0;34m" 24 | declare -r color_magenta="${color_start}0;35m" 25 | declare -r color_teal="${color_start}0;36m" 26 | declare -r color_white="${color_start}0;37m" 27 | declare -r color_norm="${color_start}0m" 28 | fi 29 | -------------------------------------------------------------------------------- /service/redis/stop_service.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 18 | source ${PEG_ROOT}/util.sh 19 | 20 | if [ "$#" -ne 1 ]; then 21 | echo "Please specify cluster name!" && exit 1 22 | fi 23 | 24 | CLUSTER_NAME=$1 25 | 26 | PUBLIC_DNS=$(fetch_cluster_public_dns ${CLUSTER_NAME}) 27 | 28 | # Install and configure nodes for redis 29 | cmd='/usr/local/redis/src/redis-cli shutdown' 30 | for dns in ${PUBLIC_DNS}; do 31 | run_cmd_on_node ${dns} ${cmd} & 32 | done 33 | 34 | wait 35 | 36 | echo "Redis Stopped!" 37 | -------------------------------------------------------------------------------- /service/riak/start_service.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 18 | source ${PEG_ROOT}/util.sh 19 | 20 | # check input arguments 21 | if [ "$#" -ne 1 ]; then 22 | echo "Please specify cluster name!" && exit 1 23 | fi 24 | 25 | CLUSTER_NAME=$1 26 | 27 | PUBLIC_DNS=$(fetch_cluster_public_dns ${CLUSTER_NAME}) 28 | 29 | cmd='sudo /etc/init.d/riak start' 30 | 31 | for dns in ${PUBLIC_DNS}; do 32 | run_cmd_on_node ${dns} ${cmd} & 33 | done 34 | 35 | wait 36 | 37 | echo -e "${color_green}Riak Started!${color_norm}" -------------------------------------------------------------------------------- /service/cassandra/start_service.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 18 | source ${PEG_ROOT}/util.sh 19 | 20 | # check input arguments 21 | if [ "$#" -ne 1 ]; then 22 | echo "Please specify cluster name!" && exit 1 23 | fi 24 | 25 | CLUSTER_NAME=$1 26 | 27 | PUBLIC_DNS=$(fetch_cluster_public_dns ${CLUSTER_NAME}) 28 | 29 | cmd='/usr/local/cassandra/bin/cassandra' 30 | # Start each cassandra node 31 | for dns in ${PUBLIC_DNS}; do 32 | run_cmd_on_node ${dns} ${cmd} 33 | done 34 | 35 | echo "Cassandra started!" 36 | -------------------------------------------------------------------------------- /service/riak/stop_service.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 18 | source ${PEG_ROOT}/util.sh 19 | 20 | # check input arguments 21 | if [ "$#" -ne 1 ]; then 22 | echo "Please specify cluster name!" && exit 1 23 | fi 24 | 25 | CLUSTER_NAME=$1 26 | 27 | PUBLIC_DNS=$(fetch_cluster_public_dns ${CLUSTER_NAME}) 28 | 29 | cmd='sudo /etc/init.d/riak stop' 30 | 31 | for dns in ${PUBLIC_DNS}; do 32 | run_cmd_on_node ${dns} ${cmd} & 33 | done 34 | 35 | wait 36 | 37 | echo -e "${color_green}Riak Stopped!${color_norm}" 38 | -------------------------------------------------------------------------------- /service/cassandra/stop_service.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 18 | source ${PEG_ROOT}/util.sh 19 | 20 | # check input arguments 21 | if [ "$#" -ne 1 ]; then 22 | echo "Please specify cluster name!" && exit 1 23 | fi 24 | 25 | CLUSTER_NAME=$1 26 | 27 | PUBLIC_DNS=$(fetch_cluster_public_dns ${CLUSTER_NAME}) 28 | 29 | cmd='/usr/local/cassandra/bin/nodetool stopdaemon' 30 | # Start each cassandra node 31 | for dns in ${PUBLIC_DNS}; do 32 | run_cmd_on_node ${dns} ${cmd} 33 | done 34 | 35 | echo "Cassandra stopped!" 36 | -------------------------------------------------------------------------------- /service/elasticsearch/start_service.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 18 | source ${PEG_ROOT}/util.sh 19 | 20 | # check input arguments 21 | if [ "$#" -ne 1 ]; then 22 | echo "Please specify cluster name!" && exit 1 23 | fi 24 | 25 | CLUSTER_NAME=$1 26 | 27 | PUBLIC_DNS=$(fetch_cluster_public_dns ${CLUSTER_NAME}) 28 | 29 | cmd='/usr/local/elasticsearch/bin/elasticsearch -d' 30 | for dns in ${PUBLIC_DNS}; do 31 | echo $dns 32 | run_cmd_on_node ${dns} ${cmd} 33 | done 34 | 35 | echo "Elasticsearch started!" 36 | 37 | -------------------------------------------------------------------------------- /service/flink/start_service.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 18 | source ${PEG_ROOT}/util.sh 19 | 20 | if [ "$#" -ne 1 ]; then 21 | echo "Please specify cluster name!" && exit 1 22 | fi 23 | 24 | CLUSTER_NAME=$1 25 | 26 | MASTER_DNS=$(fetch_cluster_master_public_dns ${CLUSTER_NAME}) 27 | 28 | run_cmd_on_node ${MASTER_DNS} '/usr/local/flink/bin/start-cluster.sh' 29 | 30 | echo "Flink Started!" 31 | echo -e "${color_green}Flink WebUI${color_norm} is running at ${color_yellow}http://${MASTER_DNS}:8081${color_norm}" 32 | -------------------------------------------------------------------------------- /service/kafka-manager/stop_service.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 18 | source ${PEG_ROOT}/util.sh 19 | 20 | if [ "$#" -ne 1 ]; then 21 | echo "Please specify cluster name!" && exit 1 22 | fi 23 | 24 | CLUSTER_NAME=$1 25 | 26 | MASTER_PUBLIC_DNS=$(fetch_public_dns_of_node_in_cluster ${CLUSTER_NAME} 1) 27 | 28 | cmd="sudo kill -9 \$(ps aux | grep '[k]afka-manager' | awk '{print \$2}')" 29 | 30 | echo ${MASTER_PUBLIC_DNS} 31 | run_cmd_on_node ${MASTER_PUBLIC_DNS} ${cmd} 32 | 33 | wait 34 | 35 | echo "Kafka-manager Stopped!" 36 | -------------------------------------------------------------------------------- /service/kafka/stop_service.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 18 | source ${PEG_ROOT}/util.sh 19 | 20 | if [ "$#" -ne 1 ]; then 21 | echo "Please specify cluster name!" && exit 1 22 | fi 23 | 24 | CLUSTER_NAME=$1 25 | 26 | PUBLIC_DNS=$(fetch_cluster_public_dns ${CLUSTER_NAME}) 27 | 28 | cmd='sudo /usr/local/kafka/bin/kafka-server-stop.sh &' 29 | # Start kafka broker on all nodes 30 | for dns in ${PUBLIC_DNS}; do 31 | echo $dns 32 | run_cmd_on_node ${dns} ${cmd} 33 | done 34 | 35 | wait 36 | 37 | echo "Kafka Stopped!" 38 | 39 | -------------------------------------------------------------------------------- /service/kibana/start_service.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 18 | source ${PEG_ROOT}/util.sh 19 | 20 | if [ "$#" -ne 1 ]; then 21 | echo "Please specify cluster name!" && exit 1 22 | fi 23 | 24 | CLUSTER_NAME=$1 25 | 26 | MASTER_DNS=$(fetch_cluster_master_public_dns ${CLUSTER_NAME}) 27 | 28 | run_cmd_on_node ${MASTER_DNS} '. ~/.profile; sudo $KIBANA_HOME/bin/kibana &' & 29 | 30 | echo "Kibana Started!" 31 | echo -e "${color_green}Kibana WebUI${color_norm} is running at ${color_yellow}http://${MASTER_DNS}:5601${color_norm}" 32 | -------------------------------------------------------------------------------- /service/spark/stop_service.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 18 | source ${PEG_ROOT}/util.sh 19 | 20 | # check input arguments 21 | if [ "$#" -ne 1 ]; then 22 | echo "Please specify cluster name!" && exit 1 23 | fi 24 | 25 | CLUSTER_NAME=$1 26 | 27 | MASTER_DNS=$(fetch_cluster_master_public_dns ${CLUSTER_NAME}) 28 | 29 | cmd='tmux kill-session -t ipython_notebook' 30 | run_cmd_on_node ${MASTER_DNS} ${cmd} 31 | 32 | cmd='/usr/local/spark/sbin/stop-all.sh' 33 | run_cmd_on_node ${MASTER_DNS} ${cmd} 34 | 35 | echo "Spark Stopped!" 36 | -------------------------------------------------------------------------------- /service/zookeeper/start_service.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 18 | source ${PEG_ROOT}/util.sh 19 | 20 | if [ "$#" -ne 1 ]; then 21 | echo "Please specify cluster name!" && exit 1 22 | fi 23 | 24 | CLUSTER_NAME=$1 25 | 26 | PUBLIC_DNS=$(fetch_cluster_public_dns ${CLUSTER_NAME}) 27 | 28 | # Install and configure nodes for zookeeper 29 | for dns in ${PUBLIC_DNS}; do 30 | echo $dns 31 | cmd=". ~/.profile; zkServer.sh start" 32 | run_cmd_on_node ${dns} ${cmd} & 33 | done 34 | 35 | wait 36 | 37 | echo "Zookeeper Started!" 38 | -------------------------------------------------------------------------------- /config/ssh/setup_ssh.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | SLAVE_DNS=( "$@" ) 18 | 19 | if ! [ -f ~/.ssh/id_rsa ]; then 20 | ssh-keygen -f ~/.ssh/id_rsa -t rsa -P "" 21 | fi 22 | sudo cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys 23 | 24 | # copy id_rsa.pub in master to all slaves authorized_keys for passwordless ssh 25 | # add additional for multiple slaves 26 | for dns in ${SLAVE_DNS[@]} 27 | do 28 | echo "Adding $DNS to authorized keys..." 29 | cat ~/.ssh/id_rsa.pub | ssh -o "StrictHostKeyChecking no" ${USER}@$dns 'cat >> ~/.ssh/authorized_keys' & 30 | done 31 | 32 | wait 33 | -------------------------------------------------------------------------------- /examples/eventsim/spark_hadoop.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 18 | 19 | CLUSTER_NAME=davids-eventsim-cluster 20 | 21 | peg up ${PEG_ROOT}/examples/eventsim/master.yml & 22 | peg up ${PEG_ROOT}/examples/eventsim/workers.yml & 23 | 24 | wait 25 | 26 | peg fetch ${CLUSTER_NAME} 27 | 28 | peg install ${CLUSTER_NAME} ssh 29 | peg install ${CLUSTER_NAME} aws 30 | peg install ${CLUSTER_NAME} hadoop 31 | peg install ${CLUSTER_NAME} spark 32 | 33 | wait 34 | 35 | peg service ${CLUSTER_NAME} hadoop start 36 | peg service ${CLUSTER_NAME} spark start -------------------------------------------------------------------------------- /config/kibana/setup_cluster.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # check input arguments 18 | if [ "$#" -ne 1 ]; then 19 | echo "Please specify cluster name!" && exit 1 20 | fi 21 | 22 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 23 | source ${PEG_ROOT}/util.sh 24 | 25 | CLUSTER_NAME=$1 26 | 27 | PUBLIC_DNS=$(fetch_public_dns_of_node_in_cluster ${CLUSTER_NAME} 1) 28 | 29 | # Install Kibana on master 30 | single_script="${PEG_ROOT}/config/kibana/setup_single.sh" 31 | args="$PUBLIC_DNS" 32 | run_script_on_node ${PUBLIC_DNS} ${single_script} ${args} 33 | 34 | echo "Kibana configuration complete!" 35 | -------------------------------------------------------------------------------- /service/zookeeper/stop_service.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 18 | source ${PEG_ROOT}/util.sh 19 | 20 | if [ "$#" -ne 1 ]; then 21 | echo "Please specify cluster name!" && exit 1 22 | fi 23 | 24 | CLUSTER_NAME=$1 25 | 26 | PUBLIC_DNS=$(fetch_cluster_public_dns ${CLUSTER_NAME}) 27 | 28 | # Install and configure nodes for zookeeper 29 | SERVER_NUM=1 30 | for dns in ${PUBLIC_DNS}; do 31 | echo $dns 32 | cmd=". ~/.profile; zkServer.sh stop" 33 | run_cmd_on_node ${dns} ${cmd} & 34 | done 35 | 36 | wait 37 | 38 | echo "Zookeeper Stopped!" 39 | -------------------------------------------------------------------------------- /service/elasticsearch/stop_service.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 18 | source ${PEG_ROOT}/util.sh 19 | 20 | # check input arguments 21 | if [ "$#" -ne 1 ]; then 22 | echo "Please specify cluster name!" && exit 1 23 | fi 24 | 25 | CLUSTER_NAME=$1 26 | 27 | PUBLIC_DNS=$(fetch_cluster_public_dns ${CLUSTER_NAME}) 28 | 29 | cmd='pkill -f elasticsearch' 30 | for dns in ${PUBLIC_DNS}; do 31 | echo -e "${color_yellow}Stopping Elasticsearch on node $dns${color_norm}" 32 | run_cmd_on_node ${dns} ${cmd} 33 | done 34 | 35 | echo "Elasticsearch stopped!" 36 | 37 | -------------------------------------------------------------------------------- /service/opscenter/start_service.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 18 | source ${PEG_ROOT}/util.sh 19 | 20 | # check input arguments 21 | if [ "$#" -ne 1 ]; then 22 | echo "Please specify cluster name!" && exit 1 23 | fi 24 | 25 | CLUSTER_NAME=$1 26 | 27 | MASTER_DNS=$(fetch_cluster_master_public_dns ${CLUSTER_NAME}) 28 | run_cmd_on_node ${MASTER_DNS} '. ~/.profile; $OPSCENTER_HOME/bin/opscenter' 29 | 30 | echo "Opscenter Started complete!" 31 | echo -e "${color_green}Opscenter WebUI${color_norm} is running at ${color_yellow}http://${MASTER_DNS}:8888${color_norm}" 32 | -------------------------------------------------------------------------------- /config/redis/setup_cluster.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # check input arguments 18 | if [ "$#" -ne 1 ]; then 19 | echo "Please specify cluster name!" && exit 1 20 | fi 21 | 22 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 23 | source ${PEG_ROOT}/util.sh 24 | 25 | CLUSTER_NAME=$1 26 | 27 | PUBLIC_DNS=$(fetch_cluster_public_dns ${CLUSTER_NAME}) 28 | 29 | # Install and configure nodes for redis 30 | single_script="${PEG_ROOT}/config/redis/setup_single.sh" 31 | for dns in ${PUBLIC_DNS}; do 32 | run_script_on_node ${dns} ${single_script} & 33 | done 34 | 35 | wait 36 | 37 | echo "Redis configuration complete!" 38 | -------------------------------------------------------------------------------- /config/hadoop/config_hosts.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | MASTER_DNS=$1; shift 18 | MASTER_NAME=$1; shift 19 | SLAVE_DNS_NAME=( "$@" ) 20 | LEN=${#SLAVE_DNS_NAME[@]} 21 | HALF=$(echo "$LEN / 2" | awk '{print $1 / $3}') 22 | SLAVE_DNS=( "${SLAVE_DNS_NAME[@]:0:$HALF}" ) 23 | SLAVE_NAME=( "${SLAVE_DNS_NAME[@]:$HALF:$HALF}" ) 24 | 25 | # add for additional datanodes 26 | sudo sed -i '2i '"$MASTER_DNS"' '"$MASTER_NAME"'' /etc/hosts 27 | 28 | for (( i=0; i<$HALF; i++)) 29 | do 30 | echo $i ${SLAVE_DNS[$i]} ${SLAVE_NAME[$i]} 31 | sudo sed -i '3i '"${SLAVE_DNS[$i]}"' '"${SLAVE_NAME[$i]}"'' /etc/hosts 32 | done 33 | 34 | -------------------------------------------------------------------------------- /install/secor/install_secor.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | source ~/.profile 18 | 19 | if [ ! -d /usr/local/secor ]; then 20 | cd /usr/local 21 | sudo git clone https://github.com/pinterest/secor.git 22 | sudo mkdir /usr/local/secor/bin 23 | fi 24 | 25 | if ! grep "export SECOR_HOME" ~/.profile; then 26 | echo -e "\nexport SECOR_HOME=/usr/local/secor\nexport PATH=\$PATH:\$SECOR_HOME/bin" | cat >> ~/.profile 27 | fi 28 | . ~/.profile 29 | 30 | sudo chown -R ubuntu $SECOR_HOME 31 | 32 | cd $SECOR_HOME 33 | sudo mvn clean package & 34 | wait 35 | sudo tar -zxvf ./target/secor-*-SNAPSHOT-bin.tar.gz -C ./bin/ 36 | -------------------------------------------------------------------------------- /service/kafka/start_service.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 18 | source ${PEG_ROOT}/util.sh 19 | 20 | if [ "$#" -ne 1 ]; then 21 | echo "Please specify cluster name!" && exit 1 22 | fi 23 | 24 | CLUSTER_NAME=$1 25 | 26 | PUBLIC_DNS=$(fetch_cluster_public_dns ${CLUSTER_NAME}) 27 | 28 | cmd='sudo /usr/local/kafka/bin/kafka-server-start.sh /usr/local/kafka/config/server.properties &' 29 | # Start kafka broker on all nodes 30 | for dns in ${PUBLIC_DNS}; do 31 | echo $dns 32 | run_cmd_on_node ${dns} ${cmd} & 33 | done 34 | 35 | wait 36 | 37 | echo "Kafka Started!" 38 | 39 | -------------------------------------------------------------------------------- /service/presto/start_service.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 18 | source ${PEG_ROOT}/util.sh 19 | 20 | if [ "$#" -ne 1 ]; then 21 | echo "Please specify cluster name!" && exit 1 22 | fi 23 | 24 | CLUSTER_NAME=$1 25 | 26 | MASTER_DNS=$(fetch_cluster_master_public_dns ${CLUSTER_NAME}) 27 | WORKER_DNS=$(fetch_cluster_worker_public_dns ${CLUSTER_NAME}) 28 | 29 | cmd='. ~/.profile; $PRESTO_HOME/bin/launcher start' 30 | for dns in ${WORKER_DNS}; do 31 | run_cmd_on_node ${dns} ${cmd} & 32 | done 33 | run_cmd_on_node ${MASTER_DNS} ${cmd} 34 | 35 | echo "Presto Started!" 36 | 37 | -------------------------------------------------------------------------------- /service/presto/stop_service.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 18 | source ${PEG_ROOT}/util.sh 19 | 20 | if [ "$#" -ne 1 ]; then 21 | echo "Please specify cluster name!" && exit 1 22 | fi 23 | 24 | CLUSTER_NAME=$1 25 | 26 | MASTER_DNS=$(fetch_cluster_master_public_dns ${CLUSTER_NAME}) 27 | WORKER_DNS=$(fetch_cluster_worker_public_dns ${CLUSTER_NAME}) 28 | 29 | cmd='. ~/.profile; $PRESTO_HOME/bin/launcher stop' 30 | for dns in ${WORKER_DNS}; do 31 | run_cmd_on_node ${dns} ${cmd} & 32 | done 33 | run_cmd_on_node ${MASTER_DNS} ${cmd} 34 | 35 | echo "Presto Stopped!" 36 | 37 | -------------------------------------------------------------------------------- /service/redis/join_redis_cluster.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | PUBLIC_DNS=( "$@" ) 18 | PORT=6379 19 | 20 | . ~/.profile 21 | 22 | sudo gem install redis 23 | 24 | extract_ip_from_dns () { 25 | SPLIT_ARR=(${1//./ }) 26 | DNS_PART_0=${SPLIT_ARR[0]} 27 | DNS_PART_0_ARR=(${DNS_PART_0//-/ }) 28 | IP_SPLIT=${DNS_PART_0_ARR[@]:1} 29 | IP=${IP_SPLIT// /.} 30 | } 31 | 32 | REDIS_NODES="" 33 | for DNS in ${PUBLIC_DNS[@]}; do 34 | extract_ip_from_dns $DNS 35 | REDIS_NODES+=$IP:$PORT\ 36 | done 37 | 38 | echo $REDIS_NODES 39 | 40 | echo "yes" | $REDIS_HOME/src/redis-trib.rb create --replicas 0 $REDIS_NODES & 41 | -------------------------------------------------------------------------------- /install/zeppelin/install_zeppelin_cluster.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # check input arguments 18 | if [ "$#" -ne 1 ]; then 19 | echo "Please specify cluster name!" && exit 1 20 | fi 21 | 22 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 23 | source ${PEG_ROOT}/util.sh 24 | 25 | # get input arguments [aws region, pem-key location] 26 | CLUSTER_NAME=$1 27 | 28 | MASTER_PUBLIC_DNS=$(get_public_dns_with_name_and_role ${CLUSTER_NAME} master) 29 | 30 | # Install Zeppelin 31 | script=${PEG_ROOT}/install/zeppelin/install_zeppelin.sh 32 | run_script_on_node ${MASTER_PUBLIC_DNS} ${script} 33 | 34 | echo "Zeppelin installed!" 35 | -------------------------------------------------------------------------------- /service/memsql/start_service.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 18 | source ${PEG_ROOT}/util.sh 19 | 20 | # check input arguments 21 | if [ "$#" -ne 1 ]; then 22 | echo "Please specify cluster name!" && exit 1 23 | fi 24 | 25 | CLUSTER_NAME=$1 26 | 27 | MASTER_DNS=$(fetch_cluster_master_public_dns ${CLUSTER_NAME}) 28 | 29 | cmd='. ~/.profile; sudo memsql-ops agent-start --all' 30 | run_cmd_on_node ${MASTER_DNS} ${cmd} 31 | 32 | echo "Memsql Started!" 33 | echo -e "${color_green}Memsql WebUI${color_norm} is running at ${color_yellow}http://${MASTER_DNS}:9000${color_norm}" 34 | 35 | -------------------------------------------------------------------------------- /install/environment/install_env_cluster.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # check input arguments 18 | if [ "$#" -ne 1 ]; then 19 | echo "Please specify cluster name!" && exit 1 20 | fi 21 | 22 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 23 | source ${PEG_ROOT}/util.sh 24 | 25 | CLUSTER_NAME=$1 26 | 27 | PUBLIC_DNS=($(fetch_cluster_public_dns ${CLUSTER_NAME})) 28 | 29 | script=${PEG_ROOT}/install/environment/install_env.sh 30 | 31 | # Install environment packages to master and slaves 32 | for dns in "${PUBLIC_DNS[@]}"; do 33 | run_script_on_node ${dns} ${script} & 34 | done 35 | 36 | wait 37 | 38 | echo "Environment installed!" 39 | -------------------------------------------------------------------------------- /service/hadoop/stop_service.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 18 | source ${PEG_ROOT}/util.sh 19 | 20 | if [ "$#" -ne 1 ]; then 21 | echo "Please specify cluster name!" && exit 1 22 | fi 23 | 24 | CLUSTER_NAME=$1 25 | 26 | MASTER_DNS=$(fetch_cluster_master_public_dns ${CLUSTER_NAME}) 27 | 28 | run_cmd_on_node ${MASTER_DNS} '. ~/.profile; $HADOOP_HOME/sbin/mr-jobhistory-daemon.sh stop historyserver' 29 | run_cmd_on_node ${MASTER_DNS} '. ~/.profile; $HADOOP_HOME/sbin/stop-yarn.sh' 30 | run_cmd_on_node ${MASTER_DNS} '. ~/.profile; $HADOOP_HOME/sbin/stop-dfs.sh' 31 | 32 | echo "Hadoop stopped!" 33 | -------------------------------------------------------------------------------- /config/pass_aws_cred: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # check input arguments 18 | if [ "$#" -ne 1 ]; then 19 | echo "Please specify the cluster name" && exit 1 20 | fi 21 | 22 | PEG_ROOT=$(dirname ${BASH_SOURCE})/.. 23 | source ${PEG_ROOT}/util.sh 24 | 25 | CLUSTER_NAME=$1 26 | 27 | PUBLIC_DNS=$(fetch_cluster_public_dns ${CLUSTER_NAME}) 28 | 29 | cmd='echo -e "export AWS_ACCESS_KEY_ID='$AWS_ACCESS_KEY_ID'\nexport AWS_SECRET_ACCESS_KEY='$AWS_SECRET_ACCESS_KEY'\nexport AWS_DEFAULT_REGION='$AWS_DEFAULT_REGION'" >> ~/.profile' 30 | for dns in ${PUBLIC_DNS}; do 31 | echo ${dns} 32 | run_cmd_on_node ${dns} ${cmd} & 33 | done 34 | 35 | wait 36 | 37 | -------------------------------------------------------------------------------- /service/secor/start_service.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 18 | source ${PEG_ROOT}/util.sh 19 | 20 | if [ "$#" -ne 1 ]; then 21 | echo "Please specify cluster name!" && exit 1 22 | fi 23 | 24 | CLUSTER_NAME=$1 25 | MASTER_PUBLIC_DNS=$(fetch_public_dns_of_node_in_cluster ${CLUSTER_NAME} 1) 26 | cmd='source ~/.profile; cd /usr/local/secor/bin; sudo java -ea -Dsecor_group=secor_backup -Dlog4j.configuration=log4j.prod.properties -Dconfig=secor.prod.backup.properties -cp secor-0.21-SNAPSHOT.jar:lib/* com.pinterest.secor.main.ConsumerMain' 27 | run_cmd_on_node ${MASTER_PUBLIC_DNS} ${cmd} & 28 | 29 | echo "Secor Started!" 30 | -------------------------------------------------------------------------------- /service/kafka-manager/start_service.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 18 | source ${PEG_ROOT}/util.sh 19 | source ${PEG_ROOT}/colors.sh 20 | 21 | if [ "$#" -ne 1 ]; then 22 | echo "Please specify cluster name!" && exit 1 23 | fi 24 | 25 | CLUSTER_NAME=$1 26 | 27 | MASTER_PUBLIC_DNS=$(fetch_public_dns_of_node_in_cluster ${CLUSTER_NAME} 1) 28 | 29 | cmd='source ~/.profile; $KAFKA_MANAGER_HOME/bin/kafka-manager -Dhttp.port=9001 &' 30 | 31 | run_cmd_on_node ${MASTER_PUBLIC_DNS} ${cmd} & 32 | 33 | echo "kafka-manager started!" 34 | echo -e "${color_yellow}Kafka Manager UI running on: http://${MASTER_PUBLIC_DNS}:9001${color_norm}" 35 | -------------------------------------------------------------------------------- /install/environment/install_env.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | sudo apt-get update 18 | sudo apt-get --yes install ssh rsync openjdk-8-jdk scala python3-pip gfortran git supervisor ruby bc 19 | 20 | # get sbt repository 21 | wget https://dl.bintray.com/sbt/debian/sbt-0.13.7.deb -P ~/Downloads 22 | sudo dpkg -i ~/Downloads/sbt-* 23 | 24 | sudo update-java-alternatives -s java-1.8.0-openjdk-amd64 25 | sudo pip3 install numpy scipy pandas nose seaborn boto scikit-learn "ipython[notebook]==5.5.0" 26 | if ! grep "export JAVA_HOME" ~/.profile; then 27 | echo -e "\nexport JAVA_HOME=/usr" | cat >> ~/.profile 28 | echo -e "export PATH=\$PATH:\$JAVA_HOME/bin" | cat >> ~/.profile 29 | fi 30 | 31 | 32 | -------------------------------------------------------------------------------- /service/spark/setup_ipython.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | MEMINFO=($(free -m | sed -n '2p' | sed -e "s/[[:space:]]\+/ /g")) 18 | TOTMEM=${MEMINFO[1]} 19 | EXECMEM=$(echo "0.90 * ( $TOTMEM - 1000 )" | awk '{ print $1 * ($4 - $6) }' ) 20 | 21 | sudo chown -R ubuntu ~/ 22 | 23 | tmux new-session -s ipython_notebook -n bash -d 24 | 25 | tmux send-keys -t ipython_notebook 'PYSPARK_DRIVER_PYTHON=ipython PYSPARK_DRIVER_PYTHON_OPTS="notebook --no-browser --ip="*" --port=8888" pyspark --packages com.databricks:spark-csv_2.10:1.1.0 --master spark://'$(hostname)':7077 --executor-memory '${EXECMEM%.*}'M --driver-memory '${EXECMEM%.*}'M --conf spark.hadoop.fs.s3a.impl=org.apache.hadoop.fs.s3a.S3AFileSystem' C-m 26 | -------------------------------------------------------------------------------- /config/riak/setup_single.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # hostnames to name the riak node as riak@hostname 18 | 19 | HOSTNAME=`hostname` 20 | HOSTNAME=${HOSTNAME//-/.} 21 | HOSTNAME=${HOSTNAME:3} 22 | 23 | sudo sed -i 's@listener.protobuf.internal = 127.0.0.1:8087@listener.protobuf.internal = '"$HOSTNAME"':8087@g' /etc/riak/riak.conf 24 | sudo sed -i 's@listener.http.internal = 127.0.0.1:8098@listener.http.internal = '"$HOSTNAME"':8098@g' /etc/riak/riak.conf 25 | sudo sed -i 's@nodename = riak\@127.0.0.1@nodename = riak\@'"$HOSTNAME"'@g' /etc/riak/riak.conf 26 | 27 | # increasing the open file limit for Riak 28 | 29 | echo "ulimit -n 200000" | sudo tee -a /etc/default/riak 30 | 31 | sudo /etc/init.d/riak start -------------------------------------------------------------------------------- /install/zeppelin/install_zeppelin.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | if [ ! -d /usr/local/zeppelin ]; then 18 | git clone https://github.com/apache/incubator-zeppelin.git 19 | sudo mv incubator-zeppelin /usr/local 20 | sudo mv /usr/local/incubator-zeppelin /usr/local/zeppelin 21 | fi 22 | 23 | if ! grep "export ZEPPELIN_HOME" ~/.profile; then 24 | echo -e "\nexport ZEPPELIN_HOME=/usr/local/zeppelin\nexport PATH=\$PATH:\$ZEPPELIN_HOME/bin" | cat >> ~/.profile 25 | 26 | . ~/.profile 27 | 28 | sudo chown -R ubuntu $ZEPPELIN_HOME 29 | 30 | cd $ZEPPELIN_HOME 31 | sudo mvn clean package -Pspark-1.4 -Dhadoop.version=2.2.0 -Phadoop-2.2 -DskipTests & 32 | wait 33 | echo "Zeppelin installed" 34 | fi 35 | -------------------------------------------------------------------------------- /service/storm/stop_service.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 18 | source ${PEG_ROOT}/util.sh 19 | 20 | if [ "$#" -ne 1 ]; then 21 | echo "Please specify cluster name!" && exit 1 22 | fi 23 | 24 | CLUSTER_NAME=$1 25 | 26 | MASTER_DNS=$(fetch_cluster_master_public_dns ${CLUSTER_NAME}) 27 | WORKER_DNS=$(fetch_cluster_worker_public_dns ${CLUSTER_NAME}) 28 | 29 | for dns in ${WORKER_DNS}; do 30 | echo $dns 31 | cmd='tmux kill-session -t supervisor' 32 | run_cmd_on_node ${dns} ${cmd} 33 | done 34 | 35 | echo $MASTER_DNS 36 | cmd="tmux kill-session -t stormui; tmux kill-session -t nimbus" 37 | run_cmd_on_node ${MASTER_DNS} ${cmd} 38 | 39 | echo "Storm Stopped!" 40 | 41 | -------------------------------------------------------------------------------- /config/kafka/setup_cluster.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # check input arguments 18 | if [ "$#" -ne 1 ]; then 19 | echo "Please specify cluster name!" && exit 1 20 | fi 21 | 22 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 23 | source ${PEG_ROOT}/util.sh 24 | 25 | CLUSTER_NAME=$1 26 | 27 | PUBLIC_DNS=$(fetch_cluster_public_dns ${CLUSTER_NAME}) 28 | 29 | single_script="${PEG_ROOT}/config/kafka/setup_single.sh" 30 | 31 | # Install and configure nodes for kafka 32 | BROKER_ID=0 33 | for dns in ${PUBLIC_DNS}; do 34 | args="$BROKER_ID $dns ${PUBLIC_DNS}" 35 | run_script_on_node ${dns} ${single_script} ${args} & 36 | BROKER_ID=$(($BROKER_ID+1)) 37 | done 38 | 39 | wait 40 | 41 | echo "Kafka configuration complete!" 42 | 43 | -------------------------------------------------------------------------------- /install/memsql/install_memsql.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | MEMSQL_INSTALLS_DIR=/usr/local/memsql 18 | if [ ! -d ${MEMSQL_INSTALLS_DIR} ]; then 19 | TECHNOLOGY_URL=http://download.memsql.com/memsql-ops-5.1.0/memsql-ops-5.1.0.tar.gz 20 | curl -sL $TECHNOLOGY_URL | gunzip | sudo tar xv -C ~ >> ~/peg_log.txt 21 | if [ -d ~/memsql* ]; then 22 | cd memsql* 23 | sudo ./install.sh --ops-datadir /usr/local/memsql-ops-data --memsql-installs-dir ${MEMSQL_INSTALLS_DIR} 24 | fi 25 | fi 26 | 27 | if ! grep "export MEMSQL_HOME" ~/.profile; then 28 | echo -e "\nexport MEMSQL_HOME="${MEMSQL_INSTALLS_DIR}"\nexport PATH=\$PATH:\$MEMSQL_HOME/bin" | cat >> ~/.profile 29 | 30 | . ~/.profile 31 | 32 | echo "Memsql installed" 33 | fi 34 | -------------------------------------------------------------------------------- /config/kafka-manager/setup_cluster.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | if [ "$#" -ne 1 ]; then 18 | echo "Please specify cluster name!" && exit 1 19 | fi 20 | 21 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 22 | source ${PEG_ROOT}/util.sh 23 | source ${PEG_ROOT}/colors.sh 24 | 25 | CLUSTER_NAME=$1 26 | 27 | PUBLIC_DNS=$(fetch_cluster_public_dns ${CLUSTER_NAME}) 28 | MASTER_PUBLIC_DNS=$(fetch_public_dns_of_node_in_cluster ${CLUSTER_NAME} 1) 29 | 30 | single_script="${PEG_ROOT}/config/kafka-manager/setup_kafka_manager.sh" 31 | args="${PUBLIC_DNS}" 32 | run_script_on_node ${MASTER_PUBLIC_DNS} ${single_script} ${args} & 33 | 34 | wait 35 | 36 | echo -e "${color_green}Kafka-manager configuration complete on ${MASTER_PUBLIC_DNS}!${color_norm}" 37 | -------------------------------------------------------------------------------- /config/ssh/add_to_known_hosts.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | NAMENODE_DNS=$1; shift 18 | NAMENODE_HOSTNAME=$1; shift 19 | DATANODE_HOSTNAMES="$@" 20 | 21 | # add NameNode to known_hosts 22 | ssh-keyscan -H -t ecdsa $NAMENODE_DNS >> ~/.ssh/known_hosts 23 | 24 | # add DataNodes to known_hosts 25 | for hostname in ${DATANODE_HOSTNAMES}; do 26 | echo "Adding $hostname to known hosts..." 27 | ssh-keyscan -H -t ecdsa $hostname >> ~/.ssh/known_hosts 28 | done 29 | 30 | # add Secondary NameNode to known_hosts 31 | ssh-keyscan -H -t ecdsa 0.0.0.0 >> ~/.ssh/known_hosts 32 | 33 | # add localhost and 127.0.0.1 to known_hosts 34 | ssh-keyscan -H -t ecdsa localhost >> ~/.ssh/known_hosts 35 | ssh-keyscan -H -t ecdsa 127.0.0.1 >> ~/.ssh/known_hosts 36 | -------------------------------------------------------------------------------- /install/memsql/install_memsql_cluster.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # check input arguments 18 | if [ "$#" -ne 1 ]; then 19 | echo "Please specify cluster name!" && exit 1 20 | fi 21 | 22 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 23 | source ${PEG_ROOT}/util.sh 24 | 25 | CLUSTER_NAME=$1 26 | 27 | MASTER_DNS=$(fetch_cluster_master_public_dns ${CLUSTER_NAME}) 28 | 29 | script="${PEG_ROOT}/install/memsql/install_memsql.sh" 30 | run_script_on_node ${MASTER_DNS} ${script} 31 | 32 | echo "Memsql installed!" 33 | echo "Memsql Started!" 34 | echo -e "${color_green}Memsql Cluster WebUI${color_norm} is running at ${color_yellow}http://${MASTER_DNS}:9000${color_norm}" 35 | echo -e "Go to the WebUI to add the Leaf nodes and deploy the Memsql cluster" 36 | -------------------------------------------------------------------------------- /install/kafka-manager/install_kafka_manager.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | source ~/.profile 18 | 19 | if [ ! -d /usr/local/kafka-manager ]; then 20 | sudo apt-get install unzip 21 | sudo git clone https://github.com/yahoo/kafka-manager.git 22 | cd ./kafka-manager 23 | sudo sbt clean dist 24 | # wait 25 | sudo unzip ./target/universal/kafka-manager-*.zip -d /usr/local/ 26 | sudo mv /usr/local/kafka-manager-* /usr/local/kafka-manager 27 | sudo rm -rf ~/kafka-manager 28 | fi 29 | 30 | if ! grep "export KAFKA_MANAGER_HOME" ~/.profile; then 31 | echo -e "\nexport KAFKA_MANAGER_HOME=/usr/local/kafka-manager\nexport PATH=\$PATH:\$KAFKA_MANAGER_HOME/bin" | cat >> ~/.profile 32 | fi 33 | source ~/.profile 34 | 35 | sudo chown -R ubuntu $KAFKA_MANAGER_HOME 36 | -------------------------------------------------------------------------------- /config/hbase/setup_cluster.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # check input arguments 18 | if [ "$#" -ne 1 ]; then 19 | echo "Please specify cluster name!" && exit 1 20 | fi 21 | 22 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 23 | source ${PEG_ROOT}/util.sh 24 | 25 | CLUSTER_NAME=$1 26 | 27 | HOSTNAMES=$(fetch_cluster_hostnames ${CLUSTER_NAME}) 28 | PUBLIC_DNS=$(fetch_cluster_public_dns ${CLUSTER_NAME}) 29 | 30 | MASTER_DNS=$(fetch_cluster_master_public_dns ${CLUSTER_NAME}) 31 | 32 | # Install HBase on master and slaves 33 | single_script="${PEG_ROOT}/config/hbase/setup_single.sh" 34 | args="$MASTER_DNS ${HOSTNAMES}" 35 | for dns in ${PUBLIC_DNS}; do 36 | run_script_on_node ${dns} ${single_script} ${args} & 37 | done 38 | 39 | wait 40 | 41 | echo "HBase configuration complete!" 42 | -------------------------------------------------------------------------------- /config/zookeeper/setup_cluster.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # must be called from the top level 18 | 19 | # check input arguments 20 | if [ "$#" -ne 1 ]; then 21 | echo "Please specify cluster name!" && exit 1 22 | fi 23 | 24 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 25 | source ${PEG_ROOT}/util.sh 26 | 27 | CLUSTER_NAME=$1 28 | 29 | PUBLIC_DNS=$(fetch_cluster_public_dns ${CLUSTER_NAME}) 30 | 31 | single_script="${PEG_ROOT}/config/zookeeper/setup_single.sh" 32 | 33 | # Install and configure nodes for zookeeper 34 | SERVER_NUM=1 35 | for dns in ${PUBLIC_DNS}; do 36 | args="$SERVER_NUM ${PUBLIC_DNS}" 37 | run_script_on_node ${dns} ${single_script} ${args} 38 | SERVER_NUM=$(($SERVER_NUM+1)) 39 | done 40 | 41 | wait 42 | 43 | echo "Zookeeper configuration complete!" 44 | -------------------------------------------------------------------------------- /config/kafka/setup_single.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # first argument is the brokerid and all after are MASTER_DNS and SLAVE_DNS 18 | ID=$1; shift 19 | PUBLIC_DNS=$1; shift 20 | DNS=( "$@" ) 21 | 22 | . ~/.profile 23 | 24 | sudo sed -i 's@broker.id=0@broker.id='"$ID"'@g' /usr/local/kafka/config/server.properties 25 | sudo sed -i 's@#advertised.listeners=PLAINTEXT://your.host.name@advertised.listeners=PLAINTEXT://'"$PUBLIC_DNS"'@g' /usr/local/kafka/config/server.properties 26 | 27 | sudo sed -i '1i export JMX_PORT=${JMX_PORT:-9999}' /usr/local/kafka/bin/kafka-server-start.sh 28 | 29 | ZK_SERVERS="" 30 | for dns in ${DNS[@]} 31 | do 32 | ZK_SERVERS=$ZK_SERVERS$dns:2181, 33 | done 34 | 35 | sudo sed -i 's@localhost:2181@'"${ZK_SERVERS:0:-1}"'@g' /usr/local/kafka/config/server.properties 36 | 37 | -------------------------------------------------------------------------------- /config/cassandra/setup_single.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | CLUSTER=$1 18 | SEED_PRIVATE_IP=$2 19 | NODE_PRIVATE_IP=$3 20 | 21 | . ~/.profile 22 | 23 | sed -i "s@cluster_name: 'Test Cluster'@cluster_name: '"$CLUSTER"'@g" $CASSANDRA_HOME/conf/cassandra.yaml 24 | sed -i 's@- seeds: "127.0.0.1"@- seeds: "'"$SEED_PRIVATE_IP"'"@g' $CASSANDRA_HOME/conf/cassandra.yaml 25 | sed -i 's@listen_address: localhost@listen_address: '"$NODE_PRIVATE_IP"'@g' $CASSANDRA_HOME/conf/cassandra.yaml 26 | sed -i 's@rpc_address: localhost@rpc_address: 0.0.0.0@g' $CASSANDRA_HOME/conf/cassandra.yaml 27 | sed -i 's@\# broadcast_rpc_address: 1.2.3.4@broadcast_rpc_address: '"$NODE_PRIVATE_IP"'@g' $CASSANDRA_HOME/conf/cassandra.yaml 28 | sed -i 's@endpoint_snitch: SimpleSnitch@endpoint_snitch: Ec2Snitch@g' $CASSANDRA_HOME/conf/cassandra.yaml 29 | 30 | -------------------------------------------------------------------------------- /install/kibana/install_kibana_cluster.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # check input arguments 4 | if [ "$#" -ne 1 ]; then 5 | echo "Please specify cluster name!" && exit 1 6 | fi 7 | 8 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 9 | source ${PEG_ROOT}/util.sh 10 | source ${PEG_ROOT}/colors.sh 11 | 12 | CLUSTER_NAME=$1 13 | MASTER_PUBLIC_DNS=$(fetch_public_dns_of_node_in_cluster ${CLUSTER_NAME} 1) 14 | TECHNOLOGY="kibana" 15 | DEP_ROOT_FOLDER=/usr/local/ 16 | 17 | function check_dependencies_and_install_kibana { 18 | if [ -z "${DEP}" ]; then 19 | echo -e "${color_yellow}Installing Kibana on Node - ${MASTER_PUBLIC_DNS} - in ${CLUSTER_NAME}${color_norm}" 20 | script=${PEG_ROOT}/install/secor/install_kibana.sh 21 | run_script_on_node ${MASTER_PUBLIC_DNS} ${script} 22 | else 23 | INSTALLED=$(check_remote_folder ${MASTER_PUBLIC_DNS} ${DEP_ROOT_FOLDER}${DEP[0]}) 24 | if [ "${INSTALLED}" = "installed" ]; then 25 | DEP=(${DEP[@]:1}) 26 | check_dependencies_and_install_kibana 27 | else 28 | echo "${DEP} is not installed in ${DEP_ROOT_FOLDER}" 29 | echo "Please install ${DEP} and then proceed with ${TECHNOLOGY}" 30 | echo "peg install ${CLUSTER_NAME} ${TECHNOLOGY}" 31 | exit 1 32 | fi 33 | fi 34 | } 35 | 36 | # Check if dependencies are installed 37 | # If yes, then install secor 38 | DEP=($(get_dependencies)) 39 | check_dependencies_and_install_kibana 40 | 41 | echo "Kibana installed!" 42 | -------------------------------------------------------------------------------- /config/storm/setup_cluster.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | if [ "$#" -ne 1 ]; then 18 | echo "Please specify cluster name!" && exit 1 19 | fi 20 | 21 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 22 | source ${PEG_ROOT}/util.sh 23 | 24 | CLUSTER_NAME=$1 25 | WORKERS_PER_NODE=4 26 | 27 | PUBLIC_DNS=$(fetch_cluster_public_dns ${CLUSTER_NAME}) 28 | 29 | MASTER_DNS=$(fetch_cluster_master_public_dns ${CLUSTER_NAME}) 30 | WORKER_DNS=$(fetch_cluster_worker_public_dns ${CLUSTER_NAME}) 31 | 32 | # Configure base Storm nimbus and supervisors 33 | single_script="${PEG_ROOT}/config/storm/setup_single.sh" 34 | args="${WORKERS_PER_NODE} ${MASTER_DNS} ${WORKER_DNS}" 35 | for dns in ${PUBLIC_DNS}; do 36 | run_script_on_node ${dns} ${single_script} ${args} & 37 | done 38 | 39 | wait 40 | 41 | echo "Storm configuration complete!" 42 | 43 | -------------------------------------------------------------------------------- /service/redis/start_service.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 18 | source ${PEG_ROOT}/util.sh 19 | 20 | if [ "$#" -ne 1 ]; then 21 | echo "Please specify cluster name!" && exit 1 22 | fi 23 | 24 | CLUSTER_NAME=$1 25 | 26 | PUBLIC_DNS=$(fetch_cluster_public_dns ${CLUSTER_NAME}) 27 | MASTER_DNS=$(fetch_cluster_master_public_dns ${CLUSTER_NAME}) 28 | 29 | # Start redis servers 30 | for dns in ${PUBLIC_DNS}; 31 | do 32 | cmd='/usr/local/redis/src/redis-server /usr/local/redis/redis.conf &' 33 | run_cmd_on_node ${dns} ${cmd} & 34 | done 35 | 36 | wait 37 | 38 | # begin discovery of redis servers 39 | sleep 5 40 | 41 | script=${PEG_ROOT}/config/redis/join_redis_cluster.sh 42 | args="${PUBLIC_DNS}" 43 | run_script_on_node ${MASTER_DNS} ${script} ${args} & 44 | 45 | echo "Redis Started!" 46 | -------------------------------------------------------------------------------- /config/storm/setup_single.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | . ~/.profile 18 | 19 | NUM_WORKERS=$1; shift 20 | CLUSTER_DNS=( "$@" ) 21 | 22 | WORKER_PORT=6700 23 | 24 | STORM_LOCAL_DIR=/var/storm 25 | sudo mkdir -p $STORM_LOCAL_DIR 26 | sudo chown -R ubuntu $STORM_LOCAL_DIR 27 | 28 | ZK_SERVERS="" 29 | for DNS in ${CLUSTER_DNS[@]}; do 30 | ZK_SERVERS+=" - \"$DNS\""$'\n' 31 | done 32 | 33 | SUPERVISOR_PORTS="" 34 | for SLOT_NUM in `seq $NUM_WORKERS`; do 35 | PORT_NUM=$(echo "$WORKER_PORT + $SLOT_NUM - 1" | awk '{print $1 + $3 - $5}') 36 | SUPERVISOR_PORTS+=" - $PORT_NUM"$'\n' 37 | done 38 | 39 | # storm.yaml 40 | cat >> $STORM_HOME/conf/storm.yaml << EOL 41 | storm.zookeeper.servers: 42 | $ZK_SERVERS 43 | nimbus.host: "${CLUSTER_DNS[0]}" 44 | storm.local.dir: "$STORM_LOCAL_DIR" 45 | supervisor.slots.ports: 46 | $SUPERVISOR_PORTS 47 | EOL 48 | -------------------------------------------------------------------------------- /config/zookeeper/setup_single.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # first argument is the myid and all after are MASTER_DNS and SLAVE_DNS 18 | ID=$1; shift 19 | DNS=( "$@" ) 20 | LEN=${#DNS[@]} 21 | 22 | . ~/.profile 23 | 24 | cp $ZOOKEEPER_HOME/conf/zoo_sample.cfg $ZOOKEEPER_HOME/conf/zoo.cfg 25 | sed -i 's@/tmp/zookeeper@/var/lib/zookeeper@g' $ZOOKEEPER_HOME/conf/zoo.cfg 26 | 27 | for i in `seq $LEN`; do 28 | SERVER_NUM=$(echo "$LEN - $i + 1" | awk '{print $1 - $3 + $5}') 29 | CURRENT_DNS=${DNS[$(echo "$SERVER_NUM - 1" | awk '{print $1 - $3}' )]} 30 | sed -i '15i server.'"$SERVER_NUM"'='"$CURRENT_DNS"':2888:3888' $ZOOKEEPER_HOME/conf/zoo.cfg 31 | done 32 | 33 | sudo mkdir /var/lib/zookeeper 34 | sudo chown -R ubuntu /var/lib/zookeeper 35 | sudo touch /var/lib/zookeeper/myid 36 | echo 'echo '"$ID"' >> /var/lib/zookeeper/myid' | sudo -s 37 | 38 | -------------------------------------------------------------------------------- /service/storm/start_service.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 18 | source ${PEG_ROOT}/util.sh 19 | 20 | if [ "$#" -ne 1 ]; then 21 | echo "Please specify cluster name!" && exit 1 22 | fi 23 | 24 | CLUSTER_NAME=$1 25 | 26 | MASTER_DNS=$(fetch_cluster_master_public_dns ${CLUSTER_NAME}) 27 | WORKER_DNS=$(fetch_cluster_worker_public_dns ${CLUSTER_NAME}) 28 | 29 | 30 | echo $MASTER_DNS 31 | script=${PEG_ROOT}/service/storm/start_master.sh 32 | run_script_on_node ${MASTER_DNS} ${script} 33 | 34 | script=${PEG_ROOT}/service/storm/start_slave.sh 35 | for dns in ${WORKER_DNS}; do 36 | echo $dns 37 | run_script_on_node ${dns} ${script} 38 | done 39 | 40 | 41 | echo "Storm Started!" 42 | echo -e "${color_green}Storm Cluster WebUI${color_norm} is running at ${color_yellow}http://${MASTER_DNS}:8080${color_norm}" 43 | -------------------------------------------------------------------------------- /config/spark/setup_cluster.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | if [ "$#" -ne 1 ]; then 18 | echo "Please specify cluster name!" && exit 1 19 | fi 20 | 21 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 22 | source ${PEG_ROOT}/util.sh 23 | 24 | CLUSTER_NAME=$1 25 | 26 | PUBLIC_DNS=$(fetch_cluster_public_dns ${CLUSTER_NAME}) 27 | 28 | MASTER_DNS=$(fetch_cluster_master_public_dns ${CLUSTER_NAME}) 29 | WORKER_DNS=$(fetch_cluster_worker_public_dns ${CLUSTER_NAME}) 30 | 31 | # Install and configure Spark on all nodes 32 | for dns in ${PUBLIC_DNS}; do 33 | single_script="${PEG_ROOT}/config/spark/setup_single.sh" 34 | args="${dns}" 35 | run_script_on_node ${dns} ${single_script} ${args} & 36 | done 37 | 38 | wait 39 | 40 | worker_script="${PEG_ROOT}/config/spark/config_workers.sh" 41 | args="${WORKER_DNS}" 42 | run_script_on_node ${MASTER_DNS} ${worker_script} ${args} 43 | -------------------------------------------------------------------------------- /service/alluxio/start_service.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 18 | source ${PEG_ROOT}/util.sh 19 | 20 | # check input arguments 21 | if [ "$#" -ne 1 ]; then 22 | echo "Please specify cluster name!" && exit 1 23 | fi 24 | 25 | CLUSTER_NAME=$1 26 | 27 | MASTER_DNS=$(fetch_cluster_master_public_dns ${CLUSTER_NAME}) 28 | WORKER_DNS=$(fetch_cluster_worker_public_dns ${CLUSTER_NAME}) 29 | 30 | cmd='. ~/.profile; /usr/local/alluxio/bin/alluxio-start.sh master' 31 | run_cmd_on_node ${MASTER_DNS} ${cmd} 32 | 33 | cmd='. ~/.profile; /usr/local/alluxio/bin/alluxio-start.sh worker SudoMount' 34 | for dns in ${WORKER_DNS}; do 35 | run_cmd_on_node ${dns} ${cmd} 36 | done 37 | 38 | echo "Alluxio Started!" 39 | echo -e "${color_green}Alluxio WebUI${color_norm} is running at ${color_yellow}http://${MASTER_DNS}:19999${color_norm}" 40 | -------------------------------------------------------------------------------- /config/cassandra/setup_cluster.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # check input arguments 18 | if [ "$#" -ne 1 ]; then 19 | echo "Please specify cluster name!" && exit 1 20 | fi 21 | 22 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 23 | source ${PEG_ROOT}/util.sh 24 | 25 | CLUSTER_NAME=$1 26 | 27 | PUBLIC_DNS=$(fetch_cluster_public_dns ${CLUSTER_NAME}) 28 | PRIVATE_IP_ARR=($(fetch_cluster_private_ips ${CLUSTER_NAME})) 29 | 30 | SEED_IP=$(fetch_cluster_master_private_ip ${CLUSTER_NAME}) 31 | SEED_DNS=$(fetch_cluster_master_public_dns ${CLUSTER_NAME}) 32 | 33 | single_script="${PEG_ROOT}/config/cassandra/setup_single.sh" 34 | 35 | IDX=0 36 | for dns in ${PUBLIC_DNS}; 37 | do 38 | args="${CLUSTER_NAME} ${SEED_IP} ${PRIVATE_IP_ARR[$IDX]}" 39 | run_script_on_node ${dns} ${single_script} ${args} & 40 | IDX=$(($IDX+1)) 41 | done 42 | 43 | wait 44 | 45 | echo "Cassandra configuration complete!" 46 | -------------------------------------------------------------------------------- /config/secor/setup_cluster.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | if [ "$#" -ne 1 ]; then 18 | echo "Please specify cluster name!" && exit 1 19 | fi 20 | 21 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 22 | source ${PEG_ROOT}/util.sh 23 | source ${PEG_ROOT}/colors.sh 24 | 25 | CLUSTER_NAME=$1 26 | 27 | PUBLIC_DNS=$(fetch_cluster_public_dns ${CLUSTER_NAME}) 28 | MASTER_PUBLIC_DNS=$(fetch_public_dns_of_node_in_cluster ${CLUSTER_NAME} 1) 29 | 30 | echo -e "${color_magenta}" 31 | 32 | while [ -z ${s3_bucket} ]; do 33 | read -p "Which S3 bucket do you want to use? " s3_bucket 34 | done 35 | 36 | echo -e "${color_norm}" 37 | 38 | single_script="${PEG_ROOT}/config/secor/setup_secor.sh" 39 | args="${PUBLIC_DNS} ${s3_bucket}" 40 | run_script_on_node ${MASTER_PUBLIC_DNS} ${single_script} ${args} & 41 | 42 | wait 43 | 44 | echo -e "${color_green}Secor configuration complete on ${MASTER_PUBLIC_DNS}!${color_norm}" 45 | -------------------------------------------------------------------------------- /config/elasticsearch/setup_cluster.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # check input arguments 18 | if [ "$#" -ne 1 ]; then 19 | echo "Please specify cluster name!" && exit 1 20 | fi 21 | 22 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 23 | source ${PEG_ROOT}/util.sh 24 | 25 | CLUSTER_NAME=$1 26 | 27 | PUBLIC_DNS=$(fetch_cluster_public_dns ${CLUSTER_NAME}) 28 | HOSTNAMES=$(fetch_cluster_hostnames ${CLUSTER_NAME}) 29 | NUMBER_OF_NODES=$(wc -l < ${PEG_ROOT}/tmp/${CLUSTER_NAME}/public_dns) 30 | QUORUM=$((${NUMBER_OF_NODES}/2 + 1)) 31 | 32 | single_script="${PEG_ROOT}/config/elasticsearch/setup_single.sh" 33 | args="$CLUSTER_NAME $AWS_DEFAULT_REGION $AWS_SECRET_ACCESS_KEY $AWS_ACCESS_KEY_ID $QUORUM ${HOSTNAMES}" 34 | # Install and configure nodes for elasticsearch 35 | for dns in ${PUBLIC_DNS}; do 36 | run_script_on_node ${dns} ${single_script} ${args} & 37 | done 38 | 39 | wait 40 | 41 | echo "Elasticsearch configuration complete!" 42 | 43 | -------------------------------------------------------------------------------- /config/alluxio/setup_cluster.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # must be called from the top level 18 | 19 | # check input arguments 20 | if [ "$#" -ne 1 ]; then 21 | echo "Please specify cluster name!" && exit 1 22 | fi 23 | 24 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 25 | source ${PEG_ROOT}/util.sh 26 | 27 | CLUSTER_NAME=$1 28 | 29 | MASTER_DNS=$(fetch_cluster_master_public_dns ${CLUSTER_NAME}) 30 | PUBLIC_DNS=$(fetch_cluster_public_dns ${CLUSTER_NAME}) 31 | HOSTNAMES=$(fetch_cluster_hostnames ${CLUSTER_NAME}) 32 | 33 | single_script="${PEG_ROOT}/config/alluxio/setup_single.sh" 34 | args="${HOSTNAMES}" 35 | # Install Alluxio on master and slaves 36 | for dns in ${PUBLIC_DNS} 37 | do 38 | run_script_on_node ${dns} ${single_script} ${args} & 39 | done 40 | 41 | wait 42 | 43 | format_script="${PEG_ROOT}/config/alluxio/format_fs.sh" 44 | run_script_on_node ${MASTER_DNS} ${format_script} 45 | 46 | echo "Alluxio configuration complete!" 47 | -------------------------------------------------------------------------------- /install/cluster_download: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # check input arguments 18 | if [ "$#" -ne 3 ]; then 19 | echo "Please specify cluster name, technology and mode (cluster or single)!" && exit 1 20 | fi 21 | 22 | PEG_ROOT=$(dirname ${BASH_SOURCE})/.. 23 | 24 | source ${PEG_ROOT}/util.sh 25 | 26 | # get input arguments [aws region, pem-key location] 27 | CLUSTER_NAME=$1 28 | TECHNOLOGY=$2 29 | MODE=$3 30 | 31 | case ${MODE} in 32 | single) 33 | PUBLIC_DNS=$(fetch_public_dns_of_node_in_cluster ${CLUSTER_NAME} 1) 34 | ;; 35 | 36 | cluster) 37 | PUBLIC_DNS=$(fetch_cluster_public_dns ${CLUSTER_NAME}) 38 | ;; 39 | esac 40 | 41 | script="${PEG_ROOT}/install/download_tech" 42 | args="${TECHNOLOGY}" 43 | 44 | for dns in ${PUBLIC_DNS}; do 45 | echo -e "${color_yellow}Checking node ${dns}${color_norm}" 46 | run_script_on_node ${dns} ${script} ${args} & 47 | done 48 | 49 | wait 50 | 51 | echo "${TECHNOLOGY} installed!" 52 | -------------------------------------------------------------------------------- /config/zeppelin/setup_zeppelin.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | . ~/.profile 18 | 19 | cp $ZEPPELIN_HOME/conf/zeppelin-env.sh.template $ZEPPELIN_HOME/conf/zeppelin-env.sh 20 | cp $ZEPPELIN_HOME/conf/zeppelin-site.xml.template $ZEPPELIN_HOME/conf/zeppelin-site.xml 21 | 22 | sed -i '18i export JAVA_HOME=/usr' $ZEPPELIN_HOME/conf/zeppelin-env.sh 23 | sed -i '18i export MASTER=spark://'$(hostname)':7077' $ZEPPELIN_HOME/conf/zeppelin-env.sh 24 | sed -i '18i export SPARK_HOME='$SPARK_HOME'' $ZEPPELIN_HOME/conf/zeppelin-env.sh 25 | 26 | MEMINFO=($(free -m | sed -n '2p' | sed -e "s/[[:space:]]\+/ /g")) 27 | TOTMEM=${MEMINFO[1]} 28 | EXECMEM=$(echo "0.90 * ( $TOTMEM - 1000 )" | awk '{print $1 * ($4-$6)}') 29 | #sed -i '18i export SPARK_SUBMIT_OPTIONS="--driver-memory '${EXECMEM%.*}'M --executor-memory '${EXECMEM%.*}'M"' $ZEPPELIN_HOME/conf/zeppelin-env.sh 30 | 31 | sed -i 's@8080@7888@g' $ZEPPELIN_HOME/conf/zeppelin-site.xml 32 | 33 | -------------------------------------------------------------------------------- /config/flink/setup_cluster.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # check input arguments 18 | if [ "$#" -ne 1 ]; then 19 | echo "Please specify cluster name!" && exit 1 20 | fi 21 | 22 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 23 | source ${PEG_ROOT}/util.sh 24 | 25 | CLUSTER_NAME=$1 26 | 27 | PUBLIC_DNS=$(fetch_cluster_public_dns ${CLUSTER_NAME}) 28 | MASTER_DNS=$(fetch_cluster_master_public_dns ${CLUSTER_NAME}) 29 | WORKER_DNS=$(fetch_cluster_worker_public_dns ${CLUSTER_NAME}) 30 | NUM_WORKERS=$(echo ${WORKER_DNS} | wc -w) 31 | 32 | single_script="${PEG_ROOT}/config/flink/setup_single.sh" 33 | args="$MASTER_DNS $NUM_WORKERS" 34 | 35 | # Install and configure Flink on all nodes 36 | for dns in ${PUBLIC_DNS}; do 37 | run_script_on_node ${dns} ${single_script} ${args} & 38 | done 39 | 40 | wait 41 | 42 | master_script="${PEG_ROOT}/config/flink/config_master.sh" 43 | args="${PUBLIC_DNS}" 44 | run_script_on_node ${MASTER_DNS} ${master_script} ${args} 45 | 46 | -------------------------------------------------------------------------------- /config/hadoop/config_namenode.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | . ~/.profile 18 | 19 | MASTER_NAME=$1; shift 20 | SLAVE_NAME=( "$@" ) 21 | 22 | # configure hdfs-site.xml 23 | sed -i '20i \n dfs.replication\n 3\n' $HADOOP_HOME/etc/hadoop/hdfs-site.xml 24 | sed -i '24i \n dfs.namenode.name.dir\n file:///usr/local/hadoop/hadoop_data/hdfs/namenode\n' $HADOOP_HOME/etc/hadoop/hdfs-site.xml 25 | sudo mkdir -p $HADOOP_HOME/hadoop_data/hdfs/namenode 26 | 27 | touch $HADOOP_HOME/etc/hadoop/masters 28 | echo $MASTER_NAME | cat >> $HADOOP_HOME/etc/hadoop/masters 29 | 30 | # add for additional datanodes 31 | touch $HADOOP_HOME/etc/hadoop/slaves.new 32 | for name in ${SLAVE_NAME[@]} 33 | do 34 | echo $name | cat >> $HADOOP_HOME/etc/hadoop/slaves.new 35 | done 36 | mv $HADOOP_HOME/etc/hadoop/slaves.new $HADOOP_HOME/etc/hadoop/slaves 37 | 38 | sudo chown -R ubuntu $HADOOP_HOME 39 | -------------------------------------------------------------------------------- /service/spark/start_service.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 18 | source ${PEG_ROOT}/util.sh 19 | 20 | # check input arguments 21 | if [ "$#" -ne 1 ]; then 22 | echo "Please specify cluster name!" && exit 1 23 | fi 24 | 25 | CLUSTER_NAME=$1 26 | 27 | MASTER_DNS=$(fetch_cluster_master_public_dns ${CLUSTER_NAME}) 28 | 29 | cmd='/usr/local/spark/sbin/start-all.sh' 30 | run_cmd_on_node ${MASTER_DNS} ${cmd} 31 | 32 | script=${PEG_ROOT}/service/spark/setup_ipython.sh 33 | run_script_on_node ${MASTER_DNS} ${script} 34 | 35 | echo "Spark Started!" 36 | echo -e "${color_green}Spark Cluster WebUI${color_norm} is running at ${color_yellow}http://${MASTER_DNS}:8080${color_norm}" 37 | echo -e "${color_green}Spark Job WebUI${color_norm} is running at ${color_yellow}http://${MASTER_DNS}:4040${color_norm}" 38 | echo -e "${color_green}Spark Jupyter Notebook${color_norm} is running at ${color_yellow}http://${MASTER_DNS}:8888${color_norm}" 39 | -------------------------------------------------------------------------------- /service/hadoop/start_service.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 18 | source ${PEG_ROOT}/util.sh 19 | 20 | if [ "$#" -ne 1 ]; then 21 | echo "Please specify cluster name!" && exit 1 22 | fi 23 | 24 | CLUSTER_NAME=$1 25 | 26 | MASTER_DNS=$(fetch_cluster_master_public_dns ${CLUSTER_NAME}) 27 | 28 | run_cmd_on_node ${MASTER_DNS} '. ~/.profile; $HADOOP_HOME/sbin/start-dfs.sh' 29 | run_cmd_on_node ${MASTER_DNS} '. ~/.profile; $HADOOP_HOME/sbin/start-yarn.sh' 30 | run_cmd_on_node ${MASTER_DNS} '. ~/.profile; $HADOOP_HOME/sbin/mr-jobhistory-daemon.sh start historyserver' 31 | 32 | echo "Hadoop started!" 33 | echo -e "${color_green}HDFS WebUI${color_norm} is running at ${color_yellow}http://${MASTER_DNS}:50070${color_norm}" 34 | echo -e "${color_green}Hadoop Job Tracker WebUI${color_norm} is running at ${color_yellow}http://${MASTER_DNS}:8088${color_norm}" 35 | echo -e "${color_green}Hadoop Job History WebUI${color_norm} is running at ${color_yellow}http://${MASTER_DNS}:19888${color_norm}" 36 | 37 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | tmp 3 | archive 4 | 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | *.py[cod] 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | env/ 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *,cover 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | 57 | # Sphinx documentation 58 | docs/_build/ 59 | 60 | # PyBuilder 61 | target/ 62 | 63 | 64 | # Logs 65 | logs 66 | *.log 67 | 68 | # Runtime data 69 | pids 70 | *.pid 71 | *.seed 72 | 73 | # Directory for instrumented libs generated by jscoverage/JSCover 74 | lib-cov 75 | 76 | # Coverage directory used by tools like istanbul 77 | coverage 78 | 79 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) 80 | .grunt 81 | 82 | # node-waf configuration 83 | .lock-wscript 84 | 85 | # Compiled binary addons (http://nodejs.org/api/addons.html) 86 | build/Release 87 | 88 | # Dependency directory 89 | # https://docs.npmjs.com/misc/faq#should-i-check-my-node-modules-folder-into-git 90 | node_modules 91 | 92 | # config templates 93 | *.yml -------------------------------------------------------------------------------- /test/test_utils.bats: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bats 2 | 3 | source ${PEG_ROOT}/util.sh 4 | 5 | setup() { 6 | mkdir ${PEG_ROOT}/test/tmp 7 | } 8 | 9 | teardown() { 10 | rm -rf ${PEG_ROOT}/test/tmp 11 | } 12 | 13 | @test "parse templates valid on-demand" { 14 | eval $(parse_yaml ${PEG_ROOT}/test/templates/valid_ondemand.yml) 15 | 16 | [ "$purchase_type" = "on_demand" ] 17 | [ "$subnet_id" = "subnet-3a78835f" ] 18 | [ "$num_instances" = "1" ] 19 | [ "$key_name" = "insight-cluster" ] 20 | [ "$security_group_ids" = "sg-9206aaf7" ] 21 | [ "$instance_type" = "m4.large" ] 22 | [ "$tag_name" = "test-cluster" ] 23 | [ "$vol_size" = "100" ] 24 | [ "$role" = "master" ] 25 | [ "$use_eips" = "true" ] 26 | } 27 | 28 | @test "parse templates valid spot" { 29 | eval $(parse_yaml ${PEG_ROOT}/test/templates/valid_spot.yml) 30 | 31 | [ "$purchase_type" = "spot" ] 32 | [ "$price" = "0.25" ] 33 | [ "$subnet_id" = "subnet-3a78835f" ] 34 | [ "$num_instances" = "1" ] 35 | [ "$key_name" = "insight-cluster" ] 36 | [ "$security_group_ids" = "sg-9206aaf7" ] 37 | [ "$instance_type" = "m4.large" ] 38 | [ "$tag_name" = "test-cluster" ] 39 | [ "$vol_size" = "100" ] 40 | [ "$role" = "master" ] 41 | [ "$use_eips" = "true" ] 42 | } 43 | 44 | @test "parse templates valid with whitespaces and newlines" { 45 | eval $(parse_yaml ${PEG_ROOT}/test/templates/valid_ws.yml) 46 | [ "$purchase_type" = "on_demand" ] 47 | [ "$subnet_id" = "subnet-3a78835f" ] 48 | [ "$num_instances" = "1" ] 49 | [ "$key_name" = "insight-cluster" ] 50 | [ "$security_group_ids" = "sg-9206aaf7" ] 51 | [ "$instance_type" = "m4.large" ] 52 | [ "$tag_name" = "test-cluster" ] 53 | [ "$vol_size" = "100" ] 54 | [ "$role" = "master" ] 55 | [ "$use_eips" = "true" ] 56 | } 57 | -------------------------------------------------------------------------------- /config/ssh/setup_passwordless_ssh.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # must be called from top level 18 | 19 | # check input arguments 20 | if [ "$#" -ne 1 ]; then 21 | echo "Please specify cluster name!" && exit 1 22 | fi 23 | 24 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 25 | source ${PEG_ROOT}/util.sh 26 | 27 | CLUSTER_NAME=$1 28 | 29 | MASTER_DNS=$(fetch_cluster_master_public_dns ${CLUSTER_NAME}) 30 | WORKER_DNS=$(fetch_cluster_worker_public_dns ${CLUSTER_NAME}) 31 | 32 | HOSTNAMES=$(fetch_cluster_hostnames ${CLUSTER_NAME}) 33 | 34 | restart_sshagent_if_needed ${CLUSTER_NAME} 35 | 36 | # Enable passwordless SSH from local to master 37 | if ! [ -f ~/.ssh/id_rsa ]; then 38 | ssh-keygen -f ~/.ssh/id_rsa -t rsa -P "" 39 | fi 40 | cat ~/.ssh/id_rsa.pub | run_cmd_on_node ${MASTER_DNS} 'cat >> ~/.ssh/authorized_keys' 41 | 42 | # Enable passwordless SSH from master to slaves 43 | SCRIPT=${PEG_ROOT}/config/ssh/setup_ssh.sh 44 | ARGS="${WORKER_DNS}" 45 | run_script_on_node ${MASTER_DNS} ${SCRIPT} ${ARGS} 46 | 47 | # Add NameNode, DataNodes, and Secondary NameNode to known hosts 48 | SCRIPT=${PEG_ROOT}/config/ssh/add_to_known_hosts.sh 49 | ARGS="${MASTER_DNS} ${HOSTNAMES}" 50 | run_script_on_node ${MASTER_DNS} ${SCRIPT} ${ARGS} 51 | 52 | -------------------------------------------------------------------------------- /run_peg_docker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | tag=0.1.3 18 | 19 | nargs="$#" 20 | 21 | if [ "${nargs}" -ne 2 ]; then 22 | echo "incorrect number of arguments" 23 | echo "./run_peg_docker.sh " 24 | exit 1 25 | fi 26 | 27 | pem_key_name=$1 28 | instance_template_folder=$2 29 | 30 | if [ ! -f ~/.ssh/${pem_key_name}.pem ]; then 31 | echo "${pem_key_name} does not exist in your ~/.ssh folder" 32 | exit 1 33 | fi 34 | 35 | if [ ! -d ${instance_template_folder} ]; then 36 | echo "${instance_template_folder} directory does not exist" 37 | exit 1 38 | fi 39 | 40 | folder_split=($(echo ${instance_template_folder} | tr "/" " ")) 41 | folder_name=${folder_split[${#folder_split[@]}-1]} 42 | 43 | docker run -it --rm --name peg \ 44 | -e AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID:?"set AWS_ACCESS_KEY_ID before proceeding"} \ 45 | -e AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY:?"set AWS_SECRET_ACCESS_KEY before proceeding"} \ 46 | -e AWS_DEFAULT_REGION=${AWS_DEFAULT_REGION:?"set AWS_DEFAULT_REGION before proceeding"} \ 47 | -e USER=${USER:=pegasus} \ 48 | -v ~/.ssh/${pem_key_name}.pem:/root/.ssh/${pem_key_name}.pem \ 49 | -v ${instance_template_folder}:/root/${folder_name} \ 50 | insightdatascience/pegasus:${tag} 51 | -------------------------------------------------------------------------------- /config/presto/setup_cluster.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | if [ "$#" -ne 1 ]; then 18 | echo "Please specify cluster name!" && exit 1 19 | fi 20 | 21 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 22 | source ${PEG_ROOT}/util.sh 23 | 24 | CLUSTER_NAME=$1 25 | 26 | PUBLIC_DNS=$(fetch_cluster_public_dns ${CLUSTER_NAME}) 27 | 28 | MASTER_DNS=$(fetch_cluster_master_public_dns ${CLUSTER_NAME}) 29 | WORKER_DNS=$(fetch_cluster_worker_public_dns ${CLUSTER_NAME}) 30 | NUM_WORKERS=$(echo ${WORKER_DNS} | wc -w) 31 | 32 | # Configure base Presto coordinator and workers 33 | single_script="${PEG_ROOT}/config/presto/setup_single.sh" 34 | args="$MASTER_DNS $NUM_WORKERS" 35 | for dns in ${PUBLIC_DNS}; do 36 | run_script_on_node ${dns} ${single_script} ${args} & 37 | done 38 | 39 | wait 40 | 41 | # Configure Presto coordinator and workers 42 | coordinator_script="${PEG_ROOT}/config/presto/config_coordinator.sh" 43 | run_script_on_node ${MASTER_DNS} ${coordinator_script} 44 | 45 | worker_script="${PEG_ROOT}/config/presto/config_worker.sh" 46 | for dns in ${WORKER_DNS}; do 47 | run_script_on_node ${dns} ${worker_script} & 48 | done 49 | 50 | wait 51 | 52 | cli_script="${PEG_ROOT}/config/presto/setup_cli.sh" 53 | run_script_on_node ${MASTER_DNS} ${cli_script} 54 | 55 | echo "Presto configuration complete!" 56 | 57 | -------------------------------------------------------------------------------- /install/secor/install_secor_cluster.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # check input arguments 18 | if [ "$#" -ne 1 ]; then 19 | echo "Please specify cluster name!" && exit 1 20 | fi 21 | 22 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 23 | source ${PEG_ROOT}/util.sh 24 | source ${PEG_ROOT}/colors.sh 25 | 26 | CLUSTER_NAME=$1 27 | MASTER_PUBLIC_DNS=$(fetch_public_dns_of_node_in_cluster ${CLUSTER_NAME} 1) 28 | TECHNOLOGY="secor" 29 | DEP_ROOT_FOLDER=/usr/local/ 30 | 31 | function check_dependencies_and_install_secor { 32 | if [ -z "${DEP}" ]; then 33 | echo -e "${color_yellow}Installing Secor on Node - ${MASTER_PUBLIC_DNS} - in ${CLUSTER_NAME}${color_norm}" 34 | script=${PEG_ROOT}/install/secor/install_secor.sh 35 | run_script_on_node ${MASTER_PUBLIC_DNS} ${script} 36 | else 37 | INSTALLED=$(check_remote_folder ${MASTER_PUBLIC_DNS} ${DEP_ROOT_FOLDER}${DEP[0]}) 38 | if [ "${INSTALLED}" = "installed" ]; then 39 | DEP=(${DEP[@]:1}) 40 | check_dependencies_and_install_secor 41 | else 42 | echo "${DEP} is not installed in ${DEP_ROOT_FOLDER}" 43 | echo "Please install ${DEP} and then proceed with ${TECHNOLOGY}" 44 | echo "peg install ${CLUSTER_NAME} ${TECHNOLOGY}" 45 | exit 1 46 | fi 47 | fi 48 | } 49 | 50 | # Check if dependencies are installed 51 | # If yes, then install secor 52 | DEP=($(get_dependencies)) 53 | check_dependencies_and_install_secor 54 | 55 | echo "Secor installed!" 56 | -------------------------------------------------------------------------------- /config/secor/setup_secor.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | args=( "$@" ) 18 | s3_bucket=${args[-1]} 19 | length=$(($#-1)) 20 | DNS=${@:1:$length} 21 | DNS=(${DNS}) 22 | 23 | . ~/.profile 24 | 25 | sudo sed -i 's@aws.access.key=@aws.access.key='"$AWS_ACCESS_KEY_ID"'@g' $SECOR_HOME/bin/secor.common.properties 26 | sudo sed -i 's@aws.secret.key=@aws.secret.key='"$AWS_SECRET_ACCESS_KEY"'@g' $SECOR_HOME/bin/secor.common.properties 27 | sudo sed -i 's@secor.compression.codec=@secor.compression.codec=org.apache.hadoop.io.compress.GzipCodec@g' $SECOR_HOME/bin/secor.common.properties 28 | sudo sed -i 's@secor.file.extension=@secor.file.extension='".gz"'@g' $SECOR_HOME/bin/secor.common.properties 29 | sudo sed -i 's@secor.file.reader.writer.factory=com.pinterest.secor.io.impl.SequenceFileReaderWriterFactory@secor.file.reader.writer.factory=com.pinterest.secor.io.impl.DelimitedTextFileReaderWriterFactory@g' $SECOR_HOME/bin/secor.common.properties 30 | 31 | sudo sed -i 's@kafka.seed.broker.host=@kafka.seed.broker.host='"${DNS[0]}"'@g' $SECOR_HOME/bin/secor.prod.properties 32 | 33 | ZK_SERVERS="" 34 | for dns in ${DNS[@]} 35 | do 36 | ZK_SERVERS=$ZK_SERVERS$dns:2181, 37 | done 38 | 39 | sudo sed -i 's@zookeeper.quorum=@zookeeper.quorum='"${ZK_SERVERS:0:-1}"'@g' $SECOR_HOME/bin/secor.prod.properties 40 | sudo sed -i 's@secor.s3.bucket=@secor.s3.bucket='"${s3_bucket}"'@g' $SECOR_HOME/bin/secor.prod.properties 41 | sudo sed -i 's@ostrich.port=9999@ostrich.port=9997@g' $SECOR_HOME/bin/secor.prod.backup.properties 42 | -------------------------------------------------------------------------------- /install/kafka-manager/install_kafka_manager_cluster.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # check input arguments 18 | if [ "$#" -ne 1 ]; then 19 | echo "Please specify cluster name!" && exit 1 20 | fi 21 | 22 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 23 | source ${PEG_ROOT}/util.sh 24 | source ${PEG_ROOT}/colors.sh 25 | 26 | CLUSTER_NAME=$1 27 | MASTER_PUBLIC_DNS=$(fetch_public_dns_of_node_in_cluster ${CLUSTER_NAME} 1) 28 | TECHNOLOGY="kafka-manager" 29 | DEP_ROOT_FOLDER=/usr/local/ 30 | 31 | function check_dependencies_and_install_kafka_manager { 32 | if [ -z "${DEP}" ]; then 33 | echo -e "${color_yellow}Installing kafka-manager on Node - ${MASTER_PUBLIC_DNS} - in ${CLUSTER_NAME}${color_norm}" 34 | script=${PEG_ROOT}/install/kafka-manager/install_kafka_manager.sh 35 | run_script_on_node ${MASTER_PUBLIC_DNS} ${script} 36 | else 37 | INSTALLED=$(check_remote_folder ${MASTER_PUBLIC_DNS} ${DEP_ROOT_FOLDER}${DEP[0]}) 38 | if [ "${INSTALLED}" = "installed" ]; then 39 | DEP=(${DEP[@]:1}) 40 | check_dependencies_and_install_kafka_manager 41 | else 42 | echo "${DEP} is not installed in ${DEP_ROOT_FOLDER}" 43 | echo "Please install ${DEP} and then proceed with ${TECHNOLOGY}" 44 | echo "peg install ${CLUSTER_NAME} ${TECHNOLOGY}" 45 | exit 1 46 | fi 47 | fi 48 | } 49 | 50 | # Check if dependencies are installed 51 | # If yes, then install kafka-manager 52 | DEP=($(get_dependencies)) 53 | check_dependencies_and_install_kafka_manager 54 | 55 | echo "kafka-manager installed!" 56 | -------------------------------------------------------------------------------- /install/riak/install_riak_cluster.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # check input arguments 18 | if [ "$#" -ne 1 ]; then 19 | echo "Please specify cluster name!" && exit 1 20 | fi 21 | 22 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 23 | source ${PEG_ROOT}/util.sh 24 | source ${PEG_ROOT}/colors.sh 25 | 26 | CLUSTER_NAME=$1 27 | MASTER_PUBLIC_DNS=$(fetch_cluster_master_public_dns ${CLUSTER_NAME}) 28 | PUBLIC_DNS=$(fetch_cluster_public_dns ${CLUSTER_NAME}) 29 | TECHNOLOGY="riak" 30 | DEP_ROOT_FOLDER=/usr/local/ 31 | 32 | function check_dependencies_and_install_riak { 33 | if [ -z "${DEP}" ]; then 34 | script=${PEG_ROOT}/install/riak/install_riak.sh 35 | for dns in ${PUBLIC_DNS}; do 36 | echo -e "${color_yellow}Installing Riak on Node - ${dns} - in ${CLUSTER_NAME}${color_norm}" 37 | run_script_on_node ${dns} ${script} & 38 | done 39 | else 40 | INSTALLED=$(check_remote_folder ${MASTER_PUBLIC_DNS} ${DEP_ROOT_FOLDER}${DEP[0]}) 41 | if [ "${INSTALLED}" = "installed" ]; then 42 | DEP=(${DEP[@]:1}) 43 | check_dependencies_and_install_secor 44 | else 45 | echo "${DEP} is not installed in ${DEP_ROOT_FOLDER}" 46 | echo "Please install ${DEP} and then proceed with ${TECHNOLOGY}" 47 | echo "peg install ${CLUSTER_NAME} ${TECHNOLOGY}" 48 | exit 1 49 | fi 50 | fi 51 | } 52 | 53 | # Check if dependencies are installed 54 | # If yes, then install riak 55 | 56 | DEP=($(get_dependencies)) 57 | check_dependencies_and_install_riak 58 | 59 | wait 60 | 61 | echo "Riak installed!" 62 | -------------------------------------------------------------------------------- /config/spark/setup_single.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | . ~/.profile 18 | 19 | spark_lib=${SPARK_HOME}/lib/ 20 | 21 | if [ ! -d ${spark_lib} ]; then 22 | mkdir ${spark_lib} 23 | fi 24 | 25 | cp ${HADOOP_HOME}/share/hadoop/tools/lib/aws-java-sdk-*.jar ${spark_lib} 26 | cp ${HADOOP_HOME}/share/hadoop/tools/lib/hadoop-aws-*.jar ${spark_lib} 27 | 28 | cp ${SPARK_HOME}/conf/spark-env.sh.template ${SPARK_HOME}/conf/spark-env.sh 29 | cp ${SPARK_HOME}/conf/spark-defaults.conf.template ${SPARK_HOME}/conf/spark-defaults.conf 30 | 31 | # configure spark-env.sh 32 | OVERSUBSCRIPTION_FACTOR=3 33 | WORKER_CORES=$(echo "$(nproc) * ${OVERSUBSCRIPTION_FACTOR}" | awk '{print $1 * $3}') 34 | spark_env="${SPARK_HOME}/conf/spark-env.sh" 35 | sed -i '22i export PYSPARK_PYTHON=python3' ${spark_env} 36 | sed -i '23i export JAVA_HOME=/usr' ${spark_env} 37 | sed -i '24i export SPARK_PUBLIC_DNS="'$1'"' ${spark_env} 38 | sed -i '25i export SPARK_WORKER_CORES='${WORKER_CORES}'' ${spark_env} 39 | sed -i '26i export DEFAULT_HADOOP_HOME='${HADOOP_HOME}'' ${spark_env} 40 | 41 | 42 | # configure spark-defaults.conf 43 | hadoop_aws_jar=$(find ${spark_lib} -type f | grep hadoop-aws) 44 | aws_java_sdk_jar=$(find ${spark_lib} -type f | grep aws-java-sdk) 45 | sed -i '21i spark.hadoop.fs.s3a.impl org.apache.hadoop.fs.s3a.S3AFileSystem' ${SPARK_HOME}/conf/spark-defaults.conf 46 | sed -i '22i spark.executor.extraClassPath '"${aws_java_sdk_jar}"':'"${hadoop_aws_jar}"'' ${SPARK_HOME}/conf/spark-defaults.conf 47 | sed -i '23i spark.driver.extraClassPath '"${aws_java_sdk_jar}"':'"${hadoop_aws_jar}"'' ${SPARK_HOME}/conf/spark-defaults.conf 48 | -------------------------------------------------------------------------------- /config/flink/setup_single.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | . ~/.profile 18 | 19 | MASTER_IP=$1 20 | NUM_WORKERS=$2 21 | 22 | MEMINFO=($(free -m | sed -n '2p' | sed -e "s/[[:space:]]\+/ /g")) 23 | TOTMEM=${MEMINFO[1]} 24 | TASKMANAGER_HEAP=$(printf "%.0f" $(echo "0.90 * ( $TOTMEM - 1000 )" | awk '{print $1*($4-$6)}')) 25 | TASK_SLOTS=$(nproc) 26 | PARALLELISM=$(echo "$TASK_SLOTS * $NUM_WORKERS" | awk '{print $1*$3}') 27 | TMP_DIRS=/var/flink/tmp 28 | 29 | sudo mkdir -p $TMP_DIRS 30 | sudo chown -R ubuntu $TMP_DIRS 31 | 32 | cp ${HADOOP_HOME}/share/hadoop/tools/lib/aws-java-sdk-*.jar ${FLINK_HOME}/lib 33 | cp ${HADOOP_HOME}/share/hadoop/tools/lib/hadoop-aws-*.jar ${FLINK_HOME}/lib 34 | cp ${HADOOP_HOME}/share/hadoop/tools/lib/httpclient-*.jar ${FLINK_HOME}/lib 35 | cp ${HADOOP_HOME}/share/hadoop/tools/lib/httpcore-*.jar ${FLINK_HOME}/lib 36 | 37 | sed -i "s@jobmanager.rpc.address: localhost@jobmanager.rpc.address: $MASTER_IP@g" $FLINK_HOME/conf/flink-conf.yaml 38 | sed -i "s@jobmanager.heap.mb: 256@jobmanager.heap.mb: 1024@g" $FLINK_HOME/conf/flink-conf.yaml 39 | sed -i "s@taskmanager.heap.mb: 512@taskmanager.heap.mb: $TASKMANAGER_HEAP@g" $FLINK_HOME/conf/flink-conf.yaml 40 | sed -i "s@taskmanager.numberOfTaskSlots: 1@taskmanager.numberOfTaskSlots: $TASK_SLOTS@g" $FLINK_HOME/conf/flink-conf.yaml 41 | sed -i "s@parallelism.default: 1@parallelism.default: $PARALLELISM@g" $FLINK_HOME/conf/flink-conf.yaml 42 | sed -i "s@# taskmanager.tmp.dirs: /tmp@taskmanager.tmp.dirs: $TMP_DIRS@g" $FLINK_HOME/conf/flink-conf.yaml 43 | sed -i "s@# fs.hdfs.hadoopconf: /path/to/hadoop/conf/@fs.hdfs.hadoopconf: $HADOOP_HOME/etc/hadoop@g" $FLINK_HOME/conf/flink-conf.yaml 44 | -------------------------------------------------------------------------------- /config/hbase/setup_single.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | MASTER_NAME=$1; shift 18 | ZK_HOSTNAME=( "$@" ) 19 | 20 | . ~/.profile 21 | 22 | # configure hbase-site.xml 23 | sudo sed -i '24i \n hbase.rootdir\n hdfs://'"$MASTER_NAME"':9000/hbase\n' $HBASE_HOME/conf/hbase-site.xml 24 | 25 | sudo sed -i '24i \n hbase.zookeeper.property.dataDir\n /var/lib/zookeeper\n' $HBASE_HOME/conf/hbase-site.xml 26 | 27 | sudo sed -i '24i \n hbase.cluster.distributed\n true\n' $HBASE_HOME/conf/hbase-site.xml 28 | 29 | ZK_QUORUM="" 30 | for ZK in ${ZK_HOSTNAME[@]}; do 31 | ZK_QUORUM+=$ZK, 32 | done 33 | ZK_QUORUM=${ZK_QUORUM%?} 34 | 35 | sudo sed -i '24i \n hbase.zookeeper.quorum\n '"$ZK_QUORUM"'\n' $HBASE_HOME/conf/hbase-site.xml 36 | 37 | # configure hbase-env.sh 38 | sudo sed -i 's@# export HBASE_MANAGES_ZK=true@export HBASE_MANAGES_ZK=false@g' $HBASE_HOME/conf/hbase-env.sh 39 | 40 | sudo sed -i 's@# export JAVA_HOME=/usr/java/jdk1.6.0/@export JAVA_HOME=/usr@g' $HBASE_HOME/conf/hbase-env.sh 41 | 42 | # setup RegionServers on all nodes except the first one 43 | REGIONSERVERS=( ${ZK_HOSTNAME[@]:1} ) 44 | sudo mv $HBASE_HOME/conf/regionservers $HBASE_HOME/conf/regionservers.backup 45 | 46 | for RS in ${REGIONSERVERS[@]}; do 47 | sudo bash -c 'echo '"$RS"' >> '"$HBASE_HOME"'/conf/regionservers' 48 | done 49 | 50 | # setup BackupMasters to the second node in the node list 51 | sudo bash -c 'echo '"${ZK_HOSTNAME[1]}"' >> '"$HBASE_HOME"'/conf/backup-masters' 52 | 53 | -------------------------------------------------------------------------------- /config/riak/setup_cluster.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # check input arguments 18 | 19 | if [ "$#" -ne 1 ]; then 20 | echo "Please specify cluster name!" && exit 1 21 | fi 22 | 23 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 24 | source ${PEG_ROOT}/util.sh 25 | 26 | CLUSTER_NAME=$1 27 | 28 | MASTER_PUBLIC_DNS=$(fetch_cluster_master_public_dns ${CLUSTER_NAME}) 29 | PUBLIC_DNS=$(fetch_cluster_public_dns ${CLUSTER_NAME}) 30 | 31 | single_script="${PEG_ROOT}/config/riak/setup_single.sh" 32 | 33 | # Install and configure nodes for Riak 34 | for dns in ${PUBLIC_DNS}; do 35 | run_script_on_node ${dns} ${single_script} & 36 | done 37 | 38 | wait 39 | 40 | # wait for riak to start on individual nodes 41 | for i in {0..20}; do echo "."; sleep 0.5; done 42 | 43 | hostnames=($(fetch_cluster_hostnames ${CLUSTER_NAME})) 44 | args=(${hostnames[0]}) 45 | 46 | # script to form a riak cluster from individual riak nodes 47 | single_script="${PEG_ROOT}/config/riak/create_cluster.sh" 48 | PUBLIC_DNS=$(fetch_cluster_public_dns ${CLUSTER_NAME}) 49 | PUBLIC_DNS=($PUBLIC_DNS) 50 | PUBLIC_DNS=(${PUBLIC_DNS[@]:1}) 51 | 52 | echo -e "Configuring nodes to form a cluster" 53 | 54 | for dns in ${PUBLIC_DNS[@]}; do 55 | run_script_on_node ${dns} ${single_script} ${args} & 56 | wait 57 | done 58 | 59 | # wait for riak cluster formation to complete 60 | for i in {0..10}; do echo "."; sleep 0.5; done 61 | 62 | # stop riak after setup 63 | PUBLIC_DNS=$(fetch_cluster_public_dns ${CLUSTER_NAME}) 64 | stop_cmd="sudo /etc/init.d/riak stop" 65 | for dns in ${PUBLIC_DNS}; do 66 | run_cmd_on_node ${dns} ${stop_cmd} & 67 | done 68 | 69 | wait 70 | 71 | echo "Riak configuration complete!" 72 | 73 | -------------------------------------------------------------------------------- /config/elasticsearch/setup_single.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # first argument is the region, second is the ec2 security group, and last is the name of the elasticsearch cluster 18 | ES_NAME=$1; shift 19 | AWS_REGION=$1; shift 20 | AWS_SECRET=$1; shift 21 | AWS_ACCESS=$1; shift 22 | QUORUM=$1; shift 23 | HOSTNAMES=( "$@" ) 24 | 25 | . ~/.profile 26 | 27 | mkdir $ELASTICSEARCH_HOME/logs 28 | mkdir $ELASTICSEARCH_HOME/plugins 29 | 30 | sudo $ELASTICSEARCH_HOME/bin/elasticsearch-plugin install discovery-ec2 31 | sudo $ELASTICSEARCH_HOME/bin/elasticsearch-plugin install repository-s3 32 | sudo $ELASTICSEARCH_HOME/bin/elasticsearch-plugin install x-pack 33 | 34 | sudo sed -i '1i discovery.type: ec2' $ELASTICSEARCH_HOME/config/elasticsearch.yml 35 | sudo sed -i '1i cluster.name: '"$ES_NAME"'' $ELASTICSEARCH_HOME/config/elasticsearch.yml 36 | sudo sed -i '1i cloud.aws.region: '"$AWS_REGION"'' $ELASTICSEARCH_HOME/config/elasticsearch.yml 37 | sudo sed -i '1i cloud.aws.secret_key: '"$AWS_SECRET"'' $ELASTICSEARCH_HOME/config/elasticsearch.yml 38 | sudo sed -i '1i cloud.aws.access_key: '"$AWS_ACCESS"'' $ELASTICSEARCH_HOME/config/elasticsearch.yml 39 | sudo sed -i '1i network.host: 0.0.0.0' $ELASTICSEARCH_HOME/config/elasticsearch.yml 40 | sudo sed -i '1i discovery.zen.minimum_master_nodes: '"$QUORUM"'' $ELASTICSEARCH_HOME/config/elasticsearch.yml 41 | 42 | ES_HOSTS="" 43 | for host in ${HOSTNAMES[@]} 44 | do 45 | ES_HOSTS=$ES_HOSTS\"$host\", 46 | done 47 | 48 | sudo sed -i '1i discovery.zen.ping.unicast.hosts: '\["${ES_HOSTS:0:-1}"\]'' $ELASTICSEARCH_HOME/config/elasticsearch.yml 49 | 50 | sudo sysctl -w vm.max_map_count=262144 51 | sudo bash -c "echo '* hard nofile 65536' >> /etc/security/limits.conf" 52 | 53 | sudo chown -R ubuntu $ELASTICSEARCH_HOME 54 | -------------------------------------------------------------------------------- /config/hadoop/setup_cluster.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # must be called from the top level 18 | 19 | # check input arguments 20 | if [ "$#" -ne 1 ]; then 21 | echo "Please specify cluster name!" && exit 1 22 | fi 23 | 24 | PEG_ROOT=$(dirname ${BASH_SOURCE})/../.. 25 | source ${PEG_ROOT}/util.sh 26 | 27 | CLUSTER_NAME=$1 28 | 29 | PUBLIC_DNS=$(fetch_cluster_public_dns ${CLUSTER_NAME}) 30 | 31 | MASTER_DNS=$(fetch_cluster_master_public_dns ${CLUSTER_NAME}) 32 | MASTER_HOSTNAME=$(fetch_cluster_master_hostname ${CLUSTER_NAME}) 33 | 34 | WORKER_DNS=$(fetch_cluster_worker_public_dns ${CLUSTER_NAME}) 35 | WORKER_HOSTNAMES=$(fetch_cluster_worker_hostnames ${CLUSTER_NAME}) 36 | 37 | # Configure base Hadoop master and slaves 38 | single_script="${PEG_ROOT}/config/hadoop/setup_single.sh" 39 | args="${MASTER_DNS} ${AWS_ACCESS_KEY_ID} ${AWS_SECRET_ACCESS_KEY}" 40 | for dns in ${PUBLIC_DNS}; do 41 | run_script_on_node ${dns} ${single_script} ${args} & 42 | done 43 | 44 | wait 45 | 46 | # Configure Hadoop master and slaves 47 | hosts_script="${PEG_ROOT}/config/hadoop/config_hosts.sh" 48 | args="${MASTER_DNS} ${MASTER_HOSTNAME} ${WORKER_DNS} ${WORKER_HOSTNAMES}" 49 | run_script_on_node ${MASTER_DNS} ${hosts_script} ${args} 50 | 51 | namenode_script="${PEG_ROOT}/config/hadoop/config_namenode.sh" 52 | args="${MASTER_HOSTNAME} ${WORKER_HOSTNAMES}" 53 | run_script_on_node ${MASTER_DNS} ${namenode_script} ${args} & 54 | 55 | datanode_script="${PEG_ROOT}/config/hadoop/config_datanode.sh" 56 | for dns in ${WORKER_DNS}; do 57 | run_script_on_node ${dns} ${datanode_script} & 58 | done 59 | 60 | wait 61 | 62 | format_script="${PEG_ROOT}/config/hadoop/format_hdfs.sh" 63 | run_script_on_node ${MASTER_DNS} ${format_script} 64 | 65 | echo "Hadoop configuration complete!" 66 | -------------------------------------------------------------------------------- /pegasus-completion.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # Bash completion for peg commands 18 | # Usage: 19 | # - Export PEGASUS_HOME as an environment variable as follows - in your .bash_profile (for mac) and .bashrc (for linux) 20 | # export PEGASUS_HOME=[Path to Pegasus] 21 | # - Put "source pegasus-completion.sh" into your .bash_profile (on mac) or .bashrc (on linux) 22 | # 23 | # OR 24 | # 25 | # If you have bash_completion installed, 26 | # [for linux] 27 | # - place this script under /etc/bash_completion.d folder (for linux) 28 | # and add the following lines to your .bashrc 29 | # if [ -f /etc/bash_completion ]; then 30 | # . /etc/bash_completion 31 | # fi 32 | # 33 | # [for mac] 34 | # - place this script under `brew --prefix`/etc/bash_completion.d (for mac) 35 | # and add the following lines to your .bash_profile 36 | # if [ -f `brew --prefix`/etc/bash_completion ]; then 37 | # . `brew --prefix`/etc/bash_completion 38 | # fi 39 | 40 | _peg() 41 | { 42 | local cur prev opts 43 | COMPREPLY=() 44 | cur="${COMP_WORDS[COMP_CWORD]}" 45 | prev="${COMP_WORDS[COMP_CWORD-1]}" 46 | clusters=$(ls ${PEGASUS_HOME}/tmp) 47 | services=$(ls ${PEGASUS_HOME}/service) 48 | 49 | opts=" 50 | config 51 | aws 52 | validate 53 | fetch 54 | describe 55 | up 56 | down 57 | install 58 | uninstall 59 | service 60 | ssh 61 | sshcmd-node 62 | sshcmd-cluster 63 | scp 64 | retag 65 | start 66 | stop 67 | port-foward 68 | ${clusters} 69 | ${services}" 70 | 71 | COMPREPLY=($(compgen -W "${opts}" -- ${cur})) 72 | return 0 73 | 74 | } 75 | complete -F _peg peg 76 | 77 | # END peg completion 78 | -------------------------------------------------------------------------------- /config/presto/setup_single.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | . ~/.profile 18 | 19 | MASTER_DNS=$1 20 | NUM_WORKERS=$2 21 | 22 | MEMINFO=($(free -m | sed -n '2p' | sed -e "s/[[:space:]]\+/ /g")) 23 | TOTMEM=${MEMINFO[1]} 24 | MAX_MEMORY_PER_NODE=$(printf "%.0f" $(echo "0.90 * ( $TOTMEM - 6000 ) * 0.001" | awk '{print $1 * ($4-$6) * $8}')) 25 | 26 | MAX_MEMORY=$(echo "$MAX_MEMORY_PER_NODE * $NUM_WORKERS" | awk '{$1 * $3}') 27 | 28 | PORT=8080 29 | 30 | mkdir $PRESTO_HOME/etc 31 | 32 | DATA_PATH=/var/presto/data 33 | sudo mkdir -p $DATA_PATH 34 | sudo chown -R ubuntu $DATA_PATH 35 | 36 | NODE_PROPERTIES_PATH=$PRESTO_HOME/etc/node.properties 37 | JVM_CONFIG_PATH=$PRESTO_HOME/etc/jvm.config 38 | CONFIG_PROPERTIES_PATH=$PRESTO_HOME/etc/config.properties 39 | LOG_PROPERTIES_PATH=$PRESTO_HOME/etc/log.properties 40 | 41 | mkdir $PRESTO_HOME/etc/catalog 42 | 43 | touch $NODE_PROPERTIES_PATH 44 | touch $JVM_CONFIG_PATH 45 | touch $CONFIG_PROPERTIES_PATH 46 | touch $LOG_PROPERTIES_PATH 47 | 48 | # node.properties 49 | cat >> $NODE_PROPERTIES_PATH << EOL 50 | node.environment=production 51 | node.id=$(uuidgen) 52 | node.data-dir=${DATA_PATH} 53 | EOL 54 | 55 | # jvm.config 56 | cat >> $JVM_CONFIG_PATH << EOL 57 | -server 58 | -Xmx16G 59 | -XX:+UseConcMarkSweepGC 60 | -XX:+ExplicitGCInvokesConcurrent 61 | -XX:+CMSClassUnloadingEnabled 62 | -XX:+AggressiveOpts 63 | -XX:+HeapDumpOnOutOfMemoryError 64 | -XX:OnOutOfMemoryError=kill -9 %p 65 | -XX:PermSize=150M 66 | -XX:MaxPermSize=150M 67 | -XX:ReservedCodeCacheSize=150M 68 | -Xbootclasspath/p:/var/presto/installation/lib/floatingdecimal-0.2.jar 69 | EOL 70 | 71 | # config.properties 72 | cat >> $CONFIG_PROPERTIES_PATH << EOL 73 | http-server.http.port=${PORT} 74 | task.max-memory=${MAX_MEMORY}GB 75 | discovery.uri=http://${MASTER_DNS}:${PORT} 76 | EOL 77 | 78 | # log.properties 79 | cat >> $LOG_PROPERTIES_PATH << EOL 80 | com.facebook.presto=WARN 81 | EOL 82 | 83 | echo "connector.name=jmx" > $PRESTO_HOME/etc/catalog/jmx.properties 84 | -------------------------------------------------------------------------------- /config/hadoop/setup_single.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | . ~/.profile 18 | 19 | MASTER_NAME=$1 20 | AWS_ACCESS_KEY_ID=$2 21 | AWS_SECRET_ACCESS_KEY=$3 22 | 23 | sed -i 's@${JAVA_HOME}@/usr@g' $HADOOP_HOME/etc/hadoop/hadoop-env.sh 24 | 25 | sed -i '$a # Update Hadoop classpath to include share folder \nif [ \"$HADOOP_CLASSPATH\" ]; then \n export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$HADOOP_HOME/share/hadoop/tools/lib/* \nelse \n export HADOOP_CLASSPATH=$HADOOP_HOME/share/hadoop/tools/lib/* \nfi' $HADOOP_HOME/etc/hadoop/hadoop-env.sh 26 | 27 | # configure core-site.xml 28 | sed -i '20i \n fs.defaultFS\n hdfs://'"$MASTER_NAME"':9000\n' $HADOOP_HOME/etc/hadoop/core-site.xml 29 | sed -i '24i \n fs.s3.impl\n org.apache.hadoop.fs.s3a.S3AFileSystem\n' $HADOOP_HOME/etc/hadoop/core-site.xml 30 | sed -i '28i \n fs.s3a.access.key\n '"${AWS_ACCESS_KEY_ID}"'\n' $HADOOP_HOME/etc/hadoop/core-site.xml 31 | sed -i '32i \n fs.s3a.secret.key\n '"${AWS_SECRET_ACCESS_KEY}"'\n' $HADOOP_HOME/etc/hadoop/core-site.xml 32 | 33 | # configure yarn-site.xml 34 | sed -i '18i \n yarn.nodemanager.aux-services\n mapreduce_shuffle\n' $HADOOP_HOME/etc/hadoop/yarn-site.xml 35 | sed -i '22i \n yarn.nodemanager.aux-services.mapreduce.shuffle.class\n org.apache.hadoop.mapred.ShuffleHandler\n' $HADOOP_HOME/etc/hadoop/yarn-site.xml 36 | sed -i '26i \n yarn.resourcemanager.resource-tracker.address\n '"$MASTER_NAME"':8025\n' $HADOOP_HOME/etc/hadoop/yarn-site.xml 37 | sed -i '30i \n yarn.resourcemanager.scheduler.address\n '"$MASTER_NAME"':8030\n' $HADOOP_HOME/etc/hadoop/yarn-site.xml 38 | sed -i '34i \n yarn.resourcemanager.address\n '"$MASTER_NAME"':8050\n' $HADOOP_HOME/etc/hadoop/yarn-site.xml 39 | 40 | # configure mapred-site.xml 41 | cp $HADOOP_HOME/etc/hadoop/mapred-site.xml.template $HADOOP_HOME/etc/hadoop/mapred-site.xml 42 | sed -i '20i \n mapreduce.jobtracker.address\n '"$MASTER_NAME"':54311\n' $HADOOP_HOME/etc/hadoop/mapred-site.xml 43 | sed -i '24i \n mapreduce.framework.name\n yarn\n' $HADOOP_HOME/etc/hadoop/mapred-site.xml 44 | sed -i '28i \n mapreduce.application.classpath\n '"$HADOOP_HOME"'/share/hadoop/mapreduce/*,'"$HADOOP_HOME"'/share/hadoop/mapreduce/lib/*,'"$HADOOP_HOME"'/share/hadoop/common/*,'"$HADOOP_HOME"'/share/hadoop/common/lib/*,'"$HADOOP_HOME"'/share/hadoop/tools/lib/* \n ' $HADOOP_HOME/etc/hadoop/mapred-site.xml 45 | -------------------------------------------------------------------------------- /install/download_tech: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # Replace this with where you would like to pull your binaries 18 | S3_BUCKET=https://s3-us-west-2.amazonaws.com/insight-tech 19 | 20 | 21 | CASSANDRA_VER=3.11.3 22 | ELASTICSEARCH_URL=6.2.4 23 | FLINK_VER=1.4.2 24 | FLINK_HADOOP_VER=27 25 | FLINK_SCALA_VER=2.11 26 | HADOOP_VER=2.7.6 27 | HBASE_VER=1.2.6 28 | HIVE_VER=2.3.3 29 | KAFKA_VER=1.1.0 30 | KAFKA_SCALA_VER=2.12 31 | KIBANA_VER=6.2.4 32 | OPSCENTER_VER=6.5.0 33 | PIG_VER=0.17.0 34 | PRESTO_VER=0.200 35 | REDIS_VER=4.0.9 36 | SPARK_VER=2.4.0 37 | SPARK_HADOOP_VER=2.7 38 | STORM_VER=1.2.1 39 | ZOOKEEPER_VER=3.4.13 40 | 41 | 42 | CASSANDRA_URL=${S3_BUCKET}/cassandra/apache-cassandra-${CASSANDRA_VER}-bin.tar.gz 43 | ELASTICSEARCH_URL=${S3_BUCKET}/elasticsearch/elasticsearch-${ELASTICSEARCH_VER}.tar.gz 44 | FLINK_URL=${S3_BUCKET}/flink/flink-${FLINK_VER}-bin-hadoop${FLINK_HADOOP_VER}-scala_${FLINK_SCALA_VER}.tgz 45 | HADOOP_URL=${S3_BUCKET}/hadoop/hadoop-$HADOOP_VER.tar.gz 46 | HBASE_URL=${S3_BUCKET}/hbase/hbase-$HBASE_VER-bin.tar.gz 47 | HIVE_URL=${S3_BUCKET}/hive/apache-hive-$HIVE_VER-bin.tar.gz 48 | KAFKA_URL=${S3_BUCKET}/kafka/kafka_$KAFKA_SCALA_VER-$KAFKA_VER.tgz 49 | KIBANA_URL=${S3_BUCKET}/kibana/kibana-$KIBANA_VER-linux-x86_64.tar.gz 50 | OPSCENTER_URL=${S3_BUCKET}/cassandra/opscenter-${OPSCENTER_VER}.tar.gz 51 | PIG_URL=${S3_BUCKET}/pig/pig-$PIG_VER.tar.gz 52 | PRESTO_URL=${S3_BUCKET}/presto-server-$PRESTO_VER.tar.gz 53 | REDIS_URL=${S3_BUCKET}/redis/redis-$REDIS_VER.tar.gz 54 | SPARK_URL=${S3_BUCKET}/spark/spark-$SPARK_VER-bin-hadoop$SPARK_HADOOP_VER.tgz 55 | STORM_URL=${S3_BUCKET}/storm/apache-storm-$STORM_VER.tar.gz 56 | ZOOKEEPER_URL=${S3_BUCKET}/zookeeper/zookeeper-$ZOOKEEPER_VER.tar.gz 57 | 58 | # If the s3 bucket doesn't have the version you want to use, below are some sites where you can find binaries but be aware that because you don't have control over the machines where those binaries reside, you may lose access to them at any point 59 | # 60 | # CASSANDRA_URL=http://www.us.apache.org/dist/cassandra/$CASSANDRA_VER/apache-cassandra-$CASSANDRA_VER-bin.tar.gz 61 | # ELASTICSEARCH_URL=http://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-$ELASTICSEARCH_VER.tar.gz 62 | # HADOOP_URL=http://www.us.apache.org/dist/hadoop/core/hadoop-$HADOOP_VER/hadoop-$HADOOP_VER.tar.gz 63 | # HBASE_URL=http://www.us.apache.org/dist/hbase/$HBASE_VER/hbase-$HBASE_VER-bin.tar.gz 64 | # HIVE_URL=http://www.us.apache.org/dist/hive/hive-$HIVE_VER/apache-hive-$HIVE_VER-bin.tar.gz 65 | # KAFKA_URL=http://www.us.apache.org/dist/kafka/$KAFKA_VER/kafka_$KAFKA_SCALA_VER-$KAFKA_VER.tgz 66 | # KIBANA_URL=http://artifacts.elastic.co/downloads/kibana/kibana-$KIBANA_VER-linux-x86_64.tar.gz 67 | # PIG_URL=http://www-us.apache.org/dist/pig/pig-$PIG_VER/pig-$PIG_VER.tar.gz 68 | # PRESTO_URL=https://repo1.maven.org/maven2/com/facebook/presto/presto-server/$PRESTO_VER/presto-server-$PRESTO_VER.tar.gz 69 | # REDIS_URL=http://download.redis.io/releases/redis-$REDIS_VER.tar.gz 70 | # SPARK_URL=http://www.us.apache.org/dist/spark/spark-$SPARK_VER/spark-$SPARK_VER-bin-hadoop$SPARK_HADOOP_VER.tgz 71 | # STORM_URL=http://www.us.apache.org/dist/storm/apache-storm-$STORM_VER/apache-storm-$STORM_VER.tar.gz 72 | # ZOOKEEPER_URL=http://www.us.apache.org/dist/zookeeper/zookeeper-$ZOOKEEPER_VER/zookeeper-$ZOOKEEPER_VER.tar.gz 73 | 74 | 75 | 76 | cd $(dirname "${BASH_SOURCE}") 77 | 78 | if [ "$#" -ne 1 ]; then 79 | echo "Provide technology to download!" 80 | exit 1 81 | fi 82 | 83 | TECHNOLOGY=$1 84 | TECHNOLOGY_HOME=$(echo $(echo $TECHNOLOGY | tr [a-z] [A-Z])_HOME) 85 | TECHNOLOGY_URL=$(eval "echo \$$(echo $TECHNOLOGY | tr [a-z] [A-Z])_URL") 86 | 87 | check_folder () { 88 | if [ -d /usr/local/$TECHNOLOGY ]; then 89 | echo "$TECHNOLOGY installed." 90 | else 91 | echo "$TECHNOLOGY missing." 92 | echo "Installing $TECHNOLOGY ..." 93 | curl_tech 94 | fi 95 | } 96 | 97 | curl_tech () { 98 | curl -sL $TECHNOLOGY_URL | gunzip | sudo tar xv -C /usr/local >> ~/peg_log.txt 99 | if [ -d /usr/local/*${TECHNOLOGY}* ]; then 100 | sudo mv /usr/local/*$TECHNOLOGY* /usr/local/$TECHNOLOGY 101 | echo "export $TECHNOLOGY_HOME=/usr/local/$TECHNOLOGY" | cat >> ~/.profile 102 | echo -e "export PATH=\$PATH:\$$TECHNOLOGY_HOME/bin\n" | cat >> ~/.profile 103 | sudo chown -R $USER /usr/local/$TECHNOLOGY 104 | eval "echo \$$(echo $TECHNOLOGY | tr [a-z] [A-Z])_VER" >> /usr/local/$TECHNOLOGY/tech_ver.txt 105 | else 106 | echo "The software wasn't downloaded correctly from the URL ($TECHNOLOGY_URL). Check Pegasus GitHub README." 107 | fi 108 | } 109 | 110 | check_folder 111 | 112 | -------------------------------------------------------------------------------- /aws-queries.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | PEG_ROOT=$(dirname "${BASH_SOURCE}") 18 | 19 | AWS_CMD="aws ec2 --region ${AWS_DEFAULT_REGION:=us-west-2} --output text" 20 | 21 | function get_public_dns_with_name_and_role { 22 | local cluster_name=$1 23 | local cluster_role=$2 24 | ${AWS_CMD} describe-instances \ 25 | --filters Name=tag:Name,Values=${cluster_name} \ 26 | Name=tag:Role,Values=${cluster_role} \ 27 | Name=instance-state-name,Values=running,stopped \ 28 | --query Reservations[].Instances[].NetworkInterfaces[].Association.PublicDnsName 29 | } 30 | 31 | function get_private_dns_with_name_and_role { 32 | local cluster_name=$1 33 | local cluster_role=$2 34 | ${AWS_CMD} describe-instances \ 35 | --filters Name=tag:Name,Values=${cluster_name} \ 36 | Name=tag:Role,Values=${cluster_role} \ 37 | Name=instance-state-name,Values=running,stopped \ 38 | --query Reservations[].Instances[].NetworkInterfaces[].PrivateDnsName 39 | } 40 | 41 | function get_pemkey_with_name { 42 | local cluster_name=$1 43 | ${AWS_CMD} describe-instances \ 44 | --filters Name=tag:Name,Values=${cluster_name} \ 45 | Name=instance-state-name,Values=running,stopped \ 46 | --query Reservations[].Instances[].KeyName 47 | } 48 | 49 | function get_instance_types_with_name { 50 | local cluster_name=$1 51 | ${AWS_CMD} describe-instances \ 52 | --filters Name=tag:Name,Values=${cluster_name} \ 53 | Name=instance-state-name,Values=running,stopped \ 54 | --query Reservations[].Instances[].InstanceType 55 | } 56 | 57 | function get_instance_ids_with_name_and_role { 58 | local cluster_name=$1 59 | local cluster_role=$2 60 | 61 | if [ -z ${cluster_role} ]; then 62 | ${AWS_CMD} describe-instances \ 63 | --filters Name=tag:Name,Values=${cluster_name} \ 64 | Name=instance-state-name,Values=running,stopped \ 65 | --query Reservations[].Instances[].InstanceId 66 | 67 | else 68 | ${AWS_CMD} describe-instances \ 69 | --filters Name=tag:Name,Values=${cluster_name} \ 70 | Name=tag:Role,Values=${cluster_role} \ 71 | Name=instance-state-name,Values=running,stopped \ 72 | --query Reservations[].Instances[].InstanceId 73 | fi 74 | } 75 | 76 | function get_instance_ids_with_public_dns { 77 | local public_dns=$(echo "$@" | tr " " ",") 78 | if [ -z ${public_dns} ]; then 79 | exit 1 80 | fi 81 | ${AWS_CMD} describe-instances \ 82 | --filters Name=dns-name,Values=${public_dns} \ 83 | --query Reservations[].Instances[].InstanceId 84 | } 85 | 86 | function show_all_vpcs { 87 | ${AWS_CMD} describe-vpcs \ 88 | --output table \ 89 | --query 'Vpcs[].{VPC_ID:VpcId,VPC_NAME:Tags[0].Value}' 90 | } 91 | 92 | function get_vpcids_with_name { 93 | local vpc_name=$1 94 | ${AWS_CMD} describe-vpcs \ 95 | --filters Name=tag:Name,Values=${vpc_name} \ 96 | --query Vpcs[].VpcId 97 | } 98 | 99 | function show_all_subnets { 100 | local vpc_name=$1 101 | local vpc_id=$(get_vpcids_with_name ${vpc_name}) 102 | 103 | if [ -z ${vpc_name} ]; then 104 | ${AWS_CMD} describe-subnets \ 105 | --output table \ 106 | --query 'Subnets[].{VPC_ID:VpcId,AZ:AvailabilityZone,IPS:AvailableIpAddressCount,SUBNET_ID:SubnetId,SUBNET_NAME:Tags[0].Value}' 107 | else 108 | ${AWS_CMD} describe-subnets \ 109 | --output table \ 110 | --filters Name=vpc-id,Values=${vpc_id:?"no vpcid found for vpc ${vpc_name}"} \ 111 | --query 'Subnets[].{VPC_ID:VpcId,AZ:AvailabilityZone,IPS:AvailableIpAddressCount,SUBNET_ID:SubnetId,SUBNET_NAME:Tags[0].Value}' 112 | fi 113 | 114 | } 115 | 116 | function show_all_security_groups { 117 | local vpc_name=$1 118 | local vpc_id=$(get_vpcids_with_name ${vpc_name}) 119 | 120 | if [ -z ${vpc_name} ]; then 121 | ${AWS_CMD} describe-security-groups \ 122 | --output table \ 123 | --query 'SecurityGroups[].{VPC_ID:VpcId,SG_ID:GroupId,SG_NAME:GroupName}' 124 | else 125 | ${AWS_CMD} describe-security-groups \ 126 | --output table \ 127 | --filters Name=vpc-id,Values=${vpc_id:?"no vpcid found for vpc ${vpc_name}"} \ 128 | --query 'SecurityGroups[].{VPC_ID:VpcId,SG_ID:GroupId,SG_NAME:GroupName}' 129 | fi 130 | } 131 | 132 | function run_spot_instances { 133 | local launch_specification="{\"ImageId\":\"${AWS_IMAGE}\",\"KeyName\":\"${key_name}\",\"InstanceType\":\"${instance_type}\",\"BlockDeviceMappings\":${block_device_mappings},\"SubnetId\":\"${subnet_id}\",\"Monitoring\":${monitoring},\"SecurityGroupIds\":[\"${security_group_ids}\"]}" 134 | 135 | ${AWS_CMD} request-spot-instances \ 136 | --spot-price "${price:?"specify spot price"}" \ 137 | --instance-count ${num_instances:?"specify number of instances"} \ 138 | --type "one-time" \ 139 | --launch-specification ${launch_specification} \ 140 | --query SpotInstanceRequests[].SpotInstanceRequestId 141 | 142 | } 143 | 144 | function run_on_demand_instances { 145 | ${AWS_CMD} run-instances \ 146 | --count ${num_instances:?"specify number of instances"} \ 147 | --image-id ${AWS_IMAGE} \ 148 | --key-name ${key_name:?"specify pem key to use"} \ 149 | --security-group-ids ${security_group_ids:?"specify security group ids"} \ 150 | --instance-type ${instance_type:?"specify instance type"} \ 151 | --subnet-id ${subnet_id:?"specify subnet id to launch"} \ 152 | --block-device-mappings ${block_device_mappings} \ 153 | --monitoring ${monitoring} \ 154 | --query Instances[].InstanceId 155 | } 156 | 157 | function get_image_id_from_instances { 158 | local instance_ids="$@" 159 | ${AWS_CMD} describe-instances \ 160 | --instance-ids ${instance_ids} \ 161 | --query Reservations[].Instances[].ImageId 162 | } 163 | 164 | function get_pem_key_from_instances { 165 | local instance_ids="$@" 166 | ${AWS_CMD} describe-instances \ 167 | --instance-ids ${instance_ids} \ 168 | --query Reservations[].Instances[].KeyName 169 | } 170 | 171 | function get_security_group_ids_from_instances { 172 | local instance_ids="$@" 173 | ${AWS_CMD} describe-instances \ 174 | --instance-ids ${instance_ids} \ 175 | --query Reservations[].Instances[].SecurityGroups[].GroupId 176 | } 177 | 178 | function get_instance_type_from_instances { 179 | local instance_ids="$@" 180 | ${AWS_CMD} describe-instances \ 181 | --instance-ids ${instance_ids} \ 182 | --query Reservations[].Instances[].InstanceType 183 | } 184 | 185 | function get_subnet_id_from_instances { 186 | local instance_ids="$@" 187 | ${AWS_CMD} describe-instances \ 188 | --instance-ids ${instance_ids} \ 189 | --query Reservations[].Instances[].SubnetId 190 | } 191 | 192 | function get_public_dns_from_instances { 193 | local instance_ids="$@" 194 | ${AWS_CMD} describe-instances \ 195 | --instance-ids ${instance_ids} \ 196 | --query Reservations[].Instances[].PublicDnsName 197 | } 198 | 199 | function get_volume_size_from_instances { 200 | local instance_ids="$@" 201 | local volume_id=$(${AWS_CMD} describe-instances \ 202 | --instance-ids ${instance_ids} \ 203 | --query Reservations[].Instances[].BlockDeviceMappings[0].Ebs.VolumeId) 204 | ${AWS_CMD} describe-volumes \ 205 | --volume-ids ${volume_id} \ 206 | --query Volumes[0].Size 207 | } 208 | 209 | function tag_resources { 210 | local key=$1; shift 211 | local val=$1; shift 212 | local resource_ids="$@" 213 | ${AWS_CMD} create-tags \ 214 | --resources ${resource_ids} \ 215 | --tags Key=${key},Value=${val} 216 | } 217 | 218 | function wait_for_instances_status_ok { 219 | local instance_ids="$@" 220 | ${AWS_CMD} wait instance-status-ok \ 221 | --instance-ids ${instance_ids} 222 | } 223 | 224 | function wait_for_spot_requests { 225 | local spot_request_ids="$@" 226 | ${AWS_CMD} wait spot-instance-request-fulfilled \ 227 | --spot-instance-request-ids ${spot_request_ids} 228 | } 229 | 230 | function get_instance_ids_of_spot_request_ids { 231 | local spot_request_ids="$@" 232 | ${AWS_CMD} describe-spot-instance-requests \ 233 | --spot-instance-request-ids ${spot_request_ids} \ 234 | --query SpotInstanceRequests[].InstanceId 235 | } 236 | 237 | function get_price_of_spot_request_ids { 238 | local spot_request_ids="$@" 239 | ${AWS_CMD} describe-spot-instance-requests \ 240 | --spot-instance-request-ids ${spot_request_ids} \ 241 | --query SpotInstanceRequests[].SpotPrice 242 | } 243 | 244 | function get_spot_request_ids_of_instance_ids { 245 | local instance_ids="$@" 246 | ${AWS_CMD} describe-instances \ 247 | --instance-ids ${instance_ids} \ 248 | --query Reservations[].Instances[].SpotInstanceRequestId 249 | } 250 | 251 | function retag_instance_with_name { 252 | local cluster_name=$1 253 | local new_cluster_name=$2 254 | local instance_ids=$(get_instance_ids_with_name_and_role ${cluster_name}) 255 | 256 | ${AWS_CMD} create-tags \ 257 | --resources ${instance_ids} \ 258 | --tags Key=Name,Value=${new_cluster_name} 259 | } 260 | 261 | function start_instance { 262 | local cluster_name=$1 263 | local instance_ids=$(get_instance_ids_with_name_and_role ${cluster_name}) 264 | 265 | ${AWS_CMD} start-instances \ 266 | --instance-ids ${instance_ids} 267 | } 268 | 269 | function stop_instance { 270 | local cluster_name=$1 271 | local instance_ids=$(get_instance_ids_with_name_and_role ${cluster_name}) 272 | 273 | ${AWS_CMD} stop-instances \ 274 | --instance-ids ${instance_ids} 275 | } 276 | 277 | function terminate_instances_with_ids { 278 | local instance_ids="$@" 279 | ${AWS_CMD} terminate-instances \ 280 | --instance-ids ${instance_ids} 281 | } 282 | 283 | function cancel_spot_requests_with_ids { 284 | local spot_request_ids="$@" 285 | ${AWS_CMD} cancel-spot-instance-requests \ 286 | --spot-instance-request-ids ${spot_request_ids} 287 | } 288 | 289 | function allocate_eip { 290 | ${AWS_CMD} allocate-address \ 291 | --domain vpc \ 292 | --query AllocationId 293 | } 294 | 295 | function allocate_and_associate_eip { 296 | local instance_id=$1 297 | local allocation_id=$(allocate_eip) 298 | 299 | ${AWS_CMD} associate-address \ 300 | --allocation-id ${allocation_id} \ 301 | --instance-id ${instance_id} \ 302 | --query AssociationId 303 | } 304 | 305 | function describe_eip_with_instance_id { 306 | local instance_id=$1 307 | 308 | ${AWS_CMD} describe-addresses \ 309 | --filters Name=instance-id,Values=${instance_id} \ 310 | --query Addresses[0].[AssociationId,AllocationId] 311 | } 312 | 313 | function release_eip { 314 | local instance_id=$1 315 | local association_and_allocation_id=($(describe_eip_with_instance_id ${instance_id})) 316 | local association_id=${association_and_allocation_id[0]} 317 | local allocation_id=${association_and_allocation_id[1]} 318 | 319 | if [ ! -z ${allocation_id} ]; then 320 | ${AWS_CMD} disassociate-address --association-id ${association_id} 321 | ${AWS_CMD} release-address --allocation-id ${allocation_id} 322 | 323 | echo -e "${color_green}Released elastic IPs associated with the instance ${instance_id}${color_norm}" 324 | fi 325 | } 326 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /peg: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2015 Insight Data Science 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | set -o errexit 18 | 19 | PEG_ROOT=$(dirname "${BASH_SOURCE}") 20 | AWS_DEFAULT_REGION=${AWS_DEFAULT_REGION:=us-west-2} 21 | REM_USER=${REM_USER:=ubuntu} 22 | 23 | source ${PEG_ROOT}/util.sh 24 | 25 | nargs="$#" 26 | 27 | if [ ${nargs} == 0 ]; then 28 | echo "Usage: peg [options] [parameters]" 29 | echo "peg: error: too few arguments" 30 | echo "use the -h option for available commands" 31 | exit 1 32 | fi 33 | 34 | while getopts ":h" opt; do 35 | case ${opt} in 36 | h) 37 | echo "peg" 38 | echo " - config" 39 | echo " - aws" 40 | echo " - validate" 41 | echo " - fetch" 42 | echo " - describe" 43 | echo " - up" 44 | echo " - down" 45 | echo " - install" 46 | echo " - uninstall" 47 | echo " - service" 48 | echo " - ssh" 49 | echo " - sshcmd-node" 50 | echo " - sshcmd-cluster" 51 | echo " - scp" 52 | echo " - retag" 53 | echo " - start" 54 | echo " - stop" 55 | echo " - port-forward" 56 | exit 1 57 | ;; 58 | 59 | *) 60 | echo "Invalid option flag: -${OPTARG}" 61 | exit 1 62 | ;; 63 | esac 64 | done 65 | 66 | args=("$@") 67 | command=${args[0]} 68 | parameters=${args[@]:1} 69 | nfargs=$(echo ${parameters} | wc -w) 70 | 71 | case ${command} in 72 | config) 73 | if [ -z ${AWS_ACCESS_KEY_ID} ]; then 74 | echo -e "${color_red}[MISSING] AWS_ACCESS_KEY_ID${color_norm}" 75 | else 76 | echo "access key: ${AWS_ACCESS_KEY_ID}" 77 | fi 78 | 79 | if [ -z ${AWS_SECRET_ACCESS_KEY} ]; then 80 | echo -e "${color_red}[MISSING] AWS_SECRET_ACCESS_KEY${color_norm}" 81 | else 82 | echo "secret key: ${AWS_SECRET_ACCESS_KEY}" 83 | fi 84 | 85 | if [ -z ${AWS_DEFAULT_REGION} ]; then 86 | echo -e "${color_red}MISSING AWS_DEFAULT_REGION${color_norm}" 87 | else 88 | echo " region: ${AWS_DEFAULT_REGION}" 89 | fi 90 | 91 | if [ -z ${REM_USER} ]; then 92 | echo -e "${color_red}MISSING REM_USER${color_norm}" 93 | else 94 | echo " SSH User: ${REM_USER}" 95 | fi 96 | ;; 97 | 98 | aws) 99 | PARAMETER_ARR=(${parameters[@]}) 100 | PEG_AWS_CMD=${PARAMETER_ARR[0]} 101 | VPC_NAME=${PARAMETER_ARR[1]} 102 | case ${PEG_AWS_CMD} in 103 | vpcs) 104 | show_all_vpcs 105 | ;; 106 | 107 | subnets) 108 | show_all_subnets ${VPC_NAME} 109 | ;; 110 | 111 | security-groups) 112 | show_all_security_groups ${VPC_NAME} 113 | ;; 114 | 115 | *) 116 | echo "specify command for peg aws" 117 | echo "peg aws" 118 | echo " - vpcs" 119 | echo " - subnets" 120 | echo " - security-groups" 121 | exit 1 122 | esac 123 | ;; 124 | 125 | validate) 126 | if [[ "${nfargs}" -eq "1" ]]; then 127 | YAML_FILE=${parameters} 128 | if [ ! -f ${YAML_FILE} ]; then 129 | echo "${YAML_FILE} not found" 130 | exit 1 131 | else 132 | set_launch_config ${YAML_FILE} 133 | validate_template 134 | echo "Template validated successfully." 135 | fi 136 | else 137 | echo "Invalid number of arguments" 138 | exit 1 139 | fi 140 | ;; 141 | 142 | fetch) 143 | if [[ "${nfargs}" -eq "1" ]]; then 144 | CLUSTER_NAME=${parameters} 145 | 146 | CLUSTER_INFO_PATH=${PEG_ROOT}/tmp/${CLUSTER_NAME} 147 | if [ -d ${CLUSTER_INFO_PATH} ]; then 148 | rm -rf ${CLUSTER_INFO_PATH} 149 | fi 150 | 151 | mkdir -p ${CLUSTER_INFO_PATH} 152 | 153 | store_public_dns ${CLUSTER_NAME} 154 | store_hostnames ${CLUSTER_NAME} 155 | store_roles ${CLUSTER_NAME} 156 | store_pemkey ${CLUSTER_NAME} 157 | peg describe ${CLUSTER_NAME} 158 | else 159 | echo "Invalid number of arguments" 160 | echo "Usage: peg fetch " 161 | exit 1 162 | fi 163 | ;; 164 | 165 | describe) 166 | if [[ "${nfargs}" -eq "1" ]]; then 167 | CLUSTER_NAME=${parameters} 168 | 169 | describe_cluster ${CLUSTER_NAME} 170 | else 171 | echo "Invalid number of arguments" 172 | echo "Usage: peg describe " 173 | exit 1 174 | fi 175 | ;; 176 | 177 | up) 178 | if [[ "${nfargs}" -eq "1" ]]; then 179 | TEMPLATE_PATH=${parameters} 180 | 181 | peg validate ${TEMPLATE_PATH} 182 | set_launch_config ${TEMPLATE_PATH} 183 | run_instances 184 | else 185 | echo "Invalid number of arguments" 186 | echo "Usage: peg up " 187 | exit 1 188 | fi 189 | ;; 190 | 191 | install) 192 | if [[ "${nfargs}" -eq "2" ]]; then 193 | PARAMETER_ARR=(${parameters}) 194 | CLUSTER_NAME=${PARAMETER_ARR[0]} 195 | TECHNOLOGY=${PARAMETER_ARR[1]} 196 | 197 | check_cluster_exists ${CLUSTER_NAME} 198 | 199 | MASTER_DNS=$(fetch_public_dns_of_node_in_cluster ${CLUSTER_NAME} 1) 200 | DEP_ROOT_FOLDER=/usr/local/ 201 | 202 | case ${TECHNOLOGY} in 203 | aws) 204 | echo "Passing AWS credentials to ${CLUSTER_NAME}" 205 | ${PEG_ROOT}/config/pass_aws_cred ${CLUSTER_NAME} 206 | ;; 207 | 208 | environment) 209 | echo "Setting up base environment packages on ${CLUSTER_NAME}" 210 | ${PEG_ROOT}/install/environment/install_env_cluster.sh ${CLUSTER_NAME} 211 | ;; 212 | 213 | alluxio|cassandra|elasticsearch|flink|hadoop|hbase|hive|kafka|opscenter|pig|presto|redis|spark|storm|zookeeper) 214 | get_dependencies >> /dev/null 215 | install_tech "cluster" 216 | ;; 217 | 218 | kafka-manager) 219 | ${PEG_ROOT}/install/kafka-manager/install_kafka_manager_cluster.sh ${CLUSTER_NAME} 220 | ${PEG_ROOT}/config/kafka-manager/setup_cluster.sh ${CLUSTER_NAME} 221 | ;; 222 | 223 | kibana) 224 | get_dependencies >> /dev/null 225 | install_tech "single" 226 | ;; 227 | 228 | memsql) 229 | ${PEG_ROOT}/install/memsql/install_memsql_cluster.sh ${CLUSTER_NAME} 230 | ;; 231 | 232 | riak) 233 | ${PEG_ROOT}/install/riak/install_riak_cluster.sh ${CLUSTER_NAME} 234 | ${PEG_ROOT}/config/riak/setup_cluster.sh ${CLUSTER_NAME} 235 | ;; 236 | 237 | secor) 238 | ${PEG_ROOT}/install/secor/install_secor_cluster.sh ${CLUSTER_NAME} 239 | ${PEG_ROOT}/config/secor/setup_cluster.sh ${CLUSTER_NAME} 240 | ;; 241 | 242 | ssh) 243 | echo "Setting up passwordless SSH on ${CLUSTER_NAME}" 244 | ${PEG_ROOT}/config/ssh/setup_passwordless_ssh.sh ${CLUSTER_NAME} 245 | ;; 246 | 247 | zeppelin) 248 | echo "Installing Zeppelin on Master Node in ${CLUSTER_NAME}" 249 | ${PEG_ROOT}/install/zeppelin/install_zeppelin_cluster.sh ${CLUSTER_NAME} 250 | ${PEG_ROOT}/config/zeppelin/setup_cluster.sh ${CLUSTER_NAME} 251 | ;; 252 | 253 | *) 254 | echo "Invalid technology to install." 255 | exit 1 256 | ;; 257 | 258 | esac 259 | else 260 | echo "Invalid number of arguments" 261 | echo "Usage: peg install " 262 | fi 263 | ;; 264 | 265 | uninstall) 266 | if [[ "${nfargs}" -eq "2" ]]; then 267 | PARAMETER_ARR=(${parameters}) 268 | CLUSTER_NAME=${PARAMETER_ARR[0]} 269 | TECHNOLOGY=${PARAMETER_ARR[1]} 270 | 271 | check_cluster_exists ${CLUSTER_NAME} 272 | 273 | PUBLIC_DNS=($(fetch_cluster_public_dns ${CLUSTER_NAME})) 274 | MASTER_DNS=${PUBLIC_DNS[0]} 275 | 276 | PEMLOC=${PEG_ROOT}/tmp/$CLUSTER_NAME/*.pem 277 | ROOT_FOLDER=/usr/local/ 278 | 279 | case ${TECHNOLOGY} in 280 | alluxio|cassandra|elasticsearch|flink|hadoop|hbase|hive|kafka|opscenter|pig|presto|redis|spark|storm|zeppelin|zookeeper) 281 | INSTALLED=$(check_remote_folder ${MASTER_DNS} ${ROOT_FOLDER}${TECHNOLOGY}) 282 | if [ "$INSTALLED" = "installed" ]; then 283 | peg service ${CLUSTER_NAME} ${TECHNOLOGY} stop 284 | uninstall_tech "cluster" 285 | echo -e "${color_green}${TECHNOLOGY} uninstalled!${color_norm}" 286 | else 287 | echo "${TECHNOLOGY} is not installed in ${ROOT_FOLDER}" 288 | exit 1 289 | fi 290 | ;; 291 | 292 | secor|kafka-manager|kibana) 293 | INSTALLED=$(check_remote_folder ${MASTER_DNS} ${ROOT_FOLDER}${TECHNOLOGY}) 294 | if [ "$INSTALLED" = "installed" ]; then 295 | peg service ${CLUSTER_NAME} ${TECHNOLOGY} stop 296 | uninstall_tech "single" 297 | echo "${TECHNOLOGY} uninstalled!" 298 | else 299 | echo "${TECHNOLOGY} is not installed in ${ROOT_FOLDER}" 300 | exit 1 301 | fi 302 | ;; 303 | 304 | memsql) 305 | INSTALLED=$(check_remote_folder ${MASTER_DNS} ${ROOT_FOLDER}${TECHNOLOGY}) 306 | if [ "$INSTALLED" = "installed" ]; then 307 | peg service ${CLUSTER_NAME} ${TECHNOLOGY} start 308 | peg sshcmd-node ${CLUSTER_NAME} 1 " 309 | . ~/.profile; 310 | yes DELETE | sudo memsql-ops memsql-delete --all; 311 | yes DELETE | sudo memsql-ops agent-uninstall --all; 312 | sudo rmdir /usr/local/memsql; 313 | sudo rmdir /usr/local/memsql-ops-data; 314 | sudo rmdir /var/lib/memsql-ops; 315 | sudo rm -r /home/ubuntu/memsql-ops*; 316 | " 317 | fi 318 | ;; 319 | 320 | riak) 321 | INSTALLED=$(check_remote_file ${MASTER_DNS} "/usr/sbin/riak") 322 | if [ "$INSTALLED" = "installed" ]; then 323 | for dns in ${PUBLIC_DNS[@]}; do 324 | run_cmd_on_node ${dns} sudo dpkg --purge riak 325 | run_cmd_on_node ${dns} sudo rm -rf /var/lib/riak 326 | done 327 | fi 328 | ;; 329 | 330 | *) 331 | echo "Invalid technology to uninstall." 332 | exit 1 333 | ;; 334 | 335 | esac 336 | else 337 | echo "Invalid number of arguments" 338 | echo "Usage: peg uninstall " 339 | fi 340 | ;; 341 | 342 | service) 343 | if [[ "${nfargs}" -eq "3" ]]; then 344 | PARAMETER_ARR=(${parameters}) 345 | CLUSTER_NAME=${PARAMETER_ARR[0]} 346 | TECHNOLOGY=${PARAMETER_ARR[1]} 347 | ACTION=${PARAMETER_ARR[2]} 348 | 349 | check_cluster_exists ${CLUSTER_NAME} 350 | 351 | MASTER_DNS=$(fetch_public_dns_of_node_in_cluster ${CLUSTER_NAME} 1) 352 | 353 | case $TECHNOLOGY in 354 | alluxio|cassandra|elasticsearch|flink|hadoop|hbase|kafka|kafka-manager|kibana|memsql|opscenter|presto|redis|secor|spark|storm|zeppelin|zookeeper) 355 | ROOT_FOLDER=/usr/local/ 356 | INSTALL_PATH="folder" 357 | service_action 358 | ;; 359 | 360 | riak) 361 | ROOT_FOLDER=/usr/sbin/ 362 | INSTALL_PATH="file" 363 | service_action 364 | ;; 365 | 366 | *) 367 | echo "Invalid service to ${ACTION}." 368 | exit 1 369 | ;; 370 | 371 | esac 372 | else 373 | echo "Invalid number of arguments" 374 | echo "Usage: peg service " 375 | fi 376 | ;; 377 | 378 | down) 379 | if [[ "${nfargs}" -eq "1" ]]; then 380 | CLUSTER_NAME=${parameters} 381 | 382 | check_cluster_exists ${CLUSTER_NAME} 383 | 384 | INSTANCE_IDS=($(get_instance_ids_with_name_and_role ${CLUSTER_NAME})) 385 | echo -e "Are you sure you want to terminate ${color_blue}$CLUSTER_NAME${color_norm}?" 386 | for ids in ${INSTANCE_IDS[@]}; do 387 | echo ${ids} 388 | done 389 | echo -n "[y/n]: " 390 | read RESPONSE 391 | 392 | case $RESPONSE in 393 | n) 394 | exit 1 395 | ;; 396 | y) 397 | terminate_instances_with_name ${CLUSTER_NAME} 398 | ;; 399 | *) 400 | echo "respond with y or n." 401 | exit 1 402 | esac 403 | else 404 | echo "Invalid number of arguments" 405 | echo "Usage: peg down " 406 | fi 407 | ;; 408 | 409 | ssh) 410 | if [[ "${nfargs}" -eq "2" ]]; then 411 | PARAMETER_ARR=(${parameters}) 412 | CLUSTER_NAME=${PARAMETER_ARR[0]} 413 | NODE_NUM=${PARAMETER_ARR[1]} 414 | 415 | check_cluster_exists ${CLUSTER_NAME} 416 | restart_sshagent_if_needed ${CLUSTER_NAME} 417 | 418 | ssh -A -o "StrictHostKeyChecking no" ${REM_USER}@$(sed -n ''"$NODE_NUM"'p' ${PEG_ROOT}/tmp/${CLUSTER_NAME}/public_dns) 419 | else 420 | echo "Invalid number of arguments" 421 | echo "Usage: peg ssh " 422 | fi 423 | ;; 424 | 425 | sshcmd-node) 426 | if [[ "${nfargs}" -ge "3" ]]; then 427 | PARAMETER_ARR=(${parameters}) 428 | CLUSTER_NAME=${PARAMETER_ARR[0]} 429 | NODE_NUM=${PARAMETER_ARR[1]} 430 | CMD=${PARAMETER_ARR[@]:2} 431 | 432 | check_cluster_exists ${CLUSTER_NAME} 433 | 434 | PUBLIC_DNS=$(sed -n ''"$NODE_NUM"'p' ${PEG_ROOT}/tmp/${CLUSTER_NAME}/public_dns) 435 | run_cmd_on_node ${PUBLIC_DNS} ${CMD} 436 | else 437 | echo "Invalid number of arguments" 438 | echo "Usage: peg sshcmd-node \"\"" 439 | fi 440 | ;; 441 | 442 | sshcmd-cluster) 443 | if [[ "${nfargs}" -ge "2" ]]; then 444 | PARAMETER_ARR=(${parameters}) 445 | CLUSTER_NAME=${PARAMETER_ARR[0]} 446 | CMD=${PARAMETER_ARR[@]:1} 447 | 448 | check_cluster_exists ${CLUSTER_NAME} 449 | 450 | run_cmd_on_cluster ${CLUSTER_NAME} ${CMD} 451 | else 452 | echo "Invalid number of arguments" 453 | echo "Usage: peg sshcmd-cluster \"\"" 454 | fi 455 | ;; 456 | 457 | scp) 458 | if [[ "${nfargs}" -eq "5" ]]; then 459 | PARAMETER_ARR=(${parameters}) 460 | SCP_OPTION=${PARAMETER_ARR[0]} 461 | CLUSTER_NAME=${PARAMETER_ARR[1]} 462 | NODE_NUM=${PARAMETER_ARR[2]} 463 | LOCAL_PATH=${PARAMETER_ARR[3]} 464 | REM_PATH=${PARAMETER_ARR[4]} 465 | 466 | check_cluster_exists ${CLUSTER_NAME} 467 | 468 | PUBLIC_DNS=$(sed -n ''"$NODE_NUM"'p' ${PEG_ROOT}/tmp/${CLUSTER_NAME}/public_dns) 469 | case ${SCP_OPTION} in 470 | to-local|from-rem) 471 | scp -r -i ${PEG_ROOT}/tmp/${CLUSTER_NAME}/*.pem ${REM_USER}@${PUBLIC_DNS}:${REM_PATH} ${LOCAL_PATH} 472 | ;; 473 | 474 | to-rem|from-local) 475 | scp -r -i ${PEG_ROOT}/tmp/${CLUSTER_NAME}/*.pem ${LOCAL_PATH} ${REM_USER}@${PUBLIC_DNS}:${REM_PATH} 476 | ;; 477 | 478 | *) 479 | echo "Invalid peg scp option" 480 | echo "valid options:" 481 | echo " to-local" 482 | echo " from-local" 483 | echo " to-rem" 484 | echo " from-rem" 485 | exit 1 486 | 487 | esac 488 | else 489 | echo "Invalid number of arguments" 490 | echo "Usage: peg scp " 491 | fi 492 | ;; 493 | 494 | retag) 495 | if [[ "${nfargs}" -eq "2" ]]; then 496 | PARAMETER_ARR=(${parameters}) 497 | CLUSTER_NAME=${PARAMETER_ARR[0]} 498 | NEW_CLUSTER_NAME=${PARAMETER_ARR[1]} 499 | 500 | retag_instance_with_name ${CLUSTER_NAME} ${NEW_CLUSTER_NAME} 501 | 502 | peg fetch ${NEW_CLUSTER_NAME} 503 | else 504 | echo "Invalid number of arguments" 505 | echo "Usage: peg retag " 506 | fi 507 | ;; 508 | 509 | port-forward) 510 | if [[ "${nfargs}" -eq "3" ]]; then 511 | PARAMETER_ARR=(${parameters}) 512 | CLUSTER_NAME=${PARAMETER_ARR[0]} 513 | NODE_NUMBER=${PARAMETER_ARR[1]} 514 | PORT_CMD=${PARAMETER_ARR[2]} 515 | 516 | check_cluster_exists ${CLUSTER_NAME} 517 | 518 | port_forward ${CLUSTER_NAME} ${NODE_NUMBER} ${PORT_CMD} 519 | else 520 | echo "Invalid number of arguments" 521 | echo "Usage: peg port-forward :" 522 | fi 523 | ;; 524 | 525 | start) 526 | if [[ "${nfargs}" -eq "1" ]]; then 527 | CLUSTER_NAME=${parameters} 528 | 529 | check_cluster_exists ${CLUSTER_NAME} 530 | 531 | start_instance ${CLUSTER_NAME} 532 | else 533 | echo "Invalid number of arguments" 534 | echo "Usage: peg start " 535 | fi 536 | ;; 537 | 538 | stop) 539 | if [[ "${nfargs}" -eq "1" ]]; then 540 | CLUSTER_NAME=${parameters} 541 | 542 | check_cluster_exists ${CLUSTER_NAME} 543 | 544 | echo "All services should be stopped before stopping clusters." 545 | echo -e "Are you sure you want to stop ${color_blue}$CLUSTER_NAME${color_norm}?" 546 | echo -n "[y/n]: " 547 | read RESPONSE 548 | 549 | case $RESPONSE in 550 | n) 551 | exit 1 552 | ;; 553 | y) 554 | stop_instance ${CLUSTER_NAME} 555 | ;; 556 | *) 557 | echo "respond with y or n." 558 | exit 1 559 | esac 560 | else 561 | echo "Invalid number of arguments" 562 | echo "Usage: peg stop " 563 | fi 564 | ;; 565 | 566 | *) 567 | echo "Invalid command. use -h to see available commands" 568 | exit 1 569 | esac 570 | 571 | 572 | --------------------------------------------------------------------------------