├── .mvn └── wrapper │ ├── maven-wrapper.jar │ └── maven-wrapper.properties ├── .gitignore ├── presto-yarn-test ├── src │ ├── main │ │ ├── resources │ │ │ ├── conf │ │ │ │ ├── slider │ │ │ │ │ ├── slider-env.sh │ │ │ │ │ ├── slider-env-openstack.sh │ │ │ │ │ ├── log4j.properties │ │ │ │ │ ├── slider-client-openstack.xml │ │ │ │ │ └── slider-client.xml │ │ │ │ ├── yarn │ │ │ │ │ ├── container-executor.cfg │ │ │ │ │ ├── capacity-scheduler.xml │ │ │ │ │ └── yarn-site.xml │ │ │ │ └── cgroup │ │ │ │ │ ├── cgrules.conf │ │ │ │ │ └── cgconfig.conf │ │ │ ├── fix_hdp_mapreduce.sh │ │ │ ├── tempto-configuration.yaml │ │ │ └── log4j.properties │ │ ├── java │ │ │ └── com │ │ │ │ └── teradata │ │ │ │ └── presto │ │ │ │ └── yarn │ │ │ │ └── test │ │ │ │ ├── utils │ │ │ │ ├── Streams.java │ │ │ │ ├── FileDigesters.java │ │ │ │ ├── Closures.java │ │ │ │ ├── Resources.java │ │ │ │ ├── TimeUtils.java │ │ │ │ ├── SimpleJdbcQueryExecutor.java │ │ │ │ └── NodeSshUtils.java │ │ │ │ ├── fulfillment │ │ │ │ ├── ImmutableNationTable.java │ │ │ │ ├── SliderClusterFulfiller.java │ │ │ │ └── PrerequisitesClusterFulfiller.java │ │ │ │ ├── Main.java │ │ │ │ ├── slider │ │ │ │ ├── SliderStatus.java │ │ │ │ └── Slider.java │ │ │ │ └── PrestoCluster.java │ │ └── assembly │ │ │ ├── presto-app.xml │ │ │ └── presto-server.xml │ └── test │ │ ├── java │ │ └── com │ │ │ └── teradata │ │ │ └── presto │ │ │ └── yarn │ │ │ └── test │ │ │ └── slider │ │ │ └── SliderStatusTest.java │ │ └── resources │ │ └── status_file ├── etc │ └── docker │ │ ├── tempto-configuration-docker-local.yaml │ │ ├── get_docker_ips.sh │ │ ├── cdh5 │ │ └── docker-compose.yml │ │ ├── hdp2.3 │ │ └── docker-compose.yml │ │ └── key ├── README.md ├── bin │ └── run_on_docker.sh └── pom.xml ├── presto-yarn-package ├── src │ └── main │ │ ├── slider │ │ ├── package │ │ │ ├── templates │ │ │ │ ├── node.properties.j2 │ │ │ │ ├── resource-groups.properties.j2 │ │ │ │ ├── config.properties-WORKER.j2 │ │ │ │ ├── config.properties-COORDINATOR.j2 │ │ │ │ └── resource_groups.json.j2 │ │ │ ├── scripts │ │ │ │ ├── __init__.py │ │ │ │ ├── presto_worker.py │ │ │ │ ├── presto_coordinator.py │ │ │ │ ├── presto_server.py │ │ │ │ ├── params.py │ │ │ │ └── configure.py │ │ │ ├── plugins │ │ │ │ └── README.txt │ │ │ └── files │ │ │ │ └── README.txt │ │ └── metainfo.xml │ │ ├── resources │ │ ├── resources-singlenode.json │ │ ├── resources-singlenode-label.json │ │ ├── resources-single-coordinator@master.json │ │ ├── resources-multinode.json │ │ ├── resources-multinode-single-worker.json │ │ ├── appConfig-test-no-catalog.json │ │ ├── appConfig.json │ │ └── appConfig-test.json │ │ └── assembly │ │ └── presto.xml └── pom.xml ├── .travis.yml ├── presto-yarn-docs ├── src │ └── main │ │ └── sphinx │ │ ├── index.rst │ │ ├── installation-yarn-directory-structure.rst │ │ ├── conf.py │ │ ├── developers.rst │ │ ├── installation-yarn-debugging-logging.rst │ │ ├── installation-yarn-manual.rst │ │ ├── installation-yarn-configuration-options-advanced.rst │ │ ├── installation-yarn-automated.rst │ │ └── installation-yarn-configuration-options.rst └── pom.xml ├── README.md ├── pom.xml └── mvnw /.mvn/wrapper/maven-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/prestodb/presto-yarn/HEAD/.mvn/wrapper/maven-wrapper.jar -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | .idea/ 3 | out/ 4 | *.iml 5 | *.eml 6 | 7 | [._]*.s[a-w][a-z] 8 | [._]s[a-w][a-z] 9 | *~ 10 | -------------------------------------------------------------------------------- /.mvn/wrapper/maven-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionUrl=https://repo1.maven.org/maven2/org/apache/maven/apache-maven/3.3.9/apache-maven-3.3.9-bin.zip 2 | -------------------------------------------------------------------------------- /presto-yarn-test/src/main/resources/conf/slider/slider-env.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | export JAVA_HOME=/usr/java/jdk1.8.0_92/ 4 | export HADOOP_CONF_DIR=/etc/hadoop/conf 5 | -------------------------------------------------------------------------------- /presto-yarn-test/src/main/resources/conf/slider/slider-env-openstack.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | export JAVA_HOME=/usr/lib/jvm/java 4 | export HADOOP_CONF_DIR=/etc/hadoop/conf 5 | 6 | -------------------------------------------------------------------------------- /presto-yarn-package/src/main/slider/package/templates/node.properties.j2: -------------------------------------------------------------------------------- 1 | node.environment=test 2 | node.id={{node_id}} 3 | node.data-dir={{data_dir}} 4 | plugin.config-dir={{catalog_dir}} 5 | -------------------------------------------------------------------------------- /presto-yarn-package/src/main/slider/package/templates/resource-groups.properties.j2: -------------------------------------------------------------------------------- 1 | resource-groups.configuration-manager=file 2 | resource-groups.config-file={{conf_dir}}/resource_groups.json 3 | -------------------------------------------------------------------------------- /presto-yarn-test/src/main/resources/conf/yarn/container-executor.cfg: -------------------------------------------------------------------------------- 1 | yarn.nodemanager.linux-container-executor.group=yarn 2 | allowed.system.users=yarn,hdfs 3 | banned.users=root 4 | min.user.id=0 5 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: java 2 | 3 | jdk: 4 | - oraclejdk8 5 | 6 | dist: trusty 7 | 8 | install: 9 | - ./mvnw package -v -B 10 | 11 | script: 12 | - ./mvnw package -DskipTests -B 13 | 14 | -------------------------------------------------------------------------------- /presto-yarn-test/src/main/resources/fix_hdp_mapreduce.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | HDP_VERSION=$(hadoop version | head -n 1 | sed 's/Hadoop 2.7.1.//g') 4 | su - hdfs -c "hdfs dfs -mkdir -p /hdp/apps/$HDP_VERSION/mapreduce" 5 | su - hdfs -c "hdfs dfs -put /usr/hdp/current/hadoop-client/mapreduce.tar.gz /hdp/apps/$HDP_VERSION/mapreduce/mapreduce.tar.gz" 6 | -------------------------------------------------------------------------------- /presto-yarn-package/src/main/slider/package/templates/config.properties-WORKER.j2: -------------------------------------------------------------------------------- 1 | coordinator=false 2 | http-server.http.port={{presto_server_port}} 3 | query.max-memory={{presto_query_max_memory}} 4 | query.max-memory-per-node={{presto_query_max_memory_per_node}} 5 | query.max-total-memory-per-node={{presto_query_max_total_memory_per_node}} 6 | discovery.uri=http://{{coordinator_host}}:{{presto_server_port}} 7 | -------------------------------------------------------------------------------- /presto-yarn-test/src/main/resources/conf/cgroup/cgrules.conf: -------------------------------------------------------------------------------- 1 | # /etc/cgrules.conf 2 | #The format of this file is described in cgrules.conf(5) 3 | #manual page. 4 | # 5 | # Example: 6 | # 7 | #@student cpu,memory usergroup/student/ 8 | #peter cpu test1/ 9 | #% memory test2/ 10 | # End of file 11 | @yarn cpu yarn 12 | -------------------------------------------------------------------------------- /presto-yarn-package/src/main/resources/resources-singlenode.json: -------------------------------------------------------------------------------- 1 | { 2 | "schema": "http://example.org/specification/v2.0.0", 3 | "metadata": { 4 | }, 5 | "global": { 6 | "yarn.vcores": "1" 7 | }, 8 | "components": { 9 | "slider-appmaster": { 10 | }, 11 | "COORDINATOR": { 12 | "yarn.role.priority": "1", 13 | "yarn.component.instances": "1", 14 | "yarn.memory": "256" 15 | } 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /presto-yarn-test/etc/docker/tempto-configuration-docker-local.yaml: -------------------------------------------------------------------------------- 1 | ssh: 2 | identity: /workspace/etc/docker/key 3 | cluster: 4 | master: hadoop-master 5 | slaves: 6 | - hadoop-slave1 7 | - hadoop-slave2 8 | - hadoop-slave3 9 | 10 | tests: 11 | app_package: 12 | path: /workspace/target/package 13 | slider: 14 | binary: /workspace/target/package/slider-assembly-0.80.0-incubating-all.zip 15 | conf_dir: /workspace/target/package/ 16 | -------------------------------------------------------------------------------- /presto-yarn-package/src/main/resources/resources-singlenode-label.json: -------------------------------------------------------------------------------- 1 | { 2 | "schema": "http://example.org/specification/v2.0.0", 3 | "metadata": { 4 | }, 5 | "global": { 6 | "yarn.vcores": "1" 7 | }, 8 | "components": { 9 | "slider-appmaster": { 10 | }, 11 | "COORDINATOR": { 12 | "yarn.role.priority": "1", 13 | "yarn.component.instances": "1", 14 | "yarn.memory": "256", 15 | "yarn.label.expression": "coordinator" 16 | } 17 | } 18 | } -------------------------------------------------------------------------------- /presto-yarn-package/src/main/slider/package/templates/config.properties-COORDINATOR.j2: -------------------------------------------------------------------------------- 1 | coordinator=true 2 | node-scheduler.include-coordinator={{singlenode}} 3 | discovery-server.enabled=true 4 | http-server.http.port={{presto_server_port}} 5 | query.max-memory={{presto_query_max_memory}} 6 | query.max-memory-per-node={{presto_query_max_memory_per_node}} 7 | query.max-total-memory-per-node={{presto_query_max_total_memory_per_node}} 8 | discovery.uri=http://{{coordinator_host}}:{{presto_server_port}} 9 | -------------------------------------------------------------------------------- /presto-yarn-package/src/main/resources/resources-single-coordinator@master.json: -------------------------------------------------------------------------------- 1 | { 2 | "schema": "http://example.org/specification/v2.0.0", 3 | "metadata": { 4 | }, 5 | "global": { 6 | "yarn.vcores": "1" 7 | }, 8 | "components": { 9 | "slider-appmaster": { 10 | }, 11 | "COORDINATOR": { 12 | "yarn.role.priority": "1", 13 | "yarn.component.instances": "1", 14 | "yarn.component.placement.policy": "1", 15 | "yarn.memory": "1500", 16 | "yarn.label.expression": "coordinator" 17 | } 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /presto-yarn-test/etc/docker/get_docker_ips.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | if test ! -f docker-compose.yml; then 4 | echo Please run this script from directory wiith docker-compose.yml file. 5 | exit 1 6 | fi 7 | 8 | echo "# This section is generated by ../$0" 9 | echo "# You may need to regenerate it per each 'docker-compose up' command" 10 | echo "# NOTE this is only supported on linux" 11 | echo 'hosts:' 12 | for container in hadoop-master hadoop-slave{1,2,3}; do 13 | container_id=$(docker-compose ps -q $container) 14 | container_ip=$(docker exec $container_id ifconfig eth0 | grep 'inet addr' | awk '{print $2 }' | cut -d : -f 2) 15 | echo " $container: $container_ip" 16 | done 17 | echo "# End of generated section" 18 | -------------------------------------------------------------------------------- /presto-yarn-package/src/main/slider/package/templates/resource_groups.json.j2: -------------------------------------------------------------------------------- 1 | { 2 | "rootGroups": [ 3 | { 4 | "name": "global", 5 | "softMemoryLimit": "80%", 6 | "hardConcurrencyLimit": 100, 7 | "maxQueued": 1000, 8 | "schedulingPolicy": "weighted", 9 | "jmxExport": true, 10 | "subGroups": [ 11 | { 12 | "name": "adhoc_${USER}", 13 | "softMemoryLimit": "10%", 14 | "hardConcurrencyLimit": 2, 15 | "maxQueued": 1, 16 | "schedulingWeight": 9, 17 | "schedulingPolicy": "query_priority" 18 | } 19 | ] 20 | } 21 | ], 22 | "selectors": [ 23 | { 24 | "group": "global.adhoc_${USER}" 25 | } 26 | ] 27 | } 28 | -------------------------------------------------------------------------------- /presto-yarn-package/src/main/resources/resources-multinode.json: -------------------------------------------------------------------------------- 1 | { 2 | "schema": "http://example.org/specification/v2.0.0", 3 | "metadata": { 4 | }, 5 | "global": { 6 | "yarn.vcores": "1" 7 | }, 8 | "components": { 9 | "slider-appmaster": { 10 | }, 11 | "COORDINATOR": { 12 | "yarn.role.priority": "1", 13 | "yarn.component.instances": "1", 14 | "yarn.component.placement.policy": "1", 15 | "yarn.memory": "1500", 16 | "yarn.label.expression": "coordinator" 17 | }, 18 | "WORKER": { 19 | "yarn.role.priority": "2", 20 | "yarn.component.instances": "3", 21 | "yarn.component.placement.policy": "1", 22 | "yarn.memory": "1500", 23 | "yarn.label.expression": "worker" 24 | } 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /presto-yarn-test/etc/docker/cdh5/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '2' 2 | services: 3 | hadoop-master: 4 | hostname: hadoop-master 5 | image: 'teradatalabs/presto-yarn-cdh5-hive-master:3' 6 | privileged: true 7 | 8 | hadoop-slave1: 9 | hostname: 'hadoop-slave1' 10 | image: 'teradatalabs/presto-yarn-cdh5-hive-slave:3' 11 | privileged: true 12 | 13 | hadoop-slave2: 14 | hostname: 'hadoop-slave2' 15 | image: 'teradatalabs/presto-yarn-cdh5-hive-slave:3' 16 | privileged: true 17 | 18 | hadoop-slave3: 19 | hostname: 'hadoop-slave3' 20 | image: 'teradatalabs/presto-yarn-cdh5-hive-slave:3' 21 | privileged: true 22 | 23 | runner: 24 | image: 'teradatalabs/centos6-java8-oracle' 25 | volumes: 26 | - '../../..:/workspace' 27 | 28 | -------------------------------------------------------------------------------- /presto-yarn-package/src/main/resources/resources-multinode-single-worker.json: -------------------------------------------------------------------------------- 1 | { 2 | "schema": "http://example.org/specification/v2.0.0", 3 | "metadata": { 4 | }, 5 | "global": { 6 | "yarn.vcores": "1", 7 | "yarn.container.failure.threshold": "0" 8 | }, 9 | "components": { 10 | "slider-appmaster": { 11 | }, 12 | "COORDINATOR": { 13 | "yarn.role.priority": "1", 14 | "yarn.component.instances": "1", 15 | "yarn.component.placement.policy": "1", 16 | "yarn.memory": "1500", 17 | "yarn.label.expression": "coordinator" 18 | }, 19 | "WORKER": { 20 | "yarn.role.priority": "2", 21 | "yarn.component.instances": "1", 22 | "yarn.component.placement.policy": "1", 23 | "yarn.memory": "1500", 24 | "yarn.label.expression": "worker" 25 | } 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /presto-yarn-package/src/main/slider/package/scripts/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Licensed to the Apache Software Foundation (ASF) under one 4 | or more contributor license agreements. See the NOTICE file 5 | distributed with this work for additional information 6 | regarding copyright ownership. The ASF licenses this file 7 | to you under the Apache License, Version 2.0 (the 8 | "License"); you may not use this file except in compliance 9 | with the License. You may obtain a copy of the License at 10 | 11 | http://www.apache.org/licenses/LICENSE-2.0 12 | 13 | Unless required by applicable law or agreed to in writing, software 14 | distributed under the License is distributed on an "AS IS" BASIS, 15 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | See the License for the specific language governing permissions and 17 | limitations under the License. 18 | 19 | """ 20 | -------------------------------------------------------------------------------- /presto-yarn-test/etc/docker/hdp2.3/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '2' 2 | services: 3 | hadoop-master: 4 | hostname: hadoop-master 5 | image: 'teradatalabs/presto-yarn-hdp2.3-master:4' 6 | privileged: true 7 | 8 | hadoop-slave1: 9 | hostname: 'hadoop-slave1' 10 | image: 'teradatalabs/presto-yarn-hdp2.3-slave:4' 11 | privileged: true 12 | 13 | hadoop-slave2: 14 | hostname: 'hadoop-slave2' 15 | image: 'teradatalabs/presto-yarn-hdp2.3-slave:4' 16 | privileged: true 17 | 18 | hadoop-slave3: 19 | hostname: 'hadoop-slave3' 20 | image: 'teradatalabs/presto-yarn-hdp2.3-slave:4' 21 | privileged: true 22 | 23 | hadoop-slave4: 24 | hostname: 'hadoop-slave4' 25 | image: 'teradatalabs/presto-yarn-hdp2.3-slave:4' 26 | privileged: true 27 | 28 | runner: 29 | image: 'teradatalabs/centos6-java8-oracle' 30 | volumes: 31 | - '../../..:/workspace' 32 | 33 | -------------------------------------------------------------------------------- /presto-yarn-package/src/main/slider/package/plugins/README.txt: -------------------------------------------------------------------------------- 1 | 17 | Place Presto plugin jars in this directory. -------------------------------------------------------------------------------- /presto-yarn-test/src/main/resources/tempto-configuration.yaml: -------------------------------------------------------------------------------- 1 | cluster: 2 | master: master 3 | slaves: 4 | - slave1 5 | - slave2 6 | - slave3 7 | 8 | hdfs: 9 | username: hdfs 10 | webhdfs: 11 | host: ${cluster.master} 12 | port: 50070 13 | 14 | databases: 15 | hive: 16 | host: ${cluster.master} 17 | jdbc_driver_class: org.apache.hive.jdbc.HiveDriver 18 | jdbc_url: jdbc:hive2://${databases.hive.host}:10000 19 | jdbc_user: hdfs 20 | jdbc_password: na 21 | jdbc_pooling: false 22 | table_manager_type: hive 23 | 24 | tests: 25 | hdfs: 26 | path: /product-test/presto-yarn 27 | app_package: 28 | path: target/package 29 | slider: 30 | binary: target/package/slider-assembly-0.80.0-incubating-all.zip 31 | conf_dir: target/package/ 32 | 33 | ssh: 34 | identity: ${IDENTITY_FILE} 35 | roles: 36 | yarn: 37 | host: ${cluster.master} 38 | port: 22 39 | user: yarn 40 | password: yarntest 41 | -------------------------------------------------------------------------------- /presto-yarn-docs/src/main/sphinx/index.rst: -------------------------------------------------------------------------------- 1 | ####################### 2 | Presto YARN Integration 3 | ####################### 4 | 5 | `Issues `_ | 6 | `Github `_ 7 | 8 | Introduction 9 | ------------ 10 | 11 | This project contains the code and needed to integrate Presto 12 | `Presto `_ with Apache Hadoop YARN using 13 | `Apache Slider `_ 14 | 15 | Presto on YARN can be set up either manually using Apache Slider or via Ambari Slider Views if you are planning to use HDP distribution. 16 | 17 | Content 18 | ------- 19 | 20 | .. toctree:: 21 | :maxdepth: 2 22 | 23 | installation-yarn-manual 24 | installation-yarn-automated 25 | installation-yarn-directory-structure.rst 26 | installation-yarn-configuration-options.rst 27 | installation-yarn-configuration-options-advanced.rst 28 | installation-yarn-debugging-logging.rst 29 | developers 30 | 31 | -------------------------------------------------------------------------------- /presto-yarn-package/src/main/slider/package/scripts/presto_worker.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Licensed to the Apache Software Foundation (ASF) under one 4 | or more contributor license agreements. See the NOTICE file 5 | distributed with this work for additional information 6 | regarding copyright ownership. The ASF licenses this file 7 | to you under the Apache License, Version 2.0 (the 8 | "License"); you may not use this file except in compliance 9 | with the License. You may obtain a copy of the License at 10 | 11 | http://www.apache.org/licenses/LICENSE-2.0 12 | 13 | Unless required by applicable law or agreed to in writing, software 14 | distributed under the License is distributed on an "AS IS" BASIS, 15 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | See the License for the specific language governing permissions and 17 | limitations under the License. 18 | 19 | """ 20 | 21 | from presto_server import PrestoServer 22 | 23 | if __name__ == "__main__": 24 | PrestoServer('WORKER').execute() 25 | -------------------------------------------------------------------------------- /presto-yarn-package/src/main/slider/package/scripts/presto_coordinator.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Licensed to the Apache Software Foundation (ASF) under one 4 | or more contributor license agreements. See the NOTICE file 5 | distributed with this work for additional information 6 | regarding copyright ownership. The ASF licenses this file 7 | to you under the Apache License, Version 2.0 (the 8 | "License"); you may not use this file except in compliance 9 | with the License. You may obtain a copy of the License at 10 | 11 | http://www.apache.org/licenses/LICENSE-2.0 12 | 13 | Unless required by applicable law or agreed to in writing, software 14 | distributed under the License is distributed on an "AS IS" BASIS, 15 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | See the License for the specific language governing permissions and 17 | limitations under the License. 18 | 19 | """ 20 | 21 | from presto_server import PrestoServer 22 | 23 | if __name__ == "__main__": 24 | PrestoServer('COORDINATOR').execute() 25 | -------------------------------------------------------------------------------- /presto-yarn-package/src/main/slider/package/files/README.txt: -------------------------------------------------------------------------------- 1 | 17 | 18 | Place the presto-server-.tar.gz installation file here, after manually configuring etc/ and the config files under presto-server-/etc 19 | 20 | 21 | -------------------------------------------------------------------------------- /presto-yarn-test/src/main/java/com/teradata/presto/yarn/test/utils/Streams.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package com.teradata.presto.yarn.test.utils; 15 | 16 | import java.util.Iterator; 17 | import java.util.Spliterators; 18 | import java.util.stream.Stream; 19 | import java.util.stream.StreamSupport; 20 | 21 | public class Streams 22 | { 23 | public static Stream stream(Iterator iterator) 24 | { 25 | return StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, 0), false); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /presto-yarn-test/src/main/resources/conf/cgroup/cgconfig.conf: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright IBM Corporation. 2007 3 | # 4 | # Authors: Balbir Singh 5 | # This program is free software; you can redistribute it and/or modify it 6 | # under the terms of version 2.1 of the GNU Lesser General Public License 7 | # as published by the Free Software Foundation. 8 | # 9 | # This program is distributed in the hope that it would be useful, but 10 | # WITHOUT ANY WARRANTY; without even the implied warranty of 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 12 | # 13 | # See man cgconfig.conf for further details. 14 | # 15 | # By default, mount all controllers to /cgroup/ 16 | 17 | mount { 18 | cpuset = /sys/fs/cgroup/cpuset; 19 | cpu = /sys/fs/cgroup/cpu; 20 | cpuacct = /sys/fs/cgroup/cpuacct; 21 | memory = /sys/fs/cgroup/memory; 22 | devices = /sys/fs/cgroup/devices; 23 | freezer = /sys/fs/cgroup/freezer; 24 | blkio = /sys/fs/cgroup/blkio; 25 | } 26 | 27 | group yarn{ 28 | cpu{ 29 | cpu.shares = 100; 30 | cpu.cfs_quota_us=1000; 31 | cpu.cfs_period_us=1000; 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /presto-yarn-package/src/main/resources/appConfig-test-no-catalog.json: -------------------------------------------------------------------------------- 1 | { 2 | "schema": "http://example.org/specification/v2.0.0", 3 | "metadata": { 4 | }, 5 | "global": { 6 | "site.global.app_user": "yarn", 7 | "site.global.user_group": "hadoop", 8 | "site.global.data_dir": "/var/lib/presto/data", 9 | "site.global.app_name": "${dep.pkg.basename}", 10 | "site.global.app_pkg_plugin": "${AGENT_WORK_ROOT}/app/definition/package/plugins/", 11 | "site.global.singlenode": "true", 12 | "site.global.coordinator_host": "${COORDINATOR_HOST}", 13 | "site.global.presto_query_max_memory": "5GB", 14 | "site.global.presto_query_max_memory_per_node": "600MB", 15 | "site.global.presto_server_port": "8080", 16 | 17 | "site.global.jvm_args": "['-server', '-Xmx1024M', '-XX:+UseG1GC', '-XX:G1HeapRegionSize=32M', '-XX:+UseGCOverheadLimit', '-XX:+ExplicitGCInvokesConcurrent', '-XX:+HeapDumpOnOutOfMemoryError', '-XX:OnOutOfMemoryError=kill -9 %p']", 18 | "site.global.log_properties": "['com.facebook.presto=INFO']", 19 | "application.def": ".slider/package/PRESTO/${app.package.name}.zip", 20 | "java_home": "/usr/java/jdk1.8.0_92" 21 | }, 22 | "components": { 23 | "slider-appmaster": { 24 | "jvm.heapsize": "128M" 25 | } 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /presto-yarn-test/src/main/java/com/teradata/presto/yarn/test/utils/FileDigesters.java: -------------------------------------------------------------------------------- 1 | package com.teradata.presto.yarn.test.utils; 2 | 3 | import java.io.FileInputStream; 4 | import java.io.IOException; 5 | import java.io.InputStream; 6 | import java.math.BigInteger; 7 | import java.nio.file.Path; 8 | import java.security.MessageDigest; 9 | import java.security.NoSuchAlgorithmException; 10 | 11 | /** 12 | * Utility class to help digest (calculate md5sum etc.) files data. 13 | */ 14 | public class FileDigesters 15 | { 16 | public static String md5sum(Path path) 17 | { 18 | MessageDigest messageDigest = getMd5(); 19 | byte[] buffer = new byte[4096]; 20 | try (InputStream is = new FileInputStream(path.toFile())) { 21 | int read; 22 | while ((read = is.read(buffer)) > 0) { 23 | messageDigest.update(buffer, 0, read); 24 | } 25 | } 26 | catch (IOException e) { 27 | throw new RuntimeException(e); 28 | } 29 | return String.format("%032x", new BigInteger(1, messageDigest.digest())); 30 | } 31 | 32 | private static MessageDigest getMd5() 33 | { 34 | try { 35 | return MessageDigest.getInstance("MD5"); 36 | } 37 | catch (NoSuchAlgorithmException e) { 38 | throw new RuntimeException(e); 39 | } 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /presto-yarn-test/src/main/java/com/teradata/presto/yarn/test/fulfillment/ImmutableNationTable.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.teradata.presto.yarn.test.fulfillment; 16 | 17 | import com.teradata.tempto.Requirement; 18 | import com.teradata.tempto.RequirementsProvider; 19 | import com.teradata.tempto.configuration.Configuration; 20 | import com.teradata.tempto.fulfillment.table.ImmutableTableRequirement; 21 | 22 | import static com.teradata.tempto.fulfillment.table.hive.tpch.TpchTableDefinitions.NATION; 23 | 24 | public class ImmutableNationTable 25 | implements RequirementsProvider 26 | { 27 | @Override 28 | public Requirement getRequirements(Configuration configuration) 29 | { 30 | return NATION_TABLE; 31 | } 32 | 33 | public static final Requirement NATION_TABLE = new ImmutableTableRequirement(NATION); 34 | } 35 | -------------------------------------------------------------------------------- /presto-yarn-test/src/main/java/com/teradata/presto/yarn/test/Main.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.teradata.presto.yarn.test; 16 | 17 | import com.teradata.tempto.runner.TemptoRunner; 18 | import com.teradata.tempto.runner.TemptoRunnerCommandLineParser; 19 | 20 | import static com.teradata.tempto.internal.configuration.TestConfigurationFactory.DEFAULT_TEST_CONFIGURATION_LOCATION; 21 | 22 | public class Main 23 | { 24 | public static void main(String[] args) 25 | { 26 | TemptoRunnerCommandLineParser parser = TemptoRunnerCommandLineParser 27 | .builder("Presto-yarn product tests") 28 | .setTestsPackage("com.teradata.presto.yarn.test.*", false) 29 | .setConfigFile(DEFAULT_TEST_CONFIGURATION_LOCATION, false) 30 | .build(); 31 | TemptoRunner.runTempto(parser, args); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /presto-yarn-package/src/main/resources/appConfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "schema": "http://example.org/specification/v2.0.0", 3 | "metadata": { 4 | }, 5 | "global": { 6 | "site.global.app_user": "yarn", 7 | "site.global.user_group": "hadoop", 8 | "site.global.data_dir": "/var/lib/presto/data", 9 | "site.global.config_dir": "/var/lib/presto/etc", 10 | "site.global.app_name": "${dep.pkg.basename}", 11 | "site.global.app_pkg_plugin": "${AGENT_WORK_ROOT}/app/definition/package/plugins/", 12 | "site.global.singlenode": "true", 13 | "site.global.coordinator_host": "${COORDINATOR_HOST}", 14 | "site.global.presto_query_max_memory": "50GB", 15 | "site.global.presto_query_max_memory_per_node": "600MB", 16 | "site.global.presto_query_max_total_memory_per_node": "600MB", 17 | "site.global.presto_server_port": "8080", 18 | 19 | "site.global.catalog": "{'tpch': ['connector.name=tpch']}", 20 | "site.global.jvm_args": "['-server', '-Xmx1024M', '-XX:+UseG1GC', '-XX:G1HeapRegionSize=32M', '-XX:+UseGCOverheadLimit', '-XX:+ExplicitGCInvokesConcurrent', '-XX:+HeapDumpOnOutOfMemoryError', '-XX:OnOutOfMemoryError=kill -9 %p']", 21 | "site.global.log_properties": "['com.facebook.presto=INFO']", 22 | 23 | "application.def": ".slider/package/PRESTO/${app.package.name}.zip", 24 | "java_home": "/usr/lib/jvm/java" 25 | }, 26 | "components": { 27 | "slider-appmaster": { 28 | "jvm.heapsize": "128M" 29 | } 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /presto-yarn-test/src/main/java/com/teradata/presto/yarn/test/utils/Closures.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package com.teradata.presto.yarn.test.utils; 15 | 16 | import org.slf4j.Logger; 17 | import org.slf4j.LoggerFactory; 18 | 19 | public class Closures 20 | { 21 | private static final Logger log = LoggerFactory.getLogger(Closures.class); 22 | 23 | public static void withMethodHelper(Runnable setup, Runnable closure, Runnable cleanup) 24 | { 25 | setup.run(); 26 | boolean clousureThrownException = true; 27 | try { 28 | closure.run(); 29 | clousureThrownException = false; 30 | } 31 | finally { 32 | try { 33 | cleanup.run(); 34 | } 35 | catch (RuntimeException e) { 36 | if (clousureThrownException) { 37 | log.error("Caught exception during cleanup", e); 38 | } 39 | else { 40 | throw e; 41 | } 42 | } 43 | } 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /presto-yarn-test/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # Set root logger to INFO and add a custom appender. 19 | log4j.rootLogger=TRACE, CONSOLE, FILE 20 | 21 | log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender 22 | log4j.appender.CONSOLE.Target=System.out 23 | log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout 24 | log4j.appender.CONSOLE.layout.conversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p [%c{1}:%L] [%X{test_id}] - %m%n 25 | log4j.appender.CONSOLE.Threshold=INFO 26 | 27 | #File Appender 28 | log4j.appender.FILE=org.apache.log4j.FileAppender 29 | log4j.appender.FILE.File=target/product-tests.log 30 | log4j.appender.FILE.layout=org.apache.log4j.PatternLayout 31 | log4j.appender.FILE.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p [%c{1}:%L] [%X{test_id}] - %m%n 32 | 33 | log4j.category.org.reflections=WARN 34 | log4j.category.org.apache.http=ERROR 35 | -------------------------------------------------------------------------------- /presto-yarn-test/src/main/java/com/teradata/presto/yarn/test/utils/Resources.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package com.teradata.presto.yarn.test.utils; 15 | 16 | import com.google.common.io.Files; 17 | import org.apache.commons.io.FileUtils; 18 | 19 | import java.io.File; 20 | import java.io.IOException; 21 | import java.net.URL; 22 | import java.nio.file.Path; 23 | import java.nio.file.Paths; 24 | 25 | public class Resources 26 | { 27 | private static final File tempDir = Files.createTempDir(); 28 | 29 | static { 30 | tempDir.deleteOnExit(); 31 | } 32 | 33 | public static Path extractResource(String resourcePath) 34 | { 35 | URL fixScriptResource = Resources.class.getResource(resourcePath); 36 | File temporaryFile = new File(tempDir, Paths.get(resourcePath).getFileName().toString()); 37 | try { 38 | FileUtils.copyURLToFile(fixScriptResource, temporaryFile); 39 | } 40 | catch (IOException e) { 41 | throw new RuntimeException(e); 42 | } 43 | return Paths.get(temporaryFile.toURI()); 44 | } 45 | 46 | private Resources() {} 47 | } 48 | -------------------------------------------------------------------------------- /presto-yarn-package/src/main/resources/appConfig-test.json: -------------------------------------------------------------------------------- 1 | { 2 | "schema": "http://example.org/specification/v2.0.0", 3 | "metadata": { 4 | }, 5 | "global": { 6 | "site.global.app_user": "yarn", 7 | "site.global.user_group": "hadoop", 8 | "site.global.data_dir": "/var/lib/presto/data", 9 | "site.global.config_dir": "/var/lib/presto/etc", 10 | "site.global.app_name": "${dep.pkg.basename}", 11 | "site.global.app_pkg_plugin": "${AGENT_WORK_ROOT}/app/definition/package/plugins/", 12 | "site.global.singlenode": "true", 13 | "site.global.coordinator_host": "${COORDINATOR_HOST}", 14 | "site.global.presto_query_max_memory": "5GB", 15 | "site.global.presto_query_max_memory_per_node": "600MB", 16 | "site.global.presto_server_port": "8080", 17 | 18 | "site.global.catalog": "{'hive': ['connector.name=hive-hadoop2', 'hive.metastore.uri=thrift://${NN_HOST}:9083'], 'tpch': ['connector.name=tpch'], 'jmx': ['connector.name=jmx']}", 19 | "site.global.jvm_args": "['-server', '-Xmx1024M', '-XX:+UseG1GC', '-XX:G1HeapRegionSize=32M', '-XX:+UseGCOverheadLimit', '-XX:+ExplicitGCInvokesConcurrent', '-XX:+HeapDumpOnOutOfMemoryError', '-XX:OnOutOfMemoryError=kill -9 %p', '-DHADOOP_USER_NAME=hdfs', '-Duser.timezone=UTC']", 20 | "site.global.log_properties": "['com.facebook.presto=INFO']", 21 | 22 | "site.global.additional_node_properties": "['plugin.dir=${AGENT_WORK_ROOT}/app/install/${dep.pkg.basename}/plugin']", 23 | 24 | "site.global.plugin": "{'ml': ['presto-ml-${presto.version}.jar']}", 25 | 26 | "application.def": ".slider/package/PRESTO/${app.package.name}.zip", 27 | "java_home": "/usr/java/jdk1.8.0_92" 28 | }, 29 | "components": { 30 | "slider-appmaster": { 31 | "jvm.heapsize": "128M" 32 | } 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /presto-yarn-test/etc/docker/key: -------------------------------------------------------------------------------- 1 | -----BEGIN RSA PRIVATE KEY----- 2 | MIIEowIBAAKCAQEAnQ2UUCYAq5cqmO+hdKv2UoO6CSBQGhp+bp5B8P3ZArhz/NkQ 3 | ZMWp5haW82mBn5d7ioTGUHQWttqs5WQbUsOFbpQydYI5d2YUBHc6g9c6QFBDjx8C 4 | sJ0cpfkczeRsCoXu6a+VboXl2BPD7jGXSSGJQxFrKGzhywg1NOKaOklmljYKz3gS 5 | 3BLGiBeJ04oceUMTKLKWs7UEnNqBaV/7y0GqYnhWsvxNKYvzkCuZEavYyLU+H6JA 6 | l/03HbKmUesPUSaurfk67/VHEH1vXICFnNvQ/LB9mvZJpcmuOFTdG0mukdta7rad 7 | kVNCHp6xrq5h6RCHA3Ivx7yP3HIDjvRPdV694wIDAQABAoIBAFFTo9axJy2z9OIH 8 | TPNOzRpDjkWFjxVFXO1JVFpICWVIQP/RI5UbanR0hCx+GRqcZeQAB3XH968uo9OR 9 | uDhueR0e7tY83ic/G+SIHSwCeu6s4Z8ubsUoRpqH2muf+nakjmhCfT4jZjczXQVu 10 | dqnvDNZHIddoMrBhirPdGAJQ2iez+sm+R57rYxPYuf5HCUISxtdBGLBHuffz6Hok 11 | ydSqmt3gkS3aYvqtA+yKcDxDJsGCx/VX2dXZANOeBm0gcgTl92IW8jOfldWJl4ZU 12 | 7GW655Yc6pnMJNr6FsIKzUUOAnAGeN8xcLMyx3f8zKIJSgmOsvgMvOeMRfnX18TP 13 | O5mjXVkCgYEAy7pT9/1VaDpDsqlUl2d8YYVXykA2LLhgH/ozBjJBSDlQO1Njb7Bf 14 | SYb50hrVUAB1un73QUMZgw1UKdwzlQWdLSbrQdnAu27oTW++XScaB3qJJ4F1++hm 15 | kERGL3CUIcModd7pr6oVcljxsqvSM/P6FTAI7fz/bjVkVWLz8sZSt+UCgYEAxVl2 16 | BBCd1kekteng+hyhfZS4kikZdVNQYiC8yhOQLdYQHlekYpk90DdjLjhq1lLBecZm 17 | xB5Gy7kOM5ja0y8LvY6zevT8W9ukC5rCJw1PFmfUuxe5DiIN2yfYlRdv/wtTYZNP 18 | Qds8fnil8Do03dkFTVUyh7HC5AHRHg7cfXnSMicCgYA2vtRPoKDxyC5m9T/JC8MN 19 | xbJIpCaqr8UM8sQBV0HZsaUQvCDNY5zHemDph9JolCcOvY2d2PUjFVLXiNfHpOGO 20 | v5WadXRoKa59GJkUGSzSc6glmJFm5xWgkOg3WU5FIFdgDU3IqbDnsGUKuUdcNKSw 21 | St8rbMALEofqthON2qNhzQKBgQCmQhP2oLkYSQsuZnauv2gdkClemVllSmcyBDqI 22 | U6rnsquppFRM7KTywXbA/a6kGNv+Z3M0Tf1+q8yXE3Nm7v+JFquGLtZb3NJ7JEnk 23 | 9IVobtj9NaMlkPLzasI/+JrV1wjL73qaFMUGF0ZU9/SJ4cdhuyDyyINT62PtWr/O 24 | xc6biQKBgGT3rab4YTD/L3LKJfCYnojRLAfqJKYeQSjrlSHgoj61OGuH05tJ2fZG 25 | OOyScySWsqUUEqkd0IJ/3uVHPjGrff/CraerTW8xHSrQcImdm2d7p2IP2CMUUC0p 26 | H/jEYfrjCTz9TShBjtxbv1O/85tJRKl6Irtu9J+tLGkByeK/qQ/8 27 | -----END RSA PRIVATE KEY----- 28 | -------------------------------------------------------------------------------- /presto-yarn-test/README.md: -------------------------------------------------------------------------------- 1 | # Presto slider package - product tests 2 | 3 | 4 | ## Pre-requisites 5 | 6 | In order to run product tests you need to have: 7 | 8 | * docker (tested with 1.10) 9 | 10 | * docker-compose (tested with 1.6.2) 11 | 12 | ## Execution 13 | 14 | Execution will spin up a docker cluster for hadoop with yarn. Such cluster consists of 4 docker containers with several jvm processes in it, so it is strongly recommended to run these tests on a highly capable workstation. 15 | 16 | Before you run any product test you need to build test binaries: 17 | 18 | ``` 19 | mvn clean package 20 | ``` 21 | 22 | ### Execution profiles 23 | 24 | There are two profiles which can be used for testing: 25 | - cdh5 - Cloudera distribution of hadoop 26 | - hdp2.3 - Hortonworks distribution of hadoop 27 | 28 | > Note that there are two tests which fails for hdp2.3. They are marked with `hdp2.3_quarantine` test group. 29 | 30 | ### Execution with automation script 31 | 32 | ``` 33 | bin/run_on_docker.sh 34 | ``` 35 | 36 | ## Manual execution 37 | 38 | ``` 39 | cd presto-yarn-test/etc/docker/ 40 | docker-compose up -d 41 | # wait until everything get ready 42 | docker-compose run runner java -jar /workspace/target/presto-yarn-test-1.2-SNAPSHOT-executable.jar --config-local /workspace/etc/docker/tempto-configuration-docker-local.yaml 43 | ``` 44 | 45 | ## Debugging product tests 46 | 47 | ``` 48 | cd presto-yarn-test/etc/docker/ 49 | docker-compose up -d 50 | # wait until everything get ready 51 | docker-compose run -p 5005:5005 runner java -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=5005 -jar /workspace/target/presto-yarn-test-1.2-SNAPSHOT-executable.jar --config-local /workspace/etc/docker/tempto-configuration-docker-local.yaml 52 | ``` 53 | -------------------------------------------------------------------------------- /presto-yarn-test/src/main/java/com/teradata/presto/yarn/test/utils/TimeUtils.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package com.teradata.presto.yarn.test.utils; 15 | 16 | import java.util.concurrent.Callable; 17 | import java.util.concurrent.TimeUnit; 18 | 19 | public class TimeUtils 20 | { 21 | public static void retryUntil(Callable condition, long timeoutInMilliseconds) 22 | { 23 | long startTime = System.currentTimeMillis(); 24 | 25 | while (System.currentTimeMillis() - startTime < timeoutInMilliseconds) { 26 | if (call(condition)) { 27 | return; 28 | } 29 | sleep(); 30 | } 31 | 32 | throw new RuntimeException("exceeded timeout"); 33 | } 34 | 35 | private static Boolean call(Callable condition) 36 | { 37 | try { 38 | return condition.call(); 39 | } 40 | catch (Exception e) { 41 | throw new RuntimeException(e); 42 | } 43 | } 44 | 45 | private static void sleep() 46 | { 47 | try { 48 | Thread.sleep(TimeUnit.SECONDS.toMillis(4)); 49 | } 50 | catch (InterruptedException e) { 51 | throw new RuntimeException(e); 52 | } 53 | } 54 | 55 | private TimeUtils() {} 56 | } 57 | -------------------------------------------------------------------------------- /presto-yarn-test/src/main/assembly/presto-app.xml: -------------------------------------------------------------------------------- 1 | 18 | 19 | 20 | 24 | 25 | presto_app_v${project.version} 26 | 27 | zip 28 | 29 | false 30 | 31 | 32 | 33 | 34 | ${package.dir}/${app.package.name} 35 | / 36 | 0755 37 | 0755 38 | false 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /presto-yarn-test/src/test/java/com/teradata/presto/yarn/test/slider/SliderStatusTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package com.teradata.presto.yarn.test.slider; 15 | 16 | import org.apache.commons.io.IOUtils; 17 | import org.testng.annotations.Test; 18 | 19 | import java.io.IOException; 20 | 21 | import static org.assertj.core.api.Assertions.assertThat; 22 | 23 | public class SliderStatusTest 24 | { 25 | @Test 26 | public void testSliderStatus() 27 | throws IOException 28 | { 29 | String statusJson = IOUtils.toString(getClass().getResourceAsStream("/status_file")); 30 | 31 | SliderStatus sliderStatus = new SliderStatus(statusJson); 32 | 33 | assertThat(sliderStatus.getLiveComponentsHost("UNKNOWN")).isEmpty(); 34 | assertThat(sliderStatus.getLiveContainers("UNKNOWN")).isEqualTo(0); 35 | 36 | assertThat(sliderStatus.getLiveContainers("COORDINATOR")).isEqualTo(1); 37 | assertThat(sliderStatus.getLiveComponentsHost("COORDINATOR")).containsExactly("kogut-vsphere-default-master"); 38 | 39 | assertThat(sliderStatus.getLiveComponentsHost("WORKER")).containsExactly( 40 | "kogut-vsphere-default-slave2", 41 | "kogut-vsphere-default-slave1", 42 | "kogut-vsphere-default-slave3"); 43 | assertThat(sliderStatus.getLiveContainers("WORKER")).isEqualTo(3); 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /presto-yarn-test/src/main/assembly/presto-server.xml: -------------------------------------------------------------------------------- 1 | 18 | 19 | 20 | 24 | 25 | presto_server_v${presto.version} 26 | 27 | tar.gz 28 | 29 | true 30 | 31 | 32 | 33 | 34 | ${package.dir}/${presto.server} 35 | / 36 | 0755 37 | 0755 38 | false 39 | 40 | **/ml/presto-ml-${presto.version}.jar 41 | 42 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /presto-yarn-docs/src/main/sphinx/installation-yarn-directory-structure.rst: -------------------------------------------------------------------------------- 1 | ============================================================== 2 | Presto Installation Directory Structure on YARN-Based Clusters 3 | ============================================================== 4 | 5 | If you use Slider scripts or use Ambari slider view to set up Presto on 6 | YARN, Presto is going to be installed using the Presto server tarball 7 | (and not the rpm). Installation happens when the YARN application is 8 | launched and you can find the Presto server installation directory under 9 | the ``yarn.nodemanager.local-dirs`` on your YARN nodemanager nodes. If 10 | for example, your ``yarn.nodemanager.local-dirs`` is 11 | ``/mnt/hadoop/nm-local-dirs`` and ``app_user`` is ``yarn``, you can find 12 | Presto is installated under 13 | ``/mnt/hadoop-hdfs/nm-local-dir/usercache/yarn/appcache/application_/container_/app/install/presto-server-``. 14 | The first part of this path (till the container\_id) is called the 15 | AGENT\_WORK\_ROOT in Slider and so in terms of that, Presto is available 16 | under ``AGENT_WORK_ROOT/app/install/presto-server-``. 17 | 18 | Normally for a tarball installed Presto the catalog, plugin and lib 19 | directories will be subdirectories under the main presto-server 20 | installation directory. The same case here, the catalog directory is at 21 | ``AGENT_WORK_ROOT/app/install/presto-server-/etc/catalog``, 22 | plugin and lib directories are created under 23 | ``AGENT_WORK_ROOT/app/install/presto-server-/plugin`` and 24 | ``AGENT_WORK_ROOT/app/install/presto-server-/lib`` directories 25 | respectively. The launcher scripts used to start the Presto Server will 26 | be at ``AGENT_WORK_ROOT/app/install/presto-server-/bin`` 27 | directory. 28 | 29 | The Presto logs are available at locations based on your configuration 30 | for data directory. If you have it configured at 31 | ``/var/lib/presto/data`` in ``appConfig.json`` then you will have Presto 32 | logs at ``/var/lib/presto/data/var/log/``. 33 | -------------------------------------------------------------------------------- /presto-yarn-test/src/main/java/com/teradata/presto/yarn/test/slider/SliderStatus.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package com.teradata.presto.yarn.test.slider; 15 | 16 | import com.fasterxml.jackson.databind.JsonNode; 17 | import com.fasterxml.jackson.databind.ObjectMapper; 18 | 19 | import java.io.IOException; 20 | import java.util.Iterator; 21 | import java.util.List; 22 | import java.util.stream.Collectors; 23 | 24 | import static com.teradata.presto.yarn.test.utils.Streams.stream; 25 | 26 | public class SliderStatus 27 | { 28 | private final JsonNode status; 29 | 30 | public SliderStatus(String statusJson) 31 | { 32 | ObjectMapper objectMapper = new ObjectMapper(); 33 | try { 34 | this.status = objectMapper.readTree(statusJson); 35 | } 36 | catch (IOException e) { 37 | throw new RuntimeException(e); 38 | } 39 | } 40 | 41 | public List getLiveComponentsHost(String component) 42 | { 43 | return stream(getLiveComponents(component)). 44 | map(liveComponent -> liveComponent.path("host").asText()) 45 | .collect(Collectors.toList()); 46 | } 47 | 48 | private Iterator getLiveComponents(String component) 49 | { 50 | return getLiveStatus().path(component).elements(); 51 | } 52 | 53 | private JsonNode getLiveStatus() 54 | { 55 | return status.path("status").path("live"); 56 | } 57 | 58 | public int getLiveContainers(String component) 59 | { 60 | return status.path("statistics").path(component).path("containers.live").asInt(); 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /presto-yarn-docs/src/main/sphinx/conf.py: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # 6 | # http://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | # 14 | 15 | # 16 | # Presto documentation build configuration file 17 | # 18 | # This file is execfile()d with the current directory set to its containing dir. 19 | # 20 | 21 | import os 22 | import sys 23 | import xml.dom.minidom 24 | 25 | try: 26 | sys.dont_write_bytecode = True 27 | except: 28 | pass 29 | 30 | sys.path.insert(0, os.path.abspath('ext')) 31 | 32 | 33 | def child_node(node, name): 34 | for i in node.childNodes: 35 | if (i.nodeType == i.ELEMENT_NODE) and (i.tagName == name): 36 | return i 37 | return None 38 | 39 | 40 | def node_text(node): 41 | return node.childNodes[0].data 42 | 43 | 44 | def maven_version(pom): 45 | dom = xml.dom.minidom.parse(pom) 46 | project = dom.childNodes[1] 47 | 48 | version = child_node(project, 'version') 49 | if version: 50 | return node_text(version) 51 | 52 | parent = child_node(project, 'parent') 53 | version = child_node(parent, 'version') 54 | return node_text(version) 55 | 56 | 57 | def get_version(): 58 | version = os.environ.get('PRESTO_VERSION', '').strip() 59 | return version or maven_version('../../../pom.xml') 60 | 61 | # -- General configuration ----------------------------------------------------- 62 | 63 | needs_sphinx = '1.1' 64 | 65 | templates_path = ['_templates'] 66 | 67 | source_suffix = '.rst' 68 | 69 | master_doc = 'index' 70 | 71 | project = 'Presto YARN' 72 | 73 | version = get_version() 74 | release = version 75 | 76 | pygments_style = 'sphinx' 77 | 78 | 79 | # -- Options for HTML output --------------------------------------------------- 80 | 81 | html_theme = "classic" 82 | -------------------------------------------------------------------------------- /presto-yarn-docs/src/main/sphinx/developers.rst: -------------------------------------------------------------------------------- 1 | Developers 2 | ========== 3 | 4 | Create Presto App Package 5 | ------------------------- 6 | 7 | 8 | First step is to build the ``presto-yarn-package--.zip`` package to deploy Presto on YARN. 9 | 10 | Run ```mvn clean package``` and the presto app package will be packaged at ``presto-yarn-package/target/presto-yarn-package--.zip``. 11 | To specify a specific version of Presto run ```mvn clean package -Dpresto.version=``` 12 | 13 | This .zip will have ``presto-server-.tar.gz`` from Presto under ``package/files/``. The Presto installed will use the configuration templates under ``package/templates``. 14 | 15 | The app package built should look something like: 16 | 17 | :: 18 | 19 | unzip -l "$@" ../presto-yarn-package-1.0.0-SNAPSHOT-0.130.zip 20 | 21 | Archive: ../presto-yarn-package-1.0.0-SNAPSHOT-0.130.zip 22 | Length Date Time Name 23 | --------- ---------- ----- ---- 24 | 0 2015-11-30 22:57 package/ 25 | 0 2015-11-30 22:57 package/files/ 26 | 411459833 2015-11-30 20:26 package/files/presto-server-0.130.tar.gz 27 | 1210 2015-11-30 22:57 appConfig-default.json 28 | 606 2015-11-30 22:57 resources-default.json 29 | 0 2015-11-30 20:26 package/scripts/ 30 | 0 2015-11-30 21:22 package/plugins/ 31 | 0 2015-11-30 20:26 package/templates/ 32 | 897 2015-11-30 22:57 package/scripts/presto_coordinator.py 33 | 892 2015-11-30 22:57 package/scripts/presto_worker.py 34 | 2801 2015-11-30 22:57 package/scripts/configure.py 35 | 787 2015-11-30 22:57 package/scripts/__init__.py 36 | 2285 2015-11-30 22:57 package/scripts/params.py 37 | 1944 2015-11-30 22:57 package/scripts/presto_server.py 38 | 35 2015-11-30 22:57 package/plugins/README.txt 39 | 948 2015-11-30 22:57 package/files/README.txt 40 | 236 2015-11-30 22:57 package/templates/config.properties-WORKER.j2 41 | 69 2015-11-30 22:57 package/templates/node.properties.j2 42 | 304 2015-11-30 22:57 package/templates/config.properties-COORDINATOR.j2 43 | 2020 2015-11-30 22:57 metainfo.xml 44 | --------- ------- 45 | 411474867 20 files 46 | -------------------------------------------------------------------------------- /presto-yarn-package/src/main/slider/package/scripts/presto_server.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Licensed to the Apache Software Foundation (ASF) under one 4 | or more contributor license agreements. See the NOTICE file 5 | distributed with this work for additional information 6 | regarding copyright ownership. The ASF licenses this file 7 | to you under the Apache License, Version 2.0 (the 8 | "License"); you may not use this file except in compliance 9 | with the License. You may obtain a copy of the License at 10 | 11 | http://www.apache.org/licenses/LICENSE-2.0 12 | 13 | Unless required by applicable law or agreed to in writing, software 14 | distributed under the License is distributed on an "AS IS" BASIS, 15 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | See the License for the specific language governing permissions and 17 | limitations under the License. 18 | 19 | """ 20 | 21 | from resource_management import * 22 | from configure import set_configuration 23 | import os 24 | 25 | class PrestoServer(Script): 26 | def __init__(self, component): 27 | self.component = component 28 | 29 | def install(self, env): 30 | self.install_packages(env) 31 | pass 32 | 33 | def configure(self): 34 | set_configuration(self.component) 35 | 36 | def start(self, env): 37 | import params 38 | 39 | env.set_params(params) 40 | 41 | self.configure() 42 | os.symlink(format('{conf_dir}'), os.path.join(format('{presto_root}'), 'etc')) 43 | process_cmd = format("PATH={java8_home}/bin:$PATH {presto_root}/bin/launcher run --node-config {conf_dir}/node.properties --jvm-config {conf_dir}/jvm.config --config {conf_dir}/config.properties >> {log_file} 2>&1") 44 | 45 | Execute(process_cmd, 46 | logoutput=True, 47 | wait_for_finish=False, 48 | pid_file=params.pid_file, 49 | poll_after=3 50 | ) 51 | 52 | def stop(self, env): 53 | # Slider doesnt yet support stopping the actual app (PrestoServer) process 54 | # but only stopping the yarn application. Slider is not wired up yet to call this function. 55 | pass 56 | 57 | def status(self, env): 58 | import params 59 | 60 | env.set_params(params) 61 | check_process_status(params.pid_file) 62 | 63 | 64 | if __name__ == "__main__": 65 | self.fail_with_error('Component name missing') 66 | -------------------------------------------------------------------------------- /presto-yarn-test/src/main/resources/conf/slider/log4j.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | # log4j configuration used during build and unit tests 18 | 19 | log4j.rootLogger=INFO,stdout 20 | log4j.threshhold=ALL 21 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 22 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 23 | 24 | # log layout skips stack-trace creation operations by avoiding line numbers and method 25 | #log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} - %m%n 26 | 27 | # debug edition is much more expensive 28 | log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} (%F:%M(%L)) - %m%n 29 | 30 | 31 | log4j.appender.subprocess=org.apache.log4j.ConsoleAppender 32 | log4j.appender.subprocess.layout=org.apache.log4j.PatternLayout 33 | log4j.appender.subprocess.layout.ConversionPattern=[%c{1}]: %m%n 34 | 35 | log4j.logger.org.apache.slider=WARN 36 | 37 | # uncomment to debug service lifecycle issues 38 | #log4j.logger.org.apache.hadoop.yarn.service.launcher=DEBUG 39 | #log4j.logger.org.apache.hadoop.yarn.service=DEBUG 40 | 41 | # uncomment for YARN operations 42 | #log4j.logger.org.apache.hadoop.yarn.client=DEBUG 43 | 44 | # uncomment this to debug security problems 45 | #log4j.logger.org.apache.hadoop.security=DEBUG 46 | 47 | #crank back on some noise 48 | log4j.logger.org.apache.hadoop.util.NativeCodeLoader=ERROR 49 | log4j.logger.org.apache.hadoop.hdfs=WARN 50 | 51 | 52 | log4j.logger.org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor=WARN 53 | log4j.logger.org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdaterImpl=WARN 54 | log4j.logger.org.apache.zookeeper=WARN 55 | 56 | 57 | -------------------------------------------------------------------------------- /presto-yarn-test/src/main/resources/conf/slider/slider-client-openstack.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 19 | 23 | 24 | 25 | 26 | yarn.resourcemanager.address 27 | master:8050 28 | 29 | 30 | 31 | yarn.resourcemanager.scheduler.address 32 | master:8030 33 | 34 | 35 | fs.defaultFS 36 | hdfs://master/ 37 | 38 | 39 | 40 | yarn.application.classpath 41 | 42 | /etc/hadoop/conf,/usr/lib/hadoop/*,/usr/lib/hadoop/lib/*,/usr/lib/hadoop-hdfs/*,/usr/lib/hadoop-hdfs/lib/*,/usr/lib/hadoop-yarn/*,/usr/lib/hadoop-yarn/lib/*,/usr/lib/hadoop-mapreduce/*,/usr/lib/hadoop-mapreduce/lib/* 43 | 44 | 45 | 46 | 47 | slider.test.agent.enabled 48 | true 49 | 50 | 51 | 52 | zk.home 53 | /usr/lib/zookeeper 54 | Zookeeper home dir on target systems 55 | 56 | 57 | 58 | slider.zookeeper.quorum 59 | master:5181 60 | 61 | 62 | 63 | hadoop.home 64 | /usr/lib/hadoop 65 | Hadoop home dir on target systems 66 | 67 | 68 | 69 | -------------------------------------------------------------------------------- /presto-yarn-test/src/main/resources/conf/slider/slider-client.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 19 | 23 | 24 | 25 | 26 | yarn.resourcemanager.address 27 | hadoop-master:8032 28 | 29 | 30 | 31 | yarn.resourcemanager.scheduler.address 32 | hadoop-master:8030 33 | 34 | 35 | fs.defaultFS 36 | hdfs://hadoop-master/ 37 | 38 | 39 | 40 | yarn.application.classpath 41 | 42 | /etc/hadoop/conf,/usr/lib/hadoop/*,/usr/lib/hadoop/lib/*,/usr/lib/hadoop-hdfs/*,/usr/lib/hadoop-hdfs/lib/*,/usr/lib/hadoop-yarn/*,/usr/lib/hadoop-yarn/lib/*,/usr/lib/hadoop-mapreduce/*,/usr/lib/hadoop-mapreduce/lib/* 43 | 44 | 45 | 46 | 47 | slider.test.agent.enabled 48 | true 49 | 50 | 51 | 52 | zk.home 53 | /usr/lib/zookeeper 54 | Zookeeper home dir on target systems 55 | 56 | 57 | 58 | slider.zookeeper.quorum 59 | hadoop-master:2181 60 | 61 | 62 | 63 | hadoop.home 64 | /usr/lib/hadoop 65 | Hadoop home dir on target systems 66 | 67 | 68 | 69 | -------------------------------------------------------------------------------- /presto-yarn-test/src/main/resources/conf/yarn/capacity-scheduler.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | yarn.scheduler.capacity.maximum-am-resource-percent 6 | 0.5 7 | 8 | 9 | yarn.scheduler.capacity.maximum-applications 10 | 10000 11 | 12 | 13 | yarn.scheduler.capacity.node-locality-delay 14 | -1 15 | 16 | 17 | 18 | 19 | yarn.scheduler.capacity.root.queues 20 | default 21 | 22 | 23 | yarn.scheduler.capacity.root.default.acl_administer_queue 24 | * 25 | 26 | 27 | yarn.scheduler.capacity.root.default.acl_submit_applications 28 | * 29 | 30 | 31 | yarn.scheduler.capacity.root.default.capacity 32 | 100 33 | 34 | 35 | yarn.scheduler.capacity.root.default.maximum-capacity 36 | 100 37 | 38 | 39 | yarn.scheduler.capacity.root.default.state 40 | RUNNING 41 | 42 | 43 | yarn.scheduler.capacity.root.default.user-limit-factor 44 | 1 45 | 46 | 47 | 48 | 49 | yarn.scheduler.capacity.root.default.accessible-node-labels 50 | coordinator,worker 51 | 52 | 53 | yarn.scheduler.capacity.root.default.default-node-label-expression 54 | worker 55 | 56 | 57 | yarn.scheduler.capacity.root.accessible-node-labels.coordinator.capacity 58 | 100 59 | 60 | 61 | yarn.scheduler.capacity.root.default.accessible-node-labels.coordinator.capacity 62 | 100 63 | 64 | 65 | yarn.scheduler.capacity.root.accessible-node-labels.worker.capacity 66 | 100 67 | 68 | 69 | yarn.scheduler.capacity.root.default.accessible-node-labels.worker.capacity 70 | 100 71 | 72 | 73 | -------------------------------------------------------------------------------- /presto-yarn-test/src/main/java/com/teradata/presto/yarn/test/utils/SimpleJdbcQueryExecutor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package com.teradata.presto.yarn.test.utils; 15 | 16 | import com.teradata.tempto.query.QueryExecutionException; 17 | import com.teradata.tempto.query.QueryExecutor; 18 | import com.teradata.tempto.query.QueryResult; 19 | import com.teradata.tempto.query.QueryType; 20 | 21 | import java.sql.Connection; 22 | import java.sql.SQLException; 23 | import java.sql.Statement; 24 | 25 | import static com.google.common.base.Preconditions.checkArgument; 26 | import static java.util.Objects.requireNonNull; 27 | 28 | public class SimpleJdbcQueryExecutor 29 | implements QueryExecutor 30 | { 31 | private final Connection connection; 32 | 33 | public SimpleJdbcQueryExecutor(Connection connection) 34 | { 35 | this.connection = requireNonNull(connection, "connection is null"); 36 | } 37 | 38 | public QueryResult executeQuery(String sql, QueryType ignored, QueryParam... params) 39 | throws QueryExecutionException 40 | { 41 | return executeQuery(sql, params); 42 | } 43 | 44 | public QueryResult executeQuery(String sql, QueryParam... params) 45 | throws QueryExecutionException 46 | { 47 | checkArgument(params.length == 0, "Query parameters are not supported."); 48 | try (Statement statement = connection.createStatement()) { 49 | return QueryResult.forResultSet(statement.executeQuery(sql)); 50 | } 51 | catch (SQLException e) { 52 | throw new QueryExecutionException(e); 53 | } 54 | } 55 | 56 | public Connection getConnection() 57 | { 58 | return connection; 59 | } 60 | 61 | public void close() 62 | { 63 | try { 64 | connection.close(); 65 | } 66 | catch (SQLException e) { 67 | throw new RuntimeException(e); 68 | } 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /presto-yarn-package/src/main/assembly/presto.xml: -------------------------------------------------------------------------------- 1 | 18 | 19 | 20 | 24 | 25 | presto_server_v${presto.version} 26 | 27 | zip 28 | dir 29 | 30 | false 31 | 32 | 33 | 34 | ${dep.pkg} 35 | package/files 36 | false 37 | 0755 38 | 39 | 40 | ${main.resources.dir}/appConfig.json 41 | / 42 | true 43 | appConfig-default.json 44 | 45 | 46 | ${main.resources.dir}/resources-multinode.json 47 | / 48 | true 49 | resources-default.json 50 | 51 | 52 | 53 | 54 | 55 | ${main.dir.filtered} 56 | / 57 | 0755 58 | 0755 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Presto-yarn [![Build Status](https://travis-ci.org/prestodb/presto-yarn.svg?branch=master)](https://travis-ci.org/prestodb/presto-yarn) 2 | 3 | This project contains the code and needed to integrate Presto 4 | `Presto `_ with Apache Hadoop YARN using 5 | `Apache Slider `_ 6 | 7 | Presto on YARN can be set up either manually using Apache Slider or via Ambari Slider Views if you are planning to use HDP distribution. 8 | 9 | The full documentation can be found [here](https://prestodb.io/presto-yarn/). 10 | 11 | ## Building the project 12 | 13 | Run ```mvn clean package``` and the presto app package will be packaged at ``presto-yarn-package/target/presto-yarn-package--.zip``. 14 | To specify a specific version of Presto run ```mvn clean package -Dpresto.version=``` 15 | 16 | This .zip will have ``presto-server-.tar.gz`` from Presto under ``package/files/``. The Presto installed will use the configuration templates under ``package/templates``. 17 | 18 | The app package built should look something like: 19 | 20 | :: 21 | 22 | unzip -l "$@" ../presto-yarn-package-1.0.0-SNAPSHOT-0.130.zip 23 | 24 | Archive: ../presto-yarn-package-1.0.0-SNAPSHOT-0.130.zip 25 | Length Date Time Name 26 | --------- ---------- ----- ---- 27 | 0 2015-11-30 22:57 package/ 28 | 0 2015-11-30 22:57 package/files/ 29 | 411459833 2015-11-30 20:26 package/files/presto-server-0.130.tar.gz 30 | 1210 2015-11-30 22:57 appConfig-default.json 31 | 606 2015-11-30 22:57 resources-default.json 32 | 0 2015-11-30 20:26 package/scripts/ 33 | 0 2015-11-30 21:22 package/plugins/ 34 | 0 2015-11-30 20:26 package/templates/ 35 | 897 2015-11-30 22:57 package/scripts/presto_coordinator.py 36 | 892 2015-11-30 22:57 package/scripts/presto_worker.py 37 | 2801 2015-11-30 22:57 package/scripts/configure.py 38 | 787 2015-11-30 22:57 package/scripts/__init__.py 39 | 2285 2015-11-30 22:57 package/scripts/params.py 40 | 1944 2015-11-30 22:57 package/scripts/presto_server.py 41 | 35 2015-11-30 22:57 package/plugins/README.txt 42 | 948 2015-11-30 22:57 package/files/README.txt 43 | 236 2015-11-30 22:57 package/templates/config.properties-WORKER.j2 44 | 69 2015-11-30 22:57 package/templates/node.properties.j2 45 | 304 2015-11-30 22:57 package/templates/config.properties-COORDINATOR.j2 46 | 2020 2015-11-30 22:57 metainfo.xml 47 | --------- ------- 48 | 411474867 20 files 49 | -------------------------------------------------------------------------------- /presto-yarn-package/src/main/slider/metainfo.xml: -------------------------------------------------------------------------------- 1 | 2 | 18 | 19 | 20 | 2.0 21 | 22 | PRESTO 23 | Presto DB 24 | ${presto.version} 25 | 26 | 27 | 28 | Presto 29 | 30 | 31 | coordinator_address 32 | ${COORDINATOR_HOST}:${site.global.presto_server_port} 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | COORDINATOR 41 | MASTER 42 | Presto-coordinator_address 43 | 1 44 | 1 45 | 46 | 47 | PYTHON 48 | 49 | 50 | 51 | WORKER 52 | SLAVE 53 | 54 | 55 | PYTHON 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | any 64 | 65 | 66 | tarball 67 | files/${dep.pkg.name} 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | -------------------------------------------------------------------------------- /presto-yarn-test/bin/run_on_docker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -ex 2 | 3 | # http://stackoverflow.com/questions/3572030/bash-script-absolute-path-with-osx 4 | function absolutepath() { 5 | [[ $1 = /* ]] && echo "$1" || echo "$PWD/${1#./}" 6 | } 7 | 8 | function retry() { 9 | END=$(($(date +%s) + 600)) 10 | 11 | while (( $(date +%s) < $END )); do 12 | set +e 13 | "$@" 14 | EXIT_CODE=$? 15 | set -e 16 | 17 | if [[ ${EXIT_CODE} == 0 ]]; then 18 | break 19 | fi 20 | sleep 5 21 | done 22 | 23 | return ${EXIT_CODE} 24 | } 25 | 26 | function hadoop_master_container(){ 27 | compose ps -q hadoop-master 28 | } 29 | 30 | function check_hive() { 31 | # TODO use docker-compose 32 | docker exec $(hadoop_master_container) hive -e 'show tables' 33 | } 34 | 35 | function run_product_tests() { 36 | PRESTO_YARN_VERSION=$(mvn -q -Dexec.executable="echo" -Dexec.args='${project.version}' --non-recursive org.codehaus.mojo:exec-maven-plugin:1.3.1:exec) 37 | compose run runner \ 38 | java -jar /workspace/target/presto-yarn-test-$PRESTO_YARN_VERSION-executable.jar \ 39 | --config-local /workspace/etc/docker/tempto-configuration-docker-local.yaml \ 40 | $* 41 | } 42 | 43 | # docker-compose down is not good enough because it's ignores services created with "run" command 44 | function stop_container() { 45 | SERVICE_NAME=$1 46 | CONTAINER_IDS=$(compose ps -q ${SERVICE_NAME}) 47 | for CONTAINER_ID in $CONTAINER_IDS; do 48 | echo "Stopping and removing ${SERVICE_NAME} with id ${CONTAINER_ID}" 49 | docker stop ${CONTAINER_ID} 50 | docker rm ${CONTAINER_ID} 51 | done 52 | } 53 | 54 | function cleanup() { 55 | OLD_ENVIRONMENT=$ENVIRONMENT 56 | for ENVIRONMENT in hdp2.3 cdh5; do 57 | # stop application runner containers started with "run" 58 | stop_container runner 59 | 60 | # stop containers started with "up" 61 | compose down || true 62 | done 63 | ENVIRONMENT=$OLD_ENVIRONMENT 64 | 65 | # wait for docker logs termination 66 | wait 67 | } 68 | 69 | function compose() { 70 | docker-compose -f ${SCRIPT_DIR}/../etc/docker/$ENVIRONMENT/docker-compose.yml $* 71 | } 72 | 73 | SCRIPT_DIR=$(dirname $(absolutepath "$0")) 74 | ENVIRONMENT=$1 75 | 76 | if [[ "$ENVIRONMENT" != "cdh5" && "$ENVIRONMENT" != "hdp2.3" ]]; then 77 | echo "Usage: run_on_docker.sh " 78 | exit 1 79 | fi 80 | 81 | shift 1 82 | 83 | # check docker and docker compose installation 84 | docker-compose version 85 | docker version 86 | 87 | cleanup 88 | 89 | compose pull 90 | compose build 91 | 92 | compose up -d 93 | compose logs --no-color hadoop-master hadoop-slave1 hadoop-slave2 hadoop-slave3 & 94 | 95 | retry check_hive 96 | 97 | # run product tests 98 | set +e 99 | run_product_tests "$*" 100 | EXIT_CODE=$? 101 | set -x 102 | 103 | cleanup 104 | 105 | exit ${EXIT_CODE} 106 | -------------------------------------------------------------------------------- /presto-yarn-docs/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 15 | 16 | 17 | 18 | 4.0.0 19 | 20 | 21 | com.teradata.presto-yarn 22 | presto-yarn 23 | 1.6-SNAPSHOT 24 | ../pom.xml 25 | 26 | 27 | presto-yarn-docs 28 | presto-yarn-docs 29 | pom 30 | 31 | 32 | 33 | 34 | 35 | com.mycila 36 | license-maven-plugin 37 | 38 | 39 | **/*.conf 40 | **/*.css_t 41 | **/*.css 42 | **/*.js 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | kr.motd.maven 52 | sphinx-maven-plugin 53 | 54 | true 55 | true 56 | true 57 | ${project.basedir}/src/main/sphinx 58 | ${project.build.directory}/html 59 | 60 | 61 | 62 | package 63 | 64 | generate 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | -------------------------------------------------------------------------------- /presto-yarn-package/src/main/slider/package/scripts/params.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Licensed to the Apache Software Foundation (ASF) under one 4 | or more contributor license agreements. See the NOTICE file 5 | distributed with this work for additional information 6 | regarding copyright ownership. The ASF licenses this file 7 | to you under the Apache License, Version 2.0 (the 8 | "License"); you may not use this file except in compliance 9 | with the License. You may obtain a copy of the License at 10 | 11 | http://www.apache.org/licenses/LICENSE-2.0 12 | 13 | Unless required by applicable law or agreed to in writing, software 14 | distributed under the License is distributed on an "AS IS" BASIS, 15 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | See the License for the specific language governing permissions and 17 | limitations under the License. 18 | 19 | """ 20 | 21 | from resource_management import * 22 | import uuid 23 | 24 | # server configurations 25 | config = Script.get_config() 26 | 27 | java8_home = config['hostLevelParams']['java_home'] 28 | 29 | app_root = config['configurations']['global']['app_root'] 30 | app_name = config['configurations']['global']['app_name'] 31 | 32 | presto_root = format("{app_root}/{app_name}") 33 | conf_dir = default('/configurations/global/config_dir', format("{presto_root}/etc")) 34 | 35 | catalog_dir = format("{conf_dir}/catalog") 36 | presto_plugin_dir = format("{presto_root}/plugin") 37 | source_plugin_dir = config['configurations']['global']['app_pkg_plugin'] 38 | addon_plugins = default('/configurations/global/plugin', '') 39 | 40 | presto_user = config['configurations']['global']['app_user'] 41 | user_group = config['configurations']['global']['user_group'] 42 | 43 | data_dir = config['configurations']['global']['data_dir'] 44 | pid_dir = format("{data_dir}/var/run") 45 | pid_file = format("{pid_dir}/slider_launcher.pid") 46 | log_dir = format("{data_dir}/var/log") 47 | log_file = format("{log_dir}/server.log") 48 | 49 | singlenode = config['configurations']['global']['singlenode'] 50 | coordinator_host = config['configurations']['global']['coordinator_host'] 51 | presto_query_max_memory = config['configurations']['global']['presto_query_max_memory'] 52 | presto_query_max_memory_per_node = config['configurations']['global']['presto_query_max_memory_per_node'] 53 | presto_query_max_total_memory_per_node = config['configurations']['global']['presto_query_max_total_memory_per_node'] 54 | presto_server_port = config['configurations']['global']['presto_server_port'] 55 | jvm_args = default('/configurations/global/jvm_args', '') 56 | log_properties = default('/configurations/global/log_properties', '') 57 | event_listener_properties = default('/configurations/global/event_listener_properties', '') 58 | 59 | node_id = uuid.uuid1() 60 | 61 | catalog_properties = default('/configurations/global/catalog', '') 62 | additional_config_properties=default('/configurations/global/additional_config_properties', '') 63 | additional_node_properties=default('/configurations/global/additional_node_properties', '') 64 | -------------------------------------------------------------------------------- /presto-yarn-test/src/main/java/com/teradata/presto/yarn/test/fulfillment/SliderClusterFulfiller.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package com.teradata.presto.yarn.test.fulfillment; 15 | 16 | import com.google.common.collect.ImmutableSet; 17 | import com.google.inject.Inject; 18 | import com.teradata.presto.yarn.test.slider.Slider; 19 | import com.teradata.tempto.Requirement; 20 | import com.teradata.tempto.context.State; 21 | import com.teradata.tempto.fulfillment.RequirementFulfiller; 22 | import com.teradata.tempto.fulfillment.TestStatus; 23 | import com.teradata.tempto.ssh.SshClient; 24 | import org.apache.commons.io.filefilter.WildcardFileFilter; 25 | import org.slf4j.Logger; 26 | import org.slf4j.LoggerFactory; 27 | 28 | import javax.inject.Named; 29 | 30 | import java.io.File; 31 | import java.io.FileFilter; 32 | import java.nio.file.Path; 33 | import java.nio.file.Paths; 34 | import java.util.Set; 35 | 36 | @RequirementFulfiller.AutoSuiteLevelFulfiller(priority = 2) 37 | public class SliderClusterFulfiller 38 | implements RequirementFulfiller 39 | { 40 | private static final Logger log = LoggerFactory.getLogger(SliderClusterFulfiller.class); 41 | 42 | public static final String PACKAGE_NAME = "PRESTO"; 43 | @Inject 44 | @Named("tests.app_package.path") 45 | private String prestoPackagePath; 46 | 47 | @Inject 48 | @Named("tests.slider.binary") 49 | private String sliderBinaryPath; 50 | private final Slider slider; 51 | 52 | @Inject 53 | public SliderClusterFulfiller(@Named("yarn") SshClient yarnSshClient) 54 | { 55 | this.slider = new Slider(yarnSshClient); 56 | } 57 | 58 | @Override 59 | public Set fulfill(Set requirements) 60 | { 61 | log.info("fulfilling slider cluster"); 62 | slider.install(Paths.get(sliderBinaryPath)); 63 | 64 | Path presto_app_package = Paths.get(getPrestoAppPackagePath()); 65 | log.info("Using Presto package from: " + presto_app_package); 66 | slider.installLocalPackage(presto_app_package, PACKAGE_NAME); 67 | 68 | return ImmutableSet.of(slider); 69 | } 70 | 71 | private String getPrestoAppPackagePath() 72 | { 73 | File dir = new File(prestoPackagePath); 74 | FileFilter fileFilter = new WildcardFileFilter("presto-yarn-package*.zip"); 75 | File[] files = dir.listFiles(fileFilter); 76 | return files[0].getPath(); 77 | } 78 | 79 | @Override 80 | public void cleanup(TestStatus testStatus) 81 | { 82 | slider.uninstallPackage(PACKAGE_NAME); 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /presto-yarn-docs/src/main/sphinx/installation-yarn-debugging-logging.rst: -------------------------------------------------------------------------------- 1 | ============================================== 2 | Debugging and Logging for a YARN-Based Cluster 3 | ============================================== 4 | 5 | Once the YARN application is launched, you can monitor the status at 6 | YARN ResourceManager WebUI. 7 | 8 | A successfully launched application will be in ``RUNNING`` state. The 9 | YARN ApplicationMaster UI (eg: 10 | ``http://master:8088/cluster/app/application_``) will show 11 | slider-appmaster, COORDINATOR and WORKER components and the 12 | associated containers running based on your configuration. You can 13 | also use Slider cli script to :ref:`check-status-label`. 14 | 15 | Slider retries to launch Presto in case of failure in the same YARN application. 16 | The YARN application will be still in ``RUNNING`` state during this retry phase. 17 | It ultimately kills the job after 5 unsuccessful retrials. 18 | 19 | If you have used :ref:`using-yarn-label` your COORDINATOR and WORKER 20 | components will be running on nodes which were 'labeled'. 21 | 22 | If you have not used labels, then you can check the status either at 23 | the YARN ResourceManager (eg: 24 | ``http://master:8088/cluster/app/application_``) or you can use 25 | :ref:`check-status-label` to get the "live" containers, and thus get the 26 | node hosting the Presto components. 27 | 28 | If Presto is up and running, then a ``pgrep`` of PrestoServer on your 29 | NodeManager nodes will give you the process details. This should also 30 | give the directory Presto is installed and the configuration files 31 | used by Presto. 32 | 33 | If the YARN application has failed to launch Presto, then you may want to 34 | take a look at the slider logs created under YARN log directory for the 35 | corresponding application. It is recommended that log aggregation of YARN application log 36 | files be enabled in YARN, using 37 | ``yarn.log-aggregation-enable property`` in your ``yarn-site.xml``. 38 | Then slider logs created during the launch of Presto-YARN will be 39 | available locally on your nodemanager nodes (where slider-appmaster 40 | and Presto components-COORDINATOR/WORKER are deployed) under 41 | contanier logs directory eg: 42 | ``/var/log/hadoop-yarn/application_/container_/``. For any 43 | retries attempted by Slider to launch Presto a new container will be 44 | launched and hence you will find a new ``container_`` directory. 45 | You can look for any errors under ``errors_*.txt`` there, and also 46 | there is a ``slider-agent.log`` file which will give you Slider 47 | application lifetime details. 48 | Subsequently every Slider application owner has the flexibility to 49 | set the include and exclude patterns of file names that they intend 50 | to aggregate, by adding the following properties in their 51 | ``resources.json``. For example, using 52 | 53 | :: 54 | 55 | "global": { 56 | "yarn.log.include.patterns": "*", 57 | "yarn.log.exclude.patterns": "*.*out" 58 | } 59 | 60 | See 61 | http://slider.incubator.apache.org/docs/configuration/resources.html#logagg 62 | for details. 63 | 64 | If there are no errors in ``slider.log`` then you may want to look at Presto 65 | logs for any errors. Presto logs will be available under the standard Presto data 66 | directory location. By default it is ``/var/lib/presto/data/var/log`` 67 | directory where ``/var/lib/presto/data`` is the default data 68 | directory ``site.global.data_dir`` configured in Slider ``appConfig.json``. 69 | You can find both ``server.log`` and ``http-request.log`` files here. Please note that 70 | log rotation of these Presto log files will have to be manually 71 | enabled (for eg: using 72 | http://linuxcommand.org/man_pages/logrotate8.html) 73 | 74 | Presto configuration files will be at ``/var/lib/presto/etc`` 75 | directory if you are using the default ``appConfig.json`` property 76 | ``site.global.config_dir``. The configuration files here will be 77 | generated by Slider and overwritten for every application restart. 78 | These files should NOT be modified manually. 79 | -------------------------------------------------------------------------------- /presto-yarn-package/src/main/slider/package/scripts/configure.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Licensed to the Apache Software Foundation (ASF) under one 4 | or more contributor license agreements. See the NOTICE file 5 | distributed with this work for additional information 6 | regarding copyright ownership. The ASF licenses this file 7 | to you under the Apache License, Version 2.0 (the 8 | "License"); you may not use this file except in compliance 9 | with the License. You may obtain a copy of the License at 10 | 11 | http://www.apache.org/licenses/LICENSE-2.0 12 | 13 | Unless required by applicable law or agreed to in writing, software 14 | distributed under the License is distributed on an "AS IS" BASIS, 15 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | See the License for the specific language governing permissions and 17 | limitations under the License. 18 | 19 | """ 20 | from resource_management import * 21 | import ast, os, shutil 22 | 23 | def set_configuration(component=None): 24 | """ 25 | Set configuration based on the component role. The jinja2 templates are populated from params.py 26 | :param component: COORDINATOR or WORKER 27 | :return: 28 | """ 29 | import params 30 | 31 | if (os.path.exists(format("{conf_dir}"))): 32 | shutil.rmtree(format("{conf_dir}")) 33 | 34 | _directory(params.conf_dir, params) 35 | _directory(params.catalog_dir, params) 36 | _directory(params.pid_dir, params) 37 | _directory(params.log_dir, params) 38 | 39 | _template_config("{params.conf_dir}/config.properties", params, component) 40 | _template_config("{params.conf_dir}/node.properties", params) 41 | if (component == "COORDINATOR"): 42 | _template_config("{params.conf_dir}/resource-groups.properties", params) 43 | _template_config("{params.conf_dir}/resource_groups.json", params) 44 | with open(format("{params.conf_dir}/README.txt"), 'a') as fw: 45 | warning_string = 'DO NOT manually modify the configuration here. This is deployed via YARN-Slider and overwritten every time the application starts.' 46 | fw.write("%s" % warning_string) 47 | 48 | if params.jvm_args: 49 | _parse_array_and_write(params.jvm_args, format("{params.conf_dir}/jvm.config")) 50 | 51 | if params.log_properties: 52 | _parse_array_and_write(params.log_properties, format("{params.conf_dir}/log.properties")) 53 | 54 | if params.event_listener_properties: 55 | _parse_array_and_write(params.event_listener_properties, format("{params.conf_dir}/event-listener.properties")) 56 | 57 | if params.additional_config_properties: 58 | _parse_array_and_write(params.additional_config_properties, format("{params.conf_dir}/config.properties")) 59 | 60 | if params.additional_node_properties: 61 | _parse_array_and_write(params.additional_node_properties, format("{params.conf_dir}/node.properties")) 62 | 63 | if params.catalog_properties: 64 | catalog_dict = ast.literal_eval(params.catalog_properties) 65 | for key, value in catalog_dict.iteritems(): 66 | _store_configuration(value, format("{params.catalog_dir}/{key}.properties")) 67 | 68 | if params.addon_plugins: 69 | plugins_dict = ast.literal_eval(params.addon_plugins) 70 | for key, value in plugins_dict.iteritems(): 71 | plugin_dir = os.path.join(params.presto_plugin_dir, key) 72 | if not os.path.exists(plugin_dir): 73 | os.makedirs(plugin_dir) 74 | for jar in value: 75 | shutil.copy2(os.path.join(params.source_plugin_dir, jar), plugin_dir) 76 | 77 | 78 | def _parse_array_and_write(parameters, path): 79 | arg_list = ast.literal_eval(parameters) 80 | _store_configuration(arg_list, path) 81 | 82 | def _store_configuration(parameters, path): 83 | with open(path, 'a') as fw: 84 | for parameter in parameters: 85 | fw.write("%s\n" % parameter) 86 | 87 | def _directory(path, params): 88 | Directory(path, 89 | owner=params.presto_user, 90 | group=params.user_group, 91 | recursive=True 92 | ) 93 | 94 | 95 | def _template_config(path, params, template_tag=None): 96 | TemplateConfig(format(path), 97 | owner=params.presto_user, 98 | group=params.user_group, 99 | template_tag=template_tag 100 | ) 101 | -------------------------------------------------------------------------------- /presto-yarn-package/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 15 | 16 | 17 | 4.0.0 18 | 19 | 20 | com.teradata.presto-yarn 21 | presto-yarn 22 | 1.6-SNAPSHOT 23 | ../pom.xml 24 | 25 | 26 | presto-yarn-package 27 | presto-yarn-package 28 | Presto App Package 29 | 30 | 31 | ${project.basedir}/src/main/slider 32 | ${project.build.directory}/slider-filtered-files 33 | ${project.basedir}/src/main/resources 34 | 35 | ${project.build.directory}/dep-package-tmp 36 | presto-server-${presto.version} 37 | ${dep.pkg.basename}.tar.gz 38 | ${dep.pkg.dir}/${dep.pkg.name} 39 | 40 | 41 | 42 | 43 | com.facebook.presto 44 | presto-server 45 | ${presto.version} 46 | tar.gz 47 | 48 | 49 | 50 | 51 | 52 | 53 | ${main.dir} 54 | true 55 | ${main.dir.filtered} 56 | 57 | 58 | ${main.resources.dir} 59 | true 60 | ${project.build.directory}/test-classes 61 | 62 | 63 | 64 | 65 | 66 | 67 | org.apache.maven.plugins 68 | maven-dependency-plugin 69 | ${maven-dependency-plugin.version} 70 | 71 | 72 | copy-dependencies 73 | process-resources 74 | 75 | copy-dependencies 76 | 77 | 78 | presto-server 79 | tar.gz 80 | true 81 | ${dep.pkg.dir} 82 | 83 | 84 | 85 | 86 | 87 | 88 | org.apache.maven.plugins 89 | maven-assembly-plugin 90 | ${maven-assembly-plugin.version} 91 | 92 | ${project.basedir}/src/main/assembly/presto.xml 93 | false 94 | ${app.package.name} 95 | 96 | 97 | 98 | build-app-package 99 | package 100 | 101 | single 102 | 103 | 104 | 105 | 106 | 107 | 108 | org.apache.maven.plugins 109 | maven-jar-plugin 110 | ${maven-jar-plugin.version} 111 | 112 | 113 | 114 | test-jar 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | -------------------------------------------------------------------------------- /presto-yarn-test/src/main/resources/conf/yarn/yarn-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | yarn.application.classpath 6 | 7 | /etc/hadoop/conf, 8 | /usr/lib/hadoop/*, 9 | /usr/lib/hadoop/lib/*, 10 | /usr/lib/hadoop-hdfs/*, 11 | /usr/lib/hadoop-hdfs/lib/*, 12 | /usr/lib/hadoop-yarn/*, 13 | /usr/lib/hadoop-yarn/lib/*, 14 | /usr/lib/hadoop-mapreduce/*, 15 | /usr/lib/hadoop-mapreduce/lib/*, 16 | /usr/hdp/current/hadoop-client/*, 17 | /usr/hdp/current/hadoop-client/lib/*, 18 | /usr/hdp/current/hadoop-hdfs-client/*, 19 | /usr/hdp/current/hadoop-hdfs-client/lib/*, 20 | /usr/hdp/current/hadoop-yarn-client/*, 21 | /usr/hdp/current/hadoop-yarn-client/lib/*, 22 | /usr/hdp/current/hadoop-mapreduce-client/*, 23 | /usr/hdp/current/hadoop-mapreduce-client/lib/* 24 | 25 | 26 | 27 | yarn.log-aggregation-enable 28 | true 29 | 30 | 31 | yarn.log-aggregation.retain-seconds 32 | 2592000 33 | 34 | 35 | yarn.log.server.url 36 | hadoop-master:19888/jobhistory/logs 37 | 38 | 39 | yarn.nodemanager.address 40 | 0.0.0.0:45454 41 | 42 | 43 | yarn.nodemanager.aux-services 44 | mapreduce_shuffle 45 | 46 | 47 | yarn.nodemanager.aux-services.mapreduce_shuffle.class 48 | org.apache.hadoop.mapred.ShuffleHandler 49 | 50 | 51 | yarn.nodemanager.delete.debug-delay-sec 52 | 86400 53 | 54 | 55 | yarn.nodemanager.log-aggregation.compression-type 56 | gz 57 | 58 | 59 | yarn.nodemanager.log-dirs 60 | /var/log/hadoop-yarn 61 | 62 | 63 | yarn.nodemanager.log.retain-second 64 | 86400 65 | 66 | 67 | yarn.nodemanager.remote-app-log-dir 68 | /var/log/hadoop-yarn 69 | 70 | 71 | yarn.nodemanager.remote-app-log-dir-suffix 72 | logs 73 | 74 | 75 | yarn.resourcemanager.address 76 | hadoop-master:8032 77 | 78 | 79 | yarn.resourcemanager.hostname 80 | hadoop-master 81 | 82 | 83 | yarn.resourcemanager.webapp.address 84 | hadoop-master:8088 85 | 86 | 87 | yarn.scheduler.maximum-allocation-vcores 88 | 2 89 | 90 | 91 | 92 | 93 | yarn.node-labels.enabled 94 | true 95 | 96 | 97 | yarn.resourcemanager.scheduler.class 98 | org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler 99 | 100 | 101 | yarn.node-labels.manager-class 102 | org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager 103 | 104 | 105 | yarn.node-labels.fs-store.root-dir 106 | hdfs://hadoop-master:8020/user/yarn/node-labels/ 107 | 108 | 109 | 110 | 111 | yarn.nodemanager.container-executor.class 112 | org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor 113 | 114 | 115 | yarn.nodemanager.linux-container-executor.resources-handler.class 116 | org.apache.hadoop.yarn.server.nodemanager.util.CgroupsLCEResourcesHandler 117 | 118 | 119 | yarn.nodemanager.linux-container-executor.cgroups.hierarchy 120 | yarn 121 | 122 | 123 | yarn.nodemanager.linux-container-executor.cgroups.mount 124 | true 125 | 126 | 127 | yarn.nodemanager.linux-container-executor.cgroups.mount-path 128 | /sys/fs/cgroup 129 | 130 | 131 | yarn.nodemanager.linux-container-executor.group 132 | yarn 133 | 134 | 135 | yarn.nodemanager.linux-container-executor.cgroups.strict-resource-usage 136 | true 137 | 138 | 139 | yarn.nodemanager.linux-container-executor.nonsecure-mode.local-user 140 | yarn 141 | 142 | 143 | yarn.nodemanager.linux-container-executor.nonsecure-mode.limit-users 144 | false 145 | 146 | 147 | yarn.nodemanager.linux-container-executor.path 148 | /usr/lib/hadoop-yarn/bin/container-executor 149 | 150 | 151 | 152 | 153 | yarn.nodemanager.vmem-check-enabled 154 | false 155 | 156 | 157 | 158 | -------------------------------------------------------------------------------- /presto-yarn-test/src/main/java/com/teradata/presto/yarn/test/slider/Slider.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package com.teradata.presto.yarn.test.slider; 15 | 16 | import com.teradata.tempto.context.State; 17 | import com.teradata.tempto.process.CommandExecutionException; 18 | import com.teradata.tempto.ssh.SshClient; 19 | import org.slf4j.Logger; 20 | import org.slf4j.LoggerFactory; 21 | 22 | import java.nio.file.Path; 23 | import java.util.Optional; 24 | 25 | import static com.google.common.base.Preconditions.checkState; 26 | import static com.teradata.presto.yarn.test.utils.Resources.extractResource; 27 | 28 | public class Slider 29 | implements State 30 | { 31 | private static final Logger log = LoggerFactory.getLogger(Slider.class); 32 | 33 | private static final String SLIDER_REMOTE_CONF_DIR = "slider-0.80.0-incubating/conf/"; 34 | private final SshClient sshClient; 35 | 36 | public Slider(SshClient sshClient) 37 | { 38 | this.sshClient = sshClient; 39 | } 40 | 41 | public void install(final Path sliderBinary) 42 | { 43 | if (isInstalled()) { 44 | log.info("Slider is already installed on cluster"); 45 | return; 46 | } 47 | 48 | sshClient.command("unzip " + String.valueOf(upload(sliderBinary))); 49 | 50 | sshClient.upload(extractResource("/conf/slider/log4j.properties"), SLIDER_REMOTE_CONF_DIR); 51 | sshClient.upload(extractResource("/conf/slider/slider-client.xml"), SLIDER_REMOTE_CONF_DIR); 52 | sshClient.upload(extractResource("/conf/slider/slider-env.sh"), SLIDER_REMOTE_CONF_DIR); 53 | } 54 | 55 | private Path upload(Path path) 56 | { 57 | sshClient.upload(path, "."); 58 | return path.getFileName(); 59 | } 60 | 61 | private boolean isInstalled() 62 | { 63 | try { 64 | action("help"); 65 | return true; 66 | } 67 | catch (CommandExecutionException e) { 68 | log.debug("Checking if slider is installed", e); 69 | return false; 70 | } 71 | } 72 | 73 | public void installLocalPackage(Path clusterPackage, final String packageName) 74 | { 75 | upload(clusterPackage); 76 | Path remotePackage = clusterPackage.getFileName(); 77 | action("package --install --name " + packageName + " --package " + remotePackage + " --replacepkg"); 78 | } 79 | 80 | public void uninstallPackage(final String packageName) 81 | { 82 | action("package --delete --name " + packageName); 83 | } 84 | 85 | public void cleanup(final String appName) 86 | { 87 | try { 88 | stop(appName, true); 89 | } 90 | catch (CommandExecutionException e) { 91 | if (e.getExitStatus() == 69) { 92 | log.warn("Unable to stop cluster (it is not started)"); 93 | } 94 | else { 95 | throw e; 96 | } 97 | } 98 | 99 | try { 100 | action("destroy " + appName); 101 | } 102 | catch (CommandExecutionException e) { 103 | log.warn("Unable to destroy cluster (is it not created?)", e); 104 | } 105 | } 106 | 107 | public void create(final String appName, final Path template, final Path resource) 108 | { 109 | action("create " + appName + " --template " + String.valueOf(upload(template)) + " --resources " + String.valueOf(upload(resource))); 110 | action("exists " + appName + " --live"); 111 | } 112 | 113 | public Optional status(final String appName) 114 | { 115 | int count = 0; 116 | int maxRetries = 10; 117 | while (true) { 118 | try { 119 | action("status " + appName + " --out status_file"); 120 | return Optional.of(new SliderStatus(sshClient.command("cat status_file"))); 121 | } 122 | catch (CommandExecutionException e) { 123 | if (e.getExitStatus() == 70) { 124 | log.warn("Unable to retrieve status, application is not yet running"); 125 | return Optional.empty(); 126 | } 127 | else if (e.getExitStatus() == 56) { 128 | log.warn("Unable to retrieve status, node is unreachable temporarily. Retrying.."); 129 | if ((count = ++count) == maxRetries) { 130 | throw e; 131 | } 132 | } 133 | else { 134 | throw e; 135 | } 136 | } 137 | } 138 | } 139 | 140 | public void stop(String clusterName, boolean force) 141 | { 142 | String forceArgument = force ? "--force" : ""; 143 | action("stop " + clusterName + " " + forceArgument); 144 | } 145 | 146 | public void stop(String clusterName) 147 | { 148 | stop(clusterName, false); 149 | } 150 | 151 | public void flex(final String clusterName, final String component_name, final int component_count) 152 | { 153 | action("flex " + clusterName + " --component " + component_name + " " + String.valueOf(component_count)); 154 | } 155 | 156 | public void action(final String arg) 157 | { 158 | sshClient.command("slider-0.80.0-incubating/bin/slider " + arg); 159 | } 160 | 161 | @Override 162 | public Optional getName() 163 | { 164 | return Optional.empty(); 165 | } 166 | } 167 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 17 | 18 | 4.0.0 19 | 20 | 21 | org.sonatype.oss 22 | oss-parent 23 | 9 24 | 25 | 26 | com.teradata.presto-yarn 27 | presto-yarn 28 | 1.6-SNAPSHOT 29 | pom 30 | presto-yarn 31 | Presto-YARN Integration 32 | https://github.com/prestodb/presto-yarn 33 | 34 | 35 | 36 | The Apache License, Version 2.0 37 | http://www.apache.org/licenses/LICENSE-2.0.txt 38 | 39 | 40 | 41 | 42 | 43 | Teradata Center for Hadoop 44 | EB230060@teradata.com 45 | 46 | 47 | 48 | 49 | scm:git:git@github.com:prestodb/presto-yarn.git 50 | scm:git:git@github.com:prestodb/presto-yarn.git 51 | git@github.com:prestodb/presto-yarn.git 52 | HEAD 53 | 54 | 55 | 2015 56 | 57 | 58 | presto-yarn-package 59 | presto-yarn-test 60 | presto-yarn-docs 61 | 62 | 63 | 64 | 0.167 65 | 0.80.0-incubating 66 | 4.11 67 | 1.4 68 | 0.13.1-2 69 | 1.7.5 70 | 71 | 72 | 2.4 73 | 3.2 74 | 2.18.1 75 | 2.18.1 76 | 2.10 77 | 2.6 78 | 2.6 79 | 80 | UTF-8 81 | ${encoding} 82 | 83 | presto-yarn-package-${project.version}-${presto.version} 84 | 85 | 86 | 87 | 88 | ossrh 89 | https://oss.sonatype.org/content/repositories/snapshots 90 | 91 | 92 | ossrh 93 | https://oss.sonatype.org/service/local/staging/deploy/maven2/ 94 | 95 | 96 | 97 | 98 | 99 | teradata 100 | Teradata public maven repository 101 | http://teradata-presto.s3.amazonaws.com/maven/repository/release/ 102 | 103 | 104 | 105 | 106 | 107 | 108 | org.sonatype.plugins 109 | nexus-staging-maven-plugin 110 | 1.6.3 111 | true 112 | 113 | ossrh 114 | https://oss.sonatype.org/ 115 | false 116 | 117 | 118 | 119 | org.apache.maven.plugins 120 | maven-release-plugin 121 | 2.5 122 | 123 | true 124 | true 125 | release 126 | deploy 127 | 128 | 129 | 130 | org.apache.maven.plugins 131 | maven-compiler-plugin 132 | ${maven-compiler-plugin.version} 133 | 134 | 1.8 135 | 1.8 136 | 137 | 138 | 139 | kr.motd.maven 140 | sphinx-maven-plugin 141 | 1.3.1.Final 142 | 143 | 144 | 145 | 146 | 147 | 148 | release 149 | 150 | 151 | 152 | org.apache.maven.plugins 153 | maven-gpg-plugin 154 | 1.5 155 | 156 | 157 | sign-artifacts 158 | verify 159 | 160 | sign 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | -------------------------------------------------------------------------------- /presto-yarn-test/src/main/java/com/teradata/presto/yarn/test/PrestoCluster.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package com.teradata.presto.yarn.test; 15 | 16 | import com.facebook.presto.jdbc.PrestoDriver; 17 | import com.google.common.collect.ImmutableList; 18 | import com.google.common.util.concurrent.SettableFuture; 19 | import com.teradata.presto.yarn.test.slider.Slider; 20 | import com.teradata.presto.yarn.test.slider.SliderStatus; 21 | import com.teradata.presto.yarn.test.utils.SimpleJdbcQueryExecutor; 22 | import com.teradata.tempto.assertions.QueryAssert; 23 | import com.teradata.tempto.hadoop.hdfs.HdfsClient; 24 | import com.teradata.tempto.query.QueryExecutionException; 25 | import com.teradata.tempto.query.QueryExecutor; 26 | import org.slf4j.Logger; 27 | import org.slf4j.LoggerFactory; 28 | 29 | import java.nio.file.Path; 30 | import java.nio.file.Paths; 31 | import java.sql.Connection; 32 | import java.sql.SQLException; 33 | import java.util.Collection; 34 | import java.util.List; 35 | import java.util.Optional; 36 | import java.util.Properties; 37 | import java.util.concurrent.Future; 38 | 39 | import static com.google.common.base.Preconditions.checkState; 40 | import static com.teradata.presto.yarn.test.utils.Closures.withMethodHelper; 41 | import static com.teradata.presto.yarn.test.utils.TimeUtils.retryUntil; 42 | import static java.util.concurrent.TimeUnit.MINUTES; 43 | 44 | public class PrestoCluster 45 | { 46 | private static final Logger log = LoggerFactory.getLogger(PrestoCluster.class); 47 | 48 | public static final String COORDINATOR_COMPONENT = "COORDINATOR"; 49 | public static final String WORKER_COMPONENT = "WORKER"; 50 | public static final String APP_NAME = "presto_cluster"; 51 | 52 | private final Path resource; 53 | private final Path template; 54 | private final Slider slider; 55 | private final HdfsClient hdfsClient; 56 | 57 | public PrestoCluster(Slider slider, HdfsClient hdfsClient, String sliderConfDir, String resource, String template) 58 | { 59 | this.hdfsClient = hdfsClient; 60 | this.slider = slider; 61 | this.resource = Paths.get(sliderConfDir, resource); 62 | this.template = Paths.get(sliderConfDir, template); 63 | } 64 | 65 | public void withPrestoCluster(Runnable closure) 66 | { 67 | withMethodHelper(this::create, closure, this::cleanup); 68 | } 69 | 70 | public void create() 71 | { 72 | cleanup(); 73 | checkState(!hdfsClient.exist(".slider/cluster/" + APP_NAME)); 74 | 75 | slider.create(APP_NAME, template, resource); 76 | } 77 | 78 | public void cleanup() 79 | { 80 | slider.cleanup(APP_NAME); 81 | } 82 | 83 | public void assertThatPrestoIsUpAndRunning(int workersCount) 84 | { 85 | waitForComponents(workersCount); 86 | 87 | QueryExecutor queryExecutor = waitForPrestoServer(); 88 | QueryAssert.assertThat(queryExecutor.executeQuery("SELECT 1")).containsExactly(QueryAssert.Row.row(1)); 89 | } 90 | 91 | public void waitForComponents(int workersCount) 92 | { 93 | waitForComponentsCount(COORDINATOR_COMPONENT, 1); 94 | waitForComponentsCount(WORKER_COMPONENT, workersCount); 95 | } 96 | 97 | private void waitForComponentsCount(final String component, final int expectedCount) 98 | { 99 | retryUntil(() -> getComponentHosts(component).size() == expectedCount, MINUTES.toMillis(3)); 100 | } 101 | 102 | public List getComponentHosts(String component) 103 | { 104 | Optional status = status(); 105 | if (status.isPresent()) { 106 | return status.get().getLiveComponentsHost(component); 107 | } 108 | else { 109 | return ImmutableList.of(); 110 | } 111 | } 112 | 113 | public Integer getLiveContainers(String component) 114 | { 115 | Optional status = status(); 116 | if (status.isPresent()) { 117 | return status.get().getLiveContainers(component); 118 | } 119 | else { 120 | return 0; 121 | } 122 | } 123 | 124 | public QueryExecutor waitForPrestoServer() 125 | { 126 | QueryExecutor queryExecutor = getQueryExecutor(); 127 | retryUntil(() -> isPrestoAccessible(queryExecutor), MINUTES.toMillis(5)); 128 | return queryExecutor; 129 | } 130 | 131 | public QueryExecutor getQueryExecutor() 132 | { 133 | String url = "jdbc:presto://" + getCoordinatorHost() + ":8080"; 134 | log.info("Waiting for Presto at connection url: " + url + "..."); 135 | 136 | return new SimpleJdbcQueryExecutor(getPrestoConnection(url)); 137 | } 138 | 139 | private Connection getPrestoConnection(String url) 140 | { 141 | PrestoDriver prestoDriver = new PrestoDriver(); 142 | Properties properties = new Properties(); 143 | properties.setProperty("user", "user"); 144 | properties.setProperty("password", "password"); 145 | 146 | try { 147 | return prestoDriver.connect(url, properties); 148 | } 149 | catch (SQLException e) { 150 | throw new RuntimeException(e); 151 | } 152 | } 153 | 154 | private boolean isPrestoAccessible(QueryExecutor queryExecutor) 155 | { 156 | try { 157 | log.debug("Trying to connect presto..."); 158 | queryExecutor.executeQuery("SELECT 1"); 159 | log.debug("Connected"); 160 | return true; 161 | } 162 | catch (QueryExecutionException ex) { 163 | return false; 164 | } 165 | } 166 | 167 | public Optional status() 168 | { 169 | return slider.status(APP_NAME); 170 | } 171 | 172 | public void stop() 173 | { 174 | slider.stop(APP_NAME); 175 | } 176 | 177 | public void flex(String component_name, int component_count) 178 | { 179 | slider.flex(APP_NAME, component_name, component_count); 180 | } 181 | 182 | public Collection getAllNodes() 183 | { 184 | return ImmutableList.builder().addAll(getWorkerHosts()).add(getCoordinatorHost()).build(); 185 | } 186 | 187 | public List getWorkerHosts() 188 | { 189 | return getComponentHosts(WORKER_COMPONENT); 190 | } 191 | 192 | public String getCoordinatorHost() 193 | { 194 | String[] coordinatorHost = new String[1]; 195 | retryUntil(() -> { 196 | List componentHosts = getComponentHosts(COORDINATOR_COMPONENT); 197 | if (componentHosts.size() == 1) { 198 | coordinatorHost[0] = componentHosts.get(0); 199 | return true; 200 | } 201 | return false; 202 | }, MINUTES.toMillis(4)); 203 | return coordinatorHost[0]; 204 | } 205 | } 206 | -------------------------------------------------------------------------------- /presto-yarn-test/src/main/java/com/teradata/presto/yarn/test/fulfillment/PrerequisitesClusterFulfiller.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package com.teradata.presto.yarn.test.fulfillment; 15 | 16 | import com.google.common.collect.ImmutableList; 17 | import com.google.common.collect.ImmutableSet; 18 | import com.google.inject.Inject; 19 | import com.teradata.presto.yarn.test.utils.NodeSshUtils; 20 | import com.teradata.tempto.Requirement; 21 | import com.teradata.tempto.context.State; 22 | import com.teradata.tempto.fulfillment.RequirementFulfiller; 23 | import com.teradata.tempto.fulfillment.TestStatus; 24 | import com.teradata.tempto.process.CommandExecutionException; 25 | import com.teradata.tempto.ssh.SshClient; 26 | import com.teradata.tempto.ssh.SshClientFactory; 27 | import org.slf4j.Logger; 28 | import org.slf4j.LoggerFactory; 29 | 30 | import javax.inject.Named; 31 | 32 | import java.util.HashMap; 33 | import java.util.List; 34 | import java.util.Map; 35 | import java.util.Set; 36 | import java.util.stream.Collectors; 37 | 38 | import static com.teradata.presto.yarn.test.PrestoCluster.COORDINATOR_COMPONENT; 39 | import static com.teradata.presto.yarn.test.PrestoCluster.WORKER_COMPONENT; 40 | import static com.teradata.presto.yarn.test.utils.Resources.extractResource; 41 | import static java.util.Arrays.asList; 42 | import static java.util.Collections.singletonList; 43 | 44 | @RequirementFulfiller.AutoSuiteLevelFulfiller(priority = 1) 45 | public class PrerequisitesClusterFulfiller 46 | implements RequirementFulfiller 47 | { 48 | private static final Logger log = LoggerFactory.getLogger(PrerequisitesClusterFulfiller.class); 49 | private static final String REMOTE_HADOOP_CONF_DIR = "/etc/hadoop/conf/"; 50 | 51 | @Inject 52 | @Named("cluster.master") 53 | private String master; 54 | @Inject 55 | @Named("cluster.slaves") 56 | private List slaves; 57 | @Inject 58 | @Named("ssh.roles.yarn.password") 59 | private String yarnPassword; 60 | 61 | private final SshClientFactory sshClientFactory; 62 | private final NodeSshUtils nodeSshUtils; 63 | 64 | @Inject 65 | public PrerequisitesClusterFulfiller(SshClientFactory sshClientFactory, @Named("yarn") SshClient yarnSshClient) 66 | { 67 | this.sshClientFactory = sshClientFactory; 68 | this.nodeSshUtils = new NodeSshUtils(sshClientFactory, yarnSshClient); 69 | } 70 | 71 | @Override 72 | public Set fulfill(Set requirements) 73 | { 74 | if (isPrepared()) { 75 | log.info("Skipping cluster prerequisites fulfillment as it is already prepared"); 76 | return ImmutableSet.of(nodeSshUtils); 77 | } 78 | 79 | runOnMaster(singletonList("echo \'" + yarnPassword + "\' | passwd --stdin yarn")); 80 | 81 | fixHdp(); 82 | 83 | setupCgroup(); 84 | 85 | setupYarnResourceManager(); 86 | 87 | restartResourceManager(); 88 | 89 | Map node_labels = getNodeLabels(); 90 | 91 | nodeSshUtils.createLabels(node_labels); 92 | 93 | useLabelsForSchedulerQueues(); 94 | 95 | runOnAll(asList("supervisorctl stop yarn-nodemanager")); 96 | 97 | restartResourceManager(); 98 | 99 | runOnAll(asList("supervisorctl start yarn-nodemanager")); 100 | 101 | nodeSshUtils.labelNodes(node_labels); 102 | 103 | runOnAll(asList( 104 | "mkdir -p /var/lib/presto", 105 | "chown yarn:yarn /var/lib/presto")); 106 | 107 | runOnMaster(asList("touch prepared")); 108 | 109 | return ImmutableSet.of(nodeSshUtils); 110 | } 111 | 112 | private boolean isPrepared() { 113 | try { 114 | runOnMaster(asList("ls prepared")); 115 | return true; 116 | } 117 | catch (CommandExecutionException e) { 118 | log.debug("Checking if cluster is prepared", e); 119 | return false; 120 | } 121 | } 122 | 123 | private void fixHdp() 124 | { 125 | nodeSshUtils.withSshClient(master, sshClient -> { 126 | sshClient.upload(extractResource("/fix_hdp_mapreduce.sh"), "/tmp/"); 127 | return sshClient.command("sh /tmp/fix_hdp_mapreduce.sh || true"); 128 | }); 129 | runOnAll(asList( 130 | "test -x /usr/lib/hadoop-yarn || ln -s /usr/hdp/2.3.*/hadoop-yarn /usr/lib/hadoop-yarn", 131 | "test -x /var/log/hadoop-yarn || (mkdir -p /var/log/hadoop-yarn && chown yarn:hadoop /var/log/hadoop-yarn)")); 132 | } 133 | 134 | private Map getNodeLabels() 135 | { 136 | Map nodeLabels = new HashMap<>(); 137 | 138 | nodeLabels.put(master, COORDINATOR_COMPONENT.toLowerCase()); 139 | slaves.forEach(slave -> nodeLabels.put(slave, WORKER_COMPONENT.toLowerCase())); 140 | return nodeLabels; 141 | } 142 | 143 | private void setupYarnResourceManager() 144 | { 145 | nodeSshUtils.withSshClient(getAllNodes(), sshClient -> { 146 | sshClient.upload(extractResource("/conf/yarn/yarn-site.xml"), REMOTE_HADOOP_CONF_DIR); 147 | sshClient.upload(extractResource("/conf/yarn/container-executor.cfg"), REMOTE_HADOOP_CONF_DIR); 148 | return null; 149 | }); 150 | 151 | runOnMaster(asList( 152 | "su hdfs -c 'hadoop fs -mkdir -p /user/yarn'", 153 | "su hdfs -c 'hadoop fs -chown yarn:yarn /user/yarn'")); 154 | } 155 | 156 | private void useLabelsForSchedulerQueues() 157 | { 158 | nodeSshUtils.withSshClient(master, sshClient -> { 159 | sshClient.upload(extractResource("/conf/yarn/capacity-scheduler.xml"), REMOTE_HADOOP_CONF_DIR); 160 | return null; 161 | }); 162 | } 163 | 164 | private void restartResourceManager() 165 | { 166 | runOnMaster(singletonList("supervisorctl restart yarn-resourcemanager")); 167 | } 168 | 169 | private void setupCgroup() 170 | { 171 | runOnAll(asList( 172 | "find / -name container-executor | xargs chown root:yarn", 173 | "find / -name container-executor | xargs chmod 6050")); 174 | 175 | nodeSshUtils.withSshClient(getAllNodes(), sshClient -> { 176 | sshClient.upload(extractResource("/conf/cgroup/cgrules.conf"), "/etc/"); 177 | sshClient.upload(extractResource("/conf/cgroup/cgconfig.conf"), "/etc/"); 178 | return null; 179 | }); 180 | 181 | String restartCgroupCmd= "/etc/init.d/cgconfig restart"; 182 | runOnAll(asList( 183 | restartCgroupCmd, 184 | "chmod -R 777 /sys/fs/cgroup")); 185 | } 186 | 187 | private void runOnMaster(List commands) 188 | { 189 | nodeSshUtils.runOnNode(master, commands); 190 | } 191 | 192 | private List runOnAll(List commands) 193 | { 194 | return getAllNodes().stream() 195 | .map(node -> nodeSshUtils.runOnNode(node, commands)) 196 | .flatMap(List::stream) 197 | .collect(Collectors.toList()); 198 | } 199 | 200 | private List getAllNodes() 201 | { 202 | return ImmutableList.builder().addAll(slaves).add(master).build(); 203 | } 204 | 205 | @Override 206 | public void cleanup(TestStatus testStatus) 207 | { 208 | } 209 | } 210 | -------------------------------------------------------------------------------- /mvnw: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # ---------------------------------------------------------------------------- 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | # ---------------------------------------------------------------------------- 20 | 21 | # ---------------------------------------------------------------------------- 22 | # Maven2 Start Up Batch script 23 | # 24 | # Required ENV vars: 25 | # ------------------ 26 | # JAVA_HOME - location of a JDK home dir 27 | # 28 | # Optional ENV vars 29 | # ----------------- 30 | # M2_HOME - location of maven2's installed home dir 31 | # MAVEN_OPTS - parameters passed to the Java VM when running Maven 32 | # e.g. to debug Maven itself, use 33 | # set MAVEN_OPTS=-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8000 34 | # MAVEN_SKIP_RC - flag to disable loading of mavenrc files 35 | # ---------------------------------------------------------------------------- 36 | 37 | if [ -z "$MAVEN_SKIP_RC" ] ; then 38 | 39 | if [ -f /etc/mavenrc ] ; then 40 | . /etc/mavenrc 41 | fi 42 | 43 | if [ -f "$HOME/.mavenrc" ] ; then 44 | . "$HOME/.mavenrc" 45 | fi 46 | 47 | fi 48 | 49 | # OS specific support. $var _must_ be set to either true or false. 50 | cygwin=false; 51 | darwin=false; 52 | mingw=false 53 | case "`uname`" in 54 | CYGWIN*) cygwin=true ;; 55 | MINGW*) mingw=true;; 56 | Darwin*) darwin=true 57 | # 58 | # Look for the Apple JDKs first to preserve the existing behaviour, and then look 59 | # for the new JDKs provided by Oracle. 60 | # 61 | if [ -z "$JAVA_HOME" ] && [ -L /System/Library/Frameworks/JavaVM.framework/Versions/CurrentJDK ] ; then 62 | # 63 | # Apple JDKs 64 | # 65 | export JAVA_HOME=/System/Library/Frameworks/JavaVM.framework/Versions/CurrentJDK/Home 66 | fi 67 | 68 | if [ -z "$JAVA_HOME" ] && [ -L /System/Library/Java/JavaVirtualMachines/CurrentJDK ] ; then 69 | # 70 | # Apple JDKs 71 | # 72 | export JAVA_HOME=/System/Library/Java/JavaVirtualMachines/CurrentJDK/Contents/Home 73 | fi 74 | 75 | if [ -z "$JAVA_HOME" ] && [ -L "/Library/Java/JavaVirtualMachines/CurrentJDK" ] ; then 76 | # 77 | # Oracle JDKs 78 | # 79 | export JAVA_HOME=/Library/Java/JavaVirtualMachines/CurrentJDK/Contents/Home 80 | fi 81 | 82 | if [ -z "$JAVA_HOME" ] && [ -x "/usr/libexec/java_home" ]; then 83 | # 84 | # Apple JDKs 85 | # 86 | export JAVA_HOME=`/usr/libexec/java_home` 87 | fi 88 | ;; 89 | esac 90 | 91 | if [ -z "$JAVA_HOME" ] ; then 92 | if [ -r /etc/gentoo-release ] ; then 93 | JAVA_HOME=`java-config --jre-home` 94 | fi 95 | fi 96 | 97 | if [ -z "$M2_HOME" ] ; then 98 | ## resolve links - $0 may be a link to maven's home 99 | PRG="$0" 100 | 101 | # need this for relative symlinks 102 | while [ -h "$PRG" ] ; do 103 | ls=`ls -ld "$PRG"` 104 | link=`expr "$ls" : '.*-> \(.*\)$'` 105 | if expr "$link" : '/.*' > /dev/null; then 106 | PRG="$link" 107 | else 108 | PRG="`dirname "$PRG"`/$link" 109 | fi 110 | done 111 | 112 | saveddir=`pwd` 113 | 114 | M2_HOME=`dirname "$PRG"`/.. 115 | 116 | # make it fully qualified 117 | M2_HOME=`cd "$M2_HOME" && pwd` 118 | 119 | cd "$saveddir" 120 | # echo Using m2 at $M2_HOME 121 | fi 122 | 123 | # For Cygwin, ensure paths are in UNIX format before anything is touched 124 | if $cygwin ; then 125 | [ -n "$M2_HOME" ] && 126 | M2_HOME=`cygpath --unix "$M2_HOME"` 127 | [ -n "$JAVA_HOME" ] && 128 | JAVA_HOME=`cygpath --unix "$JAVA_HOME"` 129 | [ -n "$CLASSPATH" ] && 130 | CLASSPATH=`cygpath --path --unix "$CLASSPATH"` 131 | fi 132 | 133 | # For Migwn, ensure paths are in UNIX format before anything is touched 134 | if $mingw ; then 135 | [ -n "$M2_HOME" ] && 136 | M2_HOME="`(cd "$M2_HOME"; pwd)`" 137 | [ -n "$JAVA_HOME" ] && 138 | JAVA_HOME="`(cd "$JAVA_HOME"; pwd)`" 139 | # TODO classpath? 140 | fi 141 | 142 | if [ -z "$JAVA_HOME" ]; then 143 | javaExecutable="`which javac`" 144 | if [ -n "$javaExecutable" ] && ! [ "`expr \"$javaExecutable\" : '\([^ ]*\)'`" = "no" ]; then 145 | # readlink(1) is not available as standard on Solaris 10. 146 | readLink=`which readlink` 147 | if [ ! `expr "$readLink" : '\([^ ]*\)'` = "no" ]; then 148 | if $darwin ; then 149 | javaHome="`dirname \"$javaExecutable\"`" 150 | javaExecutable="`cd \"$javaHome\" && pwd -P`/javac" 151 | else 152 | javaExecutable="`readlink -f \"$javaExecutable\"`" 153 | fi 154 | javaHome="`dirname \"$javaExecutable\"`" 155 | javaHome=`expr "$javaHome" : '\(.*\)/bin'` 156 | JAVA_HOME="$javaHome" 157 | export JAVA_HOME 158 | fi 159 | fi 160 | fi 161 | 162 | if [ -z "$JAVACMD" ] ; then 163 | if [ -n "$JAVA_HOME" ] ; then 164 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then 165 | # IBM's JDK on AIX uses strange locations for the executables 166 | JAVACMD="$JAVA_HOME/jre/sh/java" 167 | else 168 | JAVACMD="$JAVA_HOME/bin/java" 169 | fi 170 | else 171 | JAVACMD="`which java`" 172 | fi 173 | fi 174 | 175 | if [ ! -x "$JAVACMD" ] ; then 176 | echo "Error: JAVA_HOME is not defined correctly." >&2 177 | echo " We cannot execute $JAVACMD" >&2 178 | exit 1 179 | fi 180 | 181 | if [ -z "$JAVA_HOME" ] ; then 182 | echo "Warning: JAVA_HOME environment variable is not set." 183 | fi 184 | 185 | CLASSWORLDS_LAUNCHER=org.codehaus.plexus.classworlds.launcher.Launcher 186 | 187 | # For Cygwin, switch paths to Windows format before running java 188 | if $cygwin; then 189 | [ -n "$M2_HOME" ] && 190 | M2_HOME=`cygpath --path --windows "$M2_HOME"` 191 | [ -n "$JAVA_HOME" ] && 192 | JAVA_HOME=`cygpath --path --windows "$JAVA_HOME"` 193 | [ -n "$CLASSPATH" ] && 194 | CLASSPATH=`cygpath --path --windows "$CLASSPATH"` 195 | fi 196 | 197 | # traverses directory structure from process work directory to filesystem root 198 | # first directory with .mvn subdirectory is considered project base directory 199 | find_maven_basedir() { 200 | local basedir=$(pwd) 201 | local wdir=$(pwd) 202 | while [ "$wdir" != '/' ] ; do 203 | if [ -d "$wdir"/.mvn ] ; then 204 | basedir=$wdir 205 | break 206 | fi 207 | wdir=$(cd "$wdir/.."; pwd) 208 | done 209 | echo "${basedir}" 210 | } 211 | 212 | # concatenates all lines of a file 213 | concat_lines() { 214 | if [ -f "$1" ]; then 215 | echo "$(tr -s '\n' ' ' < "$1")" 216 | fi 217 | } 218 | 219 | export MAVEN_PROJECTBASEDIR=${MAVEN_BASEDIR:-$(find_maven_basedir)} 220 | MAVEN_OPTS="$(concat_lines "$MAVEN_PROJECTBASEDIR/.mvn/jvm.config") $MAVEN_OPTS" 221 | 222 | # Provide a "standardized" way to retrieve the CLI args that will 223 | # work with both Windows and non-Windows executions. 224 | MAVEN_CMD_LINE_ARGS="$MAVEN_CONFIG $@" 225 | export MAVEN_CMD_LINE_ARGS 226 | 227 | WRAPPER_LAUNCHER=org.apache.maven.wrapper.MavenWrapperMain 228 | 229 | exec "$JAVACMD" \ 230 | $MAVEN_OPTS \ 231 | -classpath "$MAVEN_PROJECTBASEDIR/.mvn/wrapper/maven-wrapper.jar" \ 232 | "-Dmaven.home=${M2_HOME}" "-Dmaven.multiModuleProjectDirectory=${MAVEN_PROJECTBASEDIR}" \ 233 | ${WRAPPER_LAUNCHER} $MAVEN_CMD_LINE_ARGS 234 | 235 | -------------------------------------------------------------------------------- /presto-yarn-test/src/main/java/com/teradata/presto/yarn/test/utils/NodeSshUtils.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package com.teradata.presto.yarn.test.utils; 15 | 16 | import com.google.common.base.Joiner; 17 | import com.google.common.base.Splitter; 18 | import com.teradata.tempto.context.State; 19 | import com.teradata.tempto.ssh.SshClient; 20 | import com.teradata.tempto.ssh.SshClientFactory; 21 | import org.apache.commons.lang3.StringUtils; 22 | import org.slf4j.Logger; 23 | import org.slf4j.LoggerFactory; 24 | 25 | import java.io.IOException; 26 | import java.util.Collection; 27 | import java.util.List; 28 | import java.util.Map; 29 | import java.util.Optional; 30 | import java.util.concurrent.TimeUnit; 31 | import java.util.function.BinaryOperator; 32 | import java.util.function.Function; 33 | import java.util.stream.Stream; 34 | 35 | import static com.google.common.base.CharMatcher.anyOf; 36 | import static com.google.common.base.Preconditions.checkState; 37 | import static com.google.common.collect.Sets.newHashSet; 38 | import static com.teradata.presto.yarn.test.utils.TimeUtils.retryUntil; 39 | import static java.lang.Long.parseLong; 40 | import static java.util.Collections.singletonList; 41 | import static java.util.concurrent.TimeUnit.MINUTES; 42 | import static java.util.stream.Collectors.toList; 43 | import static java.util.stream.Collectors.toMap; 44 | 45 | public class NodeSshUtils 46 | implements State 47 | { 48 | private static final Logger log = LoggerFactory.getLogger(NodeSshUtils.class); 49 | 50 | private final SshClientFactory sshClientFactory; 51 | private final SshClient yarnSshClient; 52 | 53 | public NodeSshUtils(SshClientFactory sshClientFactory, SshClient yarnSshClient) 54 | { 55 | this.yarnSshClient = yarnSshClient; 56 | this.sshClientFactory = sshClientFactory; 57 | } 58 | 59 | public boolean isPrestoProcessRunning(String host) 60 | { 61 | return withSshClient(host, sshClient -> { 62 | String prestoProcessesCountRow = sshClient.command("ps aux | grep PrestoServer | grep -v grep || true").trim(); 63 | int processesCount = prestoProcessesCountRow.split("\n").length; 64 | if (StringUtils.isEmpty(prestoProcessesCountRow)) { 65 | processesCount = 0; 66 | } 67 | 68 | log.info("Presto processes count on {}: {}", host, processesCount); 69 | checkState(processesCount == 0 || processesCount == 1, "Unexpected number of presto proceses: %s on: %s", processesCount, host); 70 | return processesCount == 1; 71 | }); 72 | } 73 | 74 | public void killPrestoProcesses(String host) 75 | { 76 | runOnNode(host, singletonList("pkill -9 -f 'java.*PrestoServer.*'")); 77 | retryUntil(() -> !isPrestoProcessRunning(host), TimeUnit.SECONDS.toMillis(10)); 78 | } 79 | 80 | public long getPrestoJvmMemory(String host) 81 | { 82 | return withSshClient(host, sshClient -> { 83 | String prestoServerPid = sshClient.command("pgrep -f PrestoServer").trim(); 84 | long prestoProcessJvm = parseLong(sshClient.command("jmap -heap " + prestoServerPid + " | grep capacity | awk 'NR == 1' | awk '{print $3}'")); 85 | log.info("Presto jvm memory " + host + ": " + prestoProcessJvm); 86 | return prestoProcessJvm; 87 | }); 88 | } 89 | 90 | public String getPrestoJvmProcess(String host) 91 | { 92 | return withSshClient(host, sshClient -> sshClient.command("ps aux | grep PrestoServer | grep -v grep").trim()); 93 | } 94 | 95 | public String createLabels(Map labels) 96 | { 97 | return commandOnYarn("yarn rmadmin -addToClusterNodeLabels " + Joiner.on(",").join(newHashSet(labels.values()))); 98 | } 99 | 100 | public void labelNodes(Map labels) 101 | { 102 | waitForNodeManagers(labels.size()); 103 | List nodeIds = getNodeIds(); 104 | 105 | Map nodeToNodeIds = labels.keySet().stream() 106 | .collect(toMap( 107 | node -> node, 108 | node -> nodeIds.stream().filter(nodeId -> nodeId.contains(node)).findFirst().get())); 109 | 110 | String replaceLabelsArgument = labels.keySet().stream().map(node -> node + "," + labels.get(node)).reduce(joinOn(" ")).get(); 111 | commandOnYarn("yarn rmadmin -replaceLabelsOnNode \'" + replaceLabelsArgument + "\'"); 112 | commandOnYarn("yarn rmadmin -refreshQueues"); 113 | 114 | checkThatLabelsAreSetCorrectly(labels, nodeToNodeIds); 115 | } 116 | 117 | private static BinaryOperator joinOn(String separator) 118 | { 119 | return (first, second) -> first + separator + second; 120 | } 121 | 122 | private void checkThatLabelsAreSetCorrectly(Map labels, Map nodeToNodeIds) 123 | { 124 | String clusterNodeLabels = commandOnYarn("yarn queue -status default | grep 'Accessible Node Labels'"); 125 | labels.values().forEach(label -> checkState(clusterNodeLabels.contains(label), "Cluster node labels '{}', does not contain label '{}'", clusterNodeLabels, label)); 126 | labels.entrySet().stream().forEach(entry -> { 127 | String node = entry.getKey(); 128 | String label = entry.getValue(); 129 | String nodeLabels = commandOnYarn("yarn node -status " + nodeToNodeIds.get(node) + " | grep \'Node-Labels\'"); 130 | checkState(nodeLabels.contains(label), "Node labels '{}' on node '{}' does not contain label '{}'", nodeLabels, node, label); 131 | }); 132 | } 133 | 134 | private void waitForNodeManagers(int numberOfNodes) 135 | { 136 | log.info("Waiting for NodeManagers..."); 137 | retryUntil(() -> getNodeIds().size() >= numberOfNodes, MINUTES.toMillis(2)); 138 | } 139 | 140 | public List getNodeIds() 141 | { 142 | return Stream.of(commandOnYarn("yarn node -list").split("\n")) 143 | .filter(line -> line.contains("RUNNING")) 144 | .map(line -> Splitter.on(anyOf(" \t")).omitEmptyStrings().trimResults().split(line).iterator().next()) 145 | .collect(toList()); 146 | } 147 | 148 | public String commandOnYarn(String command) 149 | { 150 | return yarnSshClient.command("source /etc/profile && " + command).trim(); 151 | } 152 | 153 | public void runOnNode(String node, String command) 154 | { 155 | runOnNode(node, singletonList(command)); 156 | } 157 | 158 | public List runOnNode(String node, List commands) 159 | { 160 | return withSshClient(node, sshClient -> { 161 | return commands.stream() 162 | .map(sshClient::command) 163 | .collect(toList()); 164 | }); 165 | } 166 | 167 | public List withSshClient(Collection hosts, Function closure) 168 | { 169 | return hosts.stream() 170 | .map(host -> withSshClient(host, closure)) 171 | .collect(toList()); 172 | } 173 | 174 | public T withSshClient(String host, Function function) 175 | { 176 | try (SshClient sshClient = sshClientFactory.create(host)) { 177 | return function.apply(sshClient); 178 | } 179 | catch (IOException e) { 180 | throw new RuntimeException(e); 181 | } 182 | } 183 | 184 | @Override 185 | public Optional getName() 186 | { 187 | return Optional.empty(); 188 | } 189 | } 190 | -------------------------------------------------------------------------------- /presto-yarn-docs/src/main/sphinx/installation-yarn-manual.rst: -------------------------------------------------------------------------------- 1 | =========================================== 2 | Manual Installation on a YARN-Based Cluster 3 | =========================================== 4 | 5 | You can use `Apache Slider`_. to manually install Presto on a YARN-based cluster. 6 | 7 | .. contents:: Installing and Integrating Presto with YARN 8 | 9 | Deploying Presto on a YARN-Based Cluster 10 | ======================================== 11 | 12 | The installation procedures assume that you have a basic knowledge of Presto 13 | and the configuration files and properties it uses. 14 | 15 | .. note:: 16 | 17 | All example files referred to are from: 18 | https://github.com/prestodb/presto-yarn/ 19 | 20 | ----- 21 | 22 | Pre-Requisites 23 | -------------- 24 | 25 | - A cluster with HDP 2.2+ or CDH5.4+ installed 26 | - Apache Slider 0.80.0 (download from https://slider.incubator.apache.org/) 27 | - JDK 1.8 28 | - Zookeeper 29 | - openssl >= 1.0.1e-16 30 | 31 | .. _Package: https:www.teradata.com/presto 32 | .. _Apache slider: https://slider.incubator.apache.org/ 33 | 34 | .. 35 | BELOW CONTENT IS GENERATED BY PANDOC FROM PRESTO-YARN README.md file, except 36 | - added pre-requisities section 37 | - inner links got fixed 38 | - links section updates 39 | - added note where example files are stored 40 | 41 | ----- 42 | 43 | Presto Installation Directory Structure 44 | --------------------------------------- 45 | 46 | When you use Slider to install Presto on a YARN-based cluster, the Presto 47 | installation directory structure differs from the standard structure. 48 | 49 | For more information, see: :doc:`Presto Installation Directory Structure for YARN-Based Clusters `. 50 | 51 | ----- 52 | 53 | Presto Installation Configuration Options 54 | ----------------------------------------- 55 | 56 | Before installation, you must configure the .json files required for running Presto. 57 | 58 | For more information, see: :doc:`Presto Configuration Options for YARN-Based Clusters `. 59 | 60 | ----- 61 | 62 | Using Apache Slider to Manually Install Presto on a YARN-Based Cluster 63 | ====================================================================== 64 | 65 | 1. Download the slider 0.80.0 installation file from 66 | http://slider.incubator.apache.org/index.html to one of your nodes in 67 | the cluster. 68 | 69 | :: 70 | 71 | tar -xvf slider-0.80.0-incubating-all.tar.gz 72 | 73 | 2. Now configure Slider with JAVA\_HOME and HADOOP\_CONF\_DIR in 74 | ``slider-0.80.0-incubating/conf/slider-env.sh`` 75 | 76 | :: 77 | 78 | export JAVA_HOME=/usr/lib/jvm/java 79 | export HADOOP_CONF_DIR=/etc/hadoop/conf 80 | 81 | 3. Configure zookeeper in ``conf/slider-client.xml``. In case zookeper 82 | is listening on ``master:2181`` you need to add there the following 83 | section: 84 | 85 | :: 86 | 87 | 88 | slider.zookeeper.quorum 89 | master:2181 90 | 91 | 92 | 4. Configure path where slider packages will be installed 93 | 94 | :: 95 | 96 | 97 | fs.defaultFS 98 | hdfs://master/ 99 | 100 | 101 | 5. Make sure the user running slider, which should be same as 102 | ``site.global.app_user`` in ``appConfig.json``, has a home dir in 103 | HDFS (See note here: :ref:`appconfig-json-label`). 104 | 105 | For more details about :ref:`appconfig-json-label` and 106 | :ref:`resources-json-label`, see :doc:`Presto Configuration Options for YARN-Based Clusters ` 107 | 108 | .. code-block:: none 109 | 110 | su hdfs 111 | $ hdfs dfs -mkdir -p /user/ 112 | $ hdfs dfs -chown : -R /user/ 113 | 114 | 6. Now run Slider: 115 | 116 | :: 117 | 118 | su 119 | cd slider-0.80.0-incubating 120 | bin/slider package --install --name PRESTO --package ../presto-yarn-package-*.zip 121 | bin/slider create presto1 --template appConfig.json --resources resources.json (using modified .json files as per your requirement) 122 | 123 | This should start your application, and you can see it under the Yarn 124 | ResourceManager webUI.If your application is successfully run, it should continuously be available in the 125 | YARN resource manager as a "RUNNING" application. If the job fails, please be sure to check the job history's logs 126 | along with the logs on the node's disk. See :doc:`Debugging and Logging for YARN-Based Clusters `. 127 | 128 | ----- 129 | 130 | Additional Slider Commands 131 | -------------------------- 132 | 133 | You can use the following Slider commands to manage your existing Presto 134 | application. 135 | 136 | .. _check-status-label: 137 | 138 | Check the Status 139 | ^^^^^^^^^^^^^^^^ 140 | 141 | If you want to check the status of running application you run the 142 | following, and you will have status printed to a file ``status_file`` 143 | 144 | :: 145 | 146 | bin/slider status presto1 --out status_file 147 | 148 | Check where the coordinator is running 149 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 150 | 151 | Use the following command to check what is the host and port of presto coordinator after deployment, so that you can connect to it. You can use output of this command to specify --server flag on presto command line. 152 | 153 | :: 154 | 155 | bin/slider registry --name presto1 --getexp presto 156 | 157 | You can also view this information through Slider REST API and YARN Application UI. 158 | 159 | Destroy the App and Re-create 160 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 161 | 162 | If you want to re-create the app due to some failures or you want to 163 | reconfigure Presto (eg: add a new connector) 164 | 165 | :: 166 | 167 | bin/slider destroy presto1 168 | bin/slider create presto1 --template appConfig.json --resources resources.json 169 | 170 | Completely Remove the App 171 | ^^^^^^^^^^^^^^^^^^^^^^^^^ 172 | 173 | Delete the app including the app package. 174 | 175 | 176 | :: 177 | 178 | bin/slider package --delete --name PRESTO 179 | 180 | 'Flex'ible App 181 | ^^^^^^^^^^^^^^ 182 | 183 | Flex the number of Presto workers to the new value. If greater than 184 | before, new copies of the worker will be requested. If less, component 185 | instances will be destroyed. 186 | 187 | Changes are immediate and depend on the availability of resources in the 188 | YARN cluster. Make sure while flex that there are extra nodes 189 | available(if adding) with YARN nodemanagers running and also Presto data 190 | directory pre-created/owned by ``yarn`` user. Also make sure these nodes 191 | do not have a Presto component already running, which may cause flex-ing 192 | to deploy worker on these nodes and eventually failing. 193 | 194 | eg: Asumme there are 2 nodes (with YARN nodemanagers running) in the 195 | cluster and you initially deployed only one of the nodes with Presto via 196 | Slider. If you want to deploy and start Presto WORKER component on the 197 | second node (assuming it meets all resource requirements) and thus have 198 | the total number of WORKERS to be 2, then run: 199 | 200 | :: 201 | 202 | bin/slider flex presto1 --component WORKER 2 203 | 204 | Please note that if your cluster already had 3 WORKER nodes running, the 205 | above command will destroy one of them and retain 2 WORKERs. 206 | 207 | ----- 208 | 209 | Advanced Configuration Options 210 | ------------------------------ 211 | 212 | The following advanced configuration options are available: 213 | 214 | + Configuring memory, CPU, and YARN CGroups 215 | + Failure policy 216 | + YARN label 217 | 218 | For more information, see :doc:`Advanced Configuration Options for YARN-Based Clusters `. 219 | 220 | ----- 221 | 222 | Debugging and Logging 223 | ===================== 224 | 225 | For more information, see: :doc:`Debugging and Logging for YARN-Based Clusters `. 226 | 227 | ----- 228 | 229 | Links 230 | ===== 231 | 232 | - http://slider.incubator.apache.org/docs/getting\_started.html 233 | - http://docs.hortonworks.com/HDPDocuments/Ambari-2.0.1.0/bk\_Installing\_HDP\_AMB/content/ch\_Installing\_Ambari.html 234 | -------------------------------------------------------------------------------- /presto-yarn-docs/src/main/sphinx/installation-yarn-configuration-options-advanced.rst: -------------------------------------------------------------------------------- 1 | ====================================================== 2 | Advanced Configuration Options for YARN-Based Clusters 3 | ====================================================== 4 | 5 | The following sections explain a few advanced configuration options. 6 | 7 | Configuring Memory, CPU, and YARN CGroups 8 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 9 | 10 | Memory and CPU related configuration properties must be modified as per 11 | your cluster configuration and requirements. 12 | 13 | 1. Memory 14 | 15 | ``yarn.memory`` in ``resources.json`` declares the amount of memory to 16 | ask for in YARN containers. It should be defined for each component, 17 | COORDINATOR and WORKER based on the expected memory consumption, 18 | measured in MB. A YARN cluster is usually configured with a minimum 19 | container allocation, set in ``yarn-site.xml`` by the configuration 20 | parameter ``yarn.scheduler.minimum-allocation-mb``. It will also have a 21 | maximum size set in ``yarn.scheduler.maximum-allocation-mb``. Asking for 22 | more than this will result in the request being rejected. 23 | 24 | The heapsize defined as -Xmx of ``site.global.jvm_args`` in 25 | ``appConfig.json``, is used by the Presto JVM itself. Slider suggests 26 | that the value of ``yarn.memory`` must be bigger than this heapsize. The 27 | value of ``yarn.memory`` MUST be bigger than the heap size allocated to 28 | any JVM and Slider suggests using atleast 50% more appears to work, 29 | though some experimentation will be needed. 30 | 31 | In addition, set other memory specific properties 32 | ``presto_query_max_memory`` and ``presto_query_max_memory_per_node`` in 33 | ``appConfig.json`` as you would set the properties ``query.max-memory`` 34 | and ``query.max-memory-per-node`` in Presto's config.properties. 35 | 36 | 2. CPU 37 | 38 | Slider also supports configuring the YARN virtual cores to use for the 39 | process which can be defined per component. ``yarn.vcores`` declares the 40 | number of "virtual cores" to request. Ask for more vcores if your 41 | process needs more CPU time. 42 | 43 | See 44 | http://slider.incubator.apache.org/docs/configuration/resources.html#core 45 | for more details. 46 | 47 | 3. CGroups in YARN 48 | 49 | If you are using CPU scheduling (using the DominantResourceCalculator), 50 | you should also use CGroups to constrain and manage CPU processes. 51 | CGroups compliments CPU scheduling by providing CPU resource isolation. 52 | With CGroups strict enforcement turned on, each CPU process gets only 53 | the resources it asks for. This way, we can guarantee that containers 54 | hosting Presto services is assigned with a percentage of CPU. If you 55 | have another process that needs to run on a node that also requires CPU 56 | resources, you can lower the percentage of CPU allocated to YARN to free 57 | up resources for the other process. 58 | 59 | See Hadoop documentation on how to configure CGroups in YARN: 60 | https://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/NodeManagerCgroups.html. 61 | Once you have CGroups configured, Presto on YARN containers will be 62 | configured in the CGroups hierarchy like any other YARN application 63 | containers. 64 | 65 | Slider can also define YARN queues to submit the application creation 66 | request to, which can set the priority, resource limits and other values 67 | of the application. But this configuration is global to Slider and 68 | defined in ``conf/slider-client.xml``. You can define the queue name and 69 | also the priority within the queue. All containers created in the Slider 70 | cluster will share this same queue. 71 | 72 | :: 73 | 74 | 75 | slider.yarn.queue 76 | default 77 | 78 | 79 | 80 | slider.yarn.queue.priority 81 | 1 82 | 83 | 84 | Failure Policy 85 | ~~~~~~~~~~~~~~ 86 | 87 | Follow this section if you want to change the default Slider failure 88 | policy. Yarn containers hosting Presto may fail due to some 89 | misconfiguration in Presto or some other conflicts. The number of times 90 | the component may fail within a failure window is defined in 91 | ``resources.json``. 92 | 93 | The related properties are: 94 | 95 | 1. The duration of a failure window, a time period in which failures are 96 | counted. The related properties are 97 | ``yarn.container.failure.window.days``, 98 | ``yarn.container.failure.window.hours``, 99 | ``yarn.container.failure.window.minutes`` and should be set in the 100 | global section as it relates just to slider. The default value is 101 | ``yarn.container.failure.window.hours=6``. The initial window is 102 | measured from the start of the slider application master —once the 103 | duration of that window is exceeded, all failure counts are reset, 104 | and the window begins again. 105 | 2. The maximum number of failures of any component in this time period. 106 | ``yarn.container.failure.threshold`` is the property for this and in 107 | most cases, should be set proportional to the the number of instances 108 | of the component. For Presto clusters, where there will be one 109 | coordinator and some number of workers it is reasonable to have a 110 | failure threshold for workers more than that of coordinator. This is 111 | because a higher failure rate of worker nodes is to be expected if 112 | the cause of the failure is due to the underlying hardware. At the 113 | same time the threshold should be low enough to detect any Presto 114 | configuration issues causing the workers to fail rapidly and breach 115 | the threshold sooner. 116 | 117 | These failure thresholds are all heuristics. When initially configuring 118 | an application instance, low thresholds reduce the disruption caused by 119 | components which are frequently failing due to configuration problems. 120 | In a production application, large failure thresholds and/or shorter 121 | windows ensures that the application is resilient to transient failures 122 | of the underlying YARN cluster and hardware. 123 | 124 | Based on the placement policy there are two more failure related 125 | properties you can set. 126 | 127 | 1. The configuration property ``yarn.node.failure.threshold`` defines 128 | how "unreliable" a node must be before it is skipped for placement 129 | requests. This is only used for the default 130 | yarn.component.placement.policy where unreliable nodes are avoided. 131 | 2. ``yarn.placement.escalate.seconds`` is the timeout after which slider 132 | will escalate the request of pending containers to be launched on 133 | other nodes. For strict placement policy where the requested 134 | components are deployed on all nodes, this property is irrelevant. 135 | For other placement policies this property is relevant and the higher 136 | the cost of migrating a component instance from one host to another, 137 | the longer value of escalation timeout is recommended. Thus slider 138 | will wait longer before the component instance is escalated to be 139 | started on other nodes. During restart, for cases where redeploying 140 | the component instances on the same node as before is beneficial (due 141 | to locality of data or similar reasons), a higher escalation timeout 142 | is recommended. 143 | 144 | Take a look here: 145 | http://slider.incubator.apache.org/docs/configuration/resources.html#failurepolicy 146 | for more details on failure policy. 147 | 148 | .. _using-yarn-label: 149 | 150 | Using YARN label 151 | ~~~~~~~~~~~~~~~~ 152 | 153 | This is an optional feature and is not required to run Presto in YARN. 154 | To guarantee that a certain set of nodes are reserved for deploying 155 | Presto or to configure a particular node for a component type we can 156 | make use of YARN label expressions. 157 | 158 | 1. First assign the nodes/subset of nodes with appropriate labels. See 159 | http://docs.hortonworks.com/HDPDocuments/HDP2/HDP-2.3.0/bk_yarn_resource_mgt/content/ch_node_labels.html 160 | 2. Then set the components in ``resource.json`` with 161 | ``yarn.label.expression`` to have labels to be used when allocating 162 | containers for Presto. 163 | 3. Create the application using 164 | ``bin/slider create .. --queue ``. ``queuename`` will be 165 | the queue defined in step one for the appropriate label. 166 | 167 | If a label expression is specified for the slider-appmaster component 168 | then it also becomes the default label expression for all component. 169 | Sample ``resources.json`` may look like: 170 | 171 | .. code-block:: none 172 | 173 | "COORDINATOR": { 174 | "yarn.role.priority": "1", 175 | "yarn.component.instances": "1", 176 | "yarn.component.placement.policy": "1", 177 | "yarn.label.expression":"coordinator" 178 | }, 179 | "WORKER": { 180 | "yarn.role.priority": "2", 181 | "yarn.component.instances": "2", 182 | "yarn.component.placement.policy": "1", 183 | "yarn.label.expression":"worker" 184 | } 185 | 186 | where coordinator and worker are the node labels created and configured 187 | with a scheduler queue in YARN 188 | 189 | -------------------------------------------------------------------------------- /presto-yarn-docs/src/main/sphinx/installation-yarn-automated.rst: -------------------------------------------------------------------------------- 1 | ============================================== 2 | Automated Installation on a YARN-Based Cluster 3 | ============================================== 4 | 5 | If you are planning to use HDP distribution, you can use Ambari and 6 | `Apache Slider`_. to perform automated Presto installation and 7 | integration on a YARN-based cluster. During installation, both the 8 | Apache Slider package and Presto are installed. 9 | 10 | 11 | .. contents:: Installing and Integrating Presto with YARN 12 | 13 | Deploying Presto on a YARN-Based Cluster 14 | ======================================== 15 | 16 | The installation procedures assume that you have a basic knowledge of Presto 17 | and the configuration files and properties it uses. 18 | 19 | .. note:: 20 | 21 | All example files referred to are from: 22 | https://github.com/prestodb/presto-yarn/ 23 | 24 | ----- 25 | 26 | Pre-Requisites 27 | -------------- 28 | 29 | - A cluster with HDP 2.2+ or CDH5.4+ installed 30 | - Apache Slider 0.80.0 (download from https://slider.incubator.apache.org/) 31 | - JDK 1.8 32 | - Zookeeper 33 | - openssl >= 1.0.1e-16 34 | - Ambari 2.1 35 | 36 | .. _Package: https:www.teradata.com/presto 37 | .. _Apache slider: https://slider.incubator.apache.org/ 38 | 39 | .. 40 | BELOW CONTENT IS GENERATED BY PANDOC FROM PRESTO-YARN README.md file, except 41 | - added pre-requisities section 42 | - inner links got fixed 43 | - links section updates 44 | - added note where example files are stored 45 | 46 | ----- 47 | 48 | Presto Installation Directory Structure 49 | --------------------------------------- 50 | 51 | When you use Ambari Slider View to install Presto on a YARN-based cluster, the 52 | Presto installation directory structure differs from the standard structure. 53 | 54 | For more information, see :doc:`Presto Installation Directory Structure for YARN-Based Clusters `. 55 | 56 | ----- 57 | 58 | Presto Installation Configuration Options 59 | ----------------------------------------- 60 | 61 | During installation, Ambari Slider View allows you to select configuration 62 | options required for running Presto. 63 | 64 | For more information, see :doc:`Presto Configuration Options for YARN-Based Clusters `. 65 | 66 | ----- 67 | 68 | Using Ambari Slider View to Install Presto on a YARN-Based Cluster 69 | ================================================================== 70 | 71 | Ambari supports deploying Slider application packages using Slider View and 72 | provides Slider integration. Slider View for Ambari allows you to deploy and 73 | manage Slider apps from Ambari Web. 74 | 75 | Use Ambari Slider View and the following steps to deploy Presto on YARN: 76 | 77 | 1. Install the Ambari server. See: 78 | http://docs.hortonworks.com/HDPDocuments/Ambari-2.1.0.0/bk_Installing_HDP_AMB/content/ch_Installing_Ambari.html. 79 | 80 | 2. Download the Apache Slider package. See: 81 | https://slider.incubator.apache.org/ 82 | 83 | 3. Copy the Presto app package 84 | ``presto-yarn-package--.zip`` to 85 | ``/var/lib/ambari-server/resources/apps/`` directory on your Ambari 86 | server node. 87 | 88 | 4. Restart ambari-server. 89 | 90 | 5. Log on to Apache Ambari, ``http://ambariserver_ip:8080`` 91 | #username-admin password-admin 92 | 93 | 6. Name your cluster, provide the configuration of the cluster, and 94 | follow the steps on the WebUI. 95 | 96 | 7. Customize/configure the services and install them. A minimum of HDFS, 97 | YARN, Zookeeper is required for Slider to work. You must also 98 | select Slider to be installed. 99 | 100 | 8. For the Slider client installed, you need to update its configuration if 101 | you are not using the default installation paths for Hadoop and Zookeeper. 102 | Thus ``slider-env.sh`` should point to your JAVA\_HOME and HADOOP\_CONF\_DIR 103 | 104 | :: 105 | 106 | export JAVA_HOME=/usr/lib/jvm/java 107 | export HADOOP_CONF_DIR=/etc/hadoop/conf 108 | 109 | 9. For zookeeper, if you are using a different installation directory from the 110 | default one at ``/usr/lib/zookeeper``: 111 | 112 | * Add a custom property to the ``slider-client`` section in 113 | Slider configuration with the key: 114 | ``zk.home`` and value: ``path_to_your_zookeeper``. 115 | * If using a different port from the default ``2181``, then add the key 116 | ``slider.zookeeper.quorum`` and value: ``master:5181``, where ``master`` 117 | is the node and ``5181`` is the port. 118 | 119 | 10. Once you have all the services up and running on the cluster, you can 120 | configure Slider in Ambari to create and manage your application by creating 121 | a "View". 122 | 123 | a. Go to ``admin`` (top right corner) -> ``Manage Ambari``. 124 | b. From the left pane, select ``Views``. 125 | c. Create a Slider View by populating all the necessary fields with a preferred instance name (for example, Slider). ``ambari.server.url`` can be of the format ``http://:8080/api/v1/clusters/``, where ```` is what you have named your Ambari cluster. 126 | d. Select the "Views" control icon in the upper right. 127 | e. Select the instance you created in the previous step (for example, "Slider"). 128 | f. Click ``Create App`` to create a new Presto YARN application. 129 | 130 | 11. Provide details of the Presto service. By default, the UI will be 131 | populated with the values you have in the ``*-default.json`` files in 132 | your ``presto-yarn-package-*.zip``. 133 | 134 | 12. The app name should be of lower case. For example: presto1. 135 | 136 | 13. You can set the configuration property fields required for your cluster. For example, 137 | if you want to set a connector for Presto, you can update the ``global.catalog`` property. See 138 | the following for an explanation of each configuration property. 139 | 140 | | :doc:`Presto Configuration Options for YARN-Based Clusters ` 141 | 142 | 14. Prepare HDFS for Slider. The user directory you create here should be 143 | for the same user you set in the ``global.app_user`` field. If the 144 | ``app_user`` is going to be ``yarn`` then do the following: 145 | 146 | :: 147 | 148 | su hdfs hdfs dfs -mkdir -p /user/yarn 149 | su hdfs hdfs dfs -chown yarn:yarn /user/yarn 150 | 151 | 15. Change the ``global.presto_server_port`` from 8080 to another unused port, for 152 | example, 8089, since Ambari by default uses 8080. 153 | 154 | 16. Pre-create the data directory in the UI (added in ``appConfig-default.json`` 155 | eg: ``/var/lib/presto/``) on all nodes. The directory must be owned by 156 | ``global.app_user``, otherwise Slider will fail to start Presto due to 157 | permission errors. 158 | 159 | :: 160 | 161 | mkdir -p /var/lib/presto/data 162 | chown -R yarn:hadoop /var/lib/presto/data 163 | 164 | 18. If you want to add any additional custom properties, use the Custom 165 | property section. Additional properties currently supported are: 166 | 167 | * ``site.global.plugin`` 168 | * ``site.global.additional_config_properties`` 169 | * ``site.global.additional_node_properties``. 170 | 171 | For the requirements and format of these properties, see: 172 | 173 | | :doc:`Presto Configuration Options for YARN-Based Clusters ` 174 | 175 | 19. Click Finish. This is the equivalent of ``package --install`` and ``create`` 176 | performed with the bin/slider script. If successfully deployed, you will see the YARN 177 | application started for Presto. You can do the following: 178 | 179 | * Click ``app launched`` and monitor the status from Slider view. 180 | * Click``Quick Links``, which should take you to the YARN WebUI. 181 | 182 | If your application is running successfully, it should always be available 183 | in the YARN resource manager as a "RUNNING" application. 184 | 185 | 20. If the job fails, check the job history’s logs and the logs on the node’s disk. 186 | See :doc:`Debugging and Logging for YARN-Based Clusters `. 187 | 188 | 21. You can manage the application lifecycle (for example: start, stop, flex, and 189 | destroy) from the View UI. 190 | 191 | ----- 192 | 193 | Additional Configuration Options 194 | ================================ 195 | 196 | After you install Presto and Slider, you can reconfigure Presto or perform 197 | additional configuration. 198 | 199 | Reconfiguring Presto in Slider View 200 | ----------------------------------- 201 | 202 | After you launch Presto you can update its configuration. For example, you 203 | can add a new connector. 204 | 205 | 1. On the Slider View instance screen, go to ``Actions.`` 206 | 2. Stop the running Presto application. 207 | 3. Click `Destroy`` to remove the existing Presto instance running in Slider. 208 | 4. Click the ``Create App`` button to re-create a new Presto instance in Slider 209 | and make configuration updates. 210 | 211 | Advanced Configuration Options 212 | ------------------------------ 213 | 214 | The following advanced configuration options are available: 215 | 216 | + Configuring memory, CPU, and YARN CGroups 217 | + Failure policy 218 | + YARN label 219 | 220 | For more information, see :doc:`Advanced Configuration Options for YARN-Based Clusters ` 221 | 222 | ----- 223 | 224 | Debugging and Logging 225 | ===================== 226 | 227 | For more information, see :doc:`Debugging and Logging for YARN-Based Clusters `. 228 | 229 | ----- 230 | 231 | Links 232 | ===== 233 | 234 | - http://slider.incubator.apache.org/docs/getting\_started.html 235 | - http://docs.hortonworks.com/HDPDocuments/Ambari-2.0.1.0/bk\_Installing\_HDP\_AMB/content/ch\_Installing\_Ambari.html 236 | -------------------------------------------------------------------------------- /presto-yarn-test/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4.0.0 4 | 20 | 21 | com.teradata.presto-yarn 22 | presto-yarn 23 | 1.6-SNAPSHOT 24 | ../pom.xml 25 | 26 | 27 | presto-yarn-test 28 | presto-yarn-test 29 | Presto Yarn Test 30 | 31 | 32 | ${project.build.directory}/package 33 | presto-server-${presto.version} 34 | com.teradata.presto.yarn.test.Main 35 | 36 | 37 | 38 | 39 | 40 | ${project.basedir}/src/main/resources 41 | true 42 | ${project.build.directory}/classes 43 | 44 | 45 | 46 | 47 | org.apache.maven.plugins 48 | maven-dependency-plugin 49 | ${maven-dependency-plugin.version} 50 | 51 | 52 | unpack slider 53 | process-resources 54 | 55 | copy 56 | 57 | 58 | 59 | 60 | org.apache.slider 61 | slider-assembly 62 | ${slider.version} 63 | all 64 | zip 65 | 66 | 67 | ${package.dir} 68 | 69 | 70 | 71 | unpack presto app package 72 | process-resources 73 | 74 | unpack 75 | 76 | 77 | 78 | 79 | ${project.groupId} 80 | presto-yarn-package 81 | ${project.version} 82 | zip 83 | 84 | 85 | ${package.dir}/${app.package.name} 86 | 87 | 88 | 89 | copy presto-ml to plugins 90 | process-resources 91 | 92 | copy-dependencies 93 | 94 | 95 | presto-ml 96 | jar 97 | ${package.dir}/${app.package.name}/package/plugins 98 | 99 | 100 | 101 | unpack presto server tarball 102 | process-resources 103 | 104 | unpack 105 | 106 | 107 | 108 | 109 | com.facebook.presto 110 | presto-server 111 | ${presto.version} 112 | tar.gz 113 | 114 | 115 | ${package.dir} 116 | 117 | 118 | 119 | unpack configuration files 120 | process-resources 121 | 122 | unpack 123 | 124 | 125 | 126 | 127 | ${project.groupId} 128 | presto-yarn-package 129 | ${project.version} 130 | test-jar 131 | 132 | 133 | ${package.dir} 134 | 135 | 136 | 137 | 138 | 139 | 140 | org.apache.maven.plugins 141 | maven-assembly-plugin 142 | ${maven-assembly-plugin.version} 143 | 144 | 145 | build presto server tar excluding ml 146 | process-resources 147 | 148 | single 149 | 150 | 151 | ${project.basedir}/src/main/assembly/presto-server.xml 152 | false 153 | ${presto.server} 154 | ${package.dir}/${app.package.name}/package/files 155 | 156 | 157 | 158 | build presto app package for tests 159 | process-resources 160 | 161 | single 162 | 163 | 164 | ${project.basedir}/src/main/assembly/presto-app.xml 165 | false 166 | ${app.package.name} 167 | ${package.dir} 168 | 169 | 170 | 171 | 172 | 173 | 174 | org.apache.maven.plugins 175 | maven-shade-plugin 176 | 177 | 178 | package 179 | 180 | shade 181 | 182 | 183 | true 184 | executable 185 | 186 | 187 | 188 | ${main-class} 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | com.teradata.tempto 202 | tempto-core 203 | ${tempto.version} 204 | 205 | 206 | com.teradata.tempto 207 | tempto-runner 208 | ${tempto.version} 209 | 210 | 211 | com.facebook.presto 212 | presto-jdbc 213 | ${presto.version} 214 | 215 | 216 | com.facebook.presto.hive 217 | hive-apache-jdbc 218 | ${hive-jdbc.version} 219 | 220 | 221 | org.slf4j 222 | slf4j-log4j12 223 | ${slf4j.version} 224 | 225 | 226 | com.facebook.presto 227 | presto-ml 228 | ${presto.version} 229 | 230 | 231 | 232 | 233 | -------------------------------------------------------------------------------- /presto-yarn-test/src/test/resources/status_file: -------------------------------------------------------------------------------- 1 | { 2 | "version" : "1.0", 3 | "name" : "presto_cluster", 4 | "type" : "agent", 5 | "state" : 3, 6 | "createTime" : 1458564035136, 7 | "updateTime" : 1458564068143, 8 | "originConfigurationPath" : "hdfs://master/user/yarn/.slider/cluster/presto_cluster/snapshot", 9 | "generatedConfigurationPath" : "hdfs://master/user/yarn/.slider/cluster/presto_cluster/generated", 10 | "dataPath" : "hdfs://master/user/yarn/.slider/cluster/presto_cluster/database", 11 | "options" : { 12 | "internal.container.failure.shortlife" : "60000", 13 | "internal.container.failure.threshold" : "5", 14 | "internal.generated.conf.path" : "hdfs://master/user/yarn/.slider/cluster/presto_cluster/generated", 15 | "env.MALLOC_ARENA_MAX" : "4", 16 | "site.global.plugin" : "{'ml': ['presto-ml-0.130.jar']}", 17 | "internal.am.tmp.dir" : "hdfs://master/user/yarn/.slider/cluster/presto_cluster/tmp/appmaster", 18 | "internal.snapshot.conf.path" : "hdfs://master/user/yarn/.slider/cluster/presto_cluster/snapshot", 19 | "site.global.config_dir" : "/var/lib/presto/etc", 20 | "zookeeper.hosts" : "master", 21 | "site.global.coordinator_host" : "${COORDINATOR_HOST}", 22 | "internal.data.dir.path" : "hdfs://master/user/yarn/.slider/cluster/presto_cluster/database", 23 | "internal.addons.dir.path" : "hdfs://master/user/yarn/.slider/cluster/presto_cluster/tmp/addons", 24 | "site.global.user_group" : "hadoop", 25 | "site.global.data_dir" : "/var/lib/presto/data", 26 | "site.global.presto_query_max_memory" : "5GB", 27 | "site.global.presto_query_max_memory_per_node" : "600MB", 28 | "internal.application.home" : null, 29 | "internal.provider.name" : "agent", 30 | "site.global.additional_node_properties" : "['plugin.dir=${AGENT_WORK_ROOT}/app/install/presto-server-0.130/plugin']", 31 | "yarn.vcores" : "1", 32 | "site.global.security_enabled" : "false", 33 | "site.global.catalog" : "{'hive': ['connector.name=hive-hadoop2', 'hive.metastore.uri=thrift://${NN_HOST}:9083'], 'tpch': ['connector.name=tpch'], 'jmx': ['connector.name=jmx']}", 34 | "java_home" : "/usr/lib/jvm/java", 35 | "zookeeper.quorum" : "master:5181", 36 | "slider.cluster.directory.permissions" : "0770", 37 | "internal.tmp.dir" : "hdfs://master/user/yarn/.slider/cluster/presto_cluster/tmp", 38 | "slider.data.directory.permissions" : "0770", 39 | "application.def" : ".slider/package/PRESTO/presto-yarn-package-1.1-SNAPSHOT-0.130.zip", 40 | "site.global.app_name" : "presto-server-0.130", 41 | "zookeeper.path" : "/services/slider/users/yarn/presto_cluster", 42 | "site.global.app_user" : "yarn", 43 | "site.global.app_pkg_plugin" : "${AGENT_WORK_ROOT}/app/definition/package/plugins/", 44 | "site.global.jvm_args" : "['-server', '-Xmx1024M', '-XX:+UseG1GC', '-XX:G1HeapRegionSize=32M', '-XX:+UseGCOverheadLimit', '-XX:+ExplicitGCInvokesConcurrent', '-XX:+HeapDumpOnOutOfMemoryError', '-XX:OnOutOfMemoryError=kill -9 %p', '-DHADOOP_USER_NAME=hdfs', '-Duser.timezone=UTC']", 45 | "internal.application.image.path" : null, 46 | "internal.appdef.dir.path" : "hdfs://master/user/yarn/.slider/cluster/presto_cluster/tmp/appdef", 47 | "site.fs.defaultFS" : "hdfs://master/", 48 | "site.global.presto_server_port" : "8080", 49 | "site.global.singlenode" : "true", 50 | "site.fs.default.name" : "hdfs://master/", 51 | "application.name" : "presto_cluster" 52 | }, 53 | "info" : { 54 | "yarn.vcores" : "2", 55 | "info.am.app.id" : "application_1458560616982_0004", 56 | "yarn.memory" : "8192", 57 | "info.am.web.url" : "http://kogut-vsphere-default-slave1:1025/", 58 | "info.am.rpc.port" : "1024", 59 | "info.am.hostname" : "kogut-vsphere-default-slave1", 60 | "info.am.web.port" : "1025", 61 | "info.am.container.id" : "container_1458560616982_0004_01_000001", 62 | "info.am.attempt.id" : "appattempt_1458560616982_0004_000001", 63 | "status.application.build.info" : "Slider Core-0.80.0-incubating Built against commit# d7e3449fa6 on Java 1.7.0_60 by gsaha", 64 | "status.hadoop.build.info" : "2.6.0", 65 | "status.hadoop.deployed.info" : "branch-2.6.0 @18e43357c8f927c0695f1e9522859d6a", 66 | "live.time" : "21 Mar 2016 12:40:35 GMT", 67 | "live.time.millis" : "1458564035136", 68 | "create.time" : "21 Mar 2016 12:40:35 GMT", 69 | "create.time.millis" : "1458564035136", 70 | "containers.at.am-restart" : "0", 71 | "status.time" : "21 Mar 2016 12:42:10 GMT", 72 | "status.time.millis" : "1458564130023", 73 | "info.am.agent.status.url" : "https://kogut-vsphere-default-slave1:50810/", 74 | "info.am.agent.ops.url" : "https://kogut-vsphere-default-slave1:34663/", 75 | "info.am.agent.ops.port" : "34663", 76 | "info.am.agent.status.port" : "50810" 77 | }, 78 | "statistics" : { 79 | "COORDINATOR" : { 80 | "containers.failed.preempted" : 0, 81 | "containers.completed" : 0, 82 | "containers.failed" : 0, 83 | "containers.requested" : 1, 84 | "containers.desired" : 1, 85 | "containers.start.failed" : 0, 86 | "containers.start.started" : 0, 87 | "containers.failed.node" : 0, 88 | "containers.active.requests" : 0, 89 | "containers.live" : 1, 90 | "containers.failed.recently" : 0 91 | }, 92 | "slider-appmaster" : { 93 | "containers.completed" : 0, 94 | "containers.failed" : 0, 95 | "containers.unknown.completed" : 0, 96 | "containers.surplus" : 0, 97 | "containers.start.failed" : 0, 98 | "containers.start.started" : 4, 99 | "containers.live" : 5 100 | }, 101 | "WORKER" : { 102 | "containers.failed.preempted" : 0, 103 | "containers.completed" : 0, 104 | "containers.failed" : 0, 105 | "containers.requested" : 3, 106 | "containers.desired" : 3, 107 | "containers.start.failed" : 0, 108 | "containers.start.started" : 0, 109 | "containers.failed.node" : 0, 110 | "containers.active.requests" : 0, 111 | "containers.live" : 3, 112 | "containers.failed.recently" : 0 113 | } 114 | }, 115 | "instances" : { 116 | "COORDINATOR" : [ "container_1458560616982_0004_01_000002" ], 117 | "slider-appmaster" : [ "container_1458560616982_0004_01_000001" ], 118 | "WORKER" : [ "container_1458560616982_0004_01_000004", "container_1458560616982_0004_01_000003", "container_1458560616982_0004_01_000005" ] 119 | }, 120 | "roles" : { 121 | "COORDINATOR" : { 122 | "yarn.vcores" : "1", 123 | "yarn.label.expression" : "coordinator", 124 | "yarn.role.priority" : "1", 125 | "yarn.memory" : "1500", 126 | "role.releasing.instances" : "0", 127 | "role.failed.node.instances" : "0", 128 | "role.requested.instances" : "0", 129 | "role.actual.instances" : "1", 130 | "role.failed.recently.instances" : "0", 131 | "role.failed.starting.instances" : "0", 132 | "yarn.component.instances" : "1", 133 | "role.failed.preempted.instances" : "0", 134 | "role.failed.instances" : "0", 135 | "yarn.component.placement.policy" : "1" 136 | }, 137 | "slider-appmaster" : { 138 | "yarn.vcores" : "1", 139 | "yarn.component.instances" : "1", 140 | "yarn.memory" : "1024", 141 | "role.releasing.instances" : "0", 142 | "role.failed.preempted.instances" : "0", 143 | "role.failed.node.instances" : "0", 144 | "role.requested.instances" : "0", 145 | "role.failed.instances" : "0", 146 | "role.actual.instances" : "1", 147 | "role.failed.recently.instances" : "0", 148 | "role.failed.starting.instances" : "0" 149 | }, 150 | "WORKER" : { 151 | "yarn.vcores" : "1", 152 | "yarn.label.expression" : "worker", 153 | "yarn.role.priority" : "2", 154 | "yarn.memory" : "1500", 155 | "role.releasing.instances" : "0", 156 | "role.failed.node.instances" : "0", 157 | "role.requested.instances" : "0", 158 | "role.actual.instances" : "3", 159 | "role.failed.recently.instances" : "0", 160 | "role.failed.starting.instances" : "0", 161 | "yarn.component.instances" : "3", 162 | "role.failed.preempted.instances" : "0", 163 | "role.failed.instances" : "0", 164 | "yarn.component.placement.policy" : "1" 165 | } 166 | }, 167 | "clientProperties" : { }, 168 | "status" : { 169 | "live" : { 170 | "COORDINATOR" : { 171 | "container_1458560616982_0004_01_000002" : { 172 | "name" : "container_1458560616982_0004_01_000002", 173 | "role" : "COORDINATOR", 174 | "roleId" : 1, 175 | "createTime" : 1458564073147, 176 | "startTime" : 1458564078740, 177 | "released" : false, 178 | "host" : "kogut-vsphere-default-master", 179 | "hostUrl" : "http://kogut-vsphere-default-master:8042", 180 | "state" : 3, 181 | "exitCode" : 0, 182 | "command" : "python ./infra/agent/slider-agent/agent/main.py --label container_1458560616982_0004_01_000002___COORDINATOR --zk-quorum master:5181 --zk-reg-path /registry/users/yarn/services/org-apache-slider/presto_cluster > /slider-agent.out 2>&1 ; ", 183 | "environment" : [ "LANGUAGE=\"en_US.UTF-8\"", "HADOOP_USER_NAME=\"yarn\"", "PYTHONPATH=\"./infra/agent/slider-agent/\"", "AGENT_LOG_ROOT=\"\"", "SLIDER_PASSPHRASE=\"06CMt5y5Z2fuDsAdTmpuBLexUk7124pkIhvyGCvRjQVBy3xFDQ\"", "LC_ALL=\"en_US.UTF-8\"", "AGENT_WORK_ROOT=\"$PWD\"", "LANG=\"en_US.UTF-8\"" ] 184 | } 185 | }, 186 | "slider-appmaster" : { 187 | "container_1458560616982_0004_01_000001" : { 188 | "name" : "container_1458560616982_0004_01_000001", 189 | "role" : "slider-appmaster", 190 | "roleId" : 0, 191 | "createTime" : 1458564035203, 192 | "startTime" : 1458564035203, 193 | "released" : false, 194 | "host" : "kogut-vsphere-default-slave1", 195 | "hostUrl" : "http://kogut-vsphere-default-slave1:1025", 196 | "state" : 3, 197 | "exitCode" : 0 198 | } 199 | }, 200 | "WORKER" : { 201 | "container_1458560616982_0004_01_000003" : { 202 | "name" : "container_1458560616982_0004_01_000003", 203 | "role" : "WORKER", 204 | "roleId" : 2, 205 | "createTime" : 1458564073167, 206 | "startTime" : 1458564077965, 207 | "released" : false, 208 | "host" : "kogut-vsphere-default-slave2", 209 | "hostUrl" : "http://kogut-vsphere-default-slave2:8042", 210 | "state" : 3, 211 | "exitCode" : 0, 212 | "command" : "python ./infra/agent/slider-agent/agent/main.py --label container_1458560616982_0004_01_000003___WORKER --zk-quorum master:5181 --zk-reg-path /registry/users/yarn/services/org-apache-slider/presto_cluster > /slider-agent.out 2>&1 ; ", 213 | "environment" : [ "LANGUAGE=\"en_US.UTF-8\"", "HADOOP_USER_NAME=\"yarn\"", "PYTHONPATH=\"./infra/agent/slider-agent/\"", "AGENT_LOG_ROOT=\"\"", "SLIDER_PASSPHRASE=\"06CMt5y5Z2fuDsAdTmpuBLexUk7124pkIhvyGCvRjQVBy3xFDQ\"", "LC_ALL=\"en_US.UTF-8\"", "AGENT_WORK_ROOT=\"$PWD\"", "LANG=\"en_US.UTF-8\"" ] 214 | }, 215 | "container_1458560616982_0004_01_000004" : { 216 | "name" : "container_1458560616982_0004_01_000004", 217 | "role" : "WORKER", 218 | "roleId" : 2, 219 | "createTime" : 1458564074091, 220 | "startTime" : 1458564078396, 221 | "released" : false, 222 | "host" : "kogut-vsphere-default-slave1", 223 | "hostUrl" : "http://kogut-vsphere-default-slave1:8042", 224 | "state" : 3, 225 | "exitCode" : 0, 226 | "command" : "python ./infra/agent/slider-agent/agent/main.py --label container_1458560616982_0004_01_000004___WORKER --zk-quorum master:5181 --zk-reg-path /registry/users/yarn/services/org-apache-slider/presto_cluster > /slider-agent.out 2>&1 ; ", 227 | "environment" : [ "LANGUAGE=\"en_US.UTF-8\"", "HADOOP_USER_NAME=\"yarn\"", "PYTHONPATH=\"./infra/agent/slider-agent/\"", "AGENT_LOG_ROOT=\"\"", "SLIDER_PASSPHRASE=\"06CMt5y5Z2fuDsAdTmpuBLexUk7124pkIhvyGCvRjQVBy3xFDQ\"", "LC_ALL=\"en_US.UTF-8\"", "AGENT_WORK_ROOT=\"$PWD\"", "LANG=\"en_US.UTF-8\"" ] 228 | }, 229 | "container_1458560616982_0004_01_000005" : { 230 | "name" : "container_1458560616982_0004_01_000005", 231 | "role" : "WORKER", 232 | "roleId" : 2, 233 | "createTime" : 1458564073208, 234 | "startTime" : 1458564074461, 235 | "released" : false, 236 | "host" : "kogut-vsphere-default-slave3", 237 | "hostUrl" : "http://kogut-vsphere-default-slave3:8042", 238 | "state" : 3, 239 | "exitCode" : 0, 240 | "command" : "python ./infra/agent/slider-agent/agent/main.py --label container_1458560616982_0004_01_000005___WORKER --zk-quorum master:5181 --zk-reg-path /registry/users/yarn/services/org-apache-slider/presto_cluster > /slider-agent.out 2>&1 ; ", 241 | "environment" : [ "LANGUAGE=\"en_US.UTF-8\"", "HADOOP_USER_NAME=\"yarn\"", "PYTHONPATH=\"./infra/agent/slider-agent/\"", "AGENT_LOG_ROOT=\"\"", "SLIDER_PASSPHRASE=\"06CMt5y5Z2fuDsAdTmpuBLexUk7124pkIhvyGCvRjQVBy3xFDQ\"", "LC_ALL=\"en_US.UTF-8\"", "AGENT_WORK_ROOT=\"$PWD\"", "LANG=\"en_US.UTF-8\"" ] 242 | } 243 | } 244 | } 245 | }, 246 | "liveness" : { 247 | "allRequestsSatisfied" : true, 248 | "requestsOutstanding" : 0 249 | } 250 | } -------------------------------------------------------------------------------- /presto-yarn-docs/src/main/sphinx/installation-yarn-configuration-options.rst: -------------------------------------------------------------------------------- 1 | ==================================================== 2 | Presto Configuration Options for YARN-Based Clusters 3 | ==================================================== 4 | 5 | If you are using Ambari to install the Presto App package you can update 6 | the configuration properties from the Ambari Slider View UI. If you are 7 | using Slider to install Presto on YARN manually, you must edit the 8 | configuration files manually. 9 | 10 | The ``appConfig.json`` and ``resources-[singlenode|mutlinode].json`` files 11 | are the two major configuration files you need to configure before you 12 | can get Presto running on YARN. Sample configuration options files for the 13 | Presto App package are available in the repository in the following directory: 14 | 15 | ``presto-yarn-package/src/main/resources`` 16 | 17 | The "default" values listed for the sections 18 | :ref:`appconfig-json-label` and :ref:`resources-json-label` are from 19 | ``presto-yarn-package/src/main/resources/appConfig.json`` and 20 | ``presto-yarn-package/src/main/resources/resources-multinode.json`` 21 | files respectively. These default values will be auto-populated on the 22 | Slider View UI for installation using Ambari Slider View. But you 23 | can modify the properties on the UI as per your requirements. 24 | 25 | For manual installation using Slider, copy the 26 | ``presto-yarn-package/src/main/resources/appConfig.json`` and 27 | ``presto-yarn-package/src/main/resources/resources-[singlenode|multinode].json`` 28 | to a local file at a location where you are planning to run Slider. Name 29 | them as ``appConfig.json`` and ``resources.json``. Update these sample 30 | json files with whatever configurations you want to have for Presto. If 31 | you are ok with the default values in the sample file you can just use 32 | them as-is. 33 | 34 | Follow the steps here and configure the presto-yarn configuration files 35 | to match your cluster requirements. Optional ones are marked (optional). 36 | Please do not change any variables other than the ones listed below. 37 | 38 | .. _appconfig-json-label: 39 | 40 | appConfig.json 41 | ~~~~~~~~~~~~~~ 42 | 43 | 1. ``site.global.app_user`` (default - ``yarn``): This is the user which 44 | will be launching the YARN application for Presto. So all the Slider 45 | commands (using ``bin/slider`` script) will be run as this user. Make 46 | sure that you have a HDFS home directory created for the 47 | ``app_user``. Eg: for user ``yarn`` create ``/user/yarn`` with 48 | ``yarn`` user as an owner. 49 | 50 | :: 51 | 52 | hdfs dfs -mkdir -p /user/yarn 53 | hdfs dfs -chown yarn:yarn /user/yarn 54 | 55 | ``Note``: For operations involving Hive connector in Presto, especially 56 | INSERT, ALTER TABLE etc, it may require that the user running Presto has 57 | access to HDFS directories like Hive warehouse directories. So make sure 58 | that the ``app_user`` you set has appropriate access permissions to 59 | those HDFS directories. For eg: ``/apps/hive/warehouse`` is usually 60 | where Presto user will need access for various DML operations involving 61 | Hive connector and is owned by ``hdfs`` in most cases. In that case, one 62 | way to fix the permission issue is to set ``site.global.app_user`` to 63 | user ``hdfs`` and also create ``/user/hdfs`` directory in HDFS if not 64 | already there (as above). You will also need to run any slider 65 | scripts(bin/slider) as user ``hdfs`` in this case. 66 | 67 | 2. ``site.global.user_group`` (default - ``hadoop``): The group owning 68 | the application. 69 | 70 | 3. ``site.global.data_dir`` (default - ``/var/lib/presto/data``): This will 71 | be the data directory used by Presto. This directory configured should 72 | be pre-created on all nodes and must be owned by user ``yarn``, 73 | otherwise slider will fail to start Presto with permission errors. 74 | 75 | :: 76 | 77 | mkdir -p /var/lib/presto/data 78 | chown -R yarn:hadoop /var/lib/presto/data 79 | 80 | 81 | 4. ``site.global.config_dir`` (default - ``/var/lib/presto/etc``): The 82 | configuration directory on the cluster where the Presto config files 83 | node.properties, jvm.config, config.properties and connector 84 | configuration files are deployed. These files will have configuration 85 | values created from templates 86 | ``presto-yarn-package/package/templates/*.j2`` and other relevant 87 | ``appConfig.json`` parameters. 88 | 89 | 5. ``site.global.singlenode`` (default - ``true``): If set to true, the 90 | node used act as both coordinator and worker (singlenode mode). For 91 | multi-node set up, this should be set to false. 92 | 93 | 6. ``site.global.presto_query_max_memory`` (default - ``50GB``): This 94 | will be used as ``query.max-memory`` in Presto's config.properties 95 | file. 96 | 97 | 7. ``site.global.presto_query_max_memory_per_node`` (default - ``1GB``): 98 | This will be used as ``query.max-memory-per-node`` in Presto's 99 | config.properties file. 100 | 101 | 8. ``site.global.presto_server_port`` (default - ``8080``): Presto 102 | server's http port. 103 | 104 | 9. ``site.global.catalog`` (optional) (default - configures ``tpch`` 105 | connector): This property is used to configure connectors for Presto. 106 | The value of this should match the properties you would normally add 107 | in a ``connector.properties`` file for Presto in a non-YARN based installation. 108 | 109 | It should be of the format (note the single quotes around 110 | each value) - {'connector1' : ['key1=value1', 'key2=value2'..], 111 | 'connector2' : ['key1=value1', 'key2=value2'..]..}. This will create 112 | files connector1.properties, connector2.properties for Presto with 113 | entries key1=value1 etc. 114 | For example to have hive.properties for CDH Hive: 115 | 116 | :: 117 | 118 | "site.global.catalog": "{'hive': ['connector.name=hive-hadoop2', 'hive.metastore.uri=thrift://${NN_HOST}:9083'], 'tpch': ['connector.name=tpch']}" 119 | 120 | ``Note``: The ``NN_HOST`` used in ``hive.metastore.uri`` is a variable 121 | for your HDFS Namenode and this expects that your hive metastore is up 122 | and running on your HDFS Namenode host. You do not have to replace that 123 | with your actual Namenode hostname. This variable will be substituted 124 | with your Namenode hostname during runtime. If you have hive metastore 125 | running elsewhere make sure you update ``NN_HOST`` with the appropriate 126 | hostname. 127 | 128 | 10. ``site.global.jvm_args`` (default - as in example below): This 129 | configures Presto ``jvm.config`` file and default heapsize is 130 | ``1GB``. Since Presto needs the ``jvm.config`` format to be a list of 131 | options, one per line, this property must be a String representation 132 | of list of strings. Each entry of this list will be a new line in 133 | your jvm.config. For example the configuration should look like: 134 | 135 | :: 136 | 137 | "site.global.jvm_args": "['-server', '-Xmx1024M', '-XX:+UseG1GC', '-XX:G1HeapRegionSize=32M', '-XX:+UseGCOverheadLimit', '-XX:+ExplicitGCInvokesConcurrent', '-XX:+HeapDumpOnOutOfMemoryError', '-XX:OnOutOfMemoryError=kill -9 %p']", 138 | 139 | 11. ``site.global.log_properties``: This allows you to configure 140 | logging level in presto. Default value is 141 | "['com.facebook.presto=INFO']" which is equivalent of the default 142 | logging level INFO. Since, presto needs the ``log.properties`` 143 | file to be a list of options, one per line, this property must be 144 | a String representation of list of strings. Each entry of this 145 | list will be a new line in your ``log.properties``. For example, 146 | below configuration will change the logging level to WARN for hive 147 | and will maintain the logging level of presto server to INFO. 148 | 149 | :: 150 | 151 | "site.global.log_properties": "['com.facebook.presto.hive=WARN', 152 | 'com.facebook.presto.server=INFO']" 153 | 154 | 12. ``site.global.additional_node_properties`` and 155 | ``site.global.additional_config_properties`` (optional) (default - 156 | None): Presto launched via Slider will use ``config.properties`` 157 | and ``node.properties`` created from templates 158 | ``presto-yarn-package/package/templates/config.properties*.j2`` 159 | and ``presto-yarn-package/package/target/node.properties.j2`` 160 | respectively. If you want to add any additional properties to 161 | these configuration files, add 162 | ``site.global.additional_config_properties`` and 163 | ``site.global.additional_node_properties`` to your 164 | ``appConfig.json``. The value of these has to be a string 165 | representation of an array of entries (key=value) that has to go 166 | to the ``.properties`` file. Eg: 167 | 168 | :: 169 | 170 | "site.global.additional_config_properties": 171 | "['task.max-worker-threads=50', 'distributed-joins-enabled=true']" 172 | 173 | 174 | 13. ``site.global.plugin`` (optional) (default - None): This allows you 175 | to add any additional jars you want to copy to plugin 176 | ``presto-server-/plugin/`` directory in addition 177 | to what is already available there. It should be of the format 178 | {'connector1' : ['jar1', 'jar2'..], 'connector2' : ['jar3', 179 | 'jar4'..]..}. This will copy jar1, jar2 to Presto plugin directory at 180 | plugin/connector1 directory and jar3, jar4 at plugin/connector2 181 | directory. Make sure you have the plugin jars you want to add to 182 | Presto available at 183 | ``presto-yarn-package/src/main/slider/package/plugins/`` prior to 184 | building the presto-yarn app package and thus the app package built 185 | ``presto-yarn-package--.zip`` will have the 186 | jars under ``package/plugins`` directory. 187 | 188 | :: 189 | 190 | "site.global.plugin": "{'ml': ['presto-ml-${presto.version}.jar']}", 191 | 192 | 14. ``site.global.app_name`` (optional) (default - ``presto-server-0.130``) 193 | This value should be the name of the tar.gz file contained within 194 | the zip file produced by presto-yarn (in package/files/ within the 195 | zip). If you use a custom presto server distribution or anything 196 | other than the default presto-yarn package settings, please be 197 | sure to modify this. 198 | 199 | 15. ``application.def`` For Slider users, when the command to install the 200 | presto package is run, the logs will explicitly tell the user 201 | which value to use for this parameter. Changing this is only 202 | required if you are using a custom built presto package. 203 | 204 | 16. ``java_home`` (default - ``/usr/lib/jvm/java``): Presto requires Java 205 | 1.8. So make jdk8 the default java or add it to ``java_home`` here 206 | 207 | 17. Variables in ``appConfig.json`` like ``${COORDINATOR_HOST}``, 208 | ``${AGENT_WORK_ROOT}`` etc. do not need any substitution and will be 209 | appropriately configured during runtime. 210 | 211 | 212 | 18. ``site.global.event_listener_properties``: This allows you to configure Presto `Event Listener `_. By default there is no listener configured. Since, Presto needs ``event-listener.properties`` file to be a list of options, one per line, this property must be a String representation of list of strings. Each entry of this list will be a new line in your ``event-listener.properties``. For example, 213 | below configuration will add the event listener named ``custom-event-listener`` with two custom properties ``custom-property1`` and ``custom-property2``. 214 | 215 | :: 216 | 217 | "site.global.event_listener_properties": "['event-listener.name=custom-event-listener', 218 | 'custom-property1=custom-value1','custom-property2=custom-value2']" 219 | 220 | 221 | .. _resources-json-label: 222 | 223 | resources.json 224 | ~~~~~~~~~~~~~~ 225 | 226 | The configuration here can be added either globally (for COORDINATOR and 227 | WORKER) or for each component. See: 228 | 229 | :doc:`Advanced Configuration Options `. 230 | 231 | 232 | 1. ``yarn.vcores`` (default - ``1``): By default this is set globally. 233 | 234 | 2. ``yarn.component.instances`` (default - ``1`` for COORDINATOR and 235 | ``3`` for WORKER): The multinode 236 | ``presto-yarn-package/src/main/resources/resources-multinode.json`` 237 | sample file is now configured for a 4 node cluster where there will 238 | be 1 coordinator and 3 workers with strict placement policy, meaning, 239 | there will be one component instance running on every node 240 | irrespective of failure history. If there are insufficient number of 241 | nodemanager nodes in your cluster to accomodate the number of workers 242 | requested, the application launch will fail. The number of workers 243 | could be ``number of nodemanagers in your cluster - 1``, with 1 node 244 | reserved for the coordinator, if you want Presto to be on all YARN 245 | nodes. 246 | If you want to deploy Presto on a single node 247 | (``site.global.singlenode`` set to true), make sure you set 1 for the 248 | COORDINATOR and just not add the WORKER component section (Refer 249 | ``presto-yarn-package/src/main/resources/resources-singlenode.json``). 250 | You can also just set ``yarn.component.instances`` to 0 for WORKER in 251 | this case. 252 | 253 | 3. ``yarn.memory`` (default - ``1500MB``): The heapsize defined as -Xmx 254 | of ``site.global.jvm_args`` in ``appConfig.json``, is used by the 255 | Presto JVM itself. Slider suggests that the value of ``yarn.memory`` 256 | must be bigger than this heapsize. The value of ``yarn.memory`` MUST 257 | be bigger than the heap size allocated to any JVM and Slider suggests 258 | using atleast 50% more appears to work, though some experimentation 259 | will be needed. 260 | 261 | 4. ``yarn.label.expression`` (optional) (default - ``coordinator`` for 262 | COORDINATOR and ``worker`` for WORKER\`\`): 263 | 264 | Now you are ready to deploy Presto on YARN either manually or by using 265 | Ambari. 266 | --------------------------------------------------------------------------------