├── .DS_Store ├── package ├── .DS_Store ├── templates │ ├── env.rc.j2 │ └── init.sh.j2 └── scripts │ ├── status_params.py │ ├── params.py │ ├── kylin_query.py │ └── kylin_master.py ├── screenshots └── kylin.png ├── role_command_order.json ├── quicklinks └── quicklinks.json ├── README.md ├── LICENSE.txt ├── metainfo.xml └── configuration └── kylin.xml /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cas-packone/ambari-kylin-service/HEAD/.DS_Store -------------------------------------------------------------------------------- /package/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cas-packone/ambari-kylin-service/HEAD/package/.DS_Store -------------------------------------------------------------------------------- /screenshots/kylin.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cas-packone/ambari-kylin-service/HEAD/screenshots/kylin.png -------------------------------------------------------------------------------- /package/templates/env.rc.j2: -------------------------------------------------------------------------------- 1 | source /etc/profile 2 | export KYLIN_HOME={{install_dir}}/latest 3 | export HIVE_CONF=/usr/hdp/current/hive-client/conf 4 | export HCAT_HOME=/usr/hdp/{{hdp_version}}/hive-hcatalog 5 | export JAVA_HOME='/usr/jdk64/default' 6 | -------------------------------------------------------------------------------- /package/scripts/status_params.py: -------------------------------------------------------------------------------- 1 | from resource_management import * 2 | from resource_management.libraries.script.script import Script 3 | 4 | # server configurations 5 | config = Script.get_config() 6 | install_dir = config['configurations']['kylin']['install.dir'] 7 | -------------------------------------------------------------------------------- /role_command_order.json: -------------------------------------------------------------------------------- 1 | { 2 | "general_deps" : { 3 | "_comment" : "dependencies for Kylin", 4 | "KYLIN_ALL-START": ["HBASE_MASTER-START", "HBASE_REGIONSERVER-START"], 5 | "KYLIN_QUERY-START": ["HBASE_MASTER-START", "HBASE_REGIONSERVER-START"] 6 | } 7 | } 8 | 9 | -------------------------------------------------------------------------------- /quicklinks/quicklinks.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "default", 3 | "description": "default quick links configuration", 4 | "configuration": { 5 | 6 | "links": [ 7 | { 8 | "name": "kylin_ui", 9 | "label": "KYLIN UI", 10 | "requires_user_name": "false", 11 | "component_name": "KYLIN_ALL", 12 | "url":"%@://%@:%@/kylin", 13 | "port":{ 14 | "http_property": "7070", 15 | "http_default_port": "7070", 16 | "https_property": "7070", 17 | "https_default_port": "7070", 18 | "regex": "^(\\d+)$", 19 | "site": "kylin" 20 | } 21 | } 22 | ] 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /package/templates/init.sh.j2: -------------------------------------------------------------------------------- 1 | export KYLIN_HOME={{install_dir}}/latest 2 | mkdir -p $KYLIN_HOME/conf/hadoop_conf 3 | rm -rf $KYLIN_HOME/conf/hadoop_conf/core-site.xml 4 | rm -rf $KYLIN_HOME/conf/hadoop_conf/hbase-site.xml 5 | rm -rf $KYLIN_HOME/conf/hadoop_conf/hdfs-site.xml 6 | rm -rf $KYLIN_HOME/conf/hadoop_conf/yarn-site.xml 7 | rm -rf $KYLIN_HOME/conf/hadoop_conf/hive-site.xml 8 | ln -s /etc/hadoop/conf/core-site.xml $KYLIN_HOME/conf/hadoop_conf/core-site.xml 9 | ln -s /etc/hbase/conf/hbase-site.xml $KYLIN_HOME/conf/hadoop_conf/hbase-site.xml 10 | ln -s /etc/hadoop/conf/hdfs-site.xml $KYLIN_HOME/conf/hadoop_conf/hdfs-site.xml 11 | ln -s /etc/hadoop/conf/yarn-site.xml $KYLIN_HOME/conf/hadoop_conf/yarn-site.xml 12 | ln -s /etc/hive/conf/hive-site.xml $KYLIN_HOME/conf/hadoop_conf/hive-site.xml -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ambari-kylin-service 2 | === 3 | 4 | ## To download the Kylin service folder, run below 5 | 6 | ``` 7 | VERSION=`hdp-select status hadoop-client | sed 's/hadoop-client - \([0-9]\.[0-9]\).*/\1/'` 8 | sudo git clone https://github.com/cas-bigdatalab/ambari-kylin-service.git /var/lib/ambari-server/resources/stacks/HDP/$VERSION/services/KYLIN 9 | ``` 10 | ## Restart Ambari 11 | \#sandbox 12 | service ambari restart 13 | 14 | \#non sandbox 15 | sudo service ambari-server restart 16 | 17 | At the install process, when setting the configuration, set the value for `download.location` with the path to the package `apache-kylin.tar` in one of the [releases](https://github.com/cas-packone/ambari-kylin-service/releases). 18 | 19 | ## SUMMARY 20 | ![Image](../master/screenshots/kylin.png?raw=true) 21 | -------------------------------------------------------------------------------- /package/scripts/params.py: -------------------------------------------------------------------------------- 1 | from resource_management import * 2 | from resource_management.libraries.script.script import Script 3 | import sys, os, glob,socket 4 | 5 | # server configurations 6 | config = Script.get_config() 7 | service_packagedir = os.path.realpath(__file__).split('/scripts')[0] 8 | tmp_dir = Script.get_tmp_dir() 9 | kylin_properties=config['configurations']['kylin']['kylin_properties'] 10 | hdp_version = default("/commandParams/version", None) 11 | downloadlocation = config['configurations']['kylin']['download.location'] 12 | #install_dir = config['configurations']['kylin']['install.dir'] 13 | install_dir = format('/opt/kylin') 14 | current_host_name = socket.gethostname() 15 | server_mode = "query" 16 | server_masters = config['clusterHostInfo']['kylin_all_hosts'][0] 17 | server_clusters_arr = config['clusterHostInfo']['kylin_all_hosts'] + (config['clusterHostInfo'].has_key('kylin_query_hosts') and config['clusterHostInfo']['kylin_query_hosts'] or [] ) 18 | server_port = "7070" 19 | server_clusters = ','.join(i + ":" + server_port for i in server_clusters_arr) 20 | kylin_servers = ';'.join( "server " + i + ":" + server_port for i in server_clusters_arr) + ";" 21 | hadoop_conf_dir = install_dir + "/latest/conf/hadoop_conf" 22 | nginx_conf=config['configurations']['nginx']['nginx_conf'] 23 | nginx_port=config['configurations']['nginx']['nginx_port'] 24 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016, cas-bigdatalab 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | * Neither the name of scidb nor the names of its 15 | contributors may be used to endorse or promote products derived from 16 | this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /metainfo.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 2.0 4 | 5 | 6 | KYLIN 7 | Kylin 8 | an open source Distributed Analytics Engine. 9 | 3.1.1 10 | 11 | 12 | KYLIN_ALL 13 | Kylin All 14 | MASTER 15 | 1 16 | 17 | 18 | PYTHON 19 | 600 20 | 21 | 22 | 23 | KYLIN_QUERY 24 | Kylin Query 25 | SLAVE 26 | 0+ 27 | 28 | 29 | PYTHON 30 | 600 31 | 32 | 33 | 34 | 35 | 36 | HDFS 37 | HIVE 38 | HBASE 39 | 40 | 41 | 42 | 43 | redhat5,redhat6,redhat7 44 | 45 | 46 | epel-release 47 | 48 | 49 | 50 | 51 | 52 | 53 | quicklinks.json 54 | true 55 | 56 | 57 | 58 | 59 | 60 | -------------------------------------------------------------------------------- /package/scripts/kylin_query.py: -------------------------------------------------------------------------------- 1 | import os 2 | import base64 3 | from time import sleep 4 | from resource_management import * 5 | 6 | class KylinQuery(Script): 7 | 8 | def install(self, env): 9 | import params 10 | self.install_packages(env) 11 | Directory([params.install_dir], 12 | mode=0755, 13 | cd_access='a', 14 | create_parents=True 15 | ) 16 | Execute('cd ' + params.install_dir + '; wget ' + params.downloadlocation + ' -O kylin.tar.gz ') 17 | Execute('cd ' + params.install_dir + '; tar -xvf kylin.tar.gz') 18 | Execute('cd ' + params.install_dir + ';rm -rf latest; ln -s apache-kylin* latest') 19 | 20 | 21 | def configure(self, env): 22 | import params 23 | env.set_params(params) 24 | kylin_properties = InlineTemplate(params.kylin_properties) 25 | File(format("{install_dir}/latest/conf/kylin.properties"), content=kylin_properties) 26 | 27 | File(format("{tmp_dir}/kylin_init.sh"), 28 | content=Template("init.sh.j2"), 29 | mode=0o700 30 | ) 31 | File(format("{tmp_dir}/kylin_env.rc"), 32 | content=Template("env.rc.j2"), 33 | mode=0o700 34 | ) 35 | Execute(format("bash {tmp_dir}/kylin_init.sh")) 36 | 37 | def start(self, env): 38 | import params 39 | env.set_params(params) 40 | self.configure(env) 41 | Execute(format(". {tmp_dir}/kylin_env.rc;{install_dir}/latest/bin/kylin.sh start")) 42 | sleep(5) 43 | Execute("ps -ef | grep java | grep kylin | grep -v grep | awk '{print $2}'>"+format("{install_dir}/latest/pid")) 44 | Execute(format("rm -rf /var/run/kylin.pid;cp {install_dir}/latest/pid /var/run/kylin.pid")) 45 | 46 | 47 | def stop(self, env): 48 | import params 49 | env.set_params(params) 50 | self.configure(env) 51 | Execute(format(". {tmp_dir}/kylin_env.rc;{install_dir}/latest/bin/kylin.sh stop")) 52 | 53 | 54 | def restart(self, env): 55 | self.stop(env) 56 | self.start(env) 57 | 58 | def status(self, env): 59 | check_process_status("/var/run/kylin.pid") 60 | 61 | 62 | if __name__ == "__main__": 63 | KylinQuery().execute() 64 | -------------------------------------------------------------------------------- /package/scripts/kylin_master.py: -------------------------------------------------------------------------------- 1 | import os 2 | import base64 3 | from time import sleep 4 | from resource_management import * 5 | 6 | class KylinMaster(Script): 7 | 8 | def install(self, env): 9 | import params 10 | self.install_packages(env) 11 | Directory([params.install_dir], 12 | mode=0755, 13 | cd_access='a', 14 | create_parents=True 15 | ) 16 | Execute('cd ' + params.install_dir + '; wget ' + params.downloadlocation + ' -O kylin.tar.gz ') 17 | Execute('cd ' + params.install_dir + '; tar -xvf kylin.tar.gz') 18 | Execute('cd ' + params.install_dir + ';rm -rf latest; ln -s apache-kylin* latest') 19 | 20 | #mkdir 21 | Execute('sudo -uhdfs hadoop fs -mkdir -p /kylin') 22 | Execute('sudo -uhdfs hadoop fs -chown -R kylin:kylin /kylin') 23 | 24 | 25 | def configure(self, env): 26 | import params 27 | params.server_mode="all" 28 | env.set_params(params) 29 | kylin_properties = InlineTemplate(params.kylin_properties) 30 | File(format("{install_dir}/latest/conf/kylin.properties"), content=kylin_properties) 31 | 32 | File(format("{tmp_dir}/kylin_init.sh"), 33 | content=Template("init.sh.j2"), 34 | mode=0o700 35 | ) 36 | File(format("{tmp_dir}/kylin_env.rc"), 37 | content=Template("env.rc.j2"), 38 | mode=0o700 39 | ) 40 | Execute(format("bash {tmp_dir}/kylin_init.sh")) 41 | 42 | def start(self, env): 43 | import params 44 | env.set_params(params) 45 | self.configure(env) 46 | Execute(format(". {tmp_dir}/kylin_env.rc;{install_dir}/latest/bin/kylin.sh start")) 47 | sleep(5) 48 | Execute("ps -ef | grep java | grep kylin | grep -v grep | awk '{print $2}' >"+format("{install_dir}/latest/pid")) 49 | Execute(format("rm -rf /var/run/kylin.pid;cp {install_dir}/latest/pid /var/run/kylin.pid")) 50 | 51 | 52 | 53 | 54 | def stop(self, env): 55 | import params 56 | env.set_params(params) 57 | self.configure(env) 58 | Execute(format(". {tmp_dir}/kylin_env.rc;{install_dir}/latest/bin/kylin.sh stop")) 59 | 60 | 61 | def restart(self, env): 62 | self.stop(env) 63 | self.start(env) 64 | 65 | def status(self, env): 66 | check_process_status("/var/run/kylin.pid") 67 | 68 | 69 | if __name__ == "__main__": 70 | KylinMaster().execute() 71 | -------------------------------------------------------------------------------- /configuration/kylin.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | kylin_properties 8 | This is the jinja template for init.conf file 9 | 10 | # 11 | # Licensed to the Apache Software Foundation (ASF) under one or more 12 | # contributor license agreements. See the NOTICE file distributed with 13 | # this work for additional information regarding copyright ownership. 14 | # The ASF licenses this file to You under the Apache License, Version 2.0 15 | # (the "License"); you may not use this file except in compliance with 16 | # the License. You may obtain a copy of the License at 17 | # 18 | # http://www.apache.org/licenses/LICENSE-2.0 19 | # 20 | # Unless required by applicable law or agreed to in writing, software 21 | # distributed under the License is distributed on an "AS IS" BASIS, 22 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 23 | # See the License for the specific language governing permissions and 24 | # limitations under the License. 25 | # 26 | 27 | ### METADATA | ENV ### 28 | 29 | # The metadata store in hbase 30 | kylin.metadata.url=kylin_metadata@hbase 31 | 32 | # Working folder in HDFS, make sure user has the right access to the hdfs directory 33 | kylin.env.hdfs-working-dir=/kylin 34 | 35 | # DEV|QA|PROD. DEV will turn on some dev features, QA and PROD has no difference in terms of functions. 36 | kylin.env=QA 37 | 38 | ### SERVER | WEB ### 39 | 40 | # Kylin server mode, valid value [all, query, job] 41 | kylin.server.mode={{server_mode}} 42 | 43 | # List of web servers in use, this enables one web server instance to sync up with other servers. 44 | kylin.server.cluster-servers={{server_clusters}} 45 | 46 | # Display timezone on UI,format like[GMT+N or GMT-N] 47 | kylin.web.timezone=GMT+8 48 | 49 | kylin.web.cross-domain-enabled=true 50 | 51 | 52 | ### SOURCE ### 53 | 54 | # Hive client, valid value [cli, beeline] 55 | kylin.source.hive.client=cli 56 | 57 | # Parameters for beeline client, only necessary if hive client is beeline 58 | #kylin.source.hive.beeline-params=-n root --hiveconf hive.security.authorization.sqlstd.confwhitelist.append='mapreduce.job.*|dfs.*' -u jdbc:hive2://localhost:10000 59 | 60 | kylin.source.hive.keep-flat-table=false 61 | 62 | # Hive database name for putting the intermediate flat tables 63 | kylin.source.hive.database-for-flat-table=default 64 | 65 | # Whether redistribute the intermediate flat table before building 66 | kylin.source.hive.redistribute-flat-table=true 67 | 68 | ## Configuration for HDP 2.6.x when kylin.source.hive.client=beeline 69 | #kylin.source.hive.sparksql-beeline-shell=/usr/hdp/current/spark2-client/bin/beeline 70 | #kylin.source.hive.sparksql-beeline-params=-n root --hiveconf hive.security.authorization.sqlstd.confwhitelist.append='mapreduce.job.*|dfs.*' -u jdbc:hive2://:10000 71 | 72 | 73 | ### STORAGE ### 74 | 75 | # The storage for cube is hbase 76 | kylin.storage.url=hbase 77 | 78 | # Compression codec for htable, valid value [none, snappy, lzo, gzip, lz4] 79 | kylin.storage.hbase.compression-codec=none 80 | 81 | # HBase Cluster FileSystem, which serving hbase, format as hdfs://hbase-cluster:8020 82 | # Leave empty if hbase running on same cluster with hive and mapreduce 83 | #kylin.storage.hbase.cluster-fs= 84 | 85 | # The cut size for hbase region, in GB. 86 | kylin.storage.hbase.region-cut-gb=5 87 | 88 | # The hfile size of GB, smaller hfile leading to the converting hfile MR has more reducers and be faster. 89 | # Set 0 to disable this optimization. 90 | kylin.storage.hbase.hfile-size-gb=2 91 | 92 | kylin.storage.hbase.min-region-count=1 93 | kylin.storage.hbase.max-region-count=500 94 | 95 | # Optional information for the owner of kylin platform, it can be your team's email 96 | # Currently it will be attached to each kylin's htable attribute 97 | kylin.storage.hbase.owner-tag=whoami@kylin.apache.org 98 | 99 | kylin.storage.hbase.coprocessor-mem-gb=3 100 | 101 | # By default kylin can spill query's intermediate results to disks when it's consuming too much memory. 102 | # Set it to false if you want query to abort immediately in such condition. 103 | kylin.storage.partition.aggr-spill-enabled=true 104 | 105 | # The maximum number of bytes each coprocessor is allowed to scan. 106 | # To allow arbitrary large scan, you can set it to 0. 107 | kylin.storage.partition.max-scan-bytes=3221225472 108 | 109 | # The default coprocessor timeout is (hbase.rpc.timeout * 0.9) / 1000 seconds, 110 | # You can set it to a smaller value. 0 means use default. 111 | # kylin.storage.hbase.coprocessor-timeout-seconds=0 112 | 113 | 114 | ### JOB ### 115 | 116 | # Max job retry on error, default 0: no retry 117 | kylin.job.retry=0 118 | 119 | # Max count of concurrent jobs running 120 | kylin.job.max-concurrent-jobs=10 121 | 122 | # The percentage of the sampling, default 100% 123 | kylin.job.sampling-percentage=100 124 | 125 | # Whether get job status from resource manager with kerberos authentication 126 | kylin.job.status.with.kerberos=false 127 | 128 | # Timeout in seconds 129 | kylin.job.step.timeout=7200 130 | 131 | # If true, will send email notification on job complete 132 | #kylin.job.notification-enabled=true 133 | #kylin.job.notification-mail-enable-starttls=true 134 | #kylin.job.notification-mail-host=smtp.office365.com 135 | #kylin.job.notification-mail-port=587 136 | #kylin.job.notification-mail-username=kylin@example.com 137 | #kylin.job.notification-mail-password=mypassword 138 | #kylin.job.notification-mail-sender=kylin@example.com 139 | 140 | 141 | ### ENGINE ### 142 | 143 | # Time interval to check hadoop job status 144 | kylin.engine.mr.yarn-check-interval-seconds=10 145 | 146 | kylin.engine.mr.reduce-input-mb=500 147 | 148 | kylin.engine.mr.max-reducer-number=500 149 | 150 | kylin.engine.mr.mapper-input-rows=1000000 151 | 152 | # Enable dictionary building in MR reducer 153 | kylin.engine.mr.build-dict-in-reducer=true 154 | 155 | # Number of reducers for fetching UHC column distinct values 156 | kylin.engine.mr.uhc-reducer-count=1 157 | 158 | ### CUBE | DICTIONARY ### 159 | 160 | # 'auto', 'inmem' or 'layer' 161 | kylin.cube.algorithm=layer 162 | 163 | # A smaller threshold prefers layer, a larger threshold prefers in-mem 164 | kylin.cube.algorithm.layer-or-inmem-threshold=7 165 | 166 | kylin.cube.aggrgroup.max-combination=4096 167 | 168 | kylin.snapshot.max-mb=300 169 | 170 | 171 | ### QUERY ### 172 | 173 | # Controls the maximum number of bytes a query is allowed to scan storage. 174 | # The default value 0 means no limit. 175 | # The counterpart kylin.storage.partition.max-scan-bytes sets the maximum per coprocessor. 176 | kylin.query.max-scan-bytes=0 177 | 178 | kylin.query.udf.version=org.apache.kylin.query.udf.VersionUDF 179 | kylin.query.udf.concat=org.apache.kylin.query.udf.ConcatUDF 180 | 181 | kylin.query.cache-enabled=true 182 | 183 | 184 | ### SECURITY ### 185 | 186 | # Spring security profile, options: testing, ldap, saml 187 | # with "testing" profile, user can use pre-defined name/pwd like KYLIN/ADMIN to login 188 | kylin.security.profile=testing 189 | 190 | # Default roles and admin roles in LDAP, for ldap and saml 191 | kylin.security.acl.default-role=ROLE_ANALYST,ROLE_MODELER 192 | kylin.security.acl.admin-role=ROLE_ADMIN 193 | 194 | # LDAP authentication configuration 195 | kylin.security.ldap.connection-server=ldap://ldap_server:389 196 | kylin.security.ldap.connection-username= 197 | kylin.security.ldap.connection-password= 198 | 199 | # LDAP user account directory; 200 | kylin.security.ldap.user-search-base= 201 | kylin.security.ldap.user-search-pattern= 202 | kylin.security.ldap.user-group-search-base= 203 | 204 | # LDAP service account directory 205 | kylin.security.ldap.service-search-base= 206 | kylin.security.ldap.service-search-pattern= 207 | kylin.security.ldap.service-group-search-base= 208 | 209 | ## SAML configurations for SSO 210 | # SAML IDP metadata file location 211 | kylin.security.saml.metadata-file=classpath:sso_metadata.xml 212 | kylin.security.saml.metadata-entity-base-url=https://hostname/kylin 213 | kylin.security.saml.context-scheme=https 214 | kylin.security.saml.context-server-name=hostname 215 | kylin.security.saml.context-server-port=443 216 | kylin.security.saml.context-path=/kylin 217 | 218 | 219 | ### Spark Engine Configs ### 220 | 221 | # Hadoop conf folder, will export this as "HADOOP_CONF_DIR" to run spark-submit 222 | # This must contain site xmls of core, yarn, hive, and hbase in one folder 223 | kylin.env.hadoop-conf-dir={{hadoop_conf_dir}} 224 | 225 | # Estimate the RDD partition numbers 226 | kylin.engine.spark.rdd-partition-cut-mb=10 227 | 228 | # Minimal partition numbers of rdd 229 | kylin.engine.spark.min-partition=1 230 | 231 | # Max partition numbers of rdd 232 | kylin.engine.spark.max-partition=5000 233 | 234 | ## Spark conf (default is in spark/conf/spark-defaults.conf) 235 | #kylin.engine.spark-conf.spark.master=yarn 236 | #kylin.engine.spark-conf.spark.submit.deployMode=cluster 237 | #kylin.engine.spark-conf.spark.yarn.queue=default 238 | #kylin.engine.spark-conf.spark.executor.memory=4G 239 | #kylin.engine.spark-conf.spark.executor.cores=4 240 | #kylin.engine.spark-conf.spark.executor.instances=2 241 | #kylin.engine.spark-conf.spark.eventLog.enabled=true 242 | #kylin.engine.spark-conf.spark.eventLog.dir=hdfs\:///kylin/spark-history 243 | #kylin.engine.spark-conf.spark.history.fs.logDirectory=hdfs\:///kylin/spark-history 244 | 245 | ## Copy the following file /spark/spark-libs.jar from the kylin .tar package to HDFS to avoid uploading dependencies at runtime 246 | #kylin.engine.spark-conf.spark.yarn.archive=hdfs://:8020/kylin/spark/spark-libs.jar 247 | 248 | ## or manually upload spark-assembly jar to HDFS and then set this property will avoid repeatedly uploading jar at runtime 249 | #kylin.engine.spark-conf.spark.yarn.jar=hdfs://packone165:8020/kylin/spark/spark-assembly-1.6.3-hadoop2.6.0.jar 250 | #kylin.engine.spark-conf.spark.io.compression.codec=org.apache.spark.io.SnappyCompressionCodec 251 | 252 | ## uncomment for HDP 253 | #kylin.engine.spark-conf.spark.driver.extraJavaOptions=-Dhdp.version={{hdp_version}} 254 | #kylin.engine.spark-conf.spark.yarn.am.extraJavaOptions=-Dhdp.version={{hdp_version}} 255 | #kylin.engine.spark-conf.spark.executor.extraJavaOptions=-Dhdp.version={{hdp_version}} 256 | 257 | 258 | 259 | 260 | download.location 261 | https://github.com/alfonsonishikawa/ambari-kylin-service/releases/download/3.1.1/apache-kylin.tar 262 | Location to download the package Apache Kylin 3.1.1 + spark 2.2.0 + HBase 1.2.0 263 | 264 | 265 | 266 | --------------------------------------------------------------------------------