├── .DS_Store
├── package
├── .DS_Store
├── templates
│ ├── env.rc.j2
│ └── init.sh.j2
└── scripts
│ ├── status_params.py
│ ├── params.py
│ ├── kylin_query.py
│ └── kylin_master.py
├── screenshots
└── kylin.png
├── role_command_order.json
├── quicklinks
└── quicklinks.json
├── README.md
├── LICENSE.txt
├── metainfo.xml
└── configuration
└── kylin.xml
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cas-packone/ambari-kylin-service/HEAD/.DS_Store
--------------------------------------------------------------------------------
/package/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cas-packone/ambari-kylin-service/HEAD/package/.DS_Store
--------------------------------------------------------------------------------
/screenshots/kylin.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cas-packone/ambari-kylin-service/HEAD/screenshots/kylin.png
--------------------------------------------------------------------------------
/package/templates/env.rc.j2:
--------------------------------------------------------------------------------
1 | source /etc/profile
2 | export KYLIN_HOME={{install_dir}}/latest
3 | export HIVE_CONF=/usr/hdp/current/hive-client/conf
4 | export HCAT_HOME=/usr/hdp/{{hdp_version}}/hive-hcatalog
5 | export JAVA_HOME='/usr/jdk64/default'
6 |
--------------------------------------------------------------------------------
/package/scripts/status_params.py:
--------------------------------------------------------------------------------
1 | from resource_management import *
2 | from resource_management.libraries.script.script import Script
3 |
4 | # server configurations
5 | config = Script.get_config()
6 | install_dir = config['configurations']['kylin']['install.dir']
7 |
--------------------------------------------------------------------------------
/role_command_order.json:
--------------------------------------------------------------------------------
1 | {
2 | "general_deps" : {
3 | "_comment" : "dependencies for Kylin",
4 | "KYLIN_ALL-START": ["HBASE_MASTER-START", "HBASE_REGIONSERVER-START"],
5 | "KYLIN_QUERY-START": ["HBASE_MASTER-START", "HBASE_REGIONSERVER-START"]
6 | }
7 | }
8 |
9 |
--------------------------------------------------------------------------------
/quicklinks/quicklinks.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "default",
3 | "description": "default quick links configuration",
4 | "configuration": {
5 |
6 | "links": [
7 | {
8 | "name": "kylin_ui",
9 | "label": "KYLIN UI",
10 | "requires_user_name": "false",
11 | "component_name": "KYLIN_ALL",
12 | "url":"%@://%@:%@/kylin",
13 | "port":{
14 | "http_property": "7070",
15 | "http_default_port": "7070",
16 | "https_property": "7070",
17 | "https_default_port": "7070",
18 | "regex": "^(\\d+)$",
19 | "site": "kylin"
20 | }
21 | }
22 | ]
23 | }
24 | }
25 |
--------------------------------------------------------------------------------
/package/templates/init.sh.j2:
--------------------------------------------------------------------------------
1 | export KYLIN_HOME={{install_dir}}/latest
2 | mkdir -p $KYLIN_HOME/conf/hadoop_conf
3 | rm -rf $KYLIN_HOME/conf/hadoop_conf/core-site.xml
4 | rm -rf $KYLIN_HOME/conf/hadoop_conf/hbase-site.xml
5 | rm -rf $KYLIN_HOME/conf/hadoop_conf/hdfs-site.xml
6 | rm -rf $KYLIN_HOME/conf/hadoop_conf/yarn-site.xml
7 | rm -rf $KYLIN_HOME/conf/hadoop_conf/hive-site.xml
8 | ln -s /etc/hadoop/conf/core-site.xml $KYLIN_HOME/conf/hadoop_conf/core-site.xml
9 | ln -s /etc/hbase/conf/hbase-site.xml $KYLIN_HOME/conf/hadoop_conf/hbase-site.xml
10 | ln -s /etc/hadoop/conf/hdfs-site.xml $KYLIN_HOME/conf/hadoop_conf/hdfs-site.xml
11 | ln -s /etc/hadoop/conf/yarn-site.xml $KYLIN_HOME/conf/hadoop_conf/yarn-site.xml
12 | ln -s /etc/hive/conf/hive-site.xml $KYLIN_HOME/conf/hadoop_conf/hive-site.xml
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ambari-kylin-service
2 | ===
3 |
4 | ## To download the Kylin service folder, run below
5 |
6 | ```
7 | VERSION=`hdp-select status hadoop-client | sed 's/hadoop-client - \([0-9]\.[0-9]\).*/\1/'`
8 | sudo git clone https://github.com/cas-bigdatalab/ambari-kylin-service.git /var/lib/ambari-server/resources/stacks/HDP/$VERSION/services/KYLIN
9 | ```
10 | ## Restart Ambari
11 | \#sandbox
12 | service ambari restart
13 |
14 | \#non sandbox
15 | sudo service ambari-server restart
16 |
17 | At the install process, when setting the configuration, set the value for `download.location` with the path to the package `apache-kylin.tar` in one of the [releases](https://github.com/cas-packone/ambari-kylin-service/releases).
18 |
19 | ## SUMMARY
20 | 
21 |
--------------------------------------------------------------------------------
/package/scripts/params.py:
--------------------------------------------------------------------------------
1 | from resource_management import *
2 | from resource_management.libraries.script.script import Script
3 | import sys, os, glob,socket
4 |
5 | # server configurations
6 | config = Script.get_config()
7 | service_packagedir = os.path.realpath(__file__).split('/scripts')[0]
8 | tmp_dir = Script.get_tmp_dir()
9 | kylin_properties=config['configurations']['kylin']['kylin_properties']
10 | hdp_version = default("/commandParams/version", None)
11 | downloadlocation = config['configurations']['kylin']['download.location']
12 | #install_dir = config['configurations']['kylin']['install.dir']
13 | install_dir = format('/opt/kylin')
14 | current_host_name = socket.gethostname()
15 | server_mode = "query"
16 | server_masters = config['clusterHostInfo']['kylin_all_hosts'][0]
17 | server_clusters_arr = config['clusterHostInfo']['kylin_all_hosts'] + (config['clusterHostInfo'].has_key('kylin_query_hosts') and config['clusterHostInfo']['kylin_query_hosts'] or [] )
18 | server_port = "7070"
19 | server_clusters = ','.join(i + ":" + server_port for i in server_clusters_arr)
20 | kylin_servers = ';'.join( "server " + i + ":" + server_port for i in server_clusters_arr) + ";"
21 | hadoop_conf_dir = install_dir + "/latest/conf/hadoop_conf"
22 | nginx_conf=config['configurations']['nginx']['nginx_conf']
23 | nginx_port=config['configurations']['nginx']['nginx_port']
24 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | Copyright (c) 2016, cas-bigdatalab
2 | All rights reserved.
3 |
4 | Redistribution and use in source and binary forms, with or without
5 | modification, are permitted provided that the following conditions are met:
6 |
7 | * Redistributions of source code must retain the above copyright notice, this
8 | list of conditions and the following disclaimer.
9 |
10 | * Redistributions in binary form must reproduce the above copyright notice,
11 | this list of conditions and the following disclaimer in the documentation
12 | and/or other materials provided with the distribution.
13 |
14 | * Neither the name of scidb nor the names of its
15 | contributors may be used to endorse or promote products derived from
16 | this software without specific prior written permission.
17 |
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--------------------------------------------------------------------------------
/metainfo.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | 2.0
4 |
5 |
6 | KYLIN
7 | Kylin
8 | an open source Distributed Analytics Engine.
9 | 3.1.1
10 |
11 |
12 | KYLIN_ALL
13 | Kylin All
14 | MASTER
15 | 1
16 |
17 |
18 | PYTHON
19 | 600
20 |
21 |
22 |
23 | KYLIN_QUERY
24 | Kylin Query
25 | SLAVE
26 | 0+
27 |
28 |
29 | PYTHON
30 | 600
31 |
32 |
33 |
34 |
35 |
36 | HDFS
37 | HIVE
38 | HBASE
39 |
40 |
41 |
42 |
43 | redhat5,redhat6,redhat7
44 |
45 |
46 | epel-release
47 |
48 |
49 |
50 |
51 |
52 |
53 | quicklinks.json
54 | true
55 |
56 |
57 |
58 |
59 |
60 |
--------------------------------------------------------------------------------
/package/scripts/kylin_query.py:
--------------------------------------------------------------------------------
1 | import os
2 | import base64
3 | from time import sleep
4 | from resource_management import *
5 |
6 | class KylinQuery(Script):
7 |
8 | def install(self, env):
9 | import params
10 | self.install_packages(env)
11 | Directory([params.install_dir],
12 | mode=0755,
13 | cd_access='a',
14 | create_parents=True
15 | )
16 | Execute('cd ' + params.install_dir + '; wget ' + params.downloadlocation + ' -O kylin.tar.gz ')
17 | Execute('cd ' + params.install_dir + '; tar -xvf kylin.tar.gz')
18 | Execute('cd ' + params.install_dir + ';rm -rf latest; ln -s apache-kylin* latest')
19 |
20 |
21 | def configure(self, env):
22 | import params
23 | env.set_params(params)
24 | kylin_properties = InlineTemplate(params.kylin_properties)
25 | File(format("{install_dir}/latest/conf/kylin.properties"), content=kylin_properties)
26 |
27 | File(format("{tmp_dir}/kylin_init.sh"),
28 | content=Template("init.sh.j2"),
29 | mode=0o700
30 | )
31 | File(format("{tmp_dir}/kylin_env.rc"),
32 | content=Template("env.rc.j2"),
33 | mode=0o700
34 | )
35 | Execute(format("bash {tmp_dir}/kylin_init.sh"))
36 |
37 | def start(self, env):
38 | import params
39 | env.set_params(params)
40 | self.configure(env)
41 | Execute(format(". {tmp_dir}/kylin_env.rc;{install_dir}/latest/bin/kylin.sh start"))
42 | sleep(5)
43 | Execute("ps -ef | grep java | grep kylin | grep -v grep | awk '{print $2}'>"+format("{install_dir}/latest/pid"))
44 | Execute(format("rm -rf /var/run/kylin.pid;cp {install_dir}/latest/pid /var/run/kylin.pid"))
45 |
46 |
47 | def stop(self, env):
48 | import params
49 | env.set_params(params)
50 | self.configure(env)
51 | Execute(format(". {tmp_dir}/kylin_env.rc;{install_dir}/latest/bin/kylin.sh stop"))
52 |
53 |
54 | def restart(self, env):
55 | self.stop(env)
56 | self.start(env)
57 |
58 | def status(self, env):
59 | check_process_status("/var/run/kylin.pid")
60 |
61 |
62 | if __name__ == "__main__":
63 | KylinQuery().execute()
64 |
--------------------------------------------------------------------------------
/package/scripts/kylin_master.py:
--------------------------------------------------------------------------------
1 | import os
2 | import base64
3 | from time import sleep
4 | from resource_management import *
5 |
6 | class KylinMaster(Script):
7 |
8 | def install(self, env):
9 | import params
10 | self.install_packages(env)
11 | Directory([params.install_dir],
12 | mode=0755,
13 | cd_access='a',
14 | create_parents=True
15 | )
16 | Execute('cd ' + params.install_dir + '; wget ' + params.downloadlocation + ' -O kylin.tar.gz ')
17 | Execute('cd ' + params.install_dir + '; tar -xvf kylin.tar.gz')
18 | Execute('cd ' + params.install_dir + ';rm -rf latest; ln -s apache-kylin* latest')
19 |
20 | #mkdir
21 | Execute('sudo -uhdfs hadoop fs -mkdir -p /kylin')
22 | Execute('sudo -uhdfs hadoop fs -chown -R kylin:kylin /kylin')
23 |
24 |
25 | def configure(self, env):
26 | import params
27 | params.server_mode="all"
28 | env.set_params(params)
29 | kylin_properties = InlineTemplate(params.kylin_properties)
30 | File(format("{install_dir}/latest/conf/kylin.properties"), content=kylin_properties)
31 |
32 | File(format("{tmp_dir}/kylin_init.sh"),
33 | content=Template("init.sh.j2"),
34 | mode=0o700
35 | )
36 | File(format("{tmp_dir}/kylin_env.rc"),
37 | content=Template("env.rc.j2"),
38 | mode=0o700
39 | )
40 | Execute(format("bash {tmp_dir}/kylin_init.sh"))
41 |
42 | def start(self, env):
43 | import params
44 | env.set_params(params)
45 | self.configure(env)
46 | Execute(format(". {tmp_dir}/kylin_env.rc;{install_dir}/latest/bin/kylin.sh start"))
47 | sleep(5)
48 | Execute("ps -ef | grep java | grep kylin | grep -v grep | awk '{print $2}' >"+format("{install_dir}/latest/pid"))
49 | Execute(format("rm -rf /var/run/kylin.pid;cp {install_dir}/latest/pid /var/run/kylin.pid"))
50 |
51 |
52 |
53 |
54 | def stop(self, env):
55 | import params
56 | env.set_params(params)
57 | self.configure(env)
58 | Execute(format(". {tmp_dir}/kylin_env.rc;{install_dir}/latest/bin/kylin.sh stop"))
59 |
60 |
61 | def restart(self, env):
62 | self.stop(env)
63 | self.start(env)
64 |
65 | def status(self, env):
66 | check_process_status("/var/run/kylin.pid")
67 |
68 |
69 | if __name__ == "__main__":
70 | KylinMaster().execute()
71 |
--------------------------------------------------------------------------------
/configuration/kylin.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 | kylin_properties
8 | This is the jinja template for init.conf file
9 |
10 | #
11 | # Licensed to the Apache Software Foundation (ASF) under one or more
12 | # contributor license agreements. See the NOTICE file distributed with
13 | # this work for additional information regarding copyright ownership.
14 | # The ASF licenses this file to You under the Apache License, Version 2.0
15 | # (the "License"); you may not use this file except in compliance with
16 | # the License. You may obtain a copy of the License at
17 | #
18 | # http://www.apache.org/licenses/LICENSE-2.0
19 | #
20 | # Unless required by applicable law or agreed to in writing, software
21 | # distributed under the License is distributed on an "AS IS" BASIS,
22 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
23 | # See the License for the specific language governing permissions and
24 | # limitations under the License.
25 | #
26 |
27 | ### METADATA | ENV ###
28 |
29 | # The metadata store in hbase
30 | kylin.metadata.url=kylin_metadata@hbase
31 |
32 | # Working folder in HDFS, make sure user has the right access to the hdfs directory
33 | kylin.env.hdfs-working-dir=/kylin
34 |
35 | # DEV|QA|PROD. DEV will turn on some dev features, QA and PROD has no difference in terms of functions.
36 | kylin.env=QA
37 |
38 | ### SERVER | WEB ###
39 |
40 | # Kylin server mode, valid value [all, query, job]
41 | kylin.server.mode={{server_mode}}
42 |
43 | # List of web servers in use, this enables one web server instance to sync up with other servers.
44 | kylin.server.cluster-servers={{server_clusters}}
45 |
46 | # Display timezone on UI,format like[GMT+N or GMT-N]
47 | kylin.web.timezone=GMT+8
48 |
49 | kylin.web.cross-domain-enabled=true
50 |
51 |
52 | ### SOURCE ###
53 |
54 | # Hive client, valid value [cli, beeline]
55 | kylin.source.hive.client=cli
56 |
57 | # Parameters for beeline client, only necessary if hive client is beeline
58 | #kylin.source.hive.beeline-params=-n root --hiveconf hive.security.authorization.sqlstd.confwhitelist.append='mapreduce.job.*|dfs.*' -u jdbc:hive2://localhost:10000
59 |
60 | kylin.source.hive.keep-flat-table=false
61 |
62 | # Hive database name for putting the intermediate flat tables
63 | kylin.source.hive.database-for-flat-table=default
64 |
65 | # Whether redistribute the intermediate flat table before building
66 | kylin.source.hive.redistribute-flat-table=true
67 |
68 | ## Configuration for HDP 2.6.x when kylin.source.hive.client=beeline
69 | #kylin.source.hive.sparksql-beeline-shell=/usr/hdp/current/spark2-client/bin/beeline
70 | #kylin.source.hive.sparksql-beeline-params=-n root --hiveconf hive.security.authorization.sqlstd.confwhitelist.append='mapreduce.job.*|dfs.*' -u jdbc:hive2://:10000
71 |
72 |
73 | ### STORAGE ###
74 |
75 | # The storage for cube is hbase
76 | kylin.storage.url=hbase
77 |
78 | # Compression codec for htable, valid value [none, snappy, lzo, gzip, lz4]
79 | kylin.storage.hbase.compression-codec=none
80 |
81 | # HBase Cluster FileSystem, which serving hbase, format as hdfs://hbase-cluster:8020
82 | # Leave empty if hbase running on same cluster with hive and mapreduce
83 | #kylin.storage.hbase.cluster-fs=
84 |
85 | # The cut size for hbase region, in GB.
86 | kylin.storage.hbase.region-cut-gb=5
87 |
88 | # The hfile size of GB, smaller hfile leading to the converting hfile MR has more reducers and be faster.
89 | # Set 0 to disable this optimization.
90 | kylin.storage.hbase.hfile-size-gb=2
91 |
92 | kylin.storage.hbase.min-region-count=1
93 | kylin.storage.hbase.max-region-count=500
94 |
95 | # Optional information for the owner of kylin platform, it can be your team's email
96 | # Currently it will be attached to each kylin's htable attribute
97 | kylin.storage.hbase.owner-tag=whoami@kylin.apache.org
98 |
99 | kylin.storage.hbase.coprocessor-mem-gb=3
100 |
101 | # By default kylin can spill query's intermediate results to disks when it's consuming too much memory.
102 | # Set it to false if you want query to abort immediately in such condition.
103 | kylin.storage.partition.aggr-spill-enabled=true
104 |
105 | # The maximum number of bytes each coprocessor is allowed to scan.
106 | # To allow arbitrary large scan, you can set it to 0.
107 | kylin.storage.partition.max-scan-bytes=3221225472
108 |
109 | # The default coprocessor timeout is (hbase.rpc.timeout * 0.9) / 1000 seconds,
110 | # You can set it to a smaller value. 0 means use default.
111 | # kylin.storage.hbase.coprocessor-timeout-seconds=0
112 |
113 |
114 | ### JOB ###
115 |
116 | # Max job retry on error, default 0: no retry
117 | kylin.job.retry=0
118 |
119 | # Max count of concurrent jobs running
120 | kylin.job.max-concurrent-jobs=10
121 |
122 | # The percentage of the sampling, default 100%
123 | kylin.job.sampling-percentage=100
124 |
125 | # Whether get job status from resource manager with kerberos authentication
126 | kylin.job.status.with.kerberos=false
127 |
128 | # Timeout in seconds
129 | kylin.job.step.timeout=7200
130 |
131 | # If true, will send email notification on job complete
132 | #kylin.job.notification-enabled=true
133 | #kylin.job.notification-mail-enable-starttls=true
134 | #kylin.job.notification-mail-host=smtp.office365.com
135 | #kylin.job.notification-mail-port=587
136 | #kylin.job.notification-mail-username=kylin@example.com
137 | #kylin.job.notification-mail-password=mypassword
138 | #kylin.job.notification-mail-sender=kylin@example.com
139 |
140 |
141 | ### ENGINE ###
142 |
143 | # Time interval to check hadoop job status
144 | kylin.engine.mr.yarn-check-interval-seconds=10
145 |
146 | kylin.engine.mr.reduce-input-mb=500
147 |
148 | kylin.engine.mr.max-reducer-number=500
149 |
150 | kylin.engine.mr.mapper-input-rows=1000000
151 |
152 | # Enable dictionary building in MR reducer
153 | kylin.engine.mr.build-dict-in-reducer=true
154 |
155 | # Number of reducers for fetching UHC column distinct values
156 | kylin.engine.mr.uhc-reducer-count=1
157 |
158 | ### CUBE | DICTIONARY ###
159 |
160 | # 'auto', 'inmem' or 'layer'
161 | kylin.cube.algorithm=layer
162 |
163 | # A smaller threshold prefers layer, a larger threshold prefers in-mem
164 | kylin.cube.algorithm.layer-or-inmem-threshold=7
165 |
166 | kylin.cube.aggrgroup.max-combination=4096
167 |
168 | kylin.snapshot.max-mb=300
169 |
170 |
171 | ### QUERY ###
172 |
173 | # Controls the maximum number of bytes a query is allowed to scan storage.
174 | # The default value 0 means no limit.
175 | # The counterpart kylin.storage.partition.max-scan-bytes sets the maximum per coprocessor.
176 | kylin.query.max-scan-bytes=0
177 |
178 | kylin.query.udf.version=org.apache.kylin.query.udf.VersionUDF
179 | kylin.query.udf.concat=org.apache.kylin.query.udf.ConcatUDF
180 |
181 | kylin.query.cache-enabled=true
182 |
183 |
184 | ### SECURITY ###
185 |
186 | # Spring security profile, options: testing, ldap, saml
187 | # with "testing" profile, user can use pre-defined name/pwd like KYLIN/ADMIN to login
188 | kylin.security.profile=testing
189 |
190 | # Default roles and admin roles in LDAP, for ldap and saml
191 | kylin.security.acl.default-role=ROLE_ANALYST,ROLE_MODELER
192 | kylin.security.acl.admin-role=ROLE_ADMIN
193 |
194 | # LDAP authentication configuration
195 | kylin.security.ldap.connection-server=ldap://ldap_server:389
196 | kylin.security.ldap.connection-username=
197 | kylin.security.ldap.connection-password=
198 |
199 | # LDAP user account directory;
200 | kylin.security.ldap.user-search-base=
201 | kylin.security.ldap.user-search-pattern=
202 | kylin.security.ldap.user-group-search-base=
203 |
204 | # LDAP service account directory
205 | kylin.security.ldap.service-search-base=
206 | kylin.security.ldap.service-search-pattern=
207 | kylin.security.ldap.service-group-search-base=
208 |
209 | ## SAML configurations for SSO
210 | # SAML IDP metadata file location
211 | kylin.security.saml.metadata-file=classpath:sso_metadata.xml
212 | kylin.security.saml.metadata-entity-base-url=https://hostname/kylin
213 | kylin.security.saml.context-scheme=https
214 | kylin.security.saml.context-server-name=hostname
215 | kylin.security.saml.context-server-port=443
216 | kylin.security.saml.context-path=/kylin
217 |
218 |
219 | ### Spark Engine Configs ###
220 |
221 | # Hadoop conf folder, will export this as "HADOOP_CONF_DIR" to run spark-submit
222 | # This must contain site xmls of core, yarn, hive, and hbase in one folder
223 | kylin.env.hadoop-conf-dir={{hadoop_conf_dir}}
224 |
225 | # Estimate the RDD partition numbers
226 | kylin.engine.spark.rdd-partition-cut-mb=10
227 |
228 | # Minimal partition numbers of rdd
229 | kylin.engine.spark.min-partition=1
230 |
231 | # Max partition numbers of rdd
232 | kylin.engine.spark.max-partition=5000
233 |
234 | ## Spark conf (default is in spark/conf/spark-defaults.conf)
235 | #kylin.engine.spark-conf.spark.master=yarn
236 | #kylin.engine.spark-conf.spark.submit.deployMode=cluster
237 | #kylin.engine.spark-conf.spark.yarn.queue=default
238 | #kylin.engine.spark-conf.spark.executor.memory=4G
239 | #kylin.engine.spark-conf.spark.executor.cores=4
240 | #kylin.engine.spark-conf.spark.executor.instances=2
241 | #kylin.engine.spark-conf.spark.eventLog.enabled=true
242 | #kylin.engine.spark-conf.spark.eventLog.dir=hdfs\:///kylin/spark-history
243 | #kylin.engine.spark-conf.spark.history.fs.logDirectory=hdfs\:///kylin/spark-history
244 |
245 | ## Copy the following file /spark/spark-libs.jar from the kylin .tar package to HDFS to avoid uploading dependencies at runtime
246 | #kylin.engine.spark-conf.spark.yarn.archive=hdfs://:8020/kylin/spark/spark-libs.jar
247 |
248 | ## or manually upload spark-assembly jar to HDFS and then set this property will avoid repeatedly uploading jar at runtime
249 | #kylin.engine.spark-conf.spark.yarn.jar=hdfs://packone165:8020/kylin/spark/spark-assembly-1.6.3-hadoop2.6.0.jar
250 | #kylin.engine.spark-conf.spark.io.compression.codec=org.apache.spark.io.SnappyCompressionCodec
251 |
252 | ## uncomment for HDP
253 | #kylin.engine.spark-conf.spark.driver.extraJavaOptions=-Dhdp.version={{hdp_version}}
254 | #kylin.engine.spark-conf.spark.yarn.am.extraJavaOptions=-Dhdp.version={{hdp_version}}
255 | #kylin.engine.spark-conf.spark.executor.extraJavaOptions=-Dhdp.version={{hdp_version}}
256 |
257 |
258 |
259 |
260 | download.location
261 | https://github.com/alfonsonishikawa/ambari-kylin-service/releases/download/3.1.1/apache-kylin.tar
262 | Location to download the package Apache Kylin 3.1.1 + spark 2.2.0 + HBase 1.2.0
263 |
264 |
265 |
266 |
--------------------------------------------------------------------------------