├── .gitignore
├── LICENSE
├── README.md
├── ansible.cfg
├── base-ubuntu-2004
    └── files
    │   ├── etc
    │       ├── apt
    │       │   ├── sources.list.mirror-aarch64
    │       │   └── sources.list.mirror-x86_64
    │       ├── ssh
    │       │   └── sshd_config
    │       ├── supervisor
    │       │   └── conf.d
    │       │   │   └── sshd.conf
    │       └── supervisord.conf
    │   ├── opt
    │       └── dev
    │       │   └── site-override.xslt
    │   ├── root
    │       └── .ssh
    │       │   ├── authorized_keys
    │       │   └── id_rsa_hadoop_testing.pub
    │   └── usr
    │       ├── local
    │           └── bin
    │           │   ├── apply-all-site-xml-overrides
    │           │   ├── apply-site-xml-override
    │           │   ├── create-service-ready-mark
    │           │   ├── remove-service-ready-mark
    │           │   ├── wait-port-ready
    │           │   └── wait-service-ready-mark
    │       └── sbin
    │           └── install_packages
├── build.yaml
├── dev
    └── checkout_pr.sh
├── docs
    └── imgs
    │   ├── deployment_architecture.excalidraw
    │   ├── deployment_architecture.png
    │   ├── namenode-ui.png
    │   ├── switchy-omega-1.png
    │   ├── switchy-omega-2.png
    │   └── switchy-omega-3.png
├── download
    └── .gitkeep
├── files
    ├── etc
    │   ├── grafana
    │   │   └── provisioning
    │   │   │   ├── dashboards
    │   │   │       └── default.yaml
    │   │   │   └── datasources
    │   │   │       ├── loki.yaml
    │   │   │       └── prometheus.yaml
    │   ├── loki
    │   │   └── config.yaml
    │   └── prometheus
    │   │   └── prometheus.yml
    └── var
    │   └── lib
    │       └── grafana
    │           └── dashboards
    │               ├── kyuubi.json
    │               └── zookeeper_10465_rev4.json
├── host_vars
    └── local.yaml
├── hosts
├── kdc
    ├── Dockerfile
    ├── README.md
    └── files
    │   ├── etc
    │       ├── krb5.conf
    │       ├── krb5kdc
    │       │   ├── kadm5-other.acl
    │       │   ├── kadm5.acl
    │       │   └── kdc.conf
    │       ├── supervisord.conf
    │       └── supervisord.d
    │       │   └── kdc.conf
    │   ├── opt
    │       ├── entrypoint.sh
    │       └── service-ready.d
    │       │   ├── 001-wait-kdc-ready.sh
    │       │   ├── 002-create-service-principals.sh
    │       │   └── 003-create-kdc-ready-mark.sh
    │   └── usr
    │       └── local
    │           └── bin
    │               └── create_principal
├── mysql
    ├── Dockerfile
    └── files
    │   └── docker-entrypoint-initdb.d
    │       ├── hive-schema-2.3.0.mysql.sql
    │       ├── hive-txn-schema-2.3.0.mysql.sql
    │       ├── kyuubi-schema-1.8.0.mysql.sql
    │       └── ranger-database-init.mysql.sql
├── node-template.yaml
├── requirements.txt
├── templates
    ├── .env.j2
    ├── base-ubuntu-2004
    │   └── Dockerfile.j2
    ├── build-image.sh.j2
    ├── compose.yaml.j2
    ├── download.sh.j2
    ├── hadoop-common
    │   └── files
    │   │   ├── etc
    │   │       ├── hadoop
    │   │       │   └── conf
    │   │       │   │   ├── capacity-scheduler.xml
    │   │       │   │   ├── container-executor.cfg
    │   │       │   │   ├── core-site.xml.j2
    │   │       │   │   ├── hadoop-env.sh.j2
    │   │       │   │   ├── hdfs-site.xml.j2
    │   │       │   │   ├── log4j.properties
    │   │       │   │   ├── mapred-site.xml.j2
    │   │       │   │   └── yarn-site.xml.j2
    │   │       ├── krb5.conf
    │   │       ├── supervisor
    │   │       │   └── conf.d
    │   │       │   │   └── trino-server.conf
    │   │       └── trino
    │   │       │   └── conf
    │   │       │       ├── config.properties
    │   │       │       ├── jvm.config
    │   │       │       ├── log.properties
    │   │       │       └── node.properties
    │   │   └── opt
    │   │       ├── hadoop-init.d
    │   │           └── init-hdfs.sh
    │   │       └── trino-init.d
    │   │           └── init-workdir.sh
    ├── hadoop-master
    │   ├── Dockerfile.j2
    │   └── files
    │   │   ├── etc
    │   │       ├── flink
    │   │       │   ├── conf
    │   │       │   │   ├── flink-conf.yaml
    │   │       │   │   ├── log4j-cli.properties
    │   │       │   │   ├── log4j-console.properties
    │   │       │   │   ├── log4j-session.properties
    │   │       │   │   └── log4j.properties
    │   │       │   └── conf_history_server
    │   │       │   │   ├── flink-conf.yaml
    │   │       │   │   └── log4j-console.properties
    │   │       ├── hive
    │   │       │   └── conf
    │   │       │   │   ├── beeline-site.xml.j2
    │   │       │   │   ├── hive-env.sh
    │   │       │   │   ├── hive-log4j2.properties
    │   │       │   │   └── hive-site.xml.j2
    │   │       ├── kyuubi
    │   │       │   └── conf
    │   │       │   │   ├── kyuubi-defaults.conf.j2
    │   │       │   │   ├── kyuubi-env.sh
    │   │       │   │   └── log4j2.xml.j2
    │   │       ├── ranger
    │   │       │   ├── conf
    │   │       │   │   └── install.properties
    │   │       │   └── startup.sh
    │   │       ├── spark
    │   │       │   └── conf
    │   │       │   │   ├── log4j2.xml.j2
    │   │       │   │   ├── spark-defaults.conf.j2
    │   │       │   │   └── spark-env.sh
    │   │       ├── supervisor
    │   │       │   └── conf.d
    │   │       │   │   ├── flink-history-server.conf
    │   │       │   │   ├── hdfs-namenode.conf
    │   │       │   │   ├── hive-metastore.conf
    │   │       │   │   ├── hive-server2.conf
    │   │       │   │   ├── kyuubi-server.conf
    │   │       │   │   ├── mapred-history-server.conf
    │   │       │   │   ├── ranger-admin.conf
    │   │       │   │   ├── spark-history-server.conf
    │   │       │   │   ├── yarn-resourcemanager.conf
    │   │       │   │   ├── zeppelin-server.conf
    │   │       │   │   └── zookeeper.conf
    │   │       ├── zeppelin
    │   │       │   └── conf
    │   │       │   │   ├── configuration.xsl
    │   │       │   │   ├── interpreter-list.j2
    │   │       │   │   ├── interpreter.json.j2
    │   │       │   │   ├── log4j.properties
    │   │       │   │   ├── log4j2.properties
    │   │       │   │   ├── log4j_yarn_cluster.properties
    │   │       │   │   ├── shiro.ini.template
    │   │       │   │   ├── zeppelin-env.sh
    │   │       │   │   └── zeppelin-site.xml
    │   │       └── zookeeper
    │   │       │   └── conf
    │   │       │       ├── zoo.cfg
    │   │       │       └── zookeeper-env.sh
    │   │   ├── opt
    │   │       ├── entrypoint.sh
    │   │       ├── hadoop-init.d
    │   │       │   └── init-hdfs.sh
    │   │       ├── parquet
    │   │       │   └── bin
    │   │       │   │   └── parquet
    │   │       ├── service-ready.d
    │   │       │   ├── 001-wait-hdfs-ready.sh
    │   │       │   ├── 002-create-hdfs-dirs.sh.j2
    │   │       │   ├── 003-create-hdfs-ready-mark.sh
    │   │       │   └── 004-kinit-spark.sh
    │   │       ├── start-hive-server2.sh
    │   │       ├── start-namenode.sh.j2
    │   │       ├── start-resourcemanager.sh
    │   │       └── start-spark-history-server.sh
    │   │   └── root
    │   │       ├── .ssh
    │   │           ├── config
    │   │           └── id_rsa_hadoop_testing
    │   │       ├── HELP.j2
    │   │       └── kyuubi-batch-spark-pi.yaml
    └── hadoop-worker
    │   ├── Dockerfile.j2
    │   └── files
    │       └── etc
    │           └── supervisor
    │               └── conf.d
    │                   ├── hdfs-datanode.conf.j2
    │                   └── yarn-nodemanager.conf
└── test-ssh.yaml


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.iml
 2 | *.ipr
 3 | *.iws
 4 | *.log
 5 | .DS_Store
 6 | .idea
 7 | .python-version
 8 | base-ubuntu-2004/Dockerfile
 9 | depends
10 | download/
11 | flags
12 | graphviz
13 | dependency_graph.svg
14 | *.swp
15 | build/tmp
16 | .env
17 | build-image.sh
18 | download.sh
19 | compose.yaml
20 | hadoop-master1/
21 | hadoop-worker1/
22 | hadoop-worker2/
23 | hadoop-worker3/
24 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/ansible.cfg:
--------------------------------------------------------------------------------
1 | [defaults]
2 | inventory=hosts
3 | module_name=shell
4 | gathering=explicit
5 | host_key_checking=False
6 | 


--------------------------------------------------------------------------------
/base-ubuntu-2004/files/etc/apt/sources.list.mirror-aarch64:
--------------------------------------------------------------------------------
 1 | deb http://mirrors.cloud.tencent.com/ubuntu-ports/ focal main restricted universe multiverse
 2 | # deb-src http://mirrors.cloud.tencent.com/ubuntu-ports/ focal main restricted universe multiverse
 3 | 
 4 | deb http://mirrors.cloud.tencent.com/ubuntu-ports/ focal-security main restricted universe multiverse
 5 | # deb-src http://mirrors.cloud.tencent.com/ubuntu-ports/ focal-security main restricted universe multiverse
 6 | 
 7 | deb http://mirrors.cloud.tencent.com/ubuntu-ports/ focal-updates main restricted universe multiverse
 8 | # deb-src http://mirrors.cloud.tencent.com/ubuntu-ports/ focal-updates main restricted universe multiverse
 9 | 
10 | # deb http://mirrors.cloud.tencent.com/ubuntu-ports/ focal-proposed main restricted universe multiverse
11 | # deb-src http://mirrors.cloud.tencent.com/ubuntu-ports/ focal-proposed main restricted universe multiverse
12 | 
13 | deb http://mirrors.cloud.tencent.com/ubuntu-ports/ focal-backports main restricted universe multiverse
14 | # deb-src http://mirrors.cloud.tencent.com/ubuntu-ports/ focal-backports main restricted universe multiverse
15 | 


--------------------------------------------------------------------------------
/base-ubuntu-2004/files/etc/apt/sources.list.mirror-x86_64:
--------------------------------------------------------------------------------
 1 | deb http://mirrors.cloud.tencent.com/ubuntu/ focal main restricted universe multiverse
 2 | # deb-src http://mirrors.cloud.tencent.com/ubuntu/ focal main restricted universe multiverse
 3 | 
 4 | deb http://mirrors.cloud.tencent.com/ubuntu/ focal-security main restricted universe multiverse
 5 | # deb-src http://mirrors.cloud.tencent.com/ubuntu/ focal-security main restricted universe multiverse
 6 | 
 7 | deb http://mirrors.cloud.tencent.com/ubuntu/ focal-updates main restricted universe multiverse
 8 | # deb-src http://mirrors.cloud.tencent.com/ubuntu/ focal-updates main restricted universe multiverse
 9 | 
10 | # deb http://mirrors.cloud.tencent.com/ubuntu/ focal-proposed main restricted universe multiverse
11 | # deb-src http://mirrors.cloud.tencent.com/ubuntu/ focal-proposed main restricted universe multiverse
12 | 
13 | deb http://mirrors.cloud.tencent.com/ubuntu/ focal-backports main restricted universe multiverse
14 | # deb-src http://mirrors.cloud.tencent.com/ubuntu/ focal-backports main restricted universe multiverse
15 | 


--------------------------------------------------------------------------------
/base-ubuntu-2004/files/etc/ssh/sshd_config:
--------------------------------------------------------------------------------
 1 | Include /etc/ssh/sshd_config.d/*.conf
 2 | 
 3 | # Change to yes to enable challenge-response passwords (beware issues with
 4 | # some PAM modules and threads)
 5 | KbdInteractiveAuthentication no
 6 | 
 7 | # Set this to 'yes' to enable PAM authentication, account processing,
 8 | # and session processing. If this is enabled, PAM authentication will
 9 | # be allowed through the KbdInteractiveAuthentication and
10 | # PasswordAuthentication.  Depending on your PAM configuration,
11 | # PAM authentication via KbdInteractiveAuthentication may bypass
12 | # the setting of "PermitRootLogin without-password".
13 | # If you just want the PAM account and session checks to run without
14 | # PAM authentication, then enable this but set PasswordAuthentication
15 | # and KbdInteractiveAuthentication to 'no'.
16 | UsePAM yes
17 | 
18 | X11Forwarding yes
19 | PrintMotd no
20 | 
21 | # Allow client to pass locale environment variables
22 | AcceptEnv LANG LC_*
23 | 
24 | # override default of no subsystems
25 | Subsystem	sftp	/usr/lib/openssh/sftp-server
26 | 
27 | RSAAuthentication yes
28 | PubkeyAuthentication yes
29 | 


--------------------------------------------------------------------------------
/base-ubuntu-2004/files/etc/supervisor/conf.d/sshd.conf:
--------------------------------------------------------------------------------
1 | [program:sshd]
2 | directory=/usr/sbin
3 | command=/usr/sbin/sshd -D
4 | startsecs=30
5 | stopwaitsecs=10
6 | redirect_stderr=true
7 | stdout_logfile=/var/log/sshd.log
8 | autostart=true
9 | autorestart=true


--------------------------------------------------------------------------------
/base-ubuntu-2004/files/etc/supervisord.conf:
--------------------------------------------------------------------------------
 1 | [supervisord]
 2 | logfile=/var/log/supervisord.log
 3 | logfile_maxbytes = 50MB
 4 | logfile_backups=10
 5 | loglevel=info
 6 | pidfile=/var/run/supervisord.pid
 7 | nodaemon=true
 8 | directory=/tmp
 9 | strip_ansi=false
10 | 
11 | [unix_http_server]
12 | file=/var/run/supervisor.sock
13 | chmod=0777
14 | 
15 | [rpcinterface:supervisor]
16 | supervisor.rpcinterface_factory=supervisor.rpcinterface:make_main_rpcinterface
17 | 
18 | [supervisorctl]
19 | serverurl=unix:///var/run/supervisor.sock ; use a unix:// URL  for a unix socket
20 | 
21 | ; The [include] section can just contain the "files" setting.  This
22 | ; setting can list multiple files (separated by whitespace or
23 | ; newlines).  It can also contain wildcards.  The filenames are
24 | ; interpreted as relative to this file.  Included files *cannot*
25 | ; include files themselves.
26 | 
27 | [include]
28 | files=/etc/supervisor/conf.d/*.conf
29 | 


--------------------------------------------------------------------------------
/base-ubuntu-2004/files/opt/dev/site-override.xslt:
--------------------------------------------------------------------------------
 1 | <!-- Copied from https://stackoverflow.com/a/31186191/65458 and adapted -->
 2 | <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
 3 |     <xsl:output method="xml" version="1.0" encoding="UTF-8" indent="yes" />
 4 |     <xsl:strip-space elements="*" />
 5 | 
 6 |     <xsl:variable name="override-properties" select="document($override-path)/configuration/property" />
 7 | 
 8 |     <xsl:template match="/configuration">
 9 |         <xsl:copy>
10 |             <!-- copy local properties not overridden by external properties -->
11 |             <xsl:copy-of select="property[not(name=$override-properties/name)]" />
12 |             <!-- add all overriding properties -->
13 |             <xsl:copy-of select="$override-properties" />
14 |         </xsl:copy>
15 |     </xsl:template>
16 | 
17 | </xsl:stylesheet>
18 | 


--------------------------------------------------------------------------------
/base-ubuntu-2004/files/root/.ssh/authorized_keys:
--------------------------------------------------------------------------------
1 | ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQDAv6UOSS3R77Rq7VLO2U3s2YS1j5jDjTGNIwLqvsV0gJRA72JQjGdGilWyRZXi9oaFMmIpjUS2VdNCMveQ9pVaySIFnfbBNINpDqTDgKpqQadLijmQwSn6Rc8U2+Wetca26hufuH0qSMY61EkK0zqy1xzzWbyCOaOR1WioSvNDlJrv867661ZvctNVL4gG7JChktmCktb3lA3F1APgRsm+zhJSJeaV0WUnWMIS5eHTye0IRAeZUGrnNsF5gb5XLATTD8bVZY5B8Tn4LQHgkXah+sJZnGcrxuvmJSfQbHNmxCzXpRhXWwUkd2eIzszo2Lqi+AeM2BkVyfR+Ygm7bQXq8Q+dqioYM4Z6tY6cgyhDkq1eiWWSAGxF+UTIIUN0o8Wkm/mNzML3BhfBhyMmRcAX3zZyAkFUuijQ/BJRJ7Wl+D50y3O0Hnrq9qwghWTD3m8evRgH8+L9EFndIH1RnYoRyLA57LwoXKpzKV+1J6stQJXwQOaWp8kgplMz8mGsflk= root@hadoop-testing
2 | 


--------------------------------------------------------------------------------
/base-ubuntu-2004/files/root/.ssh/id_rsa_hadoop_testing.pub:
--------------------------------------------------------------------------------
1 | ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQDAv6UOSS3R77Rq7VLO2U3s2YS1j5jDjTGNIwLqvsV0gJRA72JQjGdGilWyRZXi9oaFMmIpjUS2VdNCMveQ9pVaySIFnfbBNINpDqTDgKpqQadLijmQwSn6Rc8U2+Wetca26hufuH0qSMY61EkK0zqy1xzzWbyCOaOR1WioSvNDlJrv867661ZvctNVL4gG7JChktmCktb3lA3F1APgRsm+zhJSJeaV0WUnWMIS5eHTye0IRAeZUGrnNsF5gb5XLATTD8bVZY5B8Tn4LQHgkXah+sJZnGcrxuvmJSfQbHNmxCzXpRhXWwUkd2eIzszo2Lqi+AeM2BkVyfR+Ygm7bQXq8Q+dqioYM4Z6tY6cgyhDkq1eiWWSAGxF+UTIIUN0o8Wkm/mNzML3BhfBhyMmRcAX3zZyAkFUuijQ/BJRJ7Wl+D50y3O0Hnrq9qwghWTD3m8evRgH8+L9EFndIH1RnYoRyLA57LwoXKpzKV+1J6stQJXwQOaWp8kgplMz8mGsflk= root@hadoop-testing
2 | 


--------------------------------------------------------------------------------
/base-ubuntu-2004/files/usr/local/bin/apply-all-site-xml-overrides:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -euo pipefail
 4 | 
 5 | fail() {
 6 |   echo "$(basename "$0"): $*" >&2
 7 |   exit 1
 8 | }
 9 | 
10 | if [ $# -ne 1 ]; then
11 |   fail "Usage: $0 <overrides dir>" >&2
12 | fi
13 | 
14 | overrides_dir="$1"
15 | 
16 | for file in $(find $overrides_dir -name '*.xml'); do
17 |     target_filename="${file#"$overrides_dir"}"
18 |     echo "Applying configuration override from $file to $target_filename"
19 |     apply-site-xml-override "$target_filename" "$file"
20 | done
21 | 


--------------------------------------------------------------------------------
/base-ubuntu-2004/files/usr/local/bin/apply-site-xml-override:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -euo pipefail
 4 | 
 5 | fail() {
 6 |   echo "$(basename "$0"): $*" >&2
 7 |   exit 1
 8 | }
 9 | 
10 | if [ $# -ne 2 ]; then
11 |   fail "Usage: $0 <some-site.xml> <overrides.xml>" >&2
12 | fi
13 | 
14 | site_xml="$1"
15 | overrides="$2"
16 | site_xml_new="$1.new"
17 | 
18 | test -f "${site_xml}" || fail "${site_xml} does not exist or is not a file"
19 | test -f "${overrides}" || fail "${overrides} does not exist or is not a file"
20 | test ! -e "${site_xml_new}" || fail "${site_xml_new} already exists"
21 | 
22 | xsltproc --param override-path "'${overrides}'" "/opt/dev/site-override.xslt" "${site_xml}" > "${site_xml_new}"
23 | cat "${site_xml_new}" > "${site_xml}" # Preserve file owner & permissions
24 | rm "${site_xml_new}"
25 | 


--------------------------------------------------------------------------------
/base-ubuntu-2004/files/usr/local/bin/create-service-ready-mark:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | dir=/share/marks/ready.d/
 4 | 
 5 | set -euo pipefail
 6 | 
 7 | function usage() {
 8 |     echo "Usage: $0 [-d <dir>] -s <service> " >&2
 9 |     exit 1
10 | }
11 | 
12 | while getopts "d:s:" o; do
13 |     case "${o}" in
14 |         d)
15 |             dir="$OPTARG"
16 |             ;;
17 |         s)
18 |             service="$OPTARG"
19 |             ;;
20 |         *)
21 |     esac
22 | done
23 | 
24 | if [[ ! -v service ]]; then
25 |     usage
26 | fi
27 | 
28 | mkdir -p $dir
29 | touch $dir/$service
30 | 


--------------------------------------------------------------------------------
/base-ubuntu-2004/files/usr/local/bin/remove-service-ready-mark:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | dir=/share/marks/ready.d/
 4 | 
 5 | set -euo pipefail
 6 | 
 7 | function usage() {
 8 |     echo "Usage: $0 [-d <dir>] -s <service> " >&2
 9 |     exit 1
10 | }
11 | 
12 | while getopts "d:s:" o; do
13 |     case "${o}" in
14 |         d)
15 |             dir="$OPTARG"
16 |             ;;
17 |         s)
18 |             service="$OPTARG"
19 |             ;;
20 |         *)
21 |     esac
22 | done
23 | 
24 | if [[ ! -v service ]]; then
25 |     usage
26 | fi
27 | 
28 | rm -f $dir/$service
29 | 
30 | 


--------------------------------------------------------------------------------
/base-ubuntu-2004/files/usr/local/bin/wait-port-ready:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | timeout=60
 4 | interval=1
 5 | 
 6 | set -euo pipefail
 7 | 
 8 | function usage() {
 9 |     echo "Usage: $0 [-t <timeout>] [-i <interval>] -p <port> " >&2
10 |     exit 1
11 | }
12 | 
13 | while getopts "t:i:p:" o; do
14 |     case "${o}" in
15 |         t)
16 |             timeout="$OPTARG"
17 |             ;;
18 |         i)
19 |             interval="$OPTARG"
20 |             ;;
21 |         p)
22 |             port="$OPTARG"
23 |             ;;
24 |         *)
25 |     esac
26 | done
27 | 
28 | if [[ ! -v port ]]; then
29 |     usage
30 | fi
31 | 
32 | end_time=$((SECONDS + timeout))
33 | 
34 | while [ $SECONDS -lt $end_time ]; do
35 |     if /opt/busybox/nc $(hostname) $port -e true; then
36 |         echo "Port $port is now available"
37 |         break
38 |     else
39 |         echo "Port $port is not available, retrying in $interval seconds..."
40 |         sleep $interval
41 |     fi
42 | done
43 | 
44 | if [ $SECONDS -ge $end_time ]; then
45 |     echo "Timeout reached. Port $port is still not available."
46 |     exit 1
47 | fi


--------------------------------------------------------------------------------
/base-ubuntu-2004/files/usr/local/bin/wait-service-ready-mark:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | timeout=60
 4 | interval=1
 5 | dir=/share/marks/ready.d/
 6 | 
 7 | set -euo pipefail
 8 | 
 9 | function usage() {
10 |     echo "Usage: $0 [-t <timeout>] [-i <interval>] [-d <dir>] -s <service> " >&2
11 |     exit 1
12 | }
13 | 
14 | while getopts "t:i:d:s:" o; do
15 |     case "${o}" in
16 |         t)
17 |             timeout="$OPTARG"
18 |             ;;
19 |         i)
20 |             interval="$OPTARG"
21 |             ;;
22 |         d)
23 |             dir="$OPTARG"
24 |             ;;
25 |         s)
26 |             service="$OPTARG"
27 |             ;;
28 |         *)
29 |     esac
30 | done
31 | 
32 | if [[ ! -v service ]]; then
33 |     usage
34 | fi
35 | 
36 | end_time=$((SECONDS + timeout))
37 | 
38 | while [ $SECONDS -lt $end_time ]; do
39 |     if [[ -f $dir/$service ]]; then
40 |         echo "Service $service is now ready"
41 |         break
42 |     else
43 |         echo "Service $service is not ready, retrying in $interval seconds..."
44 |         sleep $interval
45 |     fi
46 | done
47 | 
48 | if [ $SECONDS -ge $end_time ]; then
49 |     echo "Timeout reached. Service $service is still not ready."
50 |     exit 1
51 | fi
52 | 


--------------------------------------------------------------------------------
/base-ubuntu-2004/files/usr/sbin/install_packages:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | set -e
 3 | set -u
 4 | export DEBIAN_FRONTEND=noninteractive
 5 | n=0
 6 | max=2
 7 | until [ $n -gt $max ]; do
 8 |     set +e
 9 |     (
10 |       apt-get update -qq &&
11 |       apt-get install -y --no-install-recommends "$@"
12 |     )
13 |     CODE=$?
14 |     set -e
15 |     if [ $CODE -eq 0 ]; then
16 |         break
17 |     fi
18 |     if [ $n -eq $max ]; then
19 |         exit $CODE
20 |     fi
21 |     echo "apt failed, retrying"
22 |     n=$(($n + 1))
23 | done
24 | rm -r /var/lib/apt/lists /var/cache/apt/archives
25 | 


--------------------------------------------------------------------------------
/build.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: compile template files
 3 | 
 4 |   hosts: local
 5 | 
 6 |   tasks:
 7 |     - name: orchestrate basic files
 8 |       ansible.builtin.template:
 9 |         src: templates/{{ item.file }}.j2
10 |         dest: "{{ playbook_dir }}/{{ item.file }}"
11 |         mode: "{{ item.mode }}"
12 |       with_items:
13 |         - { file: ".env", mode: "a+x" }
14 |         - { file: "build-image.sh", mode: "a+x" }
15 |         - { file: "download.sh", mode: "a+x" }
16 |         - { file: "base-ubuntu-2004/Dockerfile", mode: ~ }
17 | 
18 |     - include_tasks: node-template.yaml
19 |       loop:
20 |         - name: "hadoop-master1"
21 |           group: "hadoop-master"
22 |         - name: "hadoop-worker1"
23 |           group: "hadoop-worker"
24 |         - name: "hadoop-worker2"
25 |           group: "hadoop-worker"
26 |         - name: "hadoop-worker3"
27 |           group: "hadoop-worker"
28 |       loop_control:
29 |         loop_var: node
30 | 
31 |     - name: orchestrate compose file
32 |       ansible.builtin.template:
33 |         src: templates/compose.yaml.j2
34 |         dest: "{{ playbook_dir }}/compose.yaml"
35 | 


--------------------------------------------------------------------------------
/dev/checkout_pr.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Licensed to the Apache Software Foundation (ASF) under one or more
 4 | # contributor license agreements.  See the NOTICE file distributed with
 5 | # this work for additional information regarding copyright ownership.
 6 | # The ASF licenses this file to You under the Apache License, Version 2.0
 7 | # (the "License"); you may not use this file except in compliance with
 8 | # the License.  You may obtain a copy of the License at
 9 | #
10 | #    http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | #
18 | 
19 | set -o pipefail
20 | set -e
21 | set -x
22 | 
23 | function usage {
24 |   echo "Usage: $(basename "${0}") [-f] <PR_NUMBER>" 2>&1
25 |   echo '   -f  force overwrite of local branch (default: fail if exists)'
26 |   exit 1
27 | }
28 | 
29 | if [[ ${#} -eq 0 ]]; then
30 |   usage
31 | fi
32 | 
33 | FORCE=""
34 | while getopts ":f" arg; do
35 |   case "${arg}" in
36 |     f)
37 |       FORCE="--force"
38 |       ;;
39 |     ?)
40 |       usage
41 |       ;;
42 |   esac
43 | done
44 | shift "$(($OPTIND -1))"
45 | 
46 | PR_NUM=$1
47 | 
48 | git fetch upstream pull/${PR_NUM}/head:PR_${PR_NUM} ${FORCE}
49 | git checkout PR_${PR_NUM}
50 | 


--------------------------------------------------------------------------------
/docs/imgs/deployment_architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awesome-kyuubi/hadoop-testing/e35aa5c439064898fd4fcba3abeccdb2f623b97e/docs/imgs/deployment_architecture.png


--------------------------------------------------------------------------------
/docs/imgs/namenode-ui.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awesome-kyuubi/hadoop-testing/e35aa5c439064898fd4fcba3abeccdb2f623b97e/docs/imgs/namenode-ui.png


--------------------------------------------------------------------------------
/docs/imgs/switchy-omega-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awesome-kyuubi/hadoop-testing/e35aa5c439064898fd4fcba3abeccdb2f623b97e/docs/imgs/switchy-omega-1.png


--------------------------------------------------------------------------------
/docs/imgs/switchy-omega-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awesome-kyuubi/hadoop-testing/e35aa5c439064898fd4fcba3abeccdb2f623b97e/docs/imgs/switchy-omega-2.png


--------------------------------------------------------------------------------
/docs/imgs/switchy-omega-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awesome-kyuubi/hadoop-testing/e35aa5c439064898fd4fcba3abeccdb2f623b97e/docs/imgs/switchy-omega-3.png


--------------------------------------------------------------------------------
/download/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awesome-kyuubi/hadoop-testing/e35aa5c439064898fd4fcba3abeccdb2f623b97e/download/.gitkeep


--------------------------------------------------------------------------------
/files/etc/grafana/provisioning/dashboards/default.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: 1
2 | 
3 | providers:
4 |   - name: Default # A uniquely identifiable name for the provider
5 |     folder: ~ # The folder where to place the dashboards
6 |     type: file
7 |     options:
8 |       path: /var/lib/grafana/dashboards
9 | 


--------------------------------------------------------------------------------
/files/etc/grafana/provisioning/datasources/loki.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: 1
 2 | 
 3 | datasources:
 4 | - name: Loki
 5 |   type: loki
 6 |   access: proxy 
 7 |   orgId: 1
 8 |   url: http://loki:3100
 9 |   basicAuth: false
10 |   isDefault: false
11 |   version: 1
12 |   editable: false
13 |   jsonData:
14 |     maxLines: 100000
15 | 


--------------------------------------------------------------------------------
/files/etc/grafana/provisioning/datasources/prometheus.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: 1
 2 | 
 3 | datasources:
 4 | - name: Prometheus
 5 |   type: prometheus
 6 |   access: proxy
 7 |   orgId: 1
 8 |   url: http://prometheus:9090
 9 |   isDefault: true
10 |   version: 1
11 |   editable: false
12 |   


--------------------------------------------------------------------------------
/files/etc/loki/config.yaml:
--------------------------------------------------------------------------------
 1 | auth_enabled: false
 2 | 
 3 | server:
 4 |   http_listen_port: 3100
 5 |   grpc_listen_port: 9096
 6 |   grpc_server_max_recv_msg_size: 67108864
 7 |   grpc_server_max_send_msg_size: 67108864
 8 | 
 9 | common:
10 |   instance_addr: 0.0.0.0
11 |   path_prefix: /loki
12 |   storage:
13 |     filesystem:
14 |       chunks_directory: /loki/chunks
15 |       rules_directory: /loki/rules
16 |   replication_factor: 1
17 |   ring:
18 |     kvstore:
19 |       store: inmemory
20 | 
21 | query_range:
22 |   results_cache:
23 |     cache:
24 |       embedded_cache:
25 |         enabled: true
26 |         max_size_mb: 100
27 | 
28 | limits_config:
29 |   retention_period: 7d
30 |   max_entries_limit_per_query: 100000
31 | 
32 | compactor:
33 |   working_directory: /loki/compactor
34 |   delete_request_store: filesystem
35 |   # shared_store: filesystem
36 |   # deletion_mode: filter-and-delete
37 |   compaction_interval: 10m
38 |   retention_enabled: true
39 |   retention_delete_delay: 1h
40 |   retention_delete_worker_count: 150
41 | 
42 | schema_config:
43 |   configs:
44 |     - from: 2024-01-01
45 |       store: tsdb
46 |       object_store: filesystem
47 |       schema: v13
48 |       index:
49 |         prefix: index_
50 |         period: 24h
51 | 


--------------------------------------------------------------------------------
/files/etc/prometheus/prometheus.yml:
--------------------------------------------------------------------------------
 1 | global:
 2 |   scrape_interval: 3s
 3 |   scrape_timeout: 3s
 4 |   evaluation_interval: 5s
 5 | 
 6 | scrape_configs:
 7 |   - job_name: "prometheus"
 8 |     scheme: "http"
 9 |     metrics_path: "/metrics"
10 |     static_configs:
11 |       - targets:
12 |           - "prometheus:9090"
13 | 
14 |   - job_name: "kyuubi"
15 |     scheme: "http"
16 |     metrics_path: "/metrics"
17 |     static_configs:
18 |       - targets:
19 |           - "hadoop-master1.orb.local:10019"
20 | 
21 |   - job_name: "zookeeper"
22 |     scheme: "http"
23 |     metrics_path: "/metrics"
24 |     static_configs:
25 |       - targets:
26 |           - "hadoop-master1.orb.local:7000"
27 | 


--------------------------------------------------------------------------------
/host_vars/local.yaml:
--------------------------------------------------------------------------------
 1 | project_version: 1-SNAPSHOT
 2 | 
 3 | # base
 4 | zulu8_version: 8.82.0.21
 5 | jdk8_version: 8.0.432
 6 | 
 7 | zulu11_version: 11.76.21
 8 | jdk11_version: 11.0.25
 9 | 
10 | zulu17_version: 17.54.21
11 | jdk17_version: 17.0.13
12 | 
13 | jdk21_enabled: false
14 | zulu21_version: 21.38.21
15 | jdk21_version: 21.0.5
16 | 
17 | s3_enabled: false
18 | aws_java_sdk_version: 1.12.620
19 | 
20 | # components
21 | kerberos_enabled: false
22 | 
23 | hadoop_version: 3.3.6
24 | 
25 | hive_version: 2.3.9
26 | 
27 | zeppelin_enabled: true
28 | zeppelin_version: 0.12.0
29 | zeppelin_custom_name: all
30 | 
31 | zookeeper_version: 3.8.4
32 | 
33 | spark_enabled: true
34 | spark_version: 3.5.5
35 | spark_binary_version: 3.5
36 | spark_scala_binary_version: 2.12
37 | spark_hadoop_version: 3.3.4
38 | spark_custom_name: hadoop3
39 | 
40 | flink_enabled: false
41 | flink_binary_version: '1.20'
42 | flink_version: 1.20.1
43 | flink_hive_version: 2.3.10
44 | 
45 | kyuubi_enabled: true
46 | kyuubi_version: 1.10.1
47 | kyuubi_hadoop_version: 3.3.6
48 | 
49 | ranger_enabled: false
50 | ranger_version: 2.4.0
51 | 
52 | trino_enabled: false
53 | trino_version: 436
54 | 
55 | parquet_enabled: true
56 | parquet_version: 1.15.0
57 | 
58 | iceberg_enabled: true
59 | iceberg_version: 1.8.1
60 | 
61 | hudi_enabled: false
62 | hudi_version: 0.14.1
63 | 
64 | kafka_enabled: false
65 | kafka_version: 3.6.2
66 | kafka_ui_enabled: false
67 | kafka_ui_version: 1.1.0
68 | 
69 | grafana_enabled: false
70 | grafana_version: 11.5.2
71 | 
72 | prometheus_enabled: false
73 | prometheus_version: 2.53.3
74 | 
75 | loki_enabled: false
76 | loki_version: 3.4.2
77 | loki_log4j2_appender_version: 0.9.32
78 | 
79 | # repository
80 | repository_apache: https://mirrors.cloud.tencent.com/apache
81 | repository_maven: https://mirrors.cloud.tencent.com/maven
82 | 
83 | apt_mirror_enabled: true
84 | 


--------------------------------------------------------------------------------
/hosts:
--------------------------------------------------------------------------------
 1 | local ansible_connection=local
 2 | 
 3 | [hadoop-master]
 4 | hadoop-master1
 5 | 
 6 | [hadoop-worker]
 7 | hadoop-worker1
 8 | hadoop-worker2
 9 | hadoop-worker3
10 | 


--------------------------------------------------------------------------------
/kdc/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Licensed under the Apache License, Version 2.0 (the "License");
 2 | # you may not use this file except in compliance with the License.
 3 | # You may obtain a copy of the License at
 4 | #
 5 | #     http://www.apache.org/licenses/LICENSE-2.0
 6 | #
 7 | # Unless required by applicable law or agreed to in writing, software
 8 | # distributed under the License is distributed on an "AS IS" BASIS,
 9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 | # See the License for the specific language governing permissions and
11 | # limitations under the License.
12 | 
13 | ARG PROJECT_VERSION
14 | FROM hadoop-testing/base-ubuntu-2004:$PROJECT_VERSION
15 | 
16 | # COPY CONFIGURATION
17 | COPY ./files /
18 | 
19 | # INSTALL KERBEROS
20 | RUN install_packages krb5-kdc krb5-admin-server krb5-user
21 | 
22 | # CREATE KERBEROS DATABASE
23 | RUN /usr/sbin/kdb5_util create -s -P password
24 | 
25 | # CREATE ANOTHER KERBEROS DATABASE
26 | RUN mkdir -p /var/lib/krb5kdc && \
27 |     /usr/sbin/kdb5_util create -s -P password -r OTHER.ORG -d /var/lib/krb5kdc/principal-other
28 | 
29 | # MAKE '.TEST.ORG' TRUST 'OTHER.ORG'
30 | RUN /usr/sbin/kadmin.local -q "addprinc -pw 123456 krbtgt/.TEST.ORG@OTHER.ORG"
31 | RUN /usr/sbin/kadmin.local -q "addprinc -pw 123456 krbtgt/TEST.ORG" -r OTHER.ORG -d /var/lib/krb5kdc/principal-other
32 | 
33 | # EXPOSE KERBEROS PORTS
34 | EXPOSE	88
35 | EXPOSE	89
36 | EXPOSE	749
37 | 
38 | CMD ["supervisord", "-c", "/etc/supervisord.conf"]
39 | ENTRYPOINT  ["/opt/entrypoint.sh"]
40 | 


--------------------------------------------------------------------------------
/kdc/README.md:
--------------------------------------------------------------------------------
 1 | KDC 
 2 | ===
 3 | 
 4 | ## Commands
 5 | 
 6 | Launch container
 7 | ```shell
 8 | docker run --rm -it -h kdc --name kdc hadoop-testing/kdc:${PROJECT_VERSION}
 9 | ```
10 | 
11 | Login shell
12 | ```shell
13 | docker exec -it kdc bash
14 | ```


--------------------------------------------------------------------------------
/kdc/files/etc/krb5.conf:
--------------------------------------------------------------------------------
 1 | [logging]
 2 |  default = FILE:/var/log/krb5libs.log
 3 |  kdc = FILE:/var/log/krb5kdc.log
 4 |  admin_server = FILE:/var/log/kadmind.log
 5 | 
 6 | [libdefaults]
 7 |  default_realm = TEST.ORG
 8 |  dns_lookup_realm = false
 9 |  dns_lookup_kdc = false
10 |  forwardable = true
11 |  allow_weak_crypto = true
12 | 
13 | [realms]
14 |  TEST.ORG = {
15 |   kdc = kdc.orb.local:88
16 |   admin_server = kdc.orb.local
17 |  }
18 |  OTHER.ORG = {
19 |   kdc = kdc.orb.local:89
20 |   admin_server = kdc.orb.local
21 |  }
22 | 


--------------------------------------------------------------------------------
/kdc/files/etc/krb5kdc/kadm5-other.acl:
--------------------------------------------------------------------------------
1 | */admin@OTHER.ORG *
2 | 


--------------------------------------------------------------------------------
/kdc/files/etc/krb5kdc/kadm5.acl:
--------------------------------------------------------------------------------
1 | */admin@TEST.ORG	*
2 | 


--------------------------------------------------------------------------------
/kdc/files/etc/krb5kdc/kdc.conf:
--------------------------------------------------------------------------------
 1 | [kdcdefaults]
 2 |  kdc_ports = 88
 3 |  kdc_tcp_ports = 88
 4 | 
 5 | [realms]
 6 |  TEST.ORG = {
 7 |   acl_file = /etc/krb5kdc/kadm5.acl
 8 |   dict_file = /usr/share/dict/words
 9 |   admin_keytab = /etc/krb5kdc/kadm5.keytab
10 |  }
11 | 
12 |  OTHER.ORG = {
13 |   acl_file = /etc/krb5kdc/kadm5-other.acl
14 |   dict_file = /usr/share/dict/words
15 |   admin_keytab = /etc/krb5kdc/kadm5-other.keytab
16 |   kdc_listen = 89
17 |   kdc_tcp_listen = 89
18 |   kdc_ports = 89
19 |   kdc_tcp_ports = 89
20 |  }
21 | 
22 | 


--------------------------------------------------------------------------------
/kdc/files/etc/supervisord.conf:
--------------------------------------------------------------------------------
 1 | [supervisord]
 2 | logfile = /var/log/supervisord.log
 3 | logfile_maxbytes = 50MB
 4 | logfile_backups=10
 5 | loglevel = info
 6 | pidfile = /var/run/supervisord.pid
 7 | nodaemon = true
 8 | directory = /tmp
 9 | strip_ansi = false
10 | 
11 | [unix_http_server]
12 | file = /tmp/supervisor.sock
13 | 
14 | [rpcinterface:supervisor]
15 | supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
16 | 
17 | [supervisorctl]
18 | serverurl = unix:///tmp/supervisor.sock
19 | 
20 | [include]
21 | files = /etc/supervisord.d/*.conf
22 | 


--------------------------------------------------------------------------------
/kdc/files/etc/supervisord.d/kdc.conf:
--------------------------------------------------------------------------------
 1 | [program:krb5kdc]
 2 | command=/bin/bash -c "exec /usr/sbin/krb5kdc -P /var/run/krb5kdc.pid -n -r TEST.ORG -n -d /var/lib/krb5kdc/principal-other -r OTHER.ORG"
 3 | autostart=true
 4 | autorestart=true
 5 | redirect_stderr=true
 6 | stdout_logfile=/dev/stdout
 7 | stdout_logfile_maxbytes=0
 8 | 
 9 | [program:kadmind]
10 | command=/bin/bash -c "exec /usr/sbin/kadmind -P /var/run/kadmind.pid -nofork -r TEST.ORG"
11 | autostart=true
12 | autorestart=true
13 | redirect_stderr=true
14 | stdout_logfile=/dev/stdout
15 | stdout_logfile_maxbytes=0
16 | 


--------------------------------------------------------------------------------
/kdc/files/opt/entrypoint.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -exuo pipefail
 4 | 
 5 | "$@" &
 6 | 
 7 | if [[ -v POST_BOOTSTRAP_COMMAND ]]; then
 8 |     $POST_BOOTSTRAP_COMMAND
 9 | fi
10 | 
11 | if [[ -d /opt/service-ready.d ]]; then
12 |     for init_script in /opt/service-ready.d/*; do
13 |         bash "${init_script}"
14 |     done
15 | fi
16 | 
17 | wait
18 | 


--------------------------------------------------------------------------------
/kdc/files/opt/service-ready.d/001-wait-kdc-ready.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | set -exuo pipefail
4 | 
5 | wait-port-ready -p 88
6 | wait-port-ready -p 89
7 | wait-port-ready -p 749


--------------------------------------------------------------------------------
/kdc/files/opt/service-ready.d/002-create-service-principals.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -exuo pipefail
 4 | 
 5 | rm -rf /share/keytabs/*
 6 | 
 7 | mkdir -p /share/keytabs/hadoop-master1
 8 | # HDFS NameNode
 9 | create_principal -p nn/hadoop-master1.orb.local -k /share/keytabs/hadoop-master1/nn.service.keytab
10 | create_principal -p host/hadoop-master1.orb.local -k /share/keytabs/hadoop-master1/nn.service.keytab
11 | create_principal -p HTTP/hadoop-master1.orb.local -k /share/keytabs/hadoop-master1/nn.service.keytab
12 | 
13 | # YARN ResourceManager
14 | create_principal -p rm/hadoop-master1.orb.local -k /share/keytabs/hadoop-master1/rm.service.keytab
15 | create_principal -p host/hadoop-master1.orb.local -k /share/keytabs/hadoop-master1/rm.service.keytab
16 | 
17 | # MapReduce JobHistory Server
18 | create_principal -p jhs/hadoop-master1.orb.local -k /share/keytabs/hadoop-master1/jhs.service.keytab
19 | 
20 | # Hive MetaStore & Hive Server2
21 | create_principal -p hive/hadoop-master1.orb.local -k /share/keytabs/hadoop-master1/hive.service.keytab
22 | 
23 | # Spark History Server
24 | create_principal -p spark/hadoop-master1.orb.local -k /share/keytabs/hadoop-master1/spark.service.keytab
25 | 
26 | # Flink History Server
27 | create_principal -p flink/hadoop-master1.orb.local -k /share/keytabs/hadoop-master1/flink.service.keytab
28 | 
29 | # Kyuubi Server
30 | create_principal -p kyuubi/hadoop-master1.orb.local -k /share/keytabs/hadoop-master1/kyuubi.service.keytab
31 | 
32 | for i in {1..3}; do
33 |   mkdir -p /share/keytabs/hadoop-worker$i
34 |   # HDFS DataNode
35 |   create_principal -p dn/hadoop-worker$i.orb.local -k /share/keytabs/hadoop-worker$i/dn.service.keytab
36 |   create_principal -p host/hadoop-worker$i.orb.local -k /share/keytabs/hadoop-worker$i/dn.service.keytab
37 |   # YARN NodeManger
38 |   create_principal -p nm/hadoop-worker$i.orb.local -k /share/keytabs/hadoop-worker$i/nm.service.keytab
39 |   create_principal -p host/hadoop-worker$i.orb.local -k /share/keytabs/hadoop-worker$i/nm.service.keytab
40 | done
41 | 
42 | chmod -R a+r /share/keytabs


--------------------------------------------------------------------------------
/kdc/files/opt/service-ready.d/003-create-kdc-ready-mark.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | create-service-ready-mark -s kdc
4 | 


--------------------------------------------------------------------------------
/kdc/files/usr/local/bin/create_principal:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -euo pipefail
 4 | 
 5 | function usage() {
 6 |     if [ $# -ne 2 ]; then
 7 |         echo "Usage: $0 [-o] -p <principal> -k <output keytab file>" >&2
 8 |         echo >&2
 9 |         echo "-o - use non default realm" >&2
10 |         exit 1
11 |     fi
12 | }
13 | 
14 | DEFAULT_REALM='TEST.ORG'
15 | OTHER_REALM='OTHER.ORG'
16 | OTHER_REALM_DATABASE='/var/lib/krb5kdc/principal-other'
17 | 
18 | realm="$DEFAULT_REALM"
19 | kadmin_opts=''
20 | 
21 | while getopts "op:k:" o; do
22 |     case "${o}" in
23 |         o)
24 |             realm="$OTHER_REALM"
25 |             kadmin_opts="-r $OTHER_REALM -d $OTHER_REALM_DATABASE"
26 |             ;;
27 |         p)
28 |             principal="$OPTARG"
29 |             ;;
30 |         k)
31 |             keytab="$OPTARG"
32 |             ;;
33 |         *)
34 |     esac
35 | done
36 | 
37 | if [[ ! -v principal ]]; then
38 |     usage
39 | fi
40 | 
41 | if [[ ! -v keytab ]]; then
42 |     usage
43 | fi
44 | 
45 | /usr/sbin/kadmin.local $kadmin_opts -q "addprinc -randkey $principal@$realm"
46 | /usr/sbin/kadmin.local $kadmin_opts -q "ktadd -norandkey -k $keytab $principal"
47 | 


--------------------------------------------------------------------------------
/mysql/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM bitnami/mysql:8.0
2 | 
3 | COPY ./files /
4 | 
5 | ENV MYSQL_ROOT_PASSWORD=root \
6 |     MYSQL_AUTHENTICATION_PLUGIN=mysql_native_password \
7 |     MYSQL_DATABASE=metastore
8 | 


--------------------------------------------------------------------------------
/mysql/files/docker-entrypoint-initdb.d/hive-txn-schema-2.3.0.mysql.sql:
--------------------------------------------------------------------------------
  1 | -- Licensed to the Apache Software Foundation (ASF) under one or more
  2 | -- contributor license agreements.  See the NOTICE file distributed with
  3 | -- this work for additional information regarding copyright ownership.
  4 | -- The ASF licenses this file to You under the Apache License, Version 2.0
  5 | -- (the "License"); you may not use this file except in compliance with
  6 | -- the License.  You may obtain a copy of the License at
  7 | --
  8 | --     http://www.apache.org/licenses/LICENSE-2.0
  9 | --
 10 | -- Unless required by applicable law or agreed to in writing, software
 11 | -- distributed under the License is distributed on an "AS IS" BASIS,
 12 | -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | -- See the License for the specific language governing permissions and
 14 | -- limitations under the License.
 15 | 
 16 | --
 17 | -- Tables for transaction management
 18 | -- 
 19 | 
 20 | CREATE TABLE TXNS (
 21 |   TXN_ID bigint PRIMARY KEY,
 22 |   TXN_STATE char(1) NOT NULL,
 23 |   TXN_STARTED bigint NOT NULL,
 24 |   TXN_LAST_HEARTBEAT bigint NOT NULL,
 25 |   TXN_USER varchar(128) NOT NULL,
 26 |   TXN_HOST varchar(128) NOT NULL,
 27 |   TXN_AGENT_INFO varchar(128),
 28 |   TXN_META_INFO varchar(128),
 29 |   TXN_HEARTBEAT_COUNT int
 30 | ) ENGINE=InnoDB DEFAULT CHARSET=latin1;
 31 | 
 32 | CREATE TABLE TXN_COMPONENTS (
 33 |   TC_TXNID bigint NOT NULL,
 34 |   TC_DATABASE varchar(128) NOT NULL,
 35 |   TC_TABLE varchar(128) NOT NULL,
 36 |   TC_PARTITION varchar(767),
 37 |   TC_OPERATION_TYPE char(1) NOT NULL,
 38 |   FOREIGN KEY (TC_TXNID) REFERENCES TXNS (TXN_ID)
 39 | ) ENGINE=InnoDB DEFAULT CHARSET=latin1;
 40 | 
 41 | CREATE INDEX TC_TXNID_INDEX ON TXN_COMPONENTS (TC_TXNID);
 42 | 
 43 | CREATE TABLE COMPLETED_TXN_COMPONENTS (
 44 |   CTC_TXNID bigint NOT NULL,
 45 |   CTC_DATABASE varchar(128) NOT NULL,
 46 |   CTC_TABLE varchar(256),
 47 |   CTC_PARTITION varchar(767)
 48 | ) ENGINE=InnoDB DEFAULT CHARSET=latin1;
 49 | 
 50 | CREATE TABLE NEXT_TXN_ID (
 51 |   NTXN_NEXT bigint NOT NULL
 52 | ) ENGINE=InnoDB DEFAULT CHARSET=latin1;
 53 | INSERT INTO NEXT_TXN_ID VALUES(1);
 54 | 
 55 | CREATE TABLE HIVE_LOCKS (
 56 |   HL_LOCK_EXT_ID bigint NOT NULL,
 57 |   HL_LOCK_INT_ID bigint NOT NULL,
 58 |   HL_TXNID bigint,
 59 |   HL_DB varchar(128) NOT NULL,
 60 |   HL_TABLE varchar(128),
 61 |   HL_PARTITION varchar(767),
 62 |   HL_LOCK_STATE char(1) not null,
 63 |   HL_LOCK_TYPE char(1) not null,
 64 |   HL_LAST_HEARTBEAT bigint NOT NULL,
 65 |   HL_ACQUIRED_AT bigint,
 66 |   HL_USER varchar(128) NOT NULL,
 67 |   HL_HOST varchar(128) NOT NULL,
 68 |   HL_HEARTBEAT_COUNT int,
 69 |   HL_AGENT_INFO varchar(128),
 70 |   HL_BLOCKEDBY_EXT_ID bigint,
 71 |   HL_BLOCKEDBY_INT_ID bigint,
 72 |   PRIMARY KEY(HL_LOCK_EXT_ID, HL_LOCK_INT_ID),
 73 |   KEY HIVE_LOCK_TXNID_INDEX (HL_TXNID)
 74 | ) ENGINE=InnoDB DEFAULT CHARSET=latin1;
 75 | 
 76 | CREATE INDEX HL_TXNID_IDX ON HIVE_LOCKS (HL_TXNID);
 77 | 
 78 | CREATE TABLE NEXT_LOCK_ID (
 79 |   NL_NEXT bigint NOT NULL
 80 | ) ENGINE=InnoDB DEFAULT CHARSET=latin1;
 81 | INSERT INTO NEXT_LOCK_ID VALUES(1);
 82 | 
 83 | CREATE TABLE COMPACTION_QUEUE (
 84 |   CQ_ID bigint PRIMARY KEY,
 85 |   CQ_DATABASE varchar(128) NOT NULL,
 86 |   CQ_TABLE varchar(128) NOT NULL,
 87 |   CQ_PARTITION varchar(767),
 88 |   CQ_STATE char(1) NOT NULL,
 89 |   CQ_TYPE char(1) NOT NULL,
 90 |   CQ_TBLPROPERTIES varchar(2048),
 91 |   CQ_WORKER_ID varchar(128),
 92 |   CQ_START bigint,
 93 |   CQ_RUN_AS varchar(128),
 94 |   CQ_HIGHEST_TXN_ID bigint,
 95 |   CQ_META_INFO varbinary(2048),
 96 |   CQ_HADOOP_JOB_ID varchar(32)
 97 | ) ENGINE=InnoDB DEFAULT CHARSET=latin1;
 98 | 
 99 | CREATE TABLE COMPLETED_COMPACTIONS (
100 |   CC_ID bigint PRIMARY KEY,
101 |   CC_DATABASE varchar(128) NOT NULL,
102 |   CC_TABLE varchar(128) NOT NULL,
103 |   CC_PARTITION varchar(767),
104 |   CC_STATE char(1) NOT NULL,
105 |   CC_TYPE char(1) NOT NULL,
106 |   CC_TBLPROPERTIES varchar(2048),
107 |   CC_WORKER_ID varchar(128),
108 |   CC_START bigint,
109 |   CC_END bigint,
110 |   CC_RUN_AS varchar(128),
111 |   CC_HIGHEST_TXN_ID bigint,
112 |   CC_META_INFO varbinary(2048),
113 |   CC_HADOOP_JOB_ID varchar(32)
114 | ) ENGINE=InnoDB DEFAULT CHARSET=latin1;
115 | 
116 | CREATE TABLE NEXT_COMPACTION_QUEUE_ID (
117 |   NCQ_NEXT bigint NOT NULL
118 | ) ENGINE=InnoDB DEFAULT CHARSET=latin1;
119 | INSERT INTO NEXT_COMPACTION_QUEUE_ID VALUES(1);
120 | 
121 | CREATE TABLE AUX_TABLE (
122 |   MT_KEY1 varchar(128) NOT NULL,
123 |   MT_KEY2 bigint NOT NULL,
124 |   MT_COMMENT varchar(255),
125 |   PRIMARY KEY(MT_KEY1, MT_KEY2)
126 | ) ENGINE=InnoDB DEFAULT CHARSET=latin1;
127 | 
128 | CREATE TABLE WRITE_SET (
129 |   WS_DATABASE varchar(128) NOT NULL,
130 |   WS_TABLE varchar(128) NOT NULL,
131 |   WS_PARTITION varchar(767),
132 |   WS_TXNID bigint NOT NULL,
133 |   WS_COMMIT_ID bigint NOT NULL,
134 |   WS_OPERATION_TYPE char(1) NOT NULL
135 | ) ENGINE=InnoDB DEFAULT CHARSET=latin1;
136 | 


--------------------------------------------------------------------------------
/mysql/files/docker-entrypoint-initdb.d/kyuubi-schema-1.8.0.mysql.sql:
--------------------------------------------------------------------------------
 1 | -- the metadata table ddl
 2 | 
 3 | CREATE DATABASE IF NOT EXISTS kyuubi;
 4 | 
 5 | USE kyuubi;
 6 | 
 7 | CREATE TABLE IF NOT EXISTS metadata(
 8 |     key_id bigint PRIMARY KEY AUTO_INCREMENT COMMENT 'the auto increment key id',
 9 |     identifier varchar(36) NOT NULL COMMENT 'the identifier id, which is an UUID',
10 |     session_type varchar(32) NOT NULL COMMENT 'the session type, SQL or BATCH',
11 |     real_user varchar(255) NOT NULL COMMENT 'the real user',
12 |     user_name varchar(255) NOT NULL COMMENT 'the user name, might be a proxy user',
13 |     ip_address varchar(128) COMMENT 'the client ip address',
14 |     kyuubi_instance varchar(1024) COMMENT 'the kyuubi instance that creates this',
15 |     state varchar(128) NOT NULL COMMENT 'the session state',
16 |     resource varchar(1024) COMMENT 'the main resource',
17 |     class_name varchar(1024) COMMENT 'the main class name',
18 |     request_name varchar(1024) COMMENT 'the request name',
19 |     request_conf mediumtext COMMENT 'the request config map',
20 |     request_args mediumtext COMMENT 'the request arguments',
21 |     create_time BIGINT NOT NULL COMMENT 'the metadata create time',
22 |     engine_type varchar(32) NOT NULL COMMENT 'the engine type',
23 |     cluster_manager varchar(128) COMMENT 'the engine cluster manager',
24 |     engine_open_time bigint COMMENT 'the engine open time',
25 |     engine_id varchar(128) COMMENT 'the engine application id',
26 |     engine_name mediumtext COMMENT 'the engine application name',
27 |     engine_url varchar(1024) COMMENT 'the engine tracking url',
28 |     engine_state varchar(32) COMMENT 'the engine application state',
29 |     engine_error mediumtext COMMENT 'the engine application diagnose',
30 |     end_time bigint COMMENT 'the metadata end time',
31 |     priority int NOT NULL DEFAULT 10 COMMENT 'the application priority, high value means high priority',
32 |     peer_instance_closed boolean default '0' COMMENT 'closed by peer kyuubi instance',
33 |     UNIQUE INDEX unique_identifier_index(identifier),
34 |     INDEX user_name_index(user_name),
35 |     INDEX engine_type_index(engine_type),
36 |     INDEX create_time_index(create_time),
37 |     -- See more detail about this index in ./005-KYUUBI-5327.mysql.sql
38 |     INDEX priority_create_time_index(priority DESC, create_time ASC)
39 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
40 | 


--------------------------------------------------------------------------------
/mysql/files/docker-entrypoint-initdb.d/ranger-database-init.mysql.sql:
--------------------------------------------------------------------------------
 1 | -- Licensed to the Apache Software Foundation (ASF) under one or more
 2 | -- contributor license agreements.  See the NOTICE file distributed with
 3 | -- this work for additional information regarding copyright ownership.
 4 | -- The ASF licenses this file to You under the Apache License, Version 2.0
 5 | -- (the "License"); you may not use this file except in compliance with
 6 | -- the License.  You may obtain a copy of the License at
 7 | --
 8 | --     http://www.apache.org/licenses/LICENSE-2.0
 9 | --
10 | -- Unless required by applicable law or agreed to in writing, software
11 | -- distributed under the License is distributed on an "AS IS" BASIS,
12 | -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | -- See the License for the specific language governing permissions and
14 | -- limitations under the License.
15 | 
16 | CREATE DATABASE IF NOT EXISTS ranger;
17 | 


--------------------------------------------------------------------------------
/node-template.yaml:
--------------------------------------------------------------------------------
 1 | # Required Parameter: node
 2 | 
 3 | - name: orchestrate {{ node.name }} - clean
 4 |   ansible.builtin.file:
 5 |     path: "{{ node.name }}"
 6 |     state: absent
 7 | 
 8 | - name: orchestrate {{ node.name }} - hadoop-common - mkdir
 9 |   ansible.builtin.file:
10 |     path: "{{ node.name }}/{{ item.path }}"
11 |     state: directory
12 |   with_community.general.filetree: "templates/hadoop-common"
13 |   when: item.state == "directory"
14 | 
15 | - name: orchestrate {{ node.name }} - hadoop-common - template
16 |   ansible.builtin.template:
17 |     src: "{{ item.src }}"
18 |     dest: "{{ node.name }}/{{ item.path | regex_replace('\\.j2$', '') }}"
19 |     mode: "a+x"
20 |   with_community.general.filetree: "templates/hadoop-common"
21 |   when: item.state == "file"
22 | 
23 | - name: orchestrate {{ node.name }} - {{ node.group }} - mkdir
24 |   ansible.builtin.file:
25 |     path: "{{ node.name }}/{{ item.path }}"
26 |     state: directory
27 |   with_community.general.filetree: "templates/{{ node.group }}"
28 |   when: item.state == "directory"
29 | 
30 | - name: orchestrate {{ node.name }} - {{ node.group }} - template
31 |   ansible.builtin.template:
32 |     src: "{{ item.src }}"
33 |     dest: "{{ node.name }}/{{ item.path | regex_replace('\\.j2$', '') }}"
34 |     mode: "a+x"
35 |   with_community.general.filetree: "templates/{{ node.group }}"
36 |   when: item.state == "file"
37 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | ansible==8.7.0
 2 | ansible-core==2.15.13
 3 | cffi==1.17.1
 4 | cryptography==44.0.0
 5 | Jinja2==3.1.5
 6 | MarkupSafe==3.0.2
 7 | packaging==24.2
 8 | pycparser==2.22
 9 | PyYAML==6.0.2
10 | resolvelib==1.0.1
11 | 


--------------------------------------------------------------------------------
/templates/.env.j2:
--------------------------------------------------------------------------------
 1 | PROJECT_VERSION={{ project_version }}
 2 | 
 3 | ZULU8_VERSION={{ zulu8_version }}
 4 | ZULU11_VERSION={{ zulu11_version }}
 5 | ZULU17_VERSION={{ zulu17_version }}
 6 | ZULU21_VERSION={{ zulu21_version }}
 7 | JDK8_VERSION={{ jdk8_version }}
 8 | JDK11_VERSION={{ jdk11_version }}
 9 | JDK17_VERSION={{ jdk17_version }}
10 | JDK21_VERSION={{ jdk21_version }}
11 | 
12 | AWS_JAVA_SDK_VERSION={{ aws_java_sdk_version }}
13 | HADOOP_VERSION={{ hadoop_version }}
14 | HIVE_VERSION={{ hive_version }}
15 | ICEBERG_VERSION={{ iceberg_version }}
16 | HUDI_VERSION={{ hudi_version }}
17 | KAFKA_VERSION={{ kafka_version }}
18 | KAFKA_UI_VERSION={{ kafka_ui_version }}
19 | KYUUBI_VERSION={{ kyuubi_version }}
20 | KYUUBI_HADOOP_VERSION={{ kyuubi_hadoop_version }}
21 | LOKI_APPENDER_VERSION={{ loki_log4j2_appender_version }}
22 | MYSQL_JDBC_VERSION=8.0.33
23 | SPARK_SCALA_BINARY_VERSION={{ spark_scala_binary_version }}
24 | SPARK_VERSION={{ spark_version }}
25 | SPARK_BINARY_VERSION={{ spark_binary_version }}
26 | SPARK_HADOOP_VERSION={{ spark_hadoop_version }}
27 | FLINK_BINARY_VERSION={{ flink_binary_version }}
28 | FLINK_VERSION={{ flink_version }}
29 | FLINK_HIVE_VERSION={{ flink_hive_version }}
30 | ZEPPELIN_VERSION={{ zeppelin_version }}
31 | ZOOKEEPER_VERSION={{ zookeeper_version }}
32 | RANGER_VERSION={{ ranger_version }}
33 | TRINO_VERSION={{ trino_version }}
34 | PARQUET_VERSION={{ parquet_version }}
35 | 
36 | GRAFANA_VERSION={{ grafana_version }}
37 | PROMETHEUS_VERSION={{ prometheus_version }}
38 | LOKI_VERSION={{ loki_version }}
39 | 
40 | # export APACHE_MIRROR=https://dlcdn.apache.org
41 | # export MAVEN_MIRROR=https://maven-central-asia.storage-download.googleapis.com/maven2
42 | APACHE_MIRROR=${APACHE_MIRROR:-{{ repository_apache }}}
43 | MAVEN_MIRROR=${MAVEN_MIRROR:-{{ repository_maven }}}
44 | 


--------------------------------------------------------------------------------
/templates/base-ubuntu-2004/Dockerfile.j2:
--------------------------------------------------------------------------------
 1 | # Licensed under the Apache License, Version 2.0 (the "License");
 2 | # you may not use this file except in compliance with the License.
 3 | # You may obtain a copy of the License at
 4 | #
 5 | #     http://www.apache.org/licenses/LICENSE-2.0
 6 | #
 7 | # Unless required by applicable law or agreed to in writing, software
 8 | # distributed under the License is distributed on an "AS IS" BASIS,
 9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 | # See the License for the specific language governing permissions and
11 | # limitations under the License.
12 | 
13 | FROM ubuntu:20.04
14 | 
15 | ARG JDK8_TAR_NAME
16 | ARG JDK11_TAR_NAME
17 | ARG JDK17_TAR_NAME
18 | ARG JDK21_TAR_NAME
19 | 
20 | COPY ./files /
21 | 
22 | {% if apt_mirror_enabled %}
23 | RUN cat /etc/apt/sources.list.mirror-$(uname -m) > /etc/apt/sources.list
24 | {% endif %}
25 | 
26 | RUN set -xeu && \
27 |     ln -snf /usr/bin/bash /usr/bin/sh && \
28 |     install_packages busybox python3-pip supervisor xsltproc curl tree jq python-is-python3 \
29 |     openssh-client openssh-server sqlite3 libcap-dev \
30 |     {% if kerberos_enabled %}krb5-user jsvc libssl1.1{% endif %} && \
31 |     mkdir /run/sshd && chmod 0755 /run/sshd && \
32 |     mkdir /opt/busybox && busybox --install /opt/busybox
33 | 
34 | ADD download/${JDK8_TAR_NAME}.tar.gz  /opt
35 | ADD download/${JDK11_TAR_NAME}.tar.gz  /opt
36 | ADD download/${JDK17_TAR_NAME}.tar.gz /opt
37 | {% if jdk21_enabled %}
38 | ADD download/${JDK21_TAR_NAME}.tar.gz /opt
39 | {% endif %}
40 | 
41 | RUN ln -snf /opt/${JDK8_TAR_NAME}  /opt/openjdk-8
42 | RUN ln -snf /opt/${JDK11_TAR_NAME} /opt/openjdk-11
43 | RUN ln -snf /opt/${JDK17_TAR_NAME} /opt/openjdk-17
44 | {% if jdk21_enabled %}
45 | RUN ln -snf /opt/${JDK21_TAR_NAME} /opt/openjdk-21
46 | {% endif %}
47 | 
48 | ENV JAVA_HOME=/opt/openjdk-8
49 | ENV PATH="$JAVA_HOME/bin:$PATH:/opt/busybox"
50 | ENV LC_ALL=C.UTF-8
51 | 
52 | RUN groupadd hadoop --gid 6000 && \
53 |     useradd  hadoop --gid 6000 --uid 6000 -m && \
54 |     groupadd hdfs --gid 6001 && \
55 |     useradd  hdfs --gid 6001 --uid 6001 --groups hadoop -m && \
56 |     groupadd yarn --gid 6002 && \
57 |     useradd  yarn --gid 6002 --uid 6002 --groups hadoop -m && \
58 |     groupadd mapred --gid 6003 && \
59 |     useradd  mapred --gid 6003 --uid 6003 --groups hadoop -m && \
60 |     groupadd hive --gid 6004 && \
61 |     useradd  hive --gid 6004 --uid 6004 -m && \
62 |     groupadd hbase --gid 6005 && \
63 |     useradd  hbase --gid 6005 --uid 6005 -m && \
64 |     groupadd kafka --gid 6006 && \
65 |     useradd  kafka --gid 6006 --uid 6006 -m && \
66 |     groupadd zookeeper --gid 6007 && \
67 |     useradd  zookeeper --gid 6007 --uid 6007 -m && \
68 |     groupadd spark --gid 6008 && \
69 |     useradd  spark --gid 6008 --uid 6008 -m && \
70 |     groupadd kyuubi --gid 6009 && \
71 |     useradd  kyuubi --gid 6009 --uid 6009 -m && \
72 |     groupadd hue --gid 6010 && \
73 |     useradd  hue --gid 6010 --uid 6010 -m && \
74 |     groupadd ranger --gid 6011 && \
75 |     useradd  ranger --gid 6011 --uid 6011 -m && \
76 |     groupadd flink --gid 6012 && \
77 |     useradd  flink --gid 6012 --uid 6012 -m && \
78 |     groupadd trino --gid 6013 && \
79 |     useradd trino --gid 6013 --uid 6013 -m && \
80 |     groupadd zeppelin --gid 6014 && \
81 |     useradd zeppelin --gid 6014 --uid 6014 -m
82 | 


--------------------------------------------------------------------------------
/templates/build-image.sh.j2:
--------------------------------------------------------------------------------
  1 | #jinja2: trim_blocks: True, lstrip_blocks: True
  2 | #!/usr/bin/env bash
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #    http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | #
 16 | 
 17 | set -xe
 18 | 
 19 | SELF_DIR="$(cd "$(dirname "$0")"; pwd)"
 20 | 
 21 | source "${SELF_DIR}/.env"
 22 | 
 23 | # use fast copy (CoW)
 24 | # https://github.com/pkolaczk/fclones/issues/219
 25 | CP="cp --reflink=auto"
 26 | if [ "$(uname)" == "Darwin" ]; then
 27 |   CP="cp -c"
 28 | fi
 29 | 
 30 | mkdir -p base-ubuntu-2004/download
 31 | if [ $(uname -m) = "arm64" ] || [ $(uname -m) = "aarch64" ]; then JDK8_TAR_NAME=zulu${ZULU8_VERSION}-ca-jdk${JDK8_VERSION}-linux_aarch64; else JDK8_TAR_NAME=zulu${ZULU8_VERSION}-ca-jdk${JDK8_VERSION}-linux_x64; fi
 32 | $CP download/${JDK8_TAR_NAME}.tar.gz base-ubuntu-2004/download/${JDK8_TAR_NAME}.tar.gz
 33 | if [ $(uname -m) = "arm64" ] || [ $(uname -m) = "aarch64" ]; then JDK11_TAR_NAME=zulu${ZULU11_VERSION}-ca-jdk${JDK11_VERSION}-linux_aarch64; else JDK11_TAR_NAME=zulu${ZULU11_VERSION}-ca-jdk${JDK11_VERSION}-linux_x64; fi
 34 | $CP download/${JDK11_TAR_NAME}.tar.gz base-ubuntu-2004/download/${JDK11_TAR_NAME}.tar.gz
 35 | if [ $(uname -m) = "arm64" ] || [ $(uname -m) = "aarch64" ]; then JDK17_TAR_NAME=zulu${ZULU17_VERSION}-ca-jdk${JDK17_VERSION}-linux_aarch64; else JDK17_TAR_NAME=zulu${ZULU17_VERSION}-ca-jdk${JDK17_VERSION}-linux_x64; fi
 36 | $CP download/${JDK17_TAR_NAME}.tar.gz base-ubuntu-2004/download/${JDK17_TAR_NAME}.tar.gz
 37 | {% if jdk21_enabled %}
 38 | if [ $(uname -m) = "arm64" ] || [ $(uname -m) = "aarch64" ]; then JDK21_TAR_NAME=zulu${ZULU21_VERSION}-ca-jdk${JDK21_VERSION}-linux_aarch64; else JDK21_TAR_NAME=zulu${ZULU21_VERSION}-ca-jdk${JDK21_VERSION}-linux_x64; fi
 39 | $CP download/${JDK21_TAR_NAME}.tar.gz base-ubuntu-2004/download/${JDK21_TAR_NAME}.tar.gz
 40 | {% endif %}
 41 | 
 42 | docker build \
 43 |   --file "${SELF_DIR}/base-ubuntu-2004/Dockerfile" \
 44 |   --build-arg JDK8_TAR_NAME=${JDK8_TAR_NAME} \
 45 |   --build-arg JDK11_TAR_NAME=${JDK11_TAR_NAME} \
 46 |   --build-arg JDK17_TAR_NAME=${JDK17_TAR_NAME} \
 47 |   --build-arg JDK21_TAR_NAME=${JDK21_TAR_NAME} \
 48 |   --tag hadoop-testing/base-ubuntu-2004:${PROJECT_VERSION} \
 49 |   "${SELF_DIR}/base-ubuntu-2004" $@
 50 | 
 51 | rm -rf base-ubuntu-2004/download/*
 52 | 
 53 | {% if kerberos_enabled %}
 54 | docker build \
 55 |   --build-arg PROJECT_VERSION=${PROJECT_VERSION} \
 56 |   --file "${SELF_DIR}/kdc/Dockerfile" \
 57 |   --tag hadoop-testing/kdc:${PROJECT_VERSION} \
 58 |   "${SELF_DIR}/kdc" $@
 59 | {% endif %}
 60 | 
 61 | function build_hadoop_master_image() {
 62 |   local INDEX=$1
 63 |   mkdir -p hadoop-master${INDEX}/download
 64 |   {% if zeppelin_enabled %}
 65 |   $CP download/zeppelin-${ZEPPELIN_VERSION}-bin{{ '-%s' % zeppelin_custom_name if zeppelin_custom_name }}.tgz hadoop-master${INDEX}/download/zeppelin-${ZEPPELIN_VERSION}-bin{{ '-%s' % zeppelin_custom_name if zeppelin_custom_name }}.tgz
 66 |   {% endif %}
 67 |   $CP download/apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz hadoop-master${INDEX}/download/apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz
 68 |   if [ $(uname -m) = "arm64" ] || [ $(uname -m) = "aarch64" ]; then HADOOP_TAR_NAME=hadoop-${HADOOP_VERSION}-aarch64; else HADOOP_TAR_NAME=hadoop-${HADOOP_VERSION}; fi
 69 |   $CP download/${HADOOP_TAR_NAME}.tar.gz hadoop-master${INDEX}/download/hadoop-${HADOOP_VERSION}.tar.gz
 70 |   $CP download/apache-hive-${HIVE_VERSION}-bin.tar.gz hadoop-master${INDEX}/download/apache-hive-${HIVE_VERSION}-bin.tar.gz
 71 |   {% if spark_enabled %}
 72 |   $CP download/spark-${SPARK_VERSION}-bin-{{ spark_custom_name }}.tgz hadoop-master${INDEX}/download/spark-${SPARK_VERSION}-bin-{{ spark_custom_name }}.tgz
 73 |   {% endif %}
 74 |   {% if flink_enabled %}
 75 |   $CP download/flink-${FLINK_VERSION}-bin-scala_2.12.tgz hadoop-master${INDEX}/download/flink-${FLINK_VERSION}-bin-scala_2.12.tgz
 76 |   $CP download/flink-sql-connector-hive-${FLINK_HIVE_VERSION}_2.12-${FLINK_VERSION}.jar hadoop-master${INDEX}/download/flink-sql-connector-hive-${FLINK_HIVE_VERSION}_2.12-${FLINK_VERSION}.jar
 77 |   {% endif %}
 78 |   $CP download/apache-kyuubi-${KYUUBI_VERSION}-bin.tgz hadoop-master${INDEX}/download/apache-kyuubi-${KYUUBI_VERSION}-bin.tgz
 79 |   $CP download/kyuubi-hive-jdbc-shaded-${KYUUBI_VERSION}.jar hadoop-master${INDEX}/download/kyuubi-hive-jdbc-shaded-${KYUUBI_VERSION}.jar
 80 |   {% if ranger_enabled %}
 81 |   $CP download/ranger-${RANGER_VERSION}-admin.tar.gz hadoop-master${INDEX}/download/ranger-${RANGER_VERSION}-admin.tar.gz
 82 |   {% endif %}
 83 |   {% if spark_enabled %}
 84 |   $CP download/kyuubi-spark-connector-tpch_${SPARK_SCALA_BINARY_VERSION}-${KYUUBI_VERSION}.jar hadoop-master${INDEX}/download/kyuubi-spark-connector-tpch_${SPARK_SCALA_BINARY_VERSION}-${KYUUBI_VERSION}.jar
 85 |   $CP download/kyuubi-spark-connector-tpcds_${SPARK_SCALA_BINARY_VERSION}-${KYUUBI_VERSION}.jar hadoop-master${INDEX}/download/kyuubi-spark-connector-tpcds_${SPARK_SCALA_BINARY_VERSION}-${KYUUBI_VERSION}.jar
 86 |   {% endif %}
 87 |   $CP download/mysql-connector-j-${MYSQL_JDBC_VERSION}.jar hadoop-master${INDEX}/download/mysql-connector-j-${MYSQL_JDBC_VERSION}.jar
 88 |   $CP download/log4j2-appender-nodep-${LOKI_APPENDER_VERSION}.jar hadoop-master${INDEX}/download/log4j2-appender-nodep-${LOKI_APPENDER_VERSION}.jar
 89 |   {% if spark_enabled and iceberg_enabled %}
 90 |   $CP download/iceberg-spark-runtime-${SPARK_BINARY_VERSION}_${SPARK_SCALA_BINARY_VERSION}-${ICEBERG_VERSION}.jar hadoop-master${INDEX}/download/iceberg-spark-runtime-${SPARK_BINARY_VERSION}_${SPARK_SCALA_BINARY_VERSION}-${ICEBERG_VERSION}.jar
 91 |   {% endif %}
 92 |   {% if flink_enabled and iceberg_enabled %}
 93 |   $CP download/iceberg-flink-runtime-${FLINK_BINARY_VERSION}-${ICEBERG_VERSION}.jar hadoop-master${INDEX}/download/iceberg-flink-runtime-${FLINK_BINARY_VERSION}-${ICEBERG_VERSION}.jar
 94 |   {% endif %}
 95 |   {% if spark_enabled and hudi_enabled %}
 96 |   $CP download/hudi-spark${SPARK_BINARY_VERSION}-bundle_${SPARK_SCALA_BINARY_VERSION}-${HUDI_VERSION}.jar hadoop-master${INDEX}/download/hudi-spark${SPARK_BINARY_VERSION}-bundle_${SPARK_SCALA_BINARY_VERSION}-${HUDI_VERSION}.jar
 97 |   {% endif %}
 98 |   $CP download/jcl-over-slf4j-1.7.36.jar hadoop-master${INDEX}/download/jcl-over-slf4j-1.7.36.jar
 99 |   $CP download/log4j-layout-template-json-2.20.0.jar hadoop-master${INDEX}/download/log4j-layout-template-json-2.20.0.jar
100 |   {% if trino_enabled %}
101 |   $CP download/trino-server-${TRINO_VERSION}.tar.gz hadoop-master${INDEX}/download/trino-server-${TRINO_VERSION}.tar.gz
102 |   $CP download/trino-cli-${TRINO_VERSION}-executable.jar hadoop-master${INDEX}/download/trino-cli-${TRINO_VERSION}-executable.jar
103 |   {% endif %}
104 |   {% if parquet_enabled %}
105 |   $CP download/parquet-cli-${PARQUET_VERSION}-runtime.jar hadoop-master${INDEX}/download/parquet-cli-${PARQUET_VERSION}-runtime.jar
106 |   {% endif %}
107 | 
108 |   docker build \
109 |     --build-arg PROJECT_VERSION=${PROJECT_VERSION} \
110 |     --build-arg ZEPPELIN_VERSION=${ZEPPELIN_VERSION} \
111 |     --build-arg ZOOKEEPER_VERSION=${ZOOKEEPER_VERSION} \
112 |     --build-arg HADOOP_VERSION=${HADOOP_VERSION} \
113 |     --build-arg HIVE_VERSION=${HIVE_VERSION} \
114 |     --build-arg SPARK_VERSION=${SPARK_VERSION} \
115 |     --build-arg SPARK_BINARY_VERSION=${SPARK_BINARY_VERSION} \
116 |     --build-arg FLINK_VERSION=${FLINK_VERSION} \
117 |     --build-arg FLINK_BINARY_VERSION=${FLINK_BINARY_VERSION} \
118 |     --build-arg FLINK_HIVE_VERSION=${FLINK_HIVE_VERSION} \
119 |     --build-arg SPARK_SCALA_BINARY_VERSION=${SPARK_SCALA_BINARY_VERSION} \
120 |     --build-arg KYUUBI_VERSION=${KYUUBI_VERSION} \
121 |     --build-arg RANGER_VERSION=${RANGER_VERSION} \
122 |     --build-arg MYSQL_JDBC_VERSION=${MYSQL_JDBC_VERSION} \
123 |     --build-arg ICEBERG_VERSION=${ICEBERG_VERSION} \
124 |     --build-arg HUDI_VERSION=${HUDI_VERSION} \
125 |     --build-arg LOKI_APPENDER_VERSION=${LOKI_APPENDER_VERSION} \
126 |     --build-arg TRINO_VERSION=${TRINO_VERSION} \
127 |     --build-arg PARQUET_VERSION=${PARQUET_VERSION} \
128 |     --file "${SELF_DIR}/hadoop-master${INDEX}/Dockerfile" \
129 |     --tag hadoop-testing/hadoop-master${INDEX}:${PROJECT_VERSION} \
130 |     "${SELF_DIR}/hadoop-master${INDEX}" $2
131 | 
132 |   rm -rf hadoop-master${INDEX}/download/*
133 | }
134 | 
135 | build_hadoop_master_image 1 "$@"
136 | 
137 | function build_hadoop_worker_image() {
138 |   local INDEX=$1
139 |   mkdir -p hadoop-worker${INDEX}/download
140 |   if [ $(uname -m) = "arm64" ] || [ $(uname -m) = "aarch64" ]; then HADOOP_TAR_NAME=hadoop-${HADOOP_VERSION}-aarch64; else HADOOP_TAR_NAME=hadoop-${HADOOP_VERSION}; fi
141 |   $CP download/${HADOOP_TAR_NAME}.tar.gz hadoop-worker${INDEX}/download/hadoop-${HADOOP_VERSION}.tar.gz
142 |   {% if spark_enabled %}
143 |   $CP download/spark-${SPARK_VERSION}-bin-{{ spark_custom_name }}.tgz hadoop-worker${INDEX}/download/spark-${SPARK_VERSION}-bin-{{ spark_custom_name }}.tgz
144 |   {% endif %}
145 |   {% if trino_enabled %}
146 |   $CP download/trino-server-${TRINO_VERSION}.tar.gz hadoop-worker${INDEX}/download/trino-server-${TRINO_VERSION}.tar.gz
147 |   {% endif %}
148 |   {% if spark_enabled %}
149 |   tar -xzf hadoop-worker${INDEX}/download/spark-${SPARK_VERSION}-bin-{{ spark_custom_name }}.tgz -C hadoop-worker${INDEX}/download spark-${SPARK_VERSION}-bin-{{ spark_custom_name }}/yarn
150 |   {% endif %}
151 |   docker build \
152 |     --build-arg PROJECT_VERSION=${PROJECT_VERSION} \
153 |     --build-arg HADOOP_VERSION=${HADOOP_VERSION} \
154 |     --build-arg SPARK_VERSION=${SPARK_VERSION} \
155 |     --build-arg TRINO_VERSION=${TRINO_VERSION} \
156 |     --file "${SELF_DIR}/hadoop-worker${INDEX}/Dockerfile" \
157 |     --tag hadoop-testing/hadoop-worker${INDEX}:${PROJECT_VERSION} \
158 |     "${SELF_DIR}/hadoop-worker${INDEX}" $2
159 | 
160 |   rm -rf hadoop-worker${INDEX}/download/*
161 | }
162 | 
163 | build_hadoop_worker_image 1 "$@"
164 | build_hadoop_worker_image 2 "$@"
165 | build_hadoop_worker_image 3 "$@"
166 | 


--------------------------------------------------------------------------------
/templates/compose.yaml.j2:
--------------------------------------------------------------------------------
  1 | #jinja2: trim_blocks: True, lstrip_blocks: True
  2 | services:
  3 |   mysql:
  4 |     image: bitnami/mysql:8.0
  5 |     hostname: mysql.orb.local
  6 |     container_name: mysql
  7 |     networks:
  8 |       - hadoop_net
  9 |     environment:
 10 |       - MYSQL_ROOT_PASSWORD=root
 11 |       - MYSQL_AUTHENTICATION_PLUGIN=mysql_native_password
 12 |       - MYSQL_DATABASE=metastore
 13 |     volumes:
 14 |     - ./mysql/files/docker-entrypoint-initdb.d:/docker-entrypoint-initdb.d
 15 |     ports:
 16 |       - 3306:3306
 17 | 
 18 |   socks5:
 19 |     image: serjs/go-socks5-proxy
 20 |     hostname: socks5.orb.local
 21 |     container_name: socks5
 22 |     networks:
 23 |       - hadoop_net
 24 |     environment:
 25 |       - PROXY_PORT=18070
 26 |     ports:
 27 |       - 18070:18070
 28 | 
 29 |   hadoop-master1:
 30 |     image: hadoop-testing/hadoop-master1:${PROJECT_VERSION}
 31 |     hostname: hadoop-master1.orb.local
 32 |     container_name: hadoop-master1
 33 |     networks:
 34 |       - hadoop_net
 35 |     volumes:
 36 |       - share:/share
 37 |     ports:
 38 |       - 8088:8088
 39 |       - 9870:9870
 40 |       - 10009:10009
 41 |       - 10099:10099
 42 |       - 18080:18080
 43 |       - 19888:19888
 44 |       - 6080:6080
 45 |     depends_on:
 46 |       - mysql
 47 | 
 48 |   hadoop-worker1:
 49 |     image: hadoop-testing/hadoop-worker1:${PROJECT_VERSION}
 50 |     hostname: hadoop-worker1.orb.local
 51 |     container_name: hadoop-worker1
 52 |     networks:
 53 |       - hadoop_net
 54 |     privileged: true
 55 |     volumes:
 56 |       - share:/share
 57 |     depends_on:
 58 |       - hadoop-master1
 59 | 
 60 |   hadoop-worker2:
 61 |     image: hadoop-testing/hadoop-worker2:${PROJECT_VERSION}
 62 |     hostname: hadoop-worker2.orb.local
 63 |     container_name: hadoop-worker2
 64 |     networks:
 65 |       - hadoop_net
 66 |     privileged: true
 67 |     volumes:
 68 |       - share:/share
 69 |     depends_on:
 70 |       - hadoop-master1
 71 | 
 72 |   hadoop-worker3:
 73 |     image: hadoop-testing/hadoop-worker3:${PROJECT_VERSION}
 74 |     hostname: hadoop-worker3.orb.local
 75 |     container_name: hadoop-worker3
 76 |     networks:
 77 |       - hadoop_net
 78 |     privileged: true
 79 |     volumes:
 80 |       - share:/share
 81 |     depends_on:
 82 |       - hadoop-master1
 83 | 
 84 | {% if kerberos_enabled %}
 85 |   kdc:
 86 |     image: hadoop-testing/kdc:${PROJECT_VERSION}
 87 |     hostname: kdc.orb.local
 88 |     container_name: kdc
 89 |     networks:
 90 |       - hadoop_net
 91 |     volumes:
 92 |       - share:/share
 93 |     ports:
 94 |       - 88:88
 95 |       - 89:89
 96 | {% endif %}
 97 | 
 98 | {% if prometheus_enabled %}
 99 |   prometheus:
100 |     image: prom/prometheus:v${PROMETHEUS_VERSION}
101 |     hostname: prometheus.orb.local
102 |     container_name: prometheus
103 |     networks:
104 |       - hadoop_net
105 |     volumes:
106 |       - ./files/etc/prometheus:/etc/prometheus
107 |     ports:
108 |       - 9090:9090
109 | {% endif %}
110 | 
111 | {% if loki_enabled %}
112 |   loki:
113 |     image: grafana/loki:${LOKI_VERSION}
114 |     hostname: loki.orb.local
115 |     container_name: loki
116 |     networks:
117 |       - hadoop_net
118 |     volumes:
119 |       - ./files/etc/loki:/etc/loki
120 |     ports:
121 |       - 3100:3100
122 |     command: -config.file=/etc/loki/config.yaml
123 | {% endif %}
124 | 
125 | {% if grafana_enabled  %}
126 |   grafana:
127 |     image: grafana/grafana-oss:${GRAFANA_VERSION}
128 |     hostname: grafana.orb.local
129 |     container_name: grafana
130 |     networks:
131 |       - hadoop_net
132 |     environment:
133 |       - GF_PATHS_PROVISIONING=/etc/grafana/provisioning
134 |       - GF_AUTH_ANONYMOUS_ENABLED=true
135 |       - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin
136 |     volumes:
137 |       - ./files/etc/grafana/provisioning:/etc/grafana/provisioning
138 |       - ./files/var/lib/grafana/dashboards:/var/lib/grafana/dashboards
139 |     ports:
140 |       - 3000:3000
141 | {% endif %}
142 | 
143 | {% if kafka_enabled %}
144 |   kafka:
145 |     image: bitnami/kafka:${KAFKA_VERSION}
146 |     hostname: kafka.orb.local
147 |     container_name: kafka
148 |     networks:
149 |       - hadoop_net
150 |     ports:
151 |       - 9092:9092
152 |     environment:
153 |       - KAFKA_ZOOKEEPER_CONNECT=hadoop-master1.orb.local:2181
154 |       - ALLOW_PLAINTEXT_LISTENER=yes
155 |     depends_on:
156 |       - hadoop-master1
157 | {% endif %}
158 | 
159 | {% if kafka_enabled and kafka_ui_enabled %}
160 |   kafka-ui:
161 |     image: kafbat/kafka-ui:v${KAFKA_UI_VERSION}
162 |     hostname: kafka-ui.orb.local
163 |     container_name: kafka-ui
164 |     networks:
165 |       - hadoop_net
166 |     ports:
167 |       - 19092:19092
168 |     environment:
169 |       SERVER_PORT: 19092
170 |       KAFKA_CLUSTERS_0_BOOTSTRAPSERVERS: kafka.orb.local:9092
171 | {% endif %}
172 | 
173 | volumes:
174 |   share:
175 | 
176 | networks:
177 |   hadoop_net:
178 |     name: orb.local
179 |     driver: bridge


--------------------------------------------------------------------------------
/templates/download.sh.j2:
--------------------------------------------------------------------------------
  1 | #jinja2: trim_blocks: True, lstrip_blocks: True
  2 | #!/usr/bin/env bash
  3 | 
  4 | SELF_DIR="$(cd "$(dirname "$0")"; pwd)"
  5 | 
  6 | source "${SELF_DIR}/.env"
  7 | 
  8 | function download_if_not_exists() {
  9 |   local url=$1
 10 |   local filename=${url##*/}
 11 |   if [ ! -s "${SELF_DIR}/download/$filename" ]; then
 12 |     echo "delete empty $filename ..."
 13 |     rm -f "${SELF_DIR}/download/$filename"
 14 |   fi
 15 |   if [ ! -f "${SELF_DIR}/download/$filename" ]; then
 16 |     echo "downloading $filename ..."
 17 |     wget -O "${SELF_DIR}/download/$filename" $url
 18 |   else
 19 |     echo "skip downloading existed $filename"
 20 |   fi
 21 | }
 22 | 
 23 | mkdir -p "${SELF_DIR}/download"
 24 | 
 25 | if [ $(uname -m) = "arm64" ] || [ $(uname -m) = "aarch64" ]; then JDK8_TAR_NAME=zulu${ZULU8_VERSION}-ca-jdk${JDK8_VERSION}-linux_aarch64; else JDK8_TAR_NAME=zulu${ZULU8_VERSION}-ca-jdk${JDK8_VERSION}-linux_x64; fi
 26 | download_if_not_exists https://cdn.azul.com/zulu/bin/${JDK8_TAR_NAME}.tar.gz
 27 | 
 28 | if [ $(uname -m) = "arm64" ] || [ $(uname -m) = "aarch64" ]; then JDK11_TAR_NAME=zulu${ZULU11_VERSION}-ca-jdk${JDK11_VERSION}-linux_aarch64; else JDK11_TAR_NAME=zulu${ZULU11_VERSION}-ca-jdk${JDK11_VERSION}-linux_x64; fi
 29 | download_if_not_exists https://cdn.azul.com/zulu/bin/${JDK11_TAR_NAME}.tar.gz
 30 | 
 31 | if [ $(uname -m) = "arm64" ] || [ $(uname -m) = "aarch64" ]; then JDK17_TAR_NAME=zulu${ZULU17_VERSION}-ca-jdk${JDK17_VERSION}-linux_aarch64; else JDK17_TAR_NAME=zulu${ZULU17_VERSION}-ca-jdk${JDK17_VERSION}-linux_x64; fi
 32 | download_if_not_exists https://cdn.azul.com/zulu/bin/${JDK17_TAR_NAME}.tar.gz
 33 | 
 34 | {% if jdk21_enabled %}
 35 | if [ $(uname -m) = "arm64" ] || [ $(uname -m) = "aarch64" ]; then JDK21_TAR_NAME=zulu${ZULU21_VERSION}-ca-jdk${JDK21_VERSION}-linux_aarch64; else JDK21_TAR_NAME=zulu${ZULU21_VERSION}-ca-jdk${JDK21_VERSION}-linux_x64; fi
 36 | download_if_not_exists https://cdn.azul.com/zulu/bin/${JDK21_TAR_NAME}.tar.gz
 37 | {% endif %}
 38 | 
 39 | {% if zeppelin_enabled %}
 40 | download_if_not_exists ${APACHE_MIRROR}/zeppelin/zeppelin-${ZEPPELIN_VERSION}/zeppelin-${ZEPPELIN_VERSION}-bin{{ '-%s' % zeppelin_custom_name if zeppelin_custom_name }}.tgz
 41 | {% endif %}
 42 | download_if_not_exists ${APACHE_MIRROR}/zookeeper/zookeeper-${ZOOKEEPER_VERSION}/apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz
 43 | if [ $(uname -m) = "arm64" ] || [ $(uname -m) = "aarch64" ]; then HADOOP_TAR_NAME=hadoop-${HADOOP_VERSION}-aarch64; else HADOOP_TAR_NAME=hadoop-${HADOOP_VERSION}; fi
 44 | download_if_not_exists ${APACHE_MIRROR}/hadoop/core/hadoop-${HADOOP_VERSION}/${HADOOP_TAR_NAME}.tar.gz
 45 | {# Hive 2 is EOL, only avaiable at archive.apache.org/dist #}
 46 | download_if_not_exists https://archive.apache.org/dist/hive/hive-${HIVE_VERSION}/apache-hive-${HIVE_VERSION}-bin.tar.gz
 47 | {% if spark_enabled %}
 48 | download_if_not_exists ${APACHE_MIRROR}/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-{{ spark_custom_name }}.tgz
 49 | {% endif %}
 50 | {% if flink_enabled %}
 51 | download_if_not_exists ${APACHE_MIRROR}/flink/flink-${FLINK_VERSION}/flink-${FLINK_VERSION}-bin-scala_2.12.tgz
 52 | download_if_not_exists ${MAVEN_MIRROR}/org/apache/flink/flink-sql-connector-hive-${FLINK_HIVE_VERSION}_2.12/${FLINK_VERSION}/flink-sql-connector-hive-${FLINK_HIVE_VERSION}_2.12-${FLINK_VERSION}.jar
 53 | {% endif %}
 54 | {% if kyuubi_enabled %}
 55 | download_if_not_exists ${APACHE_MIRROR}/kyuubi/kyuubi-${KYUUBI_VERSION}/apache-kyuubi-${KYUUBI_VERSION}-bin.tgz
 56 | download_if_not_exists ${MAVEN_MIRROR}/org/apache/kyuubi/kyuubi-hive-jdbc-shaded/${KYUUBI_VERSION}/kyuubi-hive-jdbc-shaded-${KYUUBI_VERSION}.jar
 57 | {% endif %}
 58 | {% if kafka_enabled %}
 59 | download_if_not_exists ${MAVEN_MIRROR}/org/apache/kafka/kafka-clients/${KAFKA_VERSION}/kafka-clients-${KAFKA_VERSION}.jar
 60 | {% endif %}
 61 | {% if ranger_enabled %}
 62 | download_if_not_exists https://github.com/pan3793/ranger/releases/download/release-ranger-${RANGER_VERSION}/ranger-${RANGER_VERSION}-admin.tar.gz
 63 | {% endif %}
 64 | {% if trino_enabled %}
 65 | download_if_not_exists ${MAVEN_MIRROR}/io/trino/trino-server/${TRINO_VERSION}/trino-server-${TRINO_VERSION}.tar.gz
 66 | download_if_not_exists ${MAVEN_MIRROR}/io/trino/trino-cli/${TRINO_VERSION}/trino-cli-${TRINO_VERSION}-executable.jar
 67 | {% endif %}
 68 | {% if parquet_enabled %}
 69 | PARQUET_CLI_JAR_NAME=parquet-cli
 70 | download_if_not_exists ${MAVEN_MIRROR}/org/apache/parquet/${PARQUET_CLI_JAR_NAME}/${PARQUET_VERSION}/${PARQUET_CLI_JAR_NAME}-${PARQUET_VERSION}-runtime.jar
 71 | {% endif %}
 72 | 
 73 | MYSQL_JDBC_JAR_NAME=mysql-connector-j
 74 | download_if_not_exists ${MAVEN_MIRROR}/com/mysql/${MYSQL_JDBC_JAR_NAME}/${MYSQL_JDBC_VERSION}/${MYSQL_JDBC_JAR_NAME}-${MYSQL_JDBC_VERSION}.jar
 75 | 
 76 | {% if spark_enabled and s3_enabled %}
 77 | HADOOP_AWS_JAR_NAME=hadoop-aws
 78 | download_if_not_exists ${MAVEN_MIRROR}/org/apache/hadoop/${HADOOP_AWS_JAR_NAME}/${SPARK_HADOOP_VERSION}/${HADOOP_AWS_JAR_NAME}-${SPARK_HADOOP_VERSION}.jar
 79 | AWS_JAVA_SDK_BUNDLE_JAR_NAME=aws-java-sdk-bundle
 80 | download_if_not_exists ${MAVEN_MIRROR}/com/amazonaws/${AWS_JAVA_SDK_BUNDLE_JAR_NAME}/${AWS_JAVA_SDK_VERSION}/${AWS_JAVA_SDK_BUNDLE_JAR_NAME}-${AWS_JAVA_SDK_VERSION}.jar
 81 | SPARK_HADOOP_CLOUD_JAR_NAME=spark-hadoop-cloud_${SPARK_SCALA_BINARY_VERSION}
 82 | download_if_not_exists ${MAVEN_MIRROR}/org/apache/spark/${SPARK_HADOOP_CLOUD_JAR_NAME}/${SPARK_VERSION}/${SPARK_HADOOP_CLOUD_JAR_NAME}-${SPARK_VERSION}.jar
 83 | HADOOP_CLOUD_STORAGE_JAR_NAME=hadoop-cloud-storage
 84 | download_if_not_exists ${MAVEN_MIRROR}/org/apache/hadoop/${HADOOP_CLOUD_STORAGE_JAR_NAME}/${SPARK_HADOOP_VERSION}/${HADOOP_CLOUD_STORAGE_JAR_NAME}-${SPARK_HADOOP_VERSION}.jar
 85 | {% endif %}
 86 | 
 87 | {% if spark_enabled %}
 88 | TPCDS_CONNECTOR_JAR_NAME=kyuubi-spark-connector-tpcds_${SPARK_SCALA_BINARY_VERSION}
 89 | download_if_not_exists ${MAVEN_MIRROR}/org/apache/kyuubi/${TPCDS_CONNECTOR_JAR_NAME}/${KYUUBI_VERSION}/${TPCDS_CONNECTOR_JAR_NAME}-${KYUUBI_VERSION}.jar
 90 | TPCH_CONNECTOR_JAR_NAME=kyuubi-spark-connector-tpch_${SPARK_SCALA_BINARY_VERSION}
 91 | download_if_not_exists ${MAVEN_MIRROR}/org/apache/kyuubi/${TPCH_CONNECTOR_JAR_NAME}/${KYUUBI_VERSION}/${TPCH_CONNECTOR_JAR_NAME}-${KYUUBI_VERSION}.jar
 92 | {% endif %}
 93 | 
 94 | {% if spark_enabled and iceberg_enabled %}
 95 | ICEBERG_SPARK_JAR_NAME=iceberg-spark-runtime-${SPARK_BINARY_VERSION}_${SPARK_SCALA_BINARY_VERSION}
 96 | download_if_not_exists ${MAVEN_MIRROR}/org/apache/iceberg/${ICEBERG_SPARK_JAR_NAME}/${ICEBERG_VERSION}/${ICEBERG_SPARK_JAR_NAME}-${ICEBERG_VERSION}.jar
 97 | {% endif %}
 98 | 
 99 | {% if flink_enabled and iceberg_enabled %}
100 | ICEBERG_FLINK_JAR_NAME=iceberg-flink-runtime-${FLINK_BINARY_VERSION}
101 | download_if_not_exists ${MAVEN_MIRROR}/org/apache/iceberg/${ICEBERG_FLINK_JAR_NAME}/${ICEBERG_VERSION}/${ICEBERG_FLINK_JAR_NAME}-${ICEBERG_VERSION}.jar
102 | {% endif %}
103 | 
104 | {% if spark_enabled and hudi_enabled %}
105 | HUDI_SPARK_BUNDLE_JAR_NAME=hudi-spark${SPARK_BINARY_VERSION}-bundle_${SPARK_SCALA_BINARY_VERSION}
106 | download_if_not_exists https://github.com/yanghua/incubator-hudi/releases/download/release-${HUDI_VERSION}/${HUDI_SPARK_BUNDLE_JAR_NAME}-${HUDI_VERSION}.jar
107 | {% endif %}
108 | 
109 | LOKI_APPENDER_JAR_NAME=log4j2-appender-nodep
110 | download_if_not_exists ${MAVEN_MIRROR}/pl/tkowalcz/tjahzi/${LOKI_APPENDER_JAR_NAME}/${LOKI_APPENDER_VERSION}/${LOKI_APPENDER_JAR_NAME}-${LOKI_APPENDER_VERSION}.jar
111 | 
112 | download_if_not_exists ${MAVEN_MIRROR}/org/slf4j/jcl-over-slf4j/1.7.36/jcl-over-slf4j-1.7.36.jar 
113 | 
114 | # TODO: Remove after kyuubi ships that jar
115 | download_if_not_exists ${MAVEN_MIRROR}/org/apache/logging/log4j/log4j-layout-template-json/2.20.0/log4j-layout-template-json-2.20.0.jar
116 | 


--------------------------------------------------------------------------------
/templates/hadoop-common/files/etc/hadoop/conf/capacity-scheduler.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <configuration>
 3 |   <property>
 4 |     <name>yarn.scheduler.capacity.maximum-applications</name>
 5 |     <value>10000</value>
 6 |     <description>
 7 |           Maximum number of applications that can be pending and running.
 8 |     </description>
 9 |   </property>
10 |   <property>
11 |     <name>yarn.scheduler.capacity.maximum-am-resource-percent</name>
12 |     <value>1</value>
13 |     <description>
14 |           Maximum percent of resources in the cluster which can be used to run
15 |           application masters i.e. controls number of concurrent running
16 |           applications.
17 |     </description>
18 |   </property>
19 |   <property>
20 |     <name>yarn.scheduler.capacity.root.queues</name>
21 |     <value>default</value>
22 |     <description>
23 |           The queues at the this level (root is the root queue).
24 |     </description>
25 |   </property>
26 |   <property>
27 |     <name>yarn.scheduler.capacity.root.default.capacity</name>
28 |     <value>100</value>
29 |     <description>Default queue target capacity.</description>
30 |   </property>
31 |   <property>
32 |     <name>yarn.scheduler.capacity.root.default.maximum-capacity</name>
33 |     <value>100</value>
34 |     <description>
35 |           The maximum capacity of the default queue.
36 |     </description>
37 |   </property>
38 |   <property>
39 |     <name>yarn.scheduler.capacity.root.default.state</name>
40 |     <value>RUNNING</value>
41 |     <description>
42 |           The state of the default queue. State can be one of RUNNING or STOPPED.
43 |     </description>
44 |   </property>
45 |   <property>
46 |     <name>yarn.scheduler.capacity.root.default.acl_submit_applications</name>
47 |     <value>*</value>
48 |     <description>
49 |           The ACL of who can submit jobs to the default queue.
50 |     </description>
51 |   </property>
52 |   <property>
53 |     <name>yarn.scheduler.capacity.root.default.user-limit-factor</name>
54 |     <value>1</value>
55 |     <description>
56 |           Default queue user limit a percentage from 0.0 to 1.0.
57 |     </description>
58 |   </property>
59 |   <property>
60 |     <name>yarn.scheduler.capacity.root.default.acl_administer_queue</name>
61 |     <value>*</value>
62 |     <description>
63 |           The ACL of who can administer jobs on the default queue.
64 |     </description>
65 |   </property>
66 |   <property>
67 |     <name>yarn.scheduler.capacity.node-locality-delay</name>
68 |     <value>-1</value>
69 |     <description>
70 |           Number of missed scheduling opportunities after which the CapacityScheduler
71 |           attempts to schedule rack-local containers.
72 |           Typically this should be set to number of racks in the cluster, this
73 |           feature is disabled by default, set to -1.
74 |     </description>
75 |   </property>
76 | </configuration>
77 | 


--------------------------------------------------------------------------------
/templates/hadoop-common/files/etc/hadoop/conf/container-executor.cfg:
--------------------------------------------------------------------------------
1 | yarn.nodemanager.linux-container-executor.group=hadoop#configured value of yarn.nodemanager.linux-container-executor.group
2 | banned.users=#comma separated list of users who can not run applications
3 | min.user.id=1000#Prevent other super-users
4 | allowed.system.users=##comma separated list of system users who CAN run applications
5 | feature.tc.enabled=false
6 | 


--------------------------------------------------------------------------------
/templates/hadoop-common/files/etc/hadoop/conf/core-site.xml.j2:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <configuration>
 3 |   {% if kerberos_enabled %}
 4 |   <property>
 5 |     <name>hadoop.security.authentication</name>
 6 |     <value>kerberos</value>
 7 |   </property>
 8 |   <property>
 9 |     <name>hadoop.security.authorization</name>
10 |     <value>true</value>
11 |   </property>
12 |   <property>
13 |     <name>hadoop.rpc.protection</name>
14 |     <value>authentication</value>
15 |   </property>
16 |   <property>
17 |     <name>hadoop.security.auth_to_local</name>
18 |     <value>
19 |       RULE:[2:$1/$2@$0]([ndj]n/.*@TEST.ORG)s/.*/hdfs/
20 |       RULE:[2:$1/$2@$0]([rn]m/.*@TEST.ORG)s/.*/yarn/
21 |       RULE:[2:$1/$2@$0](jhs/.*@TEST.ORG)s/.*/mapred/
22 |       DEFAULT RULE:[1:$1] RULE:[2:$1]
23 |     </value>
24 |   </property>
25 |   {% endif %}
26 |   <property>
27 |     <name>fs.defaultFS</name>
28 |     <value>hdfs://hadoop-master1.orb.local:8020</value>
29 |   </property>
30 |   <property>
31 |     <name>hadoop.caller.context.enabled</name>
32 |     <value>true</value>
33 |   </property>
34 |   <property>
35 |     <name>hadoop.proxyuser.hadoop.hosts</name>
36 |     <value>*</value>
37 |   </property>
38 |   <property>
39 |     <name>hadoop.proxyuser.hadoop.groups</name>
40 |     <value>*</value>
41 |   </property>
42 |   <property>
43 |     <name>hadoop.proxyuser.hdfs.hosts</name>
44 |     <value>*</value>
45 |   </property>
46 |   <property>
47 |     <name>hadoop.proxyuser.hdfs.groups</name>
48 |     <value>*</value>
49 |   </property>
50 |   <property>
51 |     <name>hadoop.proxyuser.yarn.hosts</name>
52 |     <value>*</value>
53 |   </property>
54 |   <property>
55 |     <name>hadoop.proxyuser.yarn.groups</name>
56 |     <value>*</value>
57 |   </property>
58 |   <property>
59 |     <name>hadoop.proxyuser.mapred.hosts</name>
60 |     <value>*</value>
61 |   </property>
62 |   <property>
63 |     <name>hadoop.proxyuser.mapred.groups</name>
64 |     <value>*</value>
65 |   </property>
66 |   <property>
67 |     <name>hadoop.proxyuser.hive.hosts</name>
68 |     <value>*</value>
69 |   </property>
70 |   <property>
71 |     <name>hadoop.proxyuser.hive.groups</name>
72 |     <value>*</value>
73 |   </property>
74 |   <property>
75 |     <name>hadoop.proxyuser.HTTP.hosts</name>
76 |     <value>*</value>
77 |   </property>
78 |   <property>
79 |     <name>hadoop.proxyuser.HTTP.groups</name>
80 |     <value>*</value>
81 |   </property>
82 |   <property>
83 |     <name>hadoop.proxyuser.kyuubi.hosts</name>
84 |     <value>*</value>
85 |   </property>
86 |   <property>
87 |     <name>hadoop.proxyuser.kyuubi.groups</name>
88 |     <value>*</value>
89 |   </property>
90 |   <property>
91 |     <name>hadoop.proxyuser.hue.hosts</name>
92 |     <value>*</value>
93 |   </property>
94 |   <property>
95 |     <name>hadoop.proxyuser.hue.groups</name>
96 |     <value>*</value>
97 |   </property>
98 | </configuration>
99 | 


--------------------------------------------------------------------------------
/templates/hadoop-common/files/etc/hadoop/conf/hadoop-env.sh.j2:
--------------------------------------------------------------------------------
 1 | # Set Hadoop-specific environment variables here.
 2 | # Forcing YARN-based mapreduce implementaion.
 3 | # Make sure to comment out if you want to go back to the default or
 4 | # if you want this to be tweakable on a per-user basis
 5 | # export HADOOP_MAPRED_HOME=/usr/lib/hadoop-mapreduce
 6 | 
 7 | # The maximum amount of heap to use, in MB. Default is 1000.
 8 | export HADOOP_HEAPSIZE=256
 9 | 
10 | # Extra Java runtime options.  Empty by default.
11 | export HADOOP_NAMENODE_OPTS="$HADOOP_NAMENODE_OPTS -Xmx512m"
12 | export YARN_OPTS="$YARN_OPTS -Xmx256m"
13 | 
14 | # Necessary to prevent map reduce jobs triggered by hive queries from dying with OOM error
15 | export HADOOP_CLIENT_OPTS="$HADOOP_CLIENT_OPTS -Xmx512m"
16 | 
17 | export HADOOP_LOG_DIR=/var/log/hadoop-hdfs
18 | export HDFS_AUDIT_LOGGER=INFO,RFAAUDIT
19 | 
20 | {% if kerberos_enabled %}
21 | export HDFS_DATANODE_SECURE_USER=hdfs
22 | export JSVC_HOME=/usr/bin
23 | {% endif %}


--------------------------------------------------------------------------------
/templates/hadoop-common/files/etc/hadoop/conf/hdfs-site.xml.j2:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <configuration>
 3 |   {% if kerberos_enabled %}
 4 |   <!-- NameNode -->
 5 |   <property>
 6 |     <name>dfs.block.access.token.enable</name>
 7 |     <value>true</value>
 8 |   </property>
 9 |   <property>
10 |     <name>dfs.namenode.kerberos.principal</name>
11 |     <value>nn/_HOST@TEST.ORG</value>
12 |   </property>
13 |   <property>
14 |     <name>dfs.namenode.keytab.file</name>
15 |     <value>/share/keytabs/hadoop-master1/nn.service.keytab</value>
16 |   </property>
17 |   <property>
18 |     <name>dfs.namenode.kerberos.internal.spnego.principal</name>
19 |     <value>HTTP/_HOST@TEST.ORG</value>
20 |   </property>
21 |   <property>
22 |     <name>dfs.web.authentication.kerberos.keytab</name>
23 |     <value>/share/keytabs/hadoop-master1/nn.service.keytab</value>
24 |   </property>
25 | 
26 |   <!-- DataNode -->
27 |   <property>
28 |     <name>dfs.datanode.data.dir.perm</name>
29 |     <value>700</value>
30 |   </property>
31 |   <property>
32 |     <name>dfs.datanode.http.address</name>
33 |     <value>0.0.0.0:9864</value>
34 |   </property>
35 |   <property>
36 |     <name>dfs.datanode.address</name>
37 |     <value>0.0.0.0:9866</value>
38 |   </property>
39 |   <property>
40 |     <name>dfs.datanode.kerberos.principal</name>
41 |     <value>dn/_HOST@TEST.ORG</value>
42 |   </property>
43 |   <property>
44 |     <name>dfs.datanode.keytab.file</name>
45 |     <value>/share/keytabs/{{ node.name }}/dn.service.keytab</value>
46 |   </property>
47 |   <property>
48 |     <name>dfs.encrypt.data.transfer</name>
49 |     <value>false</value>
50 |   </property>
51 |   {% endif %}
52 | 
53 |   <property>
54 |     <name>dfs.namenode.http-address</name>
55 |     <value>0.0.0.0:9870</value>
56 |   </property>
57 |   <property>
58 |     <name>dfs.namenode.rpc-address</name>
59 |     <value>hadoop-master1.orb.local:8020</value>
60 |   </property>
61 |   <property>
62 |     <name>dfs.permissions.enabled</name>
63 |     <value>false</value>
64 |   </property>
65 |   <!-- This property explicitly forbids datanode to enter safe mode which results in 30 s penalty on environment startup -->
66 |   <property>
67 |     <name>dfs.safemode.threshold.pct</name>
68 |     <value>0</value>
69 |   </property>
70 |   <property>
71 |     <name>dfs.namenode.name.dir</name>
72 |     <value>/var/lib/hadoop-hdfs/cache/name/</value>
73 |   </property>
74 |   <property>
75 |     <name>dfs.datanode.data.dir</name>
76 |     <value>/var/lib/hadoop-hdfs/cache/data/</value>
77 |   </property>
78 | </configuration>
79 | 


--------------------------------------------------------------------------------
/templates/hadoop-common/files/etc/hadoop/conf/mapred-site.xml.j2:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <configuration>
 3 |   {% if kerberos_enabled %}
 4 |   <property>
 5 |     <name>mapreduce.jobhistory.keytab</name>
 6 |     <value>/share/keytabs/hadoop-master1/jhs.service.keytab</value>
 7 |   </property>
 8 |   <property>
 9 |     <name>mapreduce.jobhistory.principal</name>
10 |     <value>jhs/_HOST@TEST.ORG</value>
11 |   </property>
12 |   {% endif %}
13 |   <property>
14 |     <name>mapreduce.jobtracker.address</name>
15 |     <value>hadoop-master1.orb.local:8021</value>
16 |   </property>
17 |   <property>
18 |     <name>mapreduce.framework.name</name>
19 |     <value>yarn</value>
20 |   </property>
21 |   <property>
22 |     <name>mapreduce.jobhistory.address</name>
23 |     <value>hadoop-master1.orb.local:10020</value>
24 |   </property>
25 |   <property>
26 |     <name>mapreduce.jobhistory.webapp.address</name>
27 |     <value>hadoop-master1.orb.local:19888</value>
28 |   </property>
29 |   <property>
30 |     <name>yarn.app.mapreduce.am.staging-dir</name>
31 |     <value>/mr-staging</value>
32 |     <description>hdfs</description>
33 |   </property>
34 |   <property>
35 |     <name>mapreduce.jobhistory.intermediate-done-dir</name>
36 |     <value>/mr-history/intermediate</value>
37 |     <description>hdfs</description>
38 |   </property>
39 |   <property>
40 |     <name>mapreduce.jobhistory.done-dir</name>
41 |     <value>/mr-history/done</value>
42 |     <description>hdfs</description>
43 |   </property>
44 |   <property>
45 |     <name>mapreduce.task.tmp.dir</name>
46 |     <value>/var/lib/hadoop-mapreduce/cache/${user.name}/tasks</value>
47 |     <description>To set the value of tmp directory for map and reduce tasks.</description>
48 |   </property>
49 |   <property>
50 |     <name>yarn.app.mapreduce.am.env</name>
51 |     <value>HADOOP_MAPRED_HOME=/opt/hadoop</value>
52 |   </property>
53 |   <property>
54 |     <name>mapreduce.map.env</name>
55 |     <value>HADOOP_MAPRED_HOME=/opt/hadoop</value>
56 |   </property>
57 |   <property>
58 |     <name>mapreduce.reduce.env</name>
59 |     <value>HADOOP_MAPRED_HOME=/opt/hadoop</value>
60 |   </property>
61 | </configuration>
62 | 


--------------------------------------------------------------------------------
/templates/hadoop-common/files/etc/hadoop/conf/yarn-site.xml.j2:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0"?>
  2 | <configuration>
  3 |   {% if kerberos_enabled %}
  4 |   <!-- ResourceManager -->
  5 |   <property>
  6 |     <name>yarn.resourcemanager.principal</name>
  7 |     <value>rm/_HOST@TEST.ORG</value>
  8 |   </property>
  9 |   <property>
 10 |     <name>yarn.resourcemanager.keytab</name>
 11 |     <value>/share/keytabs/{{ node.name }}/rm.service.keytab</value>
 12 |   </property>
 13 | 
 14 |   <!-- NodeManager -->
 15 |   <property>
 16 |     <name>yarn.nodemanager.principal</name>
 17 |     <value>nm/_HOST@TEST.ORG</value>
 18 |   </property>
 19 |   <property>
 20 |     <name>yarn.nodemanager.keytab</name>
 21 |     <value>/share/keytabs/{{ node.name }}/nm.service.keytab</value>
 22 |   </property>
 23 |   <property>
 24 |     <name>yarn.nodemanager.container-executor.class</name>
 25 |     <value>org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor</value>
 26 |   </property>
 27 |   <property>
 28 |     <name>yarn.nodemanager.linux-container-executor.group</name>
 29 |     <value>hadoop</value>
 30 |   </property>
 31 |   {% endif %}
 32 |   <property>
 33 |     <name>yarn.resourcemanager.address</name>
 34 |     <value>hadoop-master1.orb.local:8032</value>
 35 |   </property>
 36 |   <property>
 37 |     <name>yarn.nodemanager.aux-services</name>
 38 |     <value>mapreduce_shuffle,spark_shuffle</value>
 39 |   </property>
 40 |   <property>
 41 |     <name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
 42 |     <value>org.apache.hadoop.mapred.ShuffleHandler</value>
 43 |   </property>
 44 |   <property>
 45 |     <name>yarn.nodemanager.aux-services.spark_shuffle.classpath</name>
 46 |     <value>/opt/spark/yarn/*</value>
 47 |   </property>
 48 |   <property>
 49 |     <name>yarn.nodemanager.aux-services.spark_shuffle.class</name>
 50 |     <value>org.apache.spark.network.yarn.YarnShuffleService</value>
 51 |   </property>
 52 |   <property>
 53 |     <name>spark.shuffle.service.port</name>
 54 |     <value>7001</value>
 55 |   </property>
 56 |   <property>
 57 |     <name>yarn.log-aggregation-enable</name>
 58 |     <value>true</value>
 59 |   </property>
 60 |   <property>
 61 |     <name>yarn.dispatcher.exit-on-error</name>
 62 |     <value>true</value>
 63 |   </property>
 64 |   <property>
 65 |     <name>yarn.nodemanager.local-dirs</name>
 66 |     <value>/var/lib/hadoop-yarn/cache/${user.name}/nm-local-dir</value>
 67 |     <description>local</description>
 68 |   </property>
 69 |   <property>
 70 |     <name>yarn.nodemanager.log-dirs</name>
 71 |     <value>/var/log/hadoop-yarn/containers</value>
 72 |     <description>local</description>
 73 |   </property>
 74 |   <property>
 75 |     <name>yarn.nodemanager.remote-app-log-dir</name>
 76 |     <value>/yarn-app-log</value>
 77 |     <description>hdfs</description>
 78 |   </property>
 79 |   <property>
 80 |     <name>yarn.application.classpath</name>
 81 |     <value>
 82 |       /etc/hadoop/conf,
 83 |       /opt/hadoop/share/hadoop/common/*,
 84 |       /opt/hadoop/share/hadoop/common/lib/*,
 85 |       /opt/hadoop/share/hadoop/hdfs/*,
 86 |       /opt/hadoop/share/hadoop/hdfs/lib/*,
 87 |       /opt/hadoop/share/hadoop/yarn/*,
 88 |       /opt/hadoop/share/hadoop/yarn/lib/*
 89 |     </value>
 90 |   </property>
 91 |   <property>
 92 |     <name>yarn.resourcemanager.hostname</name>
 93 |     <value>hadoop-master1.orb.local</value>
 94 |   </property>
 95 |   <property>
 96 |     <name>yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage</name>
 97 |     <value>100</value>
 98 |   </property>
 99 |   <property>
100 |     <name>yarn.nodemanager.pmem-check-enabled</name>
101 |     <value>false</value>
102 |   </property>
103 |   <property>
104 |     <name>yarn.nodemanager.vmem-check-enabled</name>
105 |     <value>false</value>
106 |   </property>
107 |   <property>
108 |     <name>yarn.nodemanager.resource.memory.enforced</name>
109 |     <value>false</value>
110 |   </property>
111 |   <property>
112 |     <name>yarn.nodemanager.elastic-memory-control.enabled</name>
113 |     <value>false</value>
114 |   </property>
115 |   <property>
116 |     <name>yarn.log.server.url</name>
117 |     <value>http://hadoop-master1.orb.local:19888/jobhistory/logs</value>
118 |   </property>
119 |   <property>
120 |     <name>yarn.scheduler.minimum-allocation-mb</name>
121 |     <value>256</value>
122 |   </property>
123 | </configuration>
124 | 


--------------------------------------------------------------------------------
/templates/hadoop-common/files/etc/krb5.conf:
--------------------------------------------------------------------------------
 1 | [logging]
 2 |  default = FILE:/var/log/krb5libs.log
 3 |  kdc = FILE:/var/log/krb5kdc.log
 4 |  admin_server = FILE:/var/log/kadmind.log
 5 | 
 6 | [libdefaults]
 7 |  default_realm = TEST.ORG
 8 |  dns_lookup_realm = false
 9 |  dns_lookup_kdc = false
10 |  forwardable = true
11 |  allow_weak_crypto = true
12 | 
13 | [realms]
14 |  TEST.ORG = {
15 |   kdc = kdc.orb.local:88
16 |   admin_server = kdc.orb.local
17 |  }
18 | 
19 | 


--------------------------------------------------------------------------------
/templates/hadoop-common/files/etc/supervisor/conf.d/trino-server.conf:
--------------------------------------------------------------------------------
 1 | [program:trino_server]
 2 | directory=/opt/trino
 3 | command=/opt/trino/bin/launcher run --etc-dir=/etc/trino/conf
 4 | startsecs=30
 5 | stopwaitsecs=10
 6 | user=trino
 7 | redirect_stderr=true
 8 | stdout_logfile=/var/log/trino-server.log
 9 | {% if trino_enabled %}
10 | autostart=true
11 | {% else %}
12 | autostart=false
13 | {% endif %}
14 | 
15 | environment=
16 |     PATH=/opt/openjdk-21/bin:%(ENV_PATH)s
17 | 


--------------------------------------------------------------------------------
/templates/hadoop-common/files/etc/trino/conf/config.properties:
--------------------------------------------------------------------------------
1 | coordinator=false
2 | http-server.http.port=18081
3 | discovery.uri=http://hadoop-master1.orb.local:18081
4 | 


--------------------------------------------------------------------------------
/templates/hadoop-common/files/etc/trino/conf/jvm.config:
--------------------------------------------------------------------------------
 1 | -server
 2 | -Xmx1G
 3 | -XX:InitialRAMPercentage=80.0
 4 | -XX:MaxRAMPercentage=80.0
 5 | -XX:G1HeapRegionSize=32M
 6 | -XX:+ExplicitGCInvokesConcurrent
 7 | -XX:+ExitOnOutOfMemoryError
 8 | -XX:+HeapDumpOnOutOfMemoryError
 9 | -XX:-OmitStackTraceInFastThrow
10 | -XX:ReservedCodeCacheSize=512M
11 | -XX:PerMethodRecompilationCutoff=10000
12 | -XX:PerBytecodeRecompilationCutoff=10000
13 | -Djdk.attach.allowAttachSelf=true
14 | -Djdk.nio.maxCachedBufferSize=2000000
15 | -Dfile.encoding=UTF-8
16 | # Reduce starvation of threads by GClocker, recommend to set about the number of cpu cores (JDK-8192647)
17 | -XX:+UnlockDiagnosticVMOptions
18 | -XX:GCLockerRetryAllocationCount=32
19 | 


--------------------------------------------------------------------------------
/templates/hadoop-common/files/etc/trino/conf/log.properties:
--------------------------------------------------------------------------------
1 | io.trino=INFO
2 | 


--------------------------------------------------------------------------------
/templates/hadoop-common/files/etc/trino/conf/node.properties:
--------------------------------------------------------------------------------
1 | node.environment=production
2 | node.id=hadoop-worker1
3 | node.data-dir=/var/trino/data
4 | 


--------------------------------------------------------------------------------
/templates/hadoop-common/files/opt/hadoop-init.d/init-hdfs.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -x
 2 | 
 3 | mkdir /var/lib/hadoop-hdfs
 4 | chown -R hdfs:hdfs /var/lib/hadoop-hdfs
 5 | 
 6 | mkdir /var/lib/hadoop-mapreduce
 7 | chown -R mapred:mapred /var/lib/hadoop-mapreduce
 8 | 
 9 | mkdir /var/lib/hadoop-yarn
10 | chown -R yarn:yarn /var/lib/hadoop-yarn
11 | 
12 | mkdir /opt/hadoop/logs /var/log/hadoop-hdfs /var/log/hadoop-yarn 
13 | chown -R hadoop:hadoop /opt/hadoop/logs
14 | chown -R hdfs:hadoop /var/log/hadoop-hdfs
15 | chown -R yarn:hadoop /var/log/hadoop-yarn
16 | chmod -R 770 /opt/hadoop/logs /var/log/hadoop-hdfs
17 | chmod 755 /var/log/hadoop-yarn
18 | 
19 | # workaround for 'could not open session' bug as suggested here:
20 | # https://github.com/docker/docker/issues/7056#issuecomment-49371610
21 | rm -f /etc/security/limits.d/hdfs.conf
22 | 


--------------------------------------------------------------------------------
/templates/hadoop-common/files/opt/trino-init.d/init-workdir.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -x
2 | 
3 | mkdir /var/trino
4 | chown -R trino:trino /var/trino
5 | 


--------------------------------------------------------------------------------
/templates/hadoop-master/Dockerfile.j2:
--------------------------------------------------------------------------------
  1 | #jinja2: trim_blocks: True, lstrip_blocks: True
  2 | # Licensed under the Apache License, Version 2.0 (the "License");
  3 | # you may not use this file except in compliance with the License.
  4 | # You may obtain a copy of the License at
  5 | #
  6 | #     http://www.apache.org/licenses/LICENSE-2.0
  7 | #
  8 | # Unless required by applicable law or agreed to in writing, software
  9 | # distributed under the License is distributed on an "AS IS" BASIS,
 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 11 | # See the License for the specific language governing permissions and
 12 | # limitations under the License.
 13 | 
 14 | ARG PROJECT_VERSION
 15 | FROM hadoop-testing/base-ubuntu-2004:$PROJECT_VERSION
 16 | 
 17 | ARG ZEPPELIN_VERSION
 18 | ARG ZOOKEEPER_VERSION
 19 | ARG HADOOP_VERSION
 20 | ARG HIVE_VERSION
 21 | ARG SPARK_VERSION
 22 | ARG SPARK_BINARY_VERSION
 23 | ARG FLINK_VERSION
 24 | ARG FLINK_BINARY_VERSION
 25 | ARG FLINK_HIVE_VERSION
 26 | ARG SPARK_SCALA_BINARY_VERSION
 27 | ARG KYUUBI_VERSION
 28 | ARG MYSQL_JDBC_VERSION
 29 | ARG LOKI_APPENDER_VERSION
 30 | ARG RANGER_VERSION
 31 | ARG ICEBERG_VERSION
 32 | ARG HUDI_VERSION
 33 | ARG TRINO_VERSION
 34 | ARG PARQUET_VERSION
 35 | 
 36 | ENV ZEPPELIN_HOME=/opt/zeppelin
 37 | ENV ZEPPELIN_CONF_DIR=/etc/zeppelin/conf
 38 | ENV ZOOKEEPER_HOME=/opt/zookeeper
 39 | ENV ZOOCFGDIR=/etc/zookeeper/conf
 40 | ENV HADOOP_HOME=/opt/hadoop
 41 | ENV HADOOP_CONF_DIR=/etc/hadoop/conf
 42 | ENV LD_LIBRARY_PATH=${HADOOP_HOME}/lib/native
 43 | ENV HIVE_HOME=/opt/hive
 44 | ENV HIVE_CONF_DIR=/etc/hive/conf
 45 | {% if spark_enabled %}
 46 | ENV SPARK_HOME=/opt/spark
 47 | ENV SPARK_CONF_DIR=/etc/spark/conf
 48 | {% endif %}
 49 | {% if flink_enabled %}
 50 | ENV FLINK_HOME=/opt/flink
 51 | ENV FLINK_CONF_DIR=/etc/flink/conf
 52 | {% endif %}
 53 | ENV KYUUBI_HOME=/opt/kyuubi
 54 | ENV KYUUBI_CONF_DIR=/etc/kyuubi/conf
 55 | {% if ranger_enabled %}
 56 | ENV RANGER_HOME=/opt/ranger
 57 | {% endif %}
 58 | {% if trino_enabled %}
 59 | ENV TRINO_HOME=/opt/trino
 60 | {% endif %}
 61 | {% if parquet_enabled %}
 62 | ENV PARQUET_HOME=/opt/parquet
 63 | {% endif %}
 64 | ENV MYSQL_JDBC_VERSION=${MYSQL_JDBC_VERSION}
 65 | ENV PATH=${HIVE_HOME}/bin:${HADOOP_HOME}/bin:${ZEPPELIN_HOME}/bin:${ZOOKEEPER_HOME}/bin:${PATH}
 66 | {% if spark_enabled %}
 67 | ENV PATH=${SPARK_HOME}/bin:${PATH}
 68 | {% endif %}
 69 | {% if flink_enabled %}
 70 | ENV PATH=${FLINK_HOME}/bin:${PATH}
 71 | {% endif %}
 72 | ENV PATH=${KYUUBI_HOME}/bin:${PATH}
 73 | {% if parquet_enabled %}
 74 | ENV PATH=${PARQUET_HOME}/bin:${PATH}
 75 | {% endif %}
 76 | 
 77 | {% if zeppelin_enabled %}
 78 | ADD download/zeppelin-${ZEPPELIN_VERSION}-bin{{ '-%s' % zeppelin_custom_name if zeppelin_custom_name }}.tgz /opt
 79 | {% endif %}
 80 | ADD download/apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz /opt
 81 | ADD download/hadoop-${HADOOP_VERSION}.tar.gz /opt
 82 | ADD download/apache-hive-${HIVE_VERSION}-bin.tar.gz /opt
 83 | {% if spark_enabled %}
 84 | ADD download/spark-${SPARK_VERSION}-bin-{{ spark_custom_name }}.tgz /opt
 85 | {% endif %}
 86 | {% if flink_enabled %}
 87 | ADD download/flink-${FLINK_VERSION}-bin-scala_2.12.tgz /opt
 88 | {% endif %}
 89 | ADD download/apache-kyuubi-${KYUUBI_VERSION}-bin.tgz /opt
 90 | {% if ranger_enabled %}
 91 | ADD download/ranger-${RANGER_VERSION}-admin.tar.gz /opt
 92 | {% endif %}
 93 | {% if trino_enabled %}
 94 | ADD download/trino-server-${TRINO_VERSION}.tar.gz /opt
 95 | {% endif %}
 96 | 
 97 | # Copy configuration files
 98 | COPY ./files /
 99 | 
100 | RUN chmod 600 /root/.ssh/id_rsa_hadoop_testing
101 | 
102 | RUN ln -snf /opt/apache-zookeeper-${ZOOKEEPER_VERSION}-bin ${ZOOKEEPER_HOME} && \
103 |     ln -snf /opt/hadoop-${HADOOP_VERSION} ${HADOOP_HOME} && \
104 |     ln -snf /opt/apache-hive-${HIVE_VERSION}-bin ${HIVE_HOME} && \
105 |     ln -snf /opt/apache-kyuubi-${KYUUBI_VERSION}-bin ${KYUUBI_HOME} && \
106 |     ln -snf ${HIVE_CONF_DIR}/hive-site.xml ${KYUUBI_CONF_DIR}/hive-site.xml && \
107 |     mkdir -p /var/log/kyuubi && chmod -R 777 /var/log/kyuubi
108 | 
109 | ADD download/log4j-layout-template-json-2.20.0.jar ${KYUUBI_HOME}/jars/
110 | ADD download/mysql-connector-j-${MYSQL_JDBC_VERSION}.jar ${KYUUBI_HOME}/jars/
111 | 
112 | {% if spark_enabled %}
113 | RUN ln -snf /opt/spark-${SPARK_VERSION}-bin-{{ spark_custom_name }} ${SPARK_HOME} && \
114 |     ln -snf ${HIVE_CONF_DIR}/hive-site.xml ${SPARK_CONF_DIR}/hive-site.xml
115 | 
116 | ADD download/mysql-connector-j-${MYSQL_JDBC_VERSION}.jar ${SPARK_HOME}/jars/
117 | {% endif %}
118 | 
119 | {% if flink_enabled %}
120 | RUN ln -snf /opt/flink-${FLINK_VERSION} ${FLINK_HOME} && \
121 |     ln -snf ${HIVE_CONF_DIR}/hive-site.xml ${FLINK_CONF_DIR}/hive-site.xml && \
122 |     ln -s ${HADOOP_HOME}/share/hadoop/client/hadoop-client-api-${HADOOP_VERSION}.jar ${FLINK_HOME}/lib/ && \
123 |     ln -s ${HADOOP_HOME}/share/hadoop/client/hadoop-client-runtime-${HADOOP_VERSION}.jar ${FLINK_HOME}/lib/ && \
124 |     mkdir /var/log/flink && chmod -R 777 /var/log/flink
125 | {% endif %}
126 | 
127 | {% if zeppelin_enabled %}
128 | RUN ln -snf /opt/zeppelin-${ZEPPELIN_VERSION}-bin{{ '-%s' % zeppelin_custom_name if zeppelin_custom_name }} ${ZEPPELIN_HOME} && \
129 |     mkdir -p /var/log/zeppelin && \
130 |     mkdir -p /var/run/zeppelin && \
131 |     mkdir -p /var/run/zeppelin/webapps && \
132 |     mkdir -p /var/run/zeppelin/pid && \
133 |     mkdir -p /var/lib/zeppelin && \
134 |     mkdir -p /var/lib/zeppelin/notebook && \
135 |     cp -R ${ZEPPELIN_HOME}/notebook/* /var/lib/zeppelin/notebook && \
136 |     chown -R 6014:6014 /etc/zeppelin && \
137 |     chown -R 6014:6014 /var/run/zeppelin && \
138 |     chown -R 6014:6014 /var/lib/zeppelin && \
139 |     chown -R 6014:6014 /var/log/zeppelin
140 | 
141 | ADD download/mysql-connector-j-${MYSQL_JDBC_VERSION}.jar ${ZEPPELIN_HOME}/interpreter/jdbc/
142 | ADD download/kyuubi-hive-jdbc-shaded-${KYUUBI_VERSION}.jar ${ZEPPELIN_HOME}/interpreter/jdbc/
143 | RUN ln -s ${HIVE_HOME}/jdbc/hive-jdbc-${HIVE_VERSION}-standalone.jar ${ZEPPELIN_HOME}/interpreter/jdbc/ && \
144 |     ln -s ${HADOOP_HOME}/share/hadoop/client/hadoop-client-api-${HADOOP_VERSION}.jar ${ZEPPELIN_HOME}/interpreter/jdbc/ && \
145 |     ln -s ${HADOOP_HOME}/share/hadoop/client/hadoop-client-runtime-${HADOOP_VERSION}.jar ${ZEPPELIN_HOME}/interpreter/jdbc/
146 | {% endif %}
147 | 
148 | {% if ranger_enabled %}
149 | {% endif %}
150 | {% if ranger_enabled %}
151 | RUN ln -snf /opt/ranger-${RANGER_VERSION}-admin ${RANGER_HOME}
152 | {% endif %}
153 | {% if trino_enabled %}
154 | RUN ln -snf /opt/trino-server-${TRINO_VERSION} ${TRINO_HOME}
155 | {% endif %}
156 | 
157 | ADD download/mysql-connector-j-${MYSQL_JDBC_VERSION}.jar ${HIVE_HOME}/lib/
158 | {% if ranger_enabled %}
159 | ADD download/mysql-connector-j-${MYSQL_JDBC_VERSION}.jar ${RANGER_HOME}/
160 | {% endif %}
161 | {% if spark_enabled and iceberg_enabled %}
162 | ADD download/iceberg-spark-runtime-${SPARK_BINARY_VERSION}_${SPARK_SCALA_BINARY_VERSION}-${ICEBERG_VERSION}.jar ${SPARK_HOME}/jars/
163 | {% endif %}
164 | {% if flink_enabled and iceberg_enabled %}
165 | ADD download/iceberg-flink-runtime-${FLINK_BINARY_VERSION}-${ICEBERG_VERSION}.jar ${FLINK_HOME}/lib/
166 | {% endif %}
167 | {% if spark_enabled and hudi_enabled %}
168 | ADD download/hudi-spark${SPARK_BINARY_VERSION}-bundle_${SPARK_SCALA_BINARY_VERSION}-${HUDI_VERSION}.jar ${SPARK_HOME}/jars/
169 | {% endif %}
170 | {% if flink_enabled %}
171 | ADD download/jcl-over-slf4j-1.7.36.jar ${FLINK_HOME}/lib/
172 | ADD download/flink-sql-connector-hive-${FLINK_HIVE_VERSION}_2.12-${FLINK_VERSION}.jar ${FLINK_HOME}/lib/
173 | {% endif %}
174 | {% if trino_enabled %}
175 | ADD --chmod=755 download/trino-cli-${TRINO_VERSION}-executable.jar ${TRINO_HOME}/bin/trino-cli
176 | {% endif %}
177 | {% if parquet_enabled %}
178 | ADD download/parquet-cli-${PARQUET_VERSION}-runtime.jar ${PARQUET_HOME}/jars/
179 | {% endif %}
180 | 
181 | ADD download/log4j2-appender-nodep-${LOKI_APPENDER_VERSION}.jar ${HIVE_HOME}/lib/
182 | ADD download/log4j2-appender-nodep-${LOKI_APPENDER_VERSION}.jar ${KYUUBI_HOME}/jars/
183 | 
184 | {% if spark_enabled %}
185 | ADD download/kyuubi-spark-connector-tpch_${SPARK_SCALA_BINARY_VERSION}-${KYUUBI_VERSION}.jar ${SPARK_HOME}/jars/
186 | ADD download/kyuubi-spark-connector-tpcds_${SPARK_SCALA_BINARY_VERSION}-${KYUUBI_VERSION}.jar ${SPARK_HOME}/jars/
187 | ADD download/log4j2-appender-nodep-${LOKI_APPENDER_VERSION}.jar ${SPARK_HOME}/jars/
188 | {% endif %}
189 | 
190 | {% if ranger_enabled %}
191 | # chown would doulbe the size of the image by introduce a new layer, but ranger seems does not work without chmod
192 | RUN chown -R ranger:hadoop /opt/ranger-${RANGER_VERSION}-admin
193 | {% endif %}
194 | 
195 | RUN /opt/hadoop-init.d/init-hdfs.sh
196 | {% if trino_enabled %}
197 | RUN /opt/trino-init.d/init-workdir.sh
198 | {% endif %}
199 | 
200 | # Zookeeper ports
201 | EXPOSE 2181
202 | 
203 | # HDFS ports
204 | EXPOSE 8020 9864 9866 9867 9870
205 | 
206 | # YARN ports
207 | EXPOSE 8030 8031 8032 8033 8040 8041 8042 8088 10020 19888
208 | 
209 | # HIVE ports
210 | EXPOSE 9083 10000
211 | 
212 | # SPARK ports
213 | EXPOSE 18080
214 | 
215 | # Flink ports
216 | EXPOSE 8082
217 | 
218 | {% if ranger_enabled %}
219 | EXPOSE 6080
220 | {% endif %}
221 | 
222 | {% if trino_enabled %}
223 | EXPOSE 18081
224 | {% endif %}
225 | 
226 | CMD ["supervisord", "-c", "/etc/supervisord.conf"]
227 | ENTRYPOINT ["/opt/entrypoint.sh"]
228 | 


--------------------------------------------------------------------------------
/templates/hadoop-master/files/etc/flink/conf/log4j-cli.properties:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | #  Licensed to the Apache Software Foundation (ASF) under one
 3 | #  or more contributor license agreements.  See the NOTICE file
 4 | #  distributed with this work for additional information
 5 | #  regarding copyright ownership.  The ASF licenses this file
 6 | #  to you under the Apache License, Version 2.0 (the
 7 | #  "License"); you may not use this file except in compliance
 8 | #  with the License.  You may obtain a copy of the License at
 9 | #
10 | #      http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | #  Unless required by applicable law or agreed to in writing, software
13 | #  distributed under the License is distributed on an "AS IS" BASIS,
14 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | #  See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | ################################################################################
18 | 
19 | # Allows this configuration to be modified at runtime. The file will be checked every 30 seconds.
20 | monitorInterval=30
21 | 
22 | rootLogger.level = INFO
23 | rootLogger.appenderRef.file.ref = FileAppender
24 | 
25 | # Log all infos in the given file
26 | appender.file.name = FileAppender
27 | appender.file.type = FILE
28 | appender.file.append = false
29 | appender.file.fileName = ${sys:log.file}
30 | appender.file.layout.type = PatternLayout
31 | appender.file.layout.pattern = %d{yyyy-MM-dd HH:mm:ss,SSS} %-5p %-60c %x - %m%n
32 | 
33 | # Log output from org.apache.flink.yarn to the console. This is used by the
34 | # CliFrontend class when using a per-job YARN cluster.
35 | logger.yarn.name = org.apache.flink.yarn
36 | logger.yarn.level = INFO
37 | logger.yarn.appenderRef.console.ref = ConsoleAppender
38 | logger.yarncli.name = org.apache.flink.yarn.cli.FlinkYarnSessionCli
39 | logger.yarncli.level = INFO
40 | logger.yarncli.appenderRef.console.ref = ConsoleAppender
41 | logger.hadoop.name = org.apache.hadoop
42 | logger.hadoop.level = INFO
43 | logger.hadoop.appenderRef.console.ref = ConsoleAppender
44 | 
45 | # Make sure hive logs go to the file.
46 | logger.hive.name = org.apache.hadoop.hive
47 | logger.hive.level = INFO
48 | logger.hive.additivity = false
49 | logger.hive.appenderRef.file.ref = FileAppender
50 | 
51 | # Log output from org.apache.flink.kubernetes to the console.
52 | logger.kubernetes.name = org.apache.flink.kubernetes
53 | logger.kubernetes.level = INFO
54 | logger.kubernetes.appenderRef.console.ref = ConsoleAppender
55 | 
56 | appender.console.name = ConsoleAppender
57 | appender.console.type = CONSOLE
58 | appender.console.layout.type = PatternLayout
59 | appender.console.layout.pattern = %d{yyyy-MM-dd HH:mm:ss,SSS} %-5p %-60c %x - %m%n
60 | 
61 | # suppress the warning that hadoop native libraries are not loaded (irrelevant for the client)
62 | logger.hadoopnative.name = org.apache.hadoop.util.NativeCodeLoader
63 | logger.hadoopnative.level = OFF
64 | 
65 | # Suppress the irrelevant (wrong) warnings from the Netty channel handler
66 | logger.netty.name = org.jboss.netty.channel.DefaultChannelPipeline
67 | logger.netty.level = OFF
68 | 


--------------------------------------------------------------------------------
/templates/hadoop-master/files/etc/flink/conf/log4j-console.properties:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | #  Licensed to the Apache Software Foundation (ASF) under one
 3 | #  or more contributor license agreements.  See the NOTICE file
 4 | #  distributed with this work for additional information
 5 | #  regarding copyright ownership.  The ASF licenses this file
 6 | #  to you under the Apache License, Version 2.0 (the
 7 | #  "License"); you may not use this file except in compliance
 8 | #  with the License.  You may obtain a copy of the License at
 9 | #
10 | #      http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | #  Unless required by applicable law or agreed to in writing, software
13 | #  distributed under the License is distributed on an "AS IS" BASIS,
14 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | #  See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | ################################################################################
18 | 
19 | # Allows this configuration to be modified at runtime. The file will be checked every 30 seconds.
20 | monitorInterval=30
21 | 
22 | # This affects logging for both user code and Flink
23 | rootLogger.level = INFO
24 | rootLogger.appenderRef.console.ref = ConsoleAppender
25 | rootLogger.appenderRef.rolling.ref = RollingFileAppender
26 | 
27 | # Uncomment this if you want to _only_ change Flink's logging
28 | #logger.flink.name = org.apache.flink
29 | #logger.flink.level = INFO
30 | 
31 | # The following lines keep the log level of common libraries/connectors on
32 | # log level INFO. The root logger does not override this. You have to manually
33 | # change the log levels here.
34 | logger.pekko.name = org.apache.pekko
35 | logger.pekko.level = INFO
36 | logger.kafka.name= org.apache.kafka
37 | logger.kafka.level = INFO
38 | logger.hadoop.name = org.apache.hadoop
39 | logger.hadoop.level = INFO
40 | logger.zookeeper.name = org.apache.zookeeper
41 | logger.zookeeper.level = INFO
42 | logger.shaded_zookeeper.name = org.apache.flink.shaded.zookeeper3
43 | logger.shaded_zookeeper.level = INFO
44 | 
45 | # Log all infos to the console
46 | appender.console.name = ConsoleAppender
47 | appender.console.type = CONSOLE
48 | appender.console.layout.type = PatternLayout
49 | appender.console.layout.pattern = %d{yyyy-MM-dd HH:mm:ss,SSS} %-5p %-60c %x - %m%n
50 | appender.console.filter.threshold.type = ThresholdFilter
51 | appender.console.filter.threshold.level = ${sys:console.log.level:-ALL}
52 | 
53 | # Log all infos in the given rolling file
54 | appender.rolling.name = RollingFileAppender
55 | appender.rolling.type = RollingFile
56 | appender.rolling.append = true
57 | appender.rolling.fileName = ${sys:log.file}
58 | appender.rolling.filePattern = ${sys:log.file}.%i
59 | appender.rolling.layout.type = PatternLayout
60 | appender.rolling.layout.pattern = %d{yyyy-MM-dd HH:mm:ss,SSS} %-5p %-60c %x - %m%n
61 | appender.rolling.policies.type = Policies
62 | appender.rolling.policies.size.type = SizeBasedTriggeringPolicy
63 | appender.rolling.policies.size.size=100MB
64 | appender.rolling.policies.startup.type = OnStartupTriggeringPolicy
65 | appender.rolling.strategy.type = DefaultRolloverStrategy
66 | appender.rolling.strategy.max = ${env:MAX_LOG_FILE_NUMBER:-10}
67 | 
68 | # Suppress the irrelevant (wrong) warnings from the Netty channel handler
69 | logger.netty.name = org.jboss.netty.channel.DefaultChannelPipeline
70 | logger.netty.level = OFF
71 | 


--------------------------------------------------------------------------------
/templates/hadoop-master/files/etc/flink/conf/log4j-session.properties:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | #  Licensed to the Apache Software Foundation (ASF) under one
 3 | #  or more contributor license agreements.  See the NOTICE file
 4 | #  distributed with this work for additional information
 5 | #  regarding copyright ownership.  The ASF licenses this file
 6 | #  to you under the Apache License, Version 2.0 (the
 7 | #  "License"); you may not use this file except in compliance
 8 | #  with the License.  You may obtain a copy of the License at
 9 | #
10 | #      http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | #  Unless required by applicable law or agreed to in writing, software
13 | #  distributed under the License is distributed on an "AS IS" BASIS,
14 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | #  See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | ################################################################################
18 | 
19 | # Allows this configuration to be modified at runtime. The file will be checked every 30 seconds.
20 | monitorInterval=30
21 | 
22 | rootLogger.level = INFO
23 | rootLogger.appenderRef.console.ref = ConsoleAppender
24 | 
25 | appender.console.name = ConsoleAppender
26 | appender.console.type = CONSOLE
27 | appender.console.layout.type = PatternLayout
28 | appender.console.layout.pattern = %d{yyyy-MM-dd HH:mm:ss,SSS} %-5p %-60c %x - %m%n
29 | 
30 | # Suppress the irrelevant (wrong) warnings from the Netty channel handler
31 | logger.netty.name = org.jboss.netty.channel.DefaultChannelPipeline
32 | logger.netty.level = OFF
33 | logger.zookeeper.name = org.apache.zookeeper
34 | logger.zookeeper.level = WARN
35 | logger.shaded_zookeeper.name = org.apache.flink.shaded.zookeeper3
36 | logger.shaded_zookeeper.level = WARN
37 | logger.curator.name = org.apache.flink.shaded.org.apache.curator.framework
38 | logger.curator.level = WARN
39 | logger.runtimeutils.name= org.apache.flink.runtime.util.ZooKeeperUtils
40 | logger.runtimeutils.level = WARN
41 | logger.runtimeleader.name = org.apache.flink.runtime.leaderretrieval.ZooKeeperLeaderRetrievalDriver
42 | logger.runtimeleader.level = WARN
43 | 


--------------------------------------------------------------------------------
/templates/hadoop-master/files/etc/flink/conf/log4j.properties:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | #  Licensed to the Apache Software Foundation (ASF) under one
 3 | #  or more contributor license agreements.  See the NOTICE file
 4 | #  distributed with this work for additional information
 5 | #  regarding copyright ownership.  The ASF licenses this file
 6 | #  to you under the Apache License, Version 2.0 (the
 7 | #  "License"); you may not use this file except in compliance
 8 | #  with the License.  You may obtain a copy of the License at
 9 | #
10 | #      http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | #  Unless required by applicable law or agreed to in writing, software
13 | #  distributed under the License is distributed on an "AS IS" BASIS,
14 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | #  See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | ################################################################################
18 | 
19 | # Allows this configuration to be modified at runtime. The file will be checked every 30 seconds.
20 | monitorInterval=30
21 | 
22 | # This affects logging for both user code and Flink
23 | rootLogger.level = INFO
24 | rootLogger.appenderRef.file.ref = MainAppender
25 | 
26 | # Uncomment this if you want to _only_ change Flink's logging
27 | #logger.flink.name = org.apache.flink
28 | #logger.flink.level = INFO
29 | 
30 | # The following lines keep the log level of common libraries/connectors on
31 | # log level INFO. The root logger does not override this. You have to manually
32 | # change the log levels here.
33 | logger.pekko.name = org.apache.pekko
34 | logger.pekko.level = INFO
35 | logger.kafka.name= org.apache.kafka
36 | logger.kafka.level = INFO
37 | logger.hadoop.name = org.apache.hadoop
38 | logger.hadoop.level = INFO
39 | logger.zookeeper.name = org.apache.zookeeper
40 | logger.zookeeper.level = INFO
41 | logger.shaded_zookeeper.name = org.apache.flink.shaded.zookeeper3
42 | logger.shaded_zookeeper.level = INFO
43 | 
44 | # Log all infos in the given file
45 | appender.main.name = MainAppender
46 | appender.main.type = RollingFile
47 | appender.main.append = true
48 | appender.main.fileName = ${sys:log.file}
49 | appender.main.filePattern = ${sys:log.file}.%i
50 | appender.main.layout.type = PatternLayout
51 | appender.main.layout.pattern = %d{yyyy-MM-dd HH:mm:ss,SSS} %-5p %-60c %x - %m%n
52 | appender.main.policies.type = Policies
53 | appender.main.policies.size.type = SizeBasedTriggeringPolicy
54 | appender.main.policies.size.size = 100MB
55 | appender.main.policies.startup.type = OnStartupTriggeringPolicy
56 | appender.main.strategy.type = DefaultRolloverStrategy
57 | appender.main.strategy.max = ${env:MAX_LOG_FILE_NUMBER:-10}
58 | 
59 | # Suppress the irrelevant (wrong) warnings from the Netty channel handler
60 | logger.netty.name = org.jboss.netty.channel.DefaultChannelPipeline
61 | logger.netty.level = OFF
62 | 


--------------------------------------------------------------------------------
/templates/hadoop-master/files/etc/flink/conf_history_server/flink-conf.yaml:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | #  Licensed to the Apache Software Foundation (ASF) under one
 3 | #  or more contributor license agreements.  See the NOTICE file
 4 | #  distributed with this work for additional information
 5 | #  regarding copyright ownership.  The ASF licenses this file
 6 | #  to you under the Apache License, Version 2.0 (the
 7 | #  "License"); you may not use this file except in compliance
 8 | #  with the License.  You may obtain a copy of the License at
 9 | #
10 | #      http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | #  Unless required by applicable law or agreed to in writing, software
13 | #  distributed under the License is distributed on an "AS IS" BASIS,
14 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | #  See the License for the specific language governing permissions and
16 | #  limitations under the License.
17 | ################################################################################
18 | 
19 | #==============================================================================
20 | # HistoryServer
21 | #==============================================================================
22 | 
23 | # The HistoryServer is started and stopped via bin/historyserver.sh (start|stop)
24 | 
25 | # The address under which the web-based HistoryServer listens.
26 | #historyserver.web.address: 0.0.0.0
27 | 
28 | # The port under which the web-based HistoryServer listens.
29 | historyserver.web.port: 8082
30 | 
31 | # Comma separated list of directories to monitor for completed jobs.
32 | historyserver.archive.fs.dir: hdfs://hadoop-master1.orb.local:8020/flink-history
33 | 
34 | # Interval in milliseconds for refreshing the monitored directories.
35 | #historyserver.archive.fs.refresh-interval: 10000
36 | 
37 | #==============================================================================
38 | # Flink Cluster Security Configuration
39 | #==============================================================================
40 | 
41 | # Kerberos authentication for various components - Hadoop, ZooKeeper, and connectors -
42 | # may be enabled in four steps:
43 | # 1. configure the local krb5.conf file
44 | # 2. provide Kerberos credentials (either a keytab or a ticket cache w/ kinit)
45 | # 3. make the credentials available to various JAAS login contexts
46 | # 4. configure the connector to use JAAS/SASL
47 | 
48 | # The below configure how Kerberos credentials are provided. A keytab will be used instead of
49 | # a ticket cache if the keytab path and principal are set.
50 | 
51 | {% if kerberos_enabled %}
52 | security.kerberos.login.use-ticket-cache: true
53 | security.kerberos.login.keytab: /share/keytabs/hadoop-master1/flink.service.keytab
54 | security.kerberos.login.principal: flink/hadoop-master1.orb.local@TEST.ORG
55 | {% else %}
56 | # security.kerberos.login.use-ticket-cache: true
57 | # security.kerberos.login.keytab: /path/to/kerberos/keytab
58 | # security.kerberos.login.principal: flink-user
59 | {% endif %}
60 | 
61 | # The configuration below defines which JAAS login contexts
62 | 
63 | # security.kerberos.login.contexts: Client,KafkaClient
64 | 


--------------------------------------------------------------------------------
/templates/hadoop-master/files/etc/flink/conf_history_server/log4j-console.properties:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | #  Licensed to the Apache Software Foundation (ASF) under one
 3 | #  or more contributor license agreements.  See the NOTICE file
 4 | #  distributed with this work for additional information
 5 | #  regarding copyright ownership.  The ASF licenses this file
 6 | #  to you under the Apache License, Version 2.0 (the
 7 | #  "License"); you may not use this file except in compliance
 8 | #  with the License.  You may obtain a copy of the License at
 9 | #
10 | #      http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | #  Unless required by applicable law or agreed to in writing, software
13 | #  distributed under the License is distributed on an "AS IS" BASIS,
14 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | #  See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | ################################################################################
18 | 
19 | # Allows this configuration to be modified at runtime. The file will be checked every 30 seconds.
20 | monitorInterval=30
21 | 
22 | # This affects logging for both user code and Flink
23 | rootLogger.level = INFO
24 | rootLogger.appenderRef.console.ref = ConsoleAppender
25 | 
26 | # Uncomment this if you want to _only_ change Flink's logging
27 | #logger.flink.name = org.apache.flink
28 | #logger.flink.level = INFO
29 | 
30 | # The following lines keep the log level of common libraries/connectors on
31 | # log level INFO. The root logger does not override this. You have to manually
32 | # change the log levels here.
33 | logger.pekko.name = org.apache.pekko
34 | logger.pekko.level = INFO
35 | logger.kafka.name= org.apache.kafka
36 | logger.kafka.level = INFO
37 | logger.hadoop.name = org.apache.hadoop
38 | logger.hadoop.level = INFO
39 | logger.zookeeper.name = org.apache.zookeeper
40 | logger.zookeeper.level = INFO
41 | logger.shaded_zookeeper.name = org.apache.flink.shaded.zookeeper3
42 | logger.shaded_zookeeper.level = INFO
43 | 
44 | # Log all infos to the console
45 | appender.console.name = ConsoleAppender
46 | appender.console.type = CONSOLE
47 | appender.console.layout.type = PatternLayout
48 | appender.console.layout.pattern = %d{yyyy-MM-dd HH:mm:ss,SSS} %-5p %-60c %x - %m%n
49 | appender.console.filter.threshold.type = ThresholdFilter
50 | appender.console.filter.threshold.level = ${sys:console.log.level:-ALL}
51 | 
52 | # Suppress the irrelevant (wrong) warnings from the Netty channel handler
53 | logger.netty.name = org.jboss.netty.channel.DefaultChannelPipeline
54 | logger.netty.level = OFF
55 | 


--------------------------------------------------------------------------------
/templates/hadoop-master/files/etc/hive/conf/beeline-site.xml.j2:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <configuration>
 3 |   {% if kerberos_enabled %}
 4 |   <property>
 5 |     <name>beeline.hs2.jdbc.url.tcpUrl</name>
 6 |     <value>jdbc:hive2://hadoop-master1.orb.local:10000/default;principal=hive/_HOST@TEST.ORG</value>
 7 |   </property>
 8 |   {% else %}
 9 |   <property>
10 |     <name>beeline.hs2.jdbc.url.tcpUrl</name>
11 |     <value>jdbc:hive2://hadoop-master1.orb.local:10000/default;user=hive;password=hive</value>
12 |   </property>
13 |   {% endif %}
14 |   <property>
15 |     <name>beeline.hs2.jdbc.url.default</name>
16 |     <value>tcpUrl</value>
17 |   </property>
18 | </configuration>
19 | 


--------------------------------------------------------------------------------
/templates/hadoop-master/files/etc/hive/conf/hive-env.sh:
--------------------------------------------------------------------------------
1 | export HADOOP_HEAPSIZE=256
2 | export HADOOP_CLIENT_OPTS="${HADOOP_CLIENT_OPTS} -Xmx256m -Djava.io.tmpdir=/tmp"
3 | 


--------------------------------------------------------------------------------
/templates/hadoop-master/files/etc/hive/conf/hive-log4j2.properties:
--------------------------------------------------------------------------------
1 | rootLogger.level = info
2 | rootLogger.appenderRef.stdout.ref = console
3 | 
4 | appender.console.type = Console
5 | appender.console.name = console
6 | appender.console.target = SYSTEM_OUT
7 | appender.console.layout.type = PatternLayout
8 | appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} [%t]: %p %c{2}: %m%n
9 | 


--------------------------------------------------------------------------------
/templates/hadoop-master/files/etc/hive/conf/hive-site.xml.j2:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0"?>
  2 | <configuration>
  3 |   {% if kerberos_enabled %}
  4 |   <!-- Hive Server2 -->
  5 |   <property>
  6 |     <name>hive.server2.authentication</name>
  7 |     <value>kerberos</value>
  8 |   </property>
  9 |   <property>
 10 |     <name>hive.server2.authentication.kerberos.principal</name>
 11 |     <value>hive/_HOST@TEST.ORG</value>
 12 |   </property>
 13 |   <property>
 14 |     <name>hive.server2.authentication.kerberos.keytab</name>
 15 |     <value>/share/keytabs/hadoop-master1/hive.service.keytab</value>
 16 |   </property>
 17 | 
 18 |   <!-- Hive MetaStore -->
 19 |   <property>
 20 |     <name>hive.metastore.sasl.enabled</name>
 21 |     <value>true</value>
 22 |   </property>
 23 |   <property>
 24 |     <name>hive.metastore.kerberos.principal</name>
 25 |     <value>hive/_HOST@TEST.ORG</value>
 26 |   </property>
 27 |   <property>
 28 |     <name>hive.metastore.kerberos.keytab.file</name>
 29 |     <value>/share/keytabs/hadoop-master1/hive.service.keytab</value>
 30 |   </property>
 31 |   {% endif %}
 32 |   <property>
 33 |     <name>hive.metastore.warehouse.dir</name>
 34 |     <value>hdfs://hadoop-master1.orb.local:8020/warehouse</value>
 35 |   </property>
 36 |   <property>
 37 |     <name>hive.metastore.uris</name>
 38 |     <value>thrift://hadoop-master1.orb.local:9083</value>
 39 |   </property>
 40 |   <property>
 41 |     <name>javax.jdo.option.ConnectionURL</name>
 42 |     <value>jdbc:mysql://mysql:3306/metastore?useSSL=false</value>
 43 |   </property>
 44 |   <property>
 45 |     <name>javax.jdo.option.ConnectionDriverName</name>
 46 |     <value>com.mysql.cj.jdbc.Driver</value>
 47 |   </property>
 48 |   <property>
 49 |     <name>javax.jdo.option.ConnectionUserName</name>
 50 |     <value>root</value>
 51 |   </property>
 52 |   <property>
 53 |     <name>javax.jdo.option.ConnectionPassword</name>
 54 |     <value>root</value>
 55 |   </property>
 56 |   <property>
 57 |     <name>datanucleus.autoCreateSchema</name>
 58 |     <value>false</value>
 59 |   </property>
 60 |   <property>
 61 |     <name>datanucleus.fixedDatastore</name>
 62 |     <value>true</value>
 63 |   </property>
 64 |   <property>
 65 |     <name>datanucleus.autoStartMechanism</name>
 66 |     <value>SchemaTable</value>
 67 |   </property>
 68 |   <property>
 69 |     <name>hive.metastore.connect.retries</name>
 70 |     <value>15</value>
 71 |   </property>
 72 |   <property>
 73 |     <name>hive.security.authorization.createtable.owner.grants</name>
 74 |     <value>ALL</value>
 75 |     <description>The set of privileges automatically granted to the owner whenever a table gets created.</description>
 76 |   </property>
 77 |   <property>
 78 |     <name>hive.users.in.admin.role</name>
 79 |     <value>hdfs,hive</value>
 80 |   </property>
 81 |   <property>
 82 |       <!-- https://community.hortonworks.com/content/supportkb/247055/errorjavalangunsupportedoperationexception-storage.html -->
 83 |     <name>metastore.storage.schema.reader.impl</name>
 84 |     <value>org.apache.hadoop.hive.metastore.SerDeStorageSchemaReader</value>
 85 |   </property>
 86 |   <property>
 87 |     <name>hive.support.concurrency</name>
 88 |     <value>true</value>
 89 |   </property>
 90 |   <property>
 91 |     <name>hive.txn.manager</name>
 92 |     <value>org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager</value>
 93 |   </property>
 94 |   <property>
 95 |     <name>hive.lock.manager</name>
 96 |     <value>org.apache.hadoop.hive.ql.lockmgr.EmbeddedLockManager</value>
 97 |   </property>
 98 |   <property>
 99 |     <name>hive.compactor.initiator.on</name>
100 |     <value>true</value>
101 |   </property>
102 |   <property>
103 |     <name>hive.compactor.worker.threads</name>
104 |     <value>1</value>
105 |   </property>
106 |   <property>
107 |     <name>hive.metastore.disallow.incompatible.col.type.changes</name>
108 |     <value>false</value>
109 |   </property>
110 | </configuration>
111 | 


--------------------------------------------------------------------------------
/templates/hadoop-master/files/etc/kyuubi/conf/kyuubi-defaults.conf.j2:
--------------------------------------------------------------------------------
 1 | #
 2 | # Licensed to the Apache Software Foundation (ASF) under one or more
 3 | # contributor license agreements.  See the NOTICE file distributed with
 4 | # this work for additional information regarding copyright ownership.
 5 | # The ASF licenses this file to You under the Apache License, Version 2.0
 6 | # (the "License"); you may not use this file except in compliance with
 7 | # the License.  You may obtain a copy of the License at
 8 | #
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 | 
18 | ## Kyuubi Configurations
19 | 
20 | {% if kerberos_enabled %}
21 | kyuubi.authentication                    KERBEROS
22 | kyuubi.kinit.principal                   kyuubi/_HOST@TEST.ORG
23 | kyuubi.kinit.keytab                      /share/keytabs/hadoop-master1/kyuubi.service.keytab
24 | {% endif %}
25 | 
26 | kyuubi.frontend.bind.host                hadoop-master1.orb.local
27 | kyuubi.frontend.protocols                THRIFT_BINARY,REST
28 | kyuubi.frontend.thrift.binary.bind.port  10009
29 | kyuubi.frontend.rest.bind.port           10099
30 | 
31 | kyuubi.engine.type                       SPARK_SQL
32 | kyuubi.engine.share.level                USER
33 | kyuubi.session.engine.initialize.timeout PT3M
34 | 
35 | kyuubi.ha.addresses                      hadoop-master1.orb.local:2181
36 | kyuubi.ha.namespace                      kyuubi
37 | 
38 | kyuubi.yarn.user.strategy  ADMIN
39 | kyuubi.yarn.user.admin     yarn
40 | 
41 | kyuubi.metadata.store.jdbc.database.schema.init=false
42 | kyuubi.metadata.store.jdbc.database.type=mysql
43 | kyuubi.metadata.store.jdbc.driver=com.mysql.jdbc.Driver
44 | kyuubi.metadata.store.jdbc.url=jdbc:mysql://mysql:3306/kyuubi?useSSL=false&useUnicode=true&characterEncoding=UTF-8
45 | kyuubi.metadata.store.jdbc.user=root
46 | kyuubi.metadata.store.jdbc.password=root
47 | 
48 | # spark engine
49 | kyuubi.session.engine.spark.initialize.sql \
50 |       show databases in tpcds; \
51 |       show databases in tpch
52 | 
53 | spark.yarn.maxAppAttempts                    1
54 | spark.submit.deployMode                      cluster
55 | kyuubi.session.engine.startup.waitCompletion false
56 | 
57 | # hive engine
58 | kyuubi.engine.hive.extra.classpath      /opt/hadoop/share/hadoop/client/*:/opt/hadoop/share/hadoop/mapreduce/*
59 | 
60 | # flink engine
61 | flink.execution.target                  yarn-application
62 | kyuubi.engine.flink.extra.classpath     /opt/hadoop/share/hadoop/client/*:/opt/hadoop/share/hadoop/mapreduce/*
63 | 
64 | # Details in https://kyuubi.readthedocs.io/en/master/configuration/settings.html
65 | 


--------------------------------------------------------------------------------
/templates/hadoop-master/files/etc/kyuubi/conf/kyuubi-env.sh:
--------------------------------------------------------------------------------
1 | export KYUUBI_LOG_DIR=/var/log/kyuubi
2 | export KYUUBI_WORK_DIR_ROOT=/var/lib/kyuubi/work
3 | 


--------------------------------------------------------------------------------
/templates/hadoop-master/files/etc/kyuubi/conf/log4j2.xml.j2:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <Configuration status="INFO">
 3 |     <Properties>
 4 |         <Property name="logDir">${env:KYUUBI_LOG_DIR}</Property>
 5 |         <Property name="restAuditLogPath">rest-audit.log</Property>
 6 |         <Property name="k8sAuditLogPath">k8s-audit.log</Property>
 7 |         <Property name="opAuditLogPath">operation-audit.log</Property>
 8 |     </Properties>
 9 |     <Appenders>
10 |         <Console name="stdout" target="SYSTEM_OUT">
11 |             <PatternLayout pattern="%d{yyyy-MM-dd HH:mm:ss.SSS} %p %tn %c: %m%n%ex"/>
12 |         </Console>
13 |         <File name="restAudit" fileName="${sys:logDir}/${sys:restAuditLogPath}">
14 |             <PatternLayout pattern="%d{yyyy-MM-dd HH:mm:ss.SSS} %p %c{1}: %m%n%ex"/>
15 |         </File>
16 |         <File name="k8sAudit" fileName="${sys:logDir}/${sys:k8sAuditLogPath}">
17 |             <PatternLayout pattern="%d{yyyy-MM-dd HH:mm:ss.SSS} %p %c{1}: %m%n%ex"/>
18 |         </File>
19 |         <File name="opAudit" fileName="${sys:logDir}/${sys:opAuditLogPath}">
20 |             <PatternLayout pattern="%d{yyyy-MM-dd HH:mm:ss.SSS} %p %c{1}: %m%n%ex"/>
21 |         </File>
22 |         {% if kafka_enabled %}
23 |         <Kafka name="kafka" topic="ecs-json-logs" syncSend="false">
24 |             <JsonTemplateLayout>
25 |                 <EventTemplateAdditionalField key="app" value="kyuubi"/>
26 |                 <EventTemplateAdditionalField key="cluster" value="hadoop-testing"/>
27 |                 <EventTemplateAdditionalField key="host" value="${hostName}"/>
28 |             </JsonTemplateLayout>
29 |             <Property name="bootstrap.servers" value="kafka:9092"/>
30 |             <Property name="compression.type" value="gzip"/>
31 |         </Kafka>
32 |         {% endif %}
33 |     </Appenders>
34 |     <Loggers>
35 |         <Root level="INFO">
36 |             <AppenderRef ref="stdout"/>
37 |             {% if kafka_enabled %}
38 |             <AppenderRef ref="kafka"/>
39 |             {% endif %}
40 |         </Root>
41 |         <Logger name="org.apache.kyuubi.ctl" level="error" additivity="false">
42 |             <AppenderRef ref="stdout"/>
43 |         </Logger>
44 |         <Logger name="org.apache.hive.beeline.KyuubiBeeLine" level="error" additivity="false">
45 |             <AppenderRef ref="stdout"/>
46 |         </Logger>
47 |         <Logger name="org.apache.kyuubi.server.http.authentication.AuthenticationAuditLogger" additivity="false">
48 |             <AppenderRef ref="stdout"/>
49 |             <AppenderRef ref="restAudit" />
50 |             {% if kafka_enabled %}
51 |             <AppenderRef ref="kafka"/>
52 |             {% endif %}
53 |         </Logger>
54 |         <Logger name="org.apache.kyuubi.engine.KubernetesApplicationAuditLogger" additivity="false">
55 |             <AppenderRef ref="stdout"/>
56 |             <AppenderRef ref="k8sAudit" />
57 |             {% if kafka_enabled %}
58 |             <AppenderRef ref="kafka"/>
59 |             {% endif %}
60 |         </Logger>
61 |         <Logger name="org.apache.kyuubi.operation.OperationAuditLogger" additivity="false">
62 |             <AppenderRef ref="stdout"/>
63 |             <AppenderRef ref="opAudit" />
64 |             {% if kafka_enabled %}
65 |             <AppenderRef ref="kafka"/>
66 |             {% endif %}
67 |         </Logger>
68 |     </Loggers>
69 | </Configuration>
70 | 


--------------------------------------------------------------------------------
/templates/hadoop-master/files/etc/ranger/conf/install.properties:
--------------------------------------------------------------------------------
  1 | # Licensed to the Apache Software Foundation (ASF) under one or more
  2 | # contributor license agreements.  See the NOTICE file distributed with
  3 | # this work for additional information regarding copyright ownership.
  4 | # The ASF licenses this file to You under the Apache License, Version 2.0
  5 | # (the "License"); you may not use this file except in compliance with
  6 | # the License.  You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | #
 17 | # This file provides a list of the deployment variables for the Policy Manager Web Application
 18 | #
 19 | 
 20 | #------------------------- DB CONFIG - BEGIN ----------------------------------
 21 | # Uncomment the below if the DBA steps need to be run separately
 22 | setup_mode=SeparateDBA
 23 | 
 24 | PYTHON_COMMAND_INVOKER=python3
 25 | 
 26 | #DB_FLAVOR=MYSQL|ORACLE|POSTGRES|MSSQL|SQLA
 27 | DB_FLAVOR=MYSQL
 28 | #
 29 | 
 30 | #
 31 | # Location of DB client library (please check the location of the jar file)
 32 | #
 33 | #SQL_CONNECTOR_JAR=/usr/share/java/ojdbc6.jar
 34 | #SQL_CONNECTOR_JAR=/usr/share/java/mysql-connector-java.jar
 35 | #SQL_CONNECTOR_JAR=/usr/share/java/postgresql.jar
 36 | #SQL_CONNECTOR_JAR=/usr/share/java/sqljdbc4.jar
 37 | #SQL_CONNECTOR_JAR=/opt/sqlanywhere17/java/sajdbc4.jar
 38 | SQL_CONNECTOR_JAR=/opt/ranger/mysql-connector-j-__REPLACE_MYSQL_JDBC_VERSION__.jar
 39 | 
 40 | 
 41 | #
 42 | # DB password for the DB admin user-id
 43 | # **************************************************************************
 44 | # ** If the password is left empty or not-defined here,
 45 | # ** it will try with blank password during installation process
 46 | # **************************************************************************
 47 | #
 48 | #db_root_user=root|SYS|postgres|sa|dba
 49 | #db_host=host:port              # for DB_FLAVOR=MYSQL|POSTGRES|SQLA|MSSQL       #for example: db_host=localhost:3306
 50 | #db_host=host:port:SID          # for DB_FLAVOR=ORACLE                          #for SID example: db_host=localhost:1521:ORCL
 51 | #db_host=host:port/ServiceName  # for DB_FLAVOR=ORACLE                          #for Service example: db_host=localhost:1521/XE
 52 | db_root_user=root
 53 | db_root_password=root
 54 | db_host=mysql:3306
 55 | #SSL config
 56 | db_ssl_enabled=false
 57 | db_ssl_required=false
 58 | db_ssl_verifyServerCertificate=false
 59 | #db_ssl_auth_type=1-way|2-way, where 1-way represents standard one way ssl authentication and 2-way represents mutual ssl authentication
 60 | db_ssl_auth_type=2-way
 61 | javax_net_ssl_keyStore=
 62 | javax_net_ssl_keyStorePassword=
 63 | javax_net_ssl_trustStore=
 64 | javax_net_ssl_trustStorePassword=
 65 | javax_net_ssl_trustStore_type=jks
 66 | javax_net_ssl_keyStore_type=jks
 67 | 
 68 | # For postgresql db
 69 | db_ssl_certificate_file=
 70 | 
 71 | #
 72 | # DB UserId used for the Ranger schema
 73 | #
 74 | db_name=ranger
 75 | db_user=root
 76 | db_password=root
 77 | 
 78 | #For over-riding the jdbc url.
 79 | is_override_db_connection_string=false
 80 | db_override_connection_string=
 81 | 
 82 | 
 83 | # change password. Password for below mentioned users can be changed only once using this property.
 84 | #PLEASE NOTE :: Password should be minimum 8 characters with min one alphabet and one numeric.
 85 | rangerAdmin_password=Ranger@admin123
 86 | rangerTagsync_password=Ranger@admin123
 87 | rangerUsersync_password=Ranger@admin123
 88 | keyadmin_password=Ranger@admin123
 89 | 
 90 | 
 91 | #Source for Audit Store. Currently solr, elasticsearch and cloudwatch logs are supported.
 92 | # * audit_store is solr
 93 | audit_store=
 94 | 
 95 | # * audit_solr_url Elasticsearch Host(s). E.g. 127.0.0.1
 96 | audit_elasticsearch_urls=
 97 | audit_elasticsearch_port=
 98 | audit_elasticsearch_protocol=
 99 | audit_elasticsearch_user=
100 | audit_elasticsearch_password=
101 | audit_elasticsearch_index=
102 | audit_elasticsearch_bootstrap_enabled=false
103 | 
104 | 
105 | # * audit_solr_url URL to Solr. E.g. http://<solr_host>:6083/solr/ranger_audits
106 | audit_solr_urls=
107 | audit_solr_user=
108 | audit_solr_password=
109 | audit_solr_zookeepers=
110 | 
111 | audit_solr_collection_name=ranger_audits
112 | #solr Properties for cloud mode
113 | audit_solr_config_name=ranger_audits
114 | audit_solr_configset_location=
115 | audit_solr_no_shards=1
116 | audit_solr_no_replica=1
117 | audit_solr_max_shards_per_node=1
118 | audit_solr_acl_user_list_sasl=solr,infra-solr
119 | audit_solr_bootstrap_enabled=false
120 | 
121 | # * audit to amazon cloudwatch properties
122 | audit_cloudwatch_region=
123 | audit_cloudwatch_log_group=
124 | audit_cloudwatch_log_stream_prefix=
125 | 
126 | #------------------------- DB CONFIG - END ----------------------------------
127 | 
128 | #
129 | # ------- PolicyManager CONFIG ----------------
130 | #
131 | 
132 | policymgr_external_url=http://localhost:6080
133 | policymgr_http_enabled=true
134 | policymgr_https_keystore_file=
135 | policymgr_https_keystore_keyalias=rangeradmin
136 | policymgr_https_keystore_password=
137 | 
138 | #Add Supported Components list below separated by semi-colon, default value is empty string to support all components
139 | #Example :  policymgr_supportedcomponents=hive,hbase,hdfs
140 | policymgr_supportedcomponents=
141 | 
142 | #
143 | # ------- PolicyManager CONFIG - END ---------------
144 | #
145 | 
146 | 
147 | #
148 | # ------- UNIX User CONFIG ----------------
149 | #
150 | unix_user=ranger
151 | unix_user_pwd=ranger@admin
152 | unix_group=ranger
153 | 
154 | #
155 | # ------- UNIX User CONFIG  - END ----------------
156 | #
157 | #
158 | 
159 | #
160 | # UNIX authentication service for Policy Manager
161 | #
162 | # PolicyManager can authenticate using UNIX username/password
163 | # The UNIX server specified here as authServiceHostName needs to be installed with ranger-unix-ugsync package.
164 | # Once the service is installed on authServiceHostName, the UNIX username/password from the host <authServiceHostName> can be used to login into policy manager
165 | #
166 | # ** The installation of ranger-unix-ugsync package can be installed after the policymanager installation is finished.
167 | #
168 | #LDAP|ACTIVE_DIRECTORY|UNIX|NONE
169 | authentication_method=NONE
170 | remoteLoginEnabled=true
171 | authServiceHostName=localhost
172 | authServicePort=5151
173 | ranger_unixauth_keystore=keystore.jks
174 | ranger_unixauth_keystore_password=password
175 | ranger_unixauth_truststore=cacerts
176 | ranger_unixauth_truststore_password=changeit
177 | 
178 | ####LDAP settings - Required only if have selected LDAP authentication ####
179 | #
180 | # Sample Settings
181 | #
182 | #xa_ldap_url=ldap://127.0.0.1:389
183 | #xa_ldap_userDNpattern=uid={0},ou=users,dc=xasecure,dc=net
184 | #xa_ldap_groupSearchBase=ou=groups,dc=xasecure,dc=net
185 | #xa_ldap_groupSearchFilter=(member=uid={0},ou=users,dc=xasecure,dc=net)
186 | #xa_ldap_groupRoleAttribute=cn
187 | #xa_ldap_base_dn=dc=xasecure,dc=net
188 | #xa_ldap_bind_dn=cn=admin,ou=users,dc=xasecure,dc=net
189 | #xa_ldap_bind_password=
190 | #xa_ldap_referral=follow|ignore
191 | #xa_ldap_userSearchFilter=(uid={0})
192 | 
193 | xa_ldap_url=
194 | xa_ldap_userDNpattern=
195 | xa_ldap_groupSearchBase=
196 | xa_ldap_groupSearchFilter=
197 | xa_ldap_groupRoleAttribute=
198 | xa_ldap_base_dn=
199 | xa_ldap_bind_dn=
200 | xa_ldap_bind_password=
201 | xa_ldap_referral=
202 | xa_ldap_userSearchFilter=
203 | ####ACTIVE_DIRECTORY settings - Required only if have selected AD authentication ####
204 | #
205 | # Sample Settings
206 | #
207 | #xa_ldap_ad_domain=xasecure.net
208 | #xa_ldap_ad_url=ldap://127.0.0.1:389
209 | #xa_ldap_ad_base_dn=dc=xasecure,dc=net
210 | #xa_ldap_ad_bind_dn=cn=administrator,ou=users,dc=xasecure,dc=net
211 | #xa_ldap_ad_bind_password=
212 | #xa_ldap_ad_referral=follow|ignore
213 | #xa_ldap_ad_userSearchFilter=(sAMAccountName={0})
214 | 
215 | xa_ldap_ad_domain=
216 | xa_ldap_ad_url=
217 | xa_ldap_ad_base_dn=
218 | xa_ldap_ad_bind_dn=
219 | xa_ldap_ad_bind_password=
220 | xa_ldap_ad_referral=
221 | xa_ldap_ad_userSearchFilter=
222 | 
223 | #------------ Kerberos Config -----------------
224 | spnego_principal=
225 | spnego_keytab=
226 | token_valid=30
227 | cookie_domain=
228 | cookie_path=/
229 | admin_principal=
230 | admin_keytab=
231 | lookup_principal=
232 | lookup_keytab=
233 | hadoop_conf=/etc/hadoop/conf
234 | #
235 | #-------- SSO CONFIG - Start ------------------
236 | #
237 | sso_enabled=false
238 | sso_providerurl=https://127.0.0.1:8443/gateway/knoxsso/api/v1/websso
239 | sso_publickey=
240 | 
241 | #
242 | #-------- SSO CONFIG - END ------------------
243 | 
244 | # Custom log directory path
245 | RANGER_ADMIN_LOG_DIR=$PWD
246 | RANGER_ADMIN_LOGBACK_CONF_FILE=
247 | 
248 | # PID file path
249 | RANGER_PID_DIR_PATH=$PWD
250 | 
251 | # #################  DO NOT MODIFY ANY VARIABLES BELOW #########################
252 | #
253 | # --- These deployment variables are not to be modified unless you understand the full impact of the changes
254 | #
255 | ################################################################################
256 | XAPOLICYMGR_DIR=$PWD
257 | app_home=$PWD/ews/webapp
258 | TMPFILE=$PWD/.fi_tmp
259 | LOGFILE=$PWD/logfile
260 | LOGFILES="$LOGFILE"
261 | 
262 | JAVA_BIN='java'
263 | JAVA_VERSION_REQUIRED='1.8'
264 | JAVA_ORACLE='Java(TM) SE Runtime Environment'
265 | 
266 | ranger_admin_max_heap_size=512m
267 | #retry DB and Java patches after the given time in seconds.
268 | PATCH_RETRY_INTERVAL=120
269 | STALE_PATCH_ENTRY_HOLD_TIME=10
270 | 
271 | #mysql_create_user_file=${PWD}/db/mysql/create_dev_user.sql
272 | mysql_core_file=db/mysql/optimized/current/ranger_core_db_mysql.sql
273 | mysql_audit_file=db/mysql/xa_audit_db.sql
274 | #mysql_asset_file=${PWD}/db/mysql/reset_asset.sql
275 | 
276 | #oracle_create_user_file=${PWD}/db/oracle/create_dev_user_oracle.sql
277 | oracle_core_file=db/oracle/optimized/current/ranger_core_db_oracle.sql
278 | oracle_audit_file=db/oracle/xa_audit_db_oracle.sql
279 | #oracle_asset_file=${PWD}/db/oracle/reset_asset_oracle.sql
280 | #
281 | postgres_core_file=db/postgres/optimized/current/ranger_core_db_postgres.sql
282 | postgres_audit_file=db/postgres/xa_audit_db_postgres.sql
283 | #
284 | sqlserver_core_file=db/sqlserver/optimized/current/ranger_core_db_sqlserver.sql
285 | sqlserver_audit_file=db/sqlserver/xa_audit_db_sqlserver.sql
286 | #
287 | sqlanywhere_core_file=db/sqlanywhere/optimized/current/ranger_core_db_sqlanywhere.sql
288 | sqlanywhere_audit_file=db/sqlanywhere/xa_audit_db_sqlanywhere.sql
289 | cred_keystore_filename=$app_home/WEB-INF/classes/conf/.jceks/rangeradmin.jceks
290 | 


--------------------------------------------------------------------------------
/templates/hadoop-master/files/etc/ranger/startup.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #    http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | 
16 | set -xe
17 | 
18 | function setup_config() {
19 |     cp /etc/ranger/conf/* ${RANGER_HOME}/
20 |     sed -i 's/__REPLACE_MYSQL_JDBC_VERSION__/'"${MYSQL_JDBC_VERSION}"'/g' "${RANGER_HOME}/install.properties"
21 | }
22 | 
23 | function setup_ranger() {
24 |     cd ${RANGER_HOME}
25 |     sh setup.sh && sh ews/ranger-admin-services.sh start
26 | }
27 | 
28 | setup_config
29 | 
30 | setup_ranger
31 | 
32 | RANGER_ADMIN_PID=`ps -ef | grep -v grep | grep -i "org.apache.ranger.server.tomcat.EmbeddedServer" | awk '{print $2}'`
33 | 
34 | tail --pid=$RANGER_ADMIN_PID -f /dev/null
35 | 


--------------------------------------------------------------------------------
/templates/hadoop-master/files/etc/spark/conf/log4j2.xml.j2:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <Configuration status="INFO">
 3 |   <Appenders>
 4 |     <Console name="stdout" target="SYSTEM_OUT">
 5 |       <PatternLayout pattern="%d{yyyy-MM-dd HH:mm:ss} %p %c{1}: %m%n"/>
 6 |     </Console>
 7 |     {% if loki_enabled %}
 8 |     <Loki name="loki">
 9 |       <host>loki</host>
10 |       <port>3100</port>
11 |       <PatternLayout>
12 |         <Pattern>%X{tid} [%t] %d{yyyy-MM-dd HH:mm:ss.SSS} %5p %c{1} - %m%n%exception{full}</Pattern>
13 |       </PatternLayout>
14 |       <Label name="host" value="${sys:hostname}"/>
15 |       <useDaemonThreads>true</useDaemonThreads>
16 |     </Loki>
17 |     {% endif %}
18 |   </Appenders>
19 |   <Loggers>
20 |     <Root level="INFO">
21 |       <AppenderRef ref="stdout"/>
22 |       {% if loki_enabled %}
23 |       <AppenderRef ref="loki"/>
24 |       {% endif %}
25 |     </Root>
26 |     <Logger name="org.apache.hadoop.util.Shell" level="ERROR" additivity="false">
27 |       <AppenderRef ref="stdout"/>
28 |       {% if loki_enabled %}
29 |       <AppenderRef ref="loki"/>
30 |       {% endif %}
31 |     </Logger>
32 |   </Loggers>
33 | </Configuration>
34 | 


--------------------------------------------------------------------------------
/templates/hadoop-master/files/etc/spark/conf/spark-defaults.conf.j2:
--------------------------------------------------------------------------------
 1 | #
 2 | # Licensed to the Apache Software Foundation (ASF) under one or more
 3 | # contributor license agreements.  See the NOTICE file distributed with
 4 | # this work for additional information regarding copyright ownership.
 5 | # The ASF licenses this file to You under the Apache License, Version 2.0
 6 | # (the "License"); you may not use this file except in compliance with
 7 | # the License.  You may obtain a copy of the License at
 8 | #
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 | 
18 | # Default system properties included when running spark-submit.
19 | # This is useful for setting default environmental settings.
20 | 
21 | spark.master=yarn
22 | spark.submit.deployMode=client
23 | spark.driver.memory=512m
24 | spark.executor.cores=1
25 | spark.executor.memory=512m
26 | 
27 | spark.yarn.appMasterEnv.JAVA_HOME=/opt/openjdk-17
28 | spark.executorEnv.JAVA_HOME=/opt/openjdk-17
29 | 
30 | spark.driver.extraLibraryPath=/opt/hadoop/lib/native
31 | spark.executor.extraLibraryPath=/opt/hadoop/lib/native
32 | 
33 | spark.eventLog.enabled=true
34 | spark.eventLog.dir=hdfs://hadoop-master1.orb.local:8020/spark-history
35 | 
36 | spark.history.ui.port=18080
37 | spark.history.fs.logDirectory=hdfs://hadoop-master1.orb.local:8020/spark-history
38 | {% if kerberos_enabled %}
39 | spark.history.kerberos.enabled=true
40 | spark.history.kerberos.principal=spark/hadoop-master1.orb.local@TEST.ORG
41 | spark.history.kerberos.keytab=/share/keytabs/hadoop-master1/spark.service.keytab
42 | {% endif %}
43 | spark.yarn.historyServer.address=hadoop-master1.orb.local:18080
44 | 
45 | spark.shuffle.service.enabled=true
46 | 
47 | spark.sql.catalog.tpcds=org.apache.kyuubi.spark.connector.tpcds.TPCDSCatalog
48 | spark.sql.catalog.tpch=org.apache.kyuubi.spark.connector.tpch.TPCHCatalog
49 | 
50 | {% if iceberg_enabled %}
51 | spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions
52 | 
53 | spark.sql.catalog.spark_catalog=org.apache.iceberg.spark.SparkSessionCatalog
54 | spark.sql.catalog.spark_catalog.type=hive
55 | {% endif %}
56 | 


--------------------------------------------------------------------------------
/templates/hadoop-master/files/etc/spark/conf/spark-env.sh:
--------------------------------------------------------------------------------
1 | export JAVA_HOME=/opt/openjdk-17
2 | 


--------------------------------------------------------------------------------
/templates/hadoop-master/files/etc/supervisor/conf.d/flink-history-server.conf:
--------------------------------------------------------------------------------
 1 | [program:flink-history-server]
 2 | directory=/opt/flink
 3 | command=/opt/flink/bin/historyserver.sh start-foreground
 4 | startsecs=30
 5 | stopwaitsecs=10
 6 | user=flink
 7 | redirect_stderr=true
 8 | stdout_logfile=/var/log/flink-history-server.log
 9 | {% if flink_enabled %}
10 | autostart=true
11 | {% else %}
12 | autostart=false
13 | {% endif %}
14 | 
15 | environment=
16 |     JAVA_HOME=/opt/openjdk-8,
17 |     FLINK_CONF_DIR=/etc/flink/conf_history_server,
18 |     FLINK_NO_DAEMONIZE=1
19 | 


--------------------------------------------------------------------------------
/templates/hadoop-master/files/etc/supervisor/conf.d/hdfs-namenode.conf:
--------------------------------------------------------------------------------
 1 | [program:hdfs-namenode]
 2 | directory=/opt
 3 | command=bash /opt/start-namenode.sh
 4 | startsecs=30
 5 | stopwaitsecs=10
 6 | user=hdfs
 7 | redirect_stderr=true
 8 | stdout_logfile=/var/log/hdfs-namenode.log
 9 | autostart=true
10 | 


--------------------------------------------------------------------------------
/templates/hadoop-master/files/etc/supervisor/conf.d/hive-metastore.conf:
--------------------------------------------------------------------------------
 1 | [program:hive-metastore]
 2 | directory=/opt/hive
 3 | command=hive --service metastore
 4 | startsecs=30
 5 | stopwaitsecs=10
 6 | user=hive
 7 | redirect_stderr=true
 8 | stdout_logfile=/var/log/hive-metastore.log
 9 | autostart=true
10 | 


--------------------------------------------------------------------------------
/templates/hadoop-master/files/etc/supervisor/conf.d/hive-server2.conf:
--------------------------------------------------------------------------------
 1 | [program:hive-server2]
 2 | directory=/opt/hive
 3 | command=/opt/start-hive-server2.sh
 4 | startsecs=30
 5 | stopwaitsecs=10
 6 | user=hive
 7 | redirect_stderr=true
 8 | stdout_logfile=/var/log/hive-server2.log
 9 | autostart=true
10 | 


--------------------------------------------------------------------------------
/templates/hadoop-master/files/etc/supervisor/conf.d/kyuubi-server.conf:
--------------------------------------------------------------------------------
 1 | [program:kyuubi-server]
 2 | directory=/opt/kyuubi
 3 | command=kyuubi run
 4 | startsecs=30
 5 | stopwaitsecs=10
 6 | user=kyuubi
 7 | redirect_stderr=true
 8 | stdout_logfile=/var/log/kyuubi-server.log
 9 | autostart=true
10 | 


--------------------------------------------------------------------------------
/templates/hadoop-master/files/etc/supervisor/conf.d/mapred-history-server.conf:
--------------------------------------------------------------------------------
 1 | [program:mapred-history-server]
 2 | directory=/opt/hadoop
 3 | command=/opt/hadoop/bin/mapred historyserver
 4 | startsecs=30
 5 | stopwaitsecs=10
 6 | user=mapred
 7 | redirect_stderr=true
 8 | stdout_logfile=/var/log/mapred-history-server.log
 9 | autostart=true
10 | 


--------------------------------------------------------------------------------
/templates/hadoop-master/files/etc/supervisor/conf.d/ranger-admin.conf:
--------------------------------------------------------------------------------
 1 | [program:ranger-admin]
 2 | directory=/etc/ranger
 3 | command=bash startup.sh
 4 | startsecs=30
 5 | stopwaitsecs=10
 6 | user=ranger
 7 | redirect_stderr=true
 8 | stdout_logfile=/var/log/ranger-admin.log
 9 | {% if ranger_enabled %}
10 | autostart=true
11 | {% else %}
12 | autostart=false
13 | {% endif %}
14 | 


--------------------------------------------------------------------------------
/templates/hadoop-master/files/etc/supervisor/conf.d/spark-history-server.conf:
--------------------------------------------------------------------------------
 1 | [program:spark-history-server]
 2 | directory=/opt/spark
 3 | command=/opt/start-spark-history-server.sh
 4 | startsecs=30
 5 | stopwaitsecs=10
 6 | user=spark
 7 | redirect_stderr=true
 8 | stdout_logfile=/var/log/spark-history-server.log
 9 | {% if spark_enabled %}
10 | autostart=true
11 | {% else %}
12 | autostart=false
13 | {% endif %}
14 | 
15 | environment=
16 |     JAVA_HOME=/opt/openjdk-17,
17 |     SPARK_NO_DAEMONIZE=1
18 | 


--------------------------------------------------------------------------------
/templates/hadoop-master/files/etc/supervisor/conf.d/yarn-resourcemanager.conf:
--------------------------------------------------------------------------------
 1 | [program:yarn-resourcemanager]
 2 | directory=/opt/hadoop
 3 | command=/opt/start-resourcemanager.sh
 4 | startsecs=30
 5 | stopwaitsecs=10
 6 | user=yarn
 7 | redirect_stderr=true
 8 | stdout_logfile=/var/log/yarn-resourcemanager.log
 9 | autostart=true
10 | 


--------------------------------------------------------------------------------
/templates/hadoop-master/files/etc/supervisor/conf.d/zeppelin-server.conf:
--------------------------------------------------------------------------------
 1 | [program:zeppelin-server]
 2 | directory=/opt/zeppelin
 3 | command=/opt/zeppelin/bin/zeppelin.sh
 4 | startsecs=30
 5 | stopwaitsecs=10
 6 | user=zeppelin
 7 | redirect_stderr=true
 8 | stdout_logfile=/var/log/zeppelin-server.log
 9 | {% if zeppelin_enabled %}
10 | autostart=true
11 | {% else %}
12 | autostart=false
13 | {% endif %}
14 | 


--------------------------------------------------------------------------------
/templates/hadoop-master/files/etc/supervisor/conf.d/zookeeper.conf:
--------------------------------------------------------------------------------
 1 | [program:zookeeper]
 2 | directory=/opt/zookeeper 
 3 | command=zkServer.sh start-foreground
 4 | startsecs=30
 5 | stopwaitsecs=10
 6 | user=zookeeper
 7 | redirect_stderr=true
 8 | stdout_logfile=/var/log/zookeeper.log
 9 | autostart=true
10 | 


--------------------------------------------------------------------------------
/templates/hadoop-master/files/etc/zeppelin/conf/configuration.xsl:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <!--
 3 |   Licensed to the Apache Software Foundation (ASF) under one
 4 |   or more contributor license agreements.  See the NOTICE file
 5 |   distributed with this work for additional information
 6 |   regarding copyright ownership.  The ASF licenses this file
 7 |   to you under the Apache License, Version 2.0 (the
 8 |   "License"); you may not use this file except in compliance
 9 |   with the License.  You may obtain a copy of the License at
10 | 
11 |       http://www.apache.org/licenses/LICENSE-2.0
12 | 
13 |   Unless required by applicable law or agreed to in writing, software
14 |   distributed under the License is distributed on an "AS IS" BASIS,
15 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 |   See the License for the specific language governing permissions and
17 |   limitations under the License.
18 | -->
19 | <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
20 | <xsl:output method="html"/>
21 | <xsl:template match="configuration">
22 | <html>
23 | <body>
24 | <table border="1">
25 | <tr>
26 |  <td>name</td>
27 |  <td>value</td>
28 |  <td>description</td>
29 | </tr>
30 | <xsl:for-each select="property">
31 | <tr>
32 |   <td><a name="{name}"><xsl:value-of select="name"/></a></td>
33 |   <td><xsl:value-of select="value"/></td>
34 |   <td><xsl:value-of select="description"/></td>
35 | </tr>
36 | </xsl:for-each>
37 | </table>
38 | </body>
39 | </html>
40 | </xsl:template>
41 | </xsl:stylesheet>
42 | 


--------------------------------------------------------------------------------
/templates/hadoop-master/files/etc/zeppelin/conf/interpreter-list.j2:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one
 2 | # or more contributor license agreements.  See the NOTICE file
 3 | # distributed with this work for additional information
 4 | # regarding copyright ownership.  The ASF licenses this file
 5 | # to you under the Apache License, Version 2.0 (the
 6 | # "License"); you may not use this file except in compliance
 7 | # with the License.  You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 | #
18 | # [name]  [maven artifact]  [description]
19 | 
20 | alluxio         org.apache.zeppelin:zeppelin-alluxio:{{ zeppelin_version }}              Alluxio interpreter
21 | angular         org.apache.zeppelin:zeppelin-angular:{{ zeppelin_version }}              HTML and AngularJS view rendering
22 | bigquery        org.apache.zeppelin:zeppelin-bigquery:{{ zeppelin_version }}             BigQuery interpreter
23 | cassandra       org.apache.zeppelin:zeppelin-cassandra:{{ zeppelin_version }}            Cassandra interpreter
24 | elasticsearch   org.apache.zeppelin:zeppelin-elasticsearch:{{ zeppelin_version }}        Elasticsearch interpreter
25 | file            org.apache.zeppelin:zeppelin-file:{{ zeppelin_version }}                 HDFS file interpreter
26 | flink           org.apache.zeppelin:zeppelin-flink:{{ zeppelin_version }}                Flink interpreter
27 | groovy          org.apache.zeppelin:zeppelin-groovy:{{ zeppelin_version }}               Groovy interpreter
28 | hbase           org.apache.zeppelin:zeppelin-hbase:{{ zeppelin_version }}                Hbase interpreter
29 | java            org.apache.zeppelin:zeppelin-java:{{ zeppelin_version }}                 Java interpreter
30 | jdbc            org.apache.zeppelin:zeppelin-jdbc:{{ zeppelin_version }}                 Jdbc interpreter
31 | livy            org.apache.zeppelin:zeppelin-livy:{{ zeppelin_version }}                 Livy interpreter
32 | md              org.apache.zeppelin:zeppelin-markdown:{{ zeppelin_version }}             Markdown support
33 | neo4j           org.apache.zeppelin:zeppelin-neo4j:{{ zeppelin_version }}                Neo4j interpreter
34 | python          org.apache.zeppelin:zeppelin-python:{{ zeppelin_version }}               Python interpreter
35 | shell           org.apache.zeppelin:zeppelin-shell:{{ zeppelin_version }}                Shell command
36 | sparql          org.apache.zeppelin:zeppelin-sparql:{{ zeppelin_version }}               Sparql interpreter
37 | submarine       org.apache.zeppelin:zeppelin-submarine:{{ zeppelin_version }}            Submarine interpreter
38 | 


--------------------------------------------------------------------------------
/templates/hadoop-master/files/etc/zeppelin/conf/log4j.properties:
--------------------------------------------------------------------------------
 1 | #
 2 | # Licensed to the Apache Software Foundation (ASF) under one or more
 3 | # contributor license agreements.  See the NOTICE file distributed with
 4 | # this work for additional information regarding copyright ownership.
 5 | # The ASF licenses this file to You under the Apache License, Version 2.0
 6 | # (the "License"); you may not use this file except in compliance with
 7 | # the License.  You may obtain a copy of the License at
 8 | #
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 | 
18 | log4j.rootLogger = INFO, dailyfile
19 | 
20 | log4j.appender.stdout = org.apache.log4j.ConsoleAppender
21 | log4j.appender.stdout.layout = org.apache.log4j.PatternLayout
22 | log4j.appender.stdout.layout.ConversionPattern=%5p [%d] ([%t] %F[%M]:%L) - %m%n
23 | 
24 | log4j.appender.dailyfile.DatePattern=.yyyy-MM-dd
25 | log4j.appender.dailyfile = org.apache.log4j.DailyRollingFileAppender
26 | log4j.appender.dailyfile.File = ${zeppelin.log.file}
27 | log4j.appender.dailyfile.layout = org.apache.log4j.PatternLayout
28 | log4j.appender.dailyfile.layout.ConversionPattern=%5p [%d] ([%t] %F[%M]:%L) - %m%n


--------------------------------------------------------------------------------
/templates/hadoop-master/files/etc/zeppelin/conf/log4j2.properties:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | #  Licensed to the Apache Software Foundation (ASF) under one
 3 | #  or more contributor license agreements.  See the NOTICE file
 4 | #  distributed with this work for additional information
 5 | #  regarding copyright ownership.  The ASF licenses this file
 6 | #  to you under the Apache License, Version 2.0 (the
 7 | #  "License"); you may not use this file except in compliance
 8 | #  with the License.  You may obtain a copy of the License at
 9 | #
10 | #      http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | #  Unless required by applicable law or agreed to in writing, software
13 | #  distributed under the License is distributed on an "AS IS" BASIS,
14 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | #  See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | ################################################################################
18 | 
19 | # This affects logging for both user code and Flink
20 | rootLogger.level = INFO
21 | rootLogger.appenderRef.file.ref = MainAppender
22 | 
23 | # Uncomment this if you want to _only_ change Flink's logging
24 | #logger.flink.name = org.apache.flink
25 | #logger.flink.level = INFO
26 | 
27 | # The following lines keep the log level of common libraries/connectors on
28 | # log level INFO. The root logger does not override this. You have to manually
29 | # change the log levels here.
30 | logger.akka.name = akka
31 | logger.akka.level = INFO
32 | logger.kafka.name= org.apache.kafka
33 | logger.kafka.level = INFO
34 | logger.hadoop.name = org.apache.hadoop
35 | logger.hadoop.level = INFO
36 | logger.zookeeper.name = org.apache.zookeeper
37 | logger.zookeeper.level = INFO
38 | 
39 | # Log all infos in the given file
40 | appender.main.name = MainAppender
41 | appender.main.type = File
42 | appender.main.append = false
43 | appender.main.fileName = ${sys:zeppelin.log.file}
44 | appender.main.layout.type = PatternLayout
45 | appender.main.layout.pattern = %d{yyyy-MM-dd HH:mm:ss,SSS} %-5p %-60c %x - %m%n
46 | 
47 | # Suppress the irrelevant (wrong) warnings from the Netty channel handler
48 | logger.netty.name = org.apache.flink.shaded.akka.org.jboss.netty.channel.DefaultChannelPipeline
49 | logger.netty.level = OFF


--------------------------------------------------------------------------------
/templates/hadoop-master/files/etc/zeppelin/conf/log4j_yarn_cluster.properties:
--------------------------------------------------------------------------------
 1 | #
 2 | # Licensed to the Apache Software Foundation (ASF) under one or more
 3 | # contributor license agreements.  See the NOTICE file distributed with
 4 | # this work for additional information regarding copyright ownership.
 5 | # The ASF licenses this file to You under the Apache License, Version 2.0
 6 | # (the "License"); you may not use this file except in compliance with
 7 | # the License.  You may obtain a copy of the License at
 8 | #
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 | 
18 | log4j.rootLogger = INFO, stdout
19 | 
20 | log4j.appender.stdout = org.apache.log4j.ConsoleAppender
21 | log4j.appender.stdout.layout = org.apache.log4j.PatternLayout
22 | log4j.appender.stdout.layout.ConversionPattern=%5p [%d] ([%t] %F[%M]:%L) - %m%n
23 | 


--------------------------------------------------------------------------------
/templates/hadoop-master/files/etc/zeppelin/conf/shiro.ini.template:
--------------------------------------------------------------------------------
  1 | #
  2 | # Licensed to the Apache Software Foundation (ASF) under one or more
  3 | # contributor license agreements.  See the NOTICE file distributed with
  4 | # this work for additional information regarding copyright ownership.
  5 | # The ASF licenses this file to You under the Apache License, Version 2.0
  6 | # (the "License"); you may not use this file except in compliance with
  7 | # the License.  You may obtain a copy of the License at
  8 | #
  9 | #    http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | #
 17 | 
 18 | [users]
 19 | # List of users with their password allowed to access Zeppelin.
 20 | # To use a different strategy (LDAP / Database / ...) check the shiro doc at http://shiro.apache.org/configuration.html#Configuration-INISections
 21 | # To enable admin user, uncomment the following line and set an appropriate password.
 22 | #admin = password1, admin
 23 | user1 = password2, role1, role2
 24 | user2 = password3, role3
 25 | user3 = password4, role2
 26 | 
 27 | # Sample LDAP configuration, for user Authentication, currently tested for single Realm
 28 | [main]
 29 | ### A sample for configuring Active Directory Realm
 30 | #activeDirectoryRealm = org.apache.zeppelin.realm.ActiveDirectoryGroupRealm
 31 | #activeDirectoryRealm.systemUsername = userNameA
 32 | 
 33 | #use either systemPassword or hadoopSecurityCredentialPath, more details in http://zeppelin.apache.org/docs/latest/security/shiroauthentication.html
 34 | #activeDirectoryRealm.systemPassword = passwordA
 35 | #activeDirectoryRealm.hadoopSecurityCredentialPath = jceks://file/user/zeppelin/zeppelin.jceks
 36 | #activeDirectoryRealm.searchBase = CN=Users,DC=SOME_GROUP,DC=COMPANY,DC=COM
 37 | #activeDirectoryRealm.url = ldap://ldap.test.com:389
 38 | #activeDirectoryRealm.groupRolesMap = "CN=admin,OU=groups,DC=SOME_GROUP,DC=COMPANY,DC=COM":"admin","CN=finance,OU=groups,DC=SOME_GROUP,DC=COMPANY,DC=COM":"finance","CN=hr,OU=groups,DC=SOME_GROUP,DC=COMPANY,DC=COM":"hr"
 39 | #activeDirectoryRealm.authorizationCachingEnabled = false
 40 | 
 41 | ### A sample for configuring LDAP Directory Realm
 42 | #ldapRealm = org.apache.zeppelin.realm.LdapGroupRealm
 43 | ## search base for ldap groups (only relevant for LdapGroupRealm):
 44 | #ldapRealm.contextFactory.environment[ldap.searchBase] = dc=COMPANY,dc=COM
 45 | #ldapRealm.contextFactory.url = ldap://ldap.test.com:389
 46 | #ldapRealm.userDnTemplate = uid={0},ou=Users,dc=COMPANY,dc=COM
 47 | #ldapRealm.contextFactory.authenticationMechanism = simple
 48 | 
 49 | ### A sample PAM configuration
 50 | #pamRealm=org.apache.zeppelin.realm.PamRealm
 51 | #pamRealm.service=sshd
 52 | 
 53 | ## A same for configuring Knox SSO Realm
 54 | #knoxJwtRealm = org.apache.zeppelin.realm.jwt.KnoxJwtRealm
 55 | #knoxJwtRealm.providerUrl = https://domain.example.com/
 56 | #knoxJwtRealm.login = gateway/knoxsso/knoxauth/login.html
 57 | #knoxJwtRealm.logout = gateway/knoxssout/api/v1/webssout
 58 | #knoxJwtRealm.logoutAPI = true
 59 | #knoxJwtRealm.redirectParam = originalUrl
 60 | #knoxJwtRealm.cookieName = hadoop-jwt
 61 | #knoxJwtRealm.publicKeyPath = /etc/zeppelin/conf/knox-sso.pem
 62 | #
 63 | #authc = org.apache.zeppelin.realm.jwt.KnoxAuthenticationFilter
 64 | 
 65 | ### A sample for configuring Kerberos Realm
 66 | # krbRealm = org.apache.zeppelin.realm.kerberos.KerberosRealm
 67 | # krbRealm.principal = HTTP/zeppelin.fqdn.domain.com@EXAMPLE.COM
 68 | # krbRealm.keytab = /etc/security/keytabs/spnego.service.keytab
 69 | # krbRealm.nameRules = DEFAULT
 70 | # krbRealm.signatureSecretFile = /etc/security/http_secret
 71 | # krbRealm.tokenValidity = 36000
 72 | # krbRealm.cookieDomain = domain.com
 73 | # krbRealm.cookiePath = /
 74 | # krbRealm.logout = /logout
 75 | # krbRealm.logoutAPI = true
 76 | # krbRealm.providerUrl = https://domain.example.com/
 77 | # krbRealm.redirectParam = originalUrl
 78 | # authc = org.apache.zeppelin.realm.kerberos.KerberosAuthenticationFilter
 79 | 
 80 | sessionManager = org.apache.shiro.web.session.mgt.DefaultWebSessionManager
 81 | 
 82 | ### If caching of user is required then uncomment below lines
 83 | #cacheManager = org.apache.shiro.cache.MemoryConstrainedCacheManager
 84 | #securityManager.cacheManager = $cacheManager
 85 | 
 86 | ### Enables 'HttpOnly' flag in Zeppelin cookies
 87 | cookie = org.apache.shiro.web.servlet.SimpleCookie
 88 | cookie.name = JSESSIONID
 89 | cookie.httpOnly = true
 90 | ### Uncomment the below line only when Zeppelin is running over HTTPS
 91 | #cookie.secure = true
 92 | sessionManager.sessionIdCookie = $cookie
 93 | 
 94 | securityManager.sessionManager = $sessionManager
 95 | # 86,400,000 milliseconds = 24 hour
 96 | securityManager.sessionManager.globalSessionTimeout = 86400000
 97 | shiro.loginUrl = /api/login
 98 | 
 99 | [roles]
100 | role1 = *
101 | role2 = *
102 | role3 = *
103 | admin = *
104 | 
105 | [urls]
106 | # This section is used for url-based security. For details see the shiro.ini documentation.
107 | #
108 | # You can secure interpreter, configuration and credential information by urls.
109 | # Comment or uncomment the below urls that you want to hide:
110 | # anon means the access is anonymous.
111 | # authc means form based auth Security.
112 | #
113 | # IMPORTANT: Order matters: URL path expressions are evaluated against an incoming request
114 | # in the order they are defined and the FIRST MATCH WINS.
115 | #
116 | # To allow anonymous access to all but the stated urls,
117 | # uncomment the line second last line (/** = anon) and comment the last line (/** = authc)
118 | #
119 | /api/version = anon
120 | /api/cluster/address = anon
121 | # Allow all authenticated users to restart interpreters on a notebook page.
122 | # Comment out the following line if you would like to authorize only admin users to restart interpreters.
123 | /api/interpreter/setting/restart/** = authc
124 | /api/interpreter/** = authc, roles[admin]
125 | /api/notebook-repositories/** = authc, roles[admin]
126 | /api/configurations/** = authc, roles[admin]
127 | /api/credential/** = authc, roles[admin]
128 | /api/admin/** = authc, roles[admin]
129 | #/** = anon
130 | /** = authc
131 | 


--------------------------------------------------------------------------------
/templates/hadoop-master/files/etc/zeppelin/conf/zeppelin-env.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | #
  3 | # Licensed to the Apache Software Foundation (ASF) under one or more
  4 | # contributor license agreements.  See the NOTICE file distributed with
  5 | # this work for additional information regarding copyright ownership.
  6 | # The ASF licenses this file to You under the Apache License, Version 2.0
  7 | # (the "License"); you may not use this file except in compliance with
  8 | # the License.  You may obtain a copy of the License at
  9 | #
 10 | #    http://www.apache.org/licenses/LICENSE-2.0
 11 | #
 12 | # Unless required by applicable law or agreed to in writing, software
 13 | # distributed under the License is distributed on an "AS IS" BASIS,
 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 | # See the License for the specific language governing permissions and
 16 | # limitations under the License.
 17 | #
 18 | 
 19 | export JAVA_HOME=/opt/openjdk-11
 20 | export ZEPPELIN_LOG_DIR=/var/log/zeppelin
 21 | export ZEPPELIN_PID_DIR=/var/run/zeppelin/pid
 22 | 
 23 | # export JAVA_HOME=
 24 | # export USE_HADOOP=                            # Whether include hadoop jars into zeppelin server process. (true or false)
 25 | # export SPARK_MASTER=                          # Spark master url. eg. spark://master_addr:7077. Leave empty if you want to use local mode.
 26 | # export ZEPPELIN_ADDR                          # Bind address (default 127.0.0.1)
 27 | # export ZEPPELIN_PORT                          # port number to listen (default 8080)
 28 | # export ZEPPELIN_LOCAL_IP                      # Zeppelin's thrift server ip address, if not specified, one random IP address will be choosen.
 29 | # export ZEPPELIN_JAVA_OPTS                     # Additional jvm options. for example, export ZEPPELIN_JAVA_OPTS="-Dspark.executor.memory=8g -Dspark.cores.max=16"
 30 | # export ZEPPELIN_MEM                           # Zeppelin jvm mem options Default -Xms1024m -Xmx1024m -XX:MaxMetaspaceSize=512m
 31 | # export ZEPPELIN_INTP_MEM                      # zeppelin interpreter process jvm mem options. Default -Xms1024m -Xmx1024m -XX:MaxMetaspaceSize=512m
 32 | # export ZEPPELIN_INTP_JAVA_OPTS                # zeppelin interpreter process jvm options.
 33 | # export ZEPPELIN_SSL_PORT                      # ssl port (used when ssl environment variable is set to true)
 34 | # export ZEPPELIN_JMX_ENABLE                    # Enable JMX feature by defining "true"
 35 | # export ZEPPELIN_JMX_PORT                      # Port number which JMX uses. If not set, JMX won't be enabled
 36 | 
 37 | # export ZEPPELIN_LOG_DIR                       # Where log files are stored.  PWD by default.
 38 | # export ZEPPELIN_PID_DIR                       # The pid files are stored. ${ZEPPELIN_HOME}/run by default.
 39 | # export ZEPPELIN_WAR_TEMPDIR                   # The location of jetty temporary directory.
 40 | # export ZEPPELIN_NOTEBOOK_DIR                  # Where notebook saved
 41 | # export ZEPPELIN_NOTEBOOK_HOMESCREEN           # Id of notebook to be displayed in homescreen. ex) 2A94M5J1Z
 42 | # export ZEPPELIN_NOTEBOOK_HOMESCREEN_HIDE      # hide homescreen notebook from list when this value set to "true". default "false"
 43 | 
 44 | # export ZEPPELIN_NOTEBOOK_S3_BUCKET            # Bucket where notebook saved
 45 | # export ZEPPELIN_NOTEBOOK_S3_ENDPOINT          # Endpoint of the bucket
 46 | # export ZEPPELIN_NOTEBOOK_S3_USER              # User in bucket where notebook saved. For example bucket/user/notebook/2A94M5J1Z/note.json
 47 | # export ZEPPELIN_NOTEBOOK_S3_KMS_KEY_ID        # AWS KMS key ID
 48 | # export ZEPPELIN_NOTEBOOK_S3_KMS_KEY_REGION    # AWS KMS key region
 49 | # export ZEPPELIN_NOTEBOOK_S3_SSE               # Server-side encryption enabled for notebooks
 50 | # export ZEPPELIN_NOTEBOOK_S3_PATH_STYLE_ACCESS # Path style access for S3 bucket
 51 | 
 52 | # export ZEPPELIN_NOTEBOOK_GCS_STORAGE_DIR      # GCS "directory" (prefix) under which notebooks are saved. E.g. gs://example-bucket/path/to/dir
 53 | # export GOOGLE_APPLICATION_CREDENTIALS         # Provide a service account key file for GCS and BigQuery API calls (overrides application default credentials)
 54 | 
 55 | # export ZEPPELIN_NOTEBOOK_MONGO_URI            # MongoDB connection URI used to connect to a MongoDB database server. Default "mongodb://localhost"
 56 | # export ZEPPELIN_NOTEBOOK_MONGO_DATABASE       # Database name to store notebook. Default "zeppelin"
 57 | # export ZEPPELIN_NOTEBOOK_MONGO_COLLECTION     # Collection name to store notebook. Default "notes"
 58 | # export ZEPPELIN_NOTEBOOK_MONGO_AUTOIMPORT     # If "true" import local notes under ZEPPELIN_NOTEBOOK_DIR on startup. Default "false"
 59 | 
 60 | # export ZEPPELIN_IDENT_STRING                  # A string representing this instance of zeppelin. $USER by default.
 61 | # export ZEPPELIN_NICENESS                      # The scheduling priority for daemons. Defaults to 0.
 62 | # export ZEPPELIN_INTERPRETER_LOCALREPO         # Local repository for interpreter's additional dependency loading
 63 | # export ZEPPELIN_INTERPRETER_DEP_MVNREPO       # Remote principal repository for interpreter's additional dependency loading
 64 | # export ZEPPELIN_HELIUM_NODE_INSTALLER_URL     # Remote Node installer url for Helium dependency loader
 65 | # export ZEPPELIN_HELIUM_NPM_INSTALLER_URL      # Remote Npm installer url for Helium dependency loader
 66 | # export ZEPPELIN_HELIUM_YARNPKG_INSTALLER_URL  # Remote Yarn package installer url for Helium dependency loader
 67 | # export ZEPPELIN_NOTEBOOK_STORAGE              # Refers to pluggable notebook storage class, can have two classes simultaneously with a sync between them (e.g. local and remote).
 68 | # export ZEPPELIN_NOTEBOOK_ONE_WAY_SYNC         # If there are multiple notebook storages, should we treat the first one as the only source of truth?
 69 | # export ZEPPELIN_NOTEBOOK_PUBLIC               # Make notebook public by default when created, private otherwise
 70 | 
 71 | # export DOCKER_TIME_ZONE # Set to the same time zone as the zeppelin server. E.g, "America/New_York" or "Asia/Shanghai"
 72 | 
 73 | #### Spark interpreter configuration ####
 74 | 
 75 | ## Kerberos ticket refresh setting
 76 | ##
 77 | #export KINIT_FAIL_THRESHOLD                    # (optional) How many times should kinit retry. The default value is 5.
 78 | #export KERBEROS_REFRESH_INTERVAL               # (optional) The refresh interval for Kerberos ticket. The default value is 1d.
 79 | 
 80 | ## Use provided spark installation ##
 81 | ## defining SPARK_HOME makes Zeppelin run spark interpreter process using spark-submit
 82 | ##
 83 | # export SPARK_HOME                             # (required) When it is defined, load it instead of Zeppelin embedded Spark libraries
 84 | # export SPARK_SUBMIT_OPTIONS                   # (optional) extra options to pass to spark submit. eg) "--driver-memory 512M --executor-memory 1G".
 85 | # export SPARK_APP_NAME                         # (optional) The name of spark application.
 86 | # export SPARK_CONF_DIR                         # (optional) In the zeppelin interpreter on docker mode, Need to set the local spark conf folder path
 87 | 
 88 | ## Use embedded spark binaries ##
 89 | ## without SPARK_HOME defined, Zeppelin still able to run spark interpreter process using embedded spark binaries.
 90 | ## however, it is not encouraged when you can define SPARK_HOME
 91 | ##
 92 | # Options read in YARN client mode
 93 | # export HADOOP_CONF_DIR                        # yarn-site.xml is located in configuration directory in HADOOP_CONF_DIR.
 94 | # Pyspark (supported with Spark 1.2.1 and above)
 95 | # To configure pyspark, you need to set spark distribution's path to 'spark.home' property in Interpreter setting screen in Zeppelin GUI
 96 | # export PYSPARK_PYTHON                         # path to the python command. must be the same path on the driver(Zeppelin) and all workers.
 97 | # export PYTHONPATH
 98 | 
 99 | ## Spark interpreter options ##
100 | ##
101 | # export ZEPPELIN_SPARK_USEHIVECONTEXT  # Use HiveContext instead of SQLContext if set true. true by default.
102 | # export ZEPPELIN_SPARK_CONCURRENTSQL   # Execute multiple SQL concurrently if set true. false by default.
103 | # export ZEPPELIN_SPARK_IMPORTIMPLICIT  # Import implicits, UDF collection, and sql if set true. true by default.
104 | # export ZEPPELIN_SPARK_MAXRESULT       # Max number of Spark SQL result to display. 1000 by default.
105 | # export ZEPPELIN_WEBSOCKET_MAX_TEXT_MESSAGE_SIZE       # Size in characters of the maximum text message to be received by websocket. Defaults to 1024000
106 | 
107 | #### HBase interpreter configuration ####
108 | 
109 | ## To connect to HBase running on a cluster, either HBASE_HOME or HBASE_CONF_DIR must be set
110 | 
111 | # export HBASE_HOME=                    # (require) Under which HBase scripts and configuration should be
112 | # export HBASE_CONF_DIR=                # (optional) Alternatively, configuration directory can be set to point to the directory that has hbase-site.xml
113 | 
114 | #### Zeppelin impersonation configuration
115 | # export ZEPPELIN_IMPERSONATE_CMD       # Optional, when user want to run interpreter as end web user. eg) 'sudo -H -u ${ZEPPELIN_IMPERSONATE_USER} bash -c '
116 | # export ZEPPELIN_IMPERSONATE_SPARK_PROXY_USER  #Optional, by default is true; can be set to false if you don't want to use --proxy-user option with Spark interpreter when impersonation enabled
117 | 


--------------------------------------------------------------------------------
/templates/hadoop-master/files/etc/zeppelin/conf/zeppelin-site.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0"?>
  2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
  3 | <!--
  4 |    Licensed to the Apache Software Foundation (ASF) under one or more
  5 |    contributor license agreements.  See the NOTICE file distributed with
  6 |    this work for additional information regarding copyright ownership.
  7 |    The ASF licenses this file to You under the Apache License, Version 2.0
  8 |    (the "License"); you may not use this file except in compliance with
  9 |    the License.  You may obtain a copy of the License at
 10 | 
 11 |        http://www.apache.org/licenses/LICENSE-2.0
 12 | 
 13 |    Unless required by applicable law or agreed to in writing, software
 14 |    distributed under the License is distributed on an "AS IS" BASIS,
 15 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16 |    See the License for the specific language governing permissions and
 17 |    limitations under the License.
 18 | -->
 19 | 
 20 | <configuration>
 21 | 
 22 | <property>
 23 |   <name>zeppelin.server.addr</name>
 24 |   <value>hadoop-master1.orb.local</value>
 25 |   <description>Server binding address. If you cannot connect to your web browser on WSL or Windows, change 127.0.0.1 to 0.0.0.0. It, however, causes security issues when you open your machine to the public</description>
 26 | </property>
 27 | 
 28 | <property>
 29 |   <name>zeppelin.server.port</name>
 30 |   <value>8081</value>
 31 |   <description>Server port.</description>
 32 | </property>
 33 | 
 34 | <property>
 35 |   <name>zeppelin.cluster.addr</name>
 36 |   <value></value>
 37 |   <description>Server cluster address, eg. 127.0.0.1:6000,127.0.0.2:6000,127.0.0.3:6000</description>
 38 | </property>
 39 | 
 40 | <property>
 41 |   <name>zeppelin.server.context.path</name>
 42 |   <value>/</value>
 43 |   <description>Context Path of the Web Application</description>
 44 | </property>
 45 | 
 46 | <property>
 47 |   <name>zeppelin.war.tempdir</name>
 48 |   <value>/var/run/zeppelin/webapps</value>
 49 |   <description>Location of jetty temporary directory</description>
 50 | </property>
 51 | 
 52 | <property>
 53 |   <name>zeppelin.notebook.dir</name>
 54 |   <value>file:///var/lib/zeppelin/notebook</value>
 55 |   <description>path or URI for notebook persist</description>
 56 | </property>
 57 | 
 58 | <property>
 59 |   <name>zeppelin.note.file.exclude.fields</name>
 60 |   <value></value>
 61 |   <description>fields to be excluded from being saved in note files, with Paragraph prefix mean the fields in Paragraph, e.g. Paragraph.results</description>
 62 | </property>
 63 | 
 64 | <property>
 65 |   <name>zeppelin.interpreter.include</name>
 66 |   <value>md,spark,jdbc</value>
 67 |   <description>All the inteprreters that you would like to include. You can only specify either 'zeppelin.interpreter.include' or 'zeppelin.interpreter.exclude'. Specifying them together is not allowed.</description>
 68 | </property>
 69 | 
 70 | <property>
 71 |   <name>zeppelin.interpreter.exclude</name>
 72 |   <value></value>
 73 |   <description>All the inteprreters that you would like to exclude. You can only specify either 'zeppelin.interpreter.include' or 'zeppelin.interpreter.exclude'. Specifying them together is not allowed.</description>
 74 | </property>
 75 | 
 76 | <property>
 77 |   <name>zeppelin.notebook.collaborative.mode.enable</name>
 78 |   <value>true</value>
 79 |   <description>Enable collaborative mode</description>
 80 | </property>
 81 | 
 82 | <property>
 83 |   <name>zeppelin.notebook.versioned.mode.enable</name>
 84 |   <value>true</value>
 85 |   <description>Value to enable/disable version control support in Notes</description>
 86 | </property>
 87 | 
 88 | <!--
 89 | <property>
 90 |   <name>zeppelin.server.kerberos.keytab</name>
 91 |   <value></value>
 92 |   <description>keytab for accessing kerberized hdfs</description>
 93 | </property>
 94 | 
 95 | <property>
 96 |   <name>zeppelin.server.kerberos.principal</name>
 97 |   <value></value>
 98 |   <description>principal for accessing kerberized hdfs</description>
 99 | </property>
100 | -->
101 | 
102 | <property>
103 |   <name>zeppelin.notebook.storage</name>
104 |   <value>org.apache.zeppelin.notebook.repo.FileSystemNotebookRepo</value>
105 |   <description>versioned notebook persistence layer implementation</description>
106 | </property>
107 | 
108 | <property>
109 |   <name>zeppelin.interpreter.dir</name>
110 |   <value>interpreter</value>
111 |   <description>Interpreter implementation base directory</description>
112 | </property>
113 | 
114 | <property>
115 |   <name>zeppelin.interpreter.localRepo</name>
116 |   <value>/var/lib/zeppelin/local-repo</value>
117 |   <description>Local repository for interpreter's additional dependency loading</description>
118 | </property>
119 | 
120 | <property>
121 |   <name>zeppelin.interpreter.dep.mvnRepo</name>
122 |   <value>https://mirrors.cloud.tencent.com/maven/</value>
123 |   <description>Remote principal repository for interpreter's additional dependency loading</description>
124 | </property>
125 | 
126 | <property>
127 |   <name>zeppelin.dep.localrepo</name>
128 |   <value>/var/lib/zeppelin/local-repo</value>
129 |   <description>Local repository for dependency loader</description>
130 | </property>
131 | 
132 | <property>
133 |   <name>zeppelin.helium.node.installer.url</name>
134 |   <value>https://nodejs.org/dist/</value>
135 |   <description>Remote Node installer url for Helium dependency loader</description>
136 | </property>
137 | 
138 | <property>
139 |   <name>zeppelin.helium.npm.installer.url</name>
140 |   <value>https://registry.npmjs.org/</value>
141 |   <description>Remote Npm installer url for Helium dependency loader</description>
142 | </property>
143 | 
144 | <property>
145 |   <name>zeppelin.helium.yarnpkg.installer.url</name>
146 |   <value>https://github.com/yarnpkg/yarn/releases/download/</value>
147 |   <description>Remote Yarn package installer url for Helium dependency loader</description>
148 | </property>
149 | 
150 | <property>
151 |   <name>zeppelin.interpreter.group.default</name>
152 |   <value>spark</value>
153 |   <description></description>
154 | </property>
155 | 
156 | <property>
157 |   <name>zeppelin.interpreter.connect.timeout</name>
158 |   <value>600s</value>
159 |   <description>Interpreter process connect timeout. Default time unit is msec.</description>
160 | </property>
161 | 
162 | <property>
163 |   <name>zeppelin.interpreter.output.limit</name>
164 |   <value>102400</value>
165 |   <description>Output message from interpreter exceeding the limit will be truncated</description>
166 | </property>
167 | 
168 | <property>
169 |   <name>zeppelin.server.allowed.origins</name>
170 |   <value>*</value>
171 |   <description>Allowed sources for REST and WebSocket requests (i.e. http://onehost:8080,http://otherhost.com). If you leave * you are vulnerable to https://issues.apache.org/jira/browse/ZEPPELIN-173</description>
172 | </property>
173 | 
174 | <property>
175 |   <name>zeppelin.username.force.lowercase</name>
176 |   <value>false</value>
177 |   <description>Force convert username case to lower case, useful for Active Directory/LDAP. Default is not to change case</description>
178 | </property>
179 | 
180 | <property>
181 |   <name>zeppelin.notebook.default.owner.username</name>
182 |   <value></value>
183 |   <description>Set owner role by default</description>
184 | </property>
185 | 
186 | <property>
187 |   <name>zeppelin.notebook.public</name>
188 |   <value>true</value>
189 |   <description>Make notebook public by default when created, private otherwise</description>
190 | </property>
191 | 
192 | <property>
193 |   <name>zeppelin.websocket.max.text.message.size</name>
194 |   <value>10240000</value>
195 |   <description>Size in characters of the maximum text message to be received by websocket. Defaults to 10240000</description>
196 | </property>
197 | 
198 | <property>
199 |   <name>zeppelin.server.default.dir.allowed</name>
200 |   <value>false</value>
201 |   <description>Enable directory listings on server.</description>
202 | </property>
203 | 
204 | <property>
205 |   <name>zeppelin.interpreter.yarn.monitor.interval_secs</name>
206 |   <value>10</value>
207 |   <description>Check interval in secs for yarn apps monitors</description>
208 | </property>
209 | 
210 | <property>
211 |     <name>zeppelin.server.jetty.name</name>
212 |     <value> </value>
213 |     <description>Hardcoding Application Server name to Prevent Fingerprinting</description>
214 | </property>
215 | 
216 | <property>
217 |   <name>zeppelin.server.xframe.options</name>
218 |   <value>SAMEORIGIN</value>
219 |   <description>The X-Frame-Options HTTP response header can be used to indicate whether or not a browser should be allowed to render a page in a frame/iframe/object.</description>
220 | </property>
221 | 
222 | <property>
223 |   <name>zeppelin.server.xxss.protection</name>
224 |   <value>1; mode=block</value>
225 |   <description>The HTTP X-XSS-Protection response header is a feature of Internet Explorer, Chrome and Safari that stops pages from loading when they detect reflected cross-site scripting (XSS) attacks. When value is set to 1 and a cross-site scripting attack is detected, the browser will sanitize the page (remove the unsafe parts).</description>
226 | </property>
227 | 
228 | <property>
229 |   <name>zeppelin.server.xcontent.type.options</name>
230 |   <value>nosniff</value>
231 |   <description>The HTTP X-Content-Type-Options response header helps to prevent MIME type sniffing attacks. It directs the browser to honor the type specified in the Content-Type header, rather than trying to determine the type from the content itself. The default value "nosniff" is really the only meaningful value. This header is supported on all browsers except Safari and Safari on iOS.</description>
232 | </property>
233 | 
234 | <!--
235 | <property>
236 |   <name>zeppelin.notebook.cron.enable</name>
237 |   <value>false</value>
238 |   <description>Notebook enable cron scheduler feature</description>
239 | </property>
240 | <property>
241 |   <name>zeppelin.notebook.cron.folders</name>
242 |   <value></value>
243 |   <description>Notebook cron folders</description>
244 | </property>
245 | -->
246 | 
247 | <property>
248 |   <name>zeppelin.run.mode</name>
249 |   <value>auto</value>
250 |   <description>'auto|local|k8s|docker'</description>
251 | </property>
252 | 
253 | <property>
254 |   <name>zeppelin.search.enable</name>
255 |   <value>false</value>
256 | </property>
257 | 
258 | <property>
259 |   <name>zeppelin.search.index.rebuild</name>
260 |   <value>false</value>
261 |   <description>Whether rebuild index when zeppelin start. If true, it would read all notes and rebuild the index, this would consume lots of memory if you have large amounts of notes, so by default it is false</description>
262 | </property>
263 | 
264 | <property>
265 |   <name>zeppelin.search.use.disk</name>
266 |   <value>true</value>
267 |   <description>Whether using disk for storing search index, if false, memory will be used instead.</description>
268 | </property>
269 | 
270 | <property>
271 |   <name>zeppelin.search.index.path</name>
272 |   <value>/var/run/zeppelin/search-index</value>
273 |   <description>path for storing search index on disk.</description>
274 | </property>
275 | 
276 | <property>
277 |   <name>zeppelin.jobmanager.enable</name>
278 |   <value>false</value>
279 |   <description>The Job tab in zeppelin page seems not so useful instead it cost lots of memory and affect the performance.
280 |   Disable it can save lots of memory</description>
281 | </property>
282 | 
283 | <property>
284 |   <name>zeppelin.spark.only_yarn_cluster</name>
285 |   <value>false</value>
286 |   <description>Whether only allow yarn cluster mode</description>
287 | </property>
288 | 
289 | </configuration>
290 | 


--------------------------------------------------------------------------------
/templates/hadoop-master/files/etc/zookeeper/conf/zoo.cfg:
--------------------------------------------------------------------------------
 1 | # The number of milliseconds of each tick
 2 | tickTime=2000
 3 | # The number of ticks that the initial
 4 | # synchronization phase can take
 5 | initLimit=10
 6 | # The number of ticks that can pass between
 7 | # sending a request and getting an acknowledgement
 8 | syncLimit=5
 9 | # the directory where the snapshot is stored.
10 | # do not use /tmp for storage, /tmp here is just
11 | # example sakes.
12 | dataDir=/var/lib/zookeeper
13 | # the port at which the clients will connect
14 | clientPort=2181
15 | # the maximum number of client connections.
16 | # increase this if you need to handle more clients
17 | #maxClientCnxns=60
18 | #
19 | # Be sure to read the maintenance section of the
20 | # administrator guide before turning on autopurge.
21 | #
22 | # https://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance
23 | #
24 | # The number of snapshots to retain in dataDir
25 | #autopurge.snapRetainCount=3
26 | # Purge task interval in hours
27 | # Set to "0" to disable auto purge feature
28 | #autopurge.purgeInterval=1
29 | 
30 | ## Metrics Providers
31 | #
32 | # https://prometheus.io Metrics Exporter
33 | metricsProvider.className=org.apache.zookeeper.metrics.prometheus.PrometheusMetricsProvider
34 | metricsProvider.httpHost=0.0.0.0
35 | metricsProvider.httpPort=7000
36 | metricsProvider.exportJvmInfo=true
37 | 


--------------------------------------------------------------------------------
/templates/hadoop-master/files/etc/zookeeper/conf/zookeeper-env.sh:
--------------------------------------------------------------------------------
1 | export ZK_SERVER_HEAP=384
2 | 
3 | # ZOOKEEPER-1177 (3.6.0)
4 | # https://xie.infoq.cn/article/f346a8284f59e16bb7f89188e
5 | export SERVER_JVMFLAGS="-Dzookeeper.watchManagerName=org.apache.zookeeper.server.watch.WatchManagerOptimized"
6 | 


--------------------------------------------------------------------------------
/templates/hadoop-master/files/opt/entrypoint.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -exuo pipefail
 4 | 
 5 | remove-service-ready-mark -s hdfs
 6 | 
 7 | "$@" &
 8 | 
 9 | if [[ -v POST_BOOTSTRAP_COMMAND ]]; then
10 |     $POST_BOOTSTRAP_COMMAND
11 | fi
12 | 
13 | if [[ -d /opt/service-ready.d ]]; then
14 |     for init_script in /opt/service-ready.d/*; do
15 |         bash "${init_script}"
16 |     done
17 | fi
18 | 
19 | wait
20 | 


--------------------------------------------------------------------------------
/templates/hadoop-master/files/opt/hadoop-init.d/init-hdfs.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -x
 2 | 
 3 | mkdir /var/lib/zookeeper
 4 | chown -R zookeeper:zookeeper /var/lib/zookeeper
 5 | 
 6 | mkdir /var/lib/kyuubi
 7 | chown -R kyuubi:root /var/lib/kyuubi
 8 | 
 9 | mkdir /var/lib/kyuubi/work
10 | chmod -R 777 /var/lib/kyuubi/work
11 | 
12 | mkdir /var/lib/hadoop-hdfs
13 | chown -R hdfs:hdfs /var/lib/hadoop-hdfs
14 | 
15 | mkdir /var/lib/hadoop-mapreduce
16 | chown -R mapred:mapred /var/lib/hadoop-mapreduce
17 | 
18 | mkdir /var/lib/hadoop-yarn
19 | chown -R yarn:yarn /var/lib/hadoop-yarn
20 | 
21 | mkdir /opt/hadoop/logs /var/log/hadoop-hdfs /var/log/hadoop-yarn 
22 | chown -R hadoop.hadoop /opt/hadoop/logs
23 | chown -R hdfs.hadoop /var/log/hadoop-hdfs
24 | chown -R yarn.hadoop /var/log/hadoop-yarn
25 | chmod -R 770 /opt/hadoop/logs /var/log/hadoop-hdfs
26 | chmod -R 755 /var/log/hadoop-yarn
27 | 
28 | touch /var/log/hdfs-namenode.log
29 | chown hdfs /var/log/hdfs-namenode.log
30 | 
31 | # Additional libs
32 | # cp -av /opt/hadoop/lib/native/Linux-amd64-64/* /usr/lib64/
33 | # mkdir -v /opt/hive/auxlib || test -d /opt/hive-client/auxlib
34 | 


--------------------------------------------------------------------------------
/templates/hadoop-master/files/opt/parquet/bin/parquet:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | ${JAVA_HOME}/bin/java -cp "${PARQUET_HOME}/jars/*:`${HADOOP_HOME}/bin/hadoop classpath`" org.apache.parquet.cli.Main "$@"
3 | 


--------------------------------------------------------------------------------
/templates/hadoop-master/files/opt/service-ready.d/001-wait-hdfs-ready.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | set -exuo pipefail
4 | 
5 | wait-port-ready -p 8020 -t ${HDFS_READY_TIMEOUT_SEC:-180}
6 | 


--------------------------------------------------------------------------------
/templates/hadoop-master/files/opt/service-ready.d/002-create-hdfs-dirs.sh.j2:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -exuo pipefail
 4 | 
 5 | HADOOP="$HADOOP_HOME/bin/hadoop"
 6 | {% if kerberos_enabled %}
 7 | kinit -kt /share/keytabs/hadoop-master1/nn.service.keytab nn/hadoop-master1.orb.local@TEST.ORG
 8 | {% else %}
 9 | HADOOP="HADOOP_USER_NAME=hdfs $HADOOP"
10 | {% endif %}
11 | 
12 | DIR_LIST=/tmp/hdfs-init-dirs
13 | 
14 | set +x
15 | 
16 | function parallel_create_dirs() {
17 |     while read line; do
18 |         # skip empty or started with # line
19 |         [[ -z "$line" || "$line" =~ ^# ]] && continue
20 | 
21 |         owner=$(echo $line | jq -r '.owner')
22 |         group=$(echo $line | jq -r '.group')
23 |         permission=$(echo $line | jq -r '.permission')
24 |         path=$(echo $line | jq -r '.path')
25 |         command="$HADOOP fs -mkdir -p $path && $HADOOP fs -chmod $permission $path && $HADOOP fs -chown $owner:$group $path"
26 |         echo $command
27 |     done < $DIR_LIST | xargs -t -I {} -P 5 bash -c "{}"
28 | }
29 | 
30 | echo > $DIR_LIST
31 | # top-level
32 | echo '{"owner": "hdfs", "group": "hdfs", "permission": "1777", "path": "/tmp"}' >> $DIR_LIST
33 | echo '{"owner": "hdfs", "group": "hdfs", "permission": "1755", "path": "/var"}' >> $DIR_LIST
34 | echo '{"owner": "hdfs", "group": "hdfs", "permission": "1755", "path": "/user"}' >> $DIR_LIST
35 | echo '{"owner": "hive", "group": "hadoop", "permission": "1777", "path": "/warehouse"}' >> $DIR_LIST
36 | echo '{"owner": "yarn", "group": "hadoop", "permission": "1777", "path": "/yarn-app-log"}' >> $DIR_LIST
37 | echo '{"owner": "mapred", "group": "mapred", "permission": "1777", "path": "/mr-staging"}' >> $DIR_LIST
38 | echo '{"owner": "mapred", "group": "hdfs", "permission": "1777", "path": "/mr-history"}' >> $DIR_LIST
39 | {% if spark_enabled %}
40 | echo '{"owner": "spark", "group": "hdfs", "permission": "1777", "path": "/spark-history"}' >> $DIR_LIST
41 | {% endif %}
42 | {% if flink_enabled %}
43 | echo '{"owner": "flink", "group": "hdfs", "permission": "1777", "path": "/flink-history"}' >> $DIR_LIST
44 | {% endif %}
45 | parallel_create_dirs
46 | 
47 | echo > $DIR_LIST
48 | # user home
49 | echo '{"owner": "mapred", "group": "mapred", "permission": "755", "path": "/user/history"}' >> $DIR_LIST
50 | echo '{"owner": "yarn", "group": "hadoop", "permission": "755", "path": "/user/yarn"}' >> $DIR_LIST
51 | echo '{"owner": "hive", "group": "hadoop", "permission": "755", "path": "/user/hive"}' >> $DIR_LIST
52 | echo '{"owner": "root", "group": "hadoop", "permission": "755", "path": "/user/root"}' >> $DIR_LIST
53 | {% if spark_enabled %}
54 | echo '{"owner": "spark", "group": "hadoop", "permission": "755", "path": "/user/spark"}' >> $DIR_LIST
55 | {% endif %}
56 | {% if flink_enabled %}
57 | echo '{"owner": "flink", "group": "hadoop", "permission": "755", "path": "/user/flink"}' >> $DIR_LIST
58 | {% endif %}
59 | parallel_create_dirs
60 | 


--------------------------------------------------------------------------------
/templates/hadoop-master/files/opt/service-ready.d/003-create-hdfs-ready-mark.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | create-service-ready-mark -s hdfs
4 | 


--------------------------------------------------------------------------------
/templates/hadoop-master/files/opt/service-ready.d/004-kinit-spark.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | {% if spark_enabled and kerberos_enabled %}
4 | kinit -kt /share/keytabs/hadoop-master1/spark.service.keytab spark/hadoop-master1.orb.local@TEST.ORG
5 | {% endif %}
6 | 


--------------------------------------------------------------------------------
/templates/hadoop-master/files/opt/start-hive-server2.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | set -exuo pipefail
4 | 
5 | wait-service-ready-mark -s hdfs
6 | 
7 | exec hive --service hiveserver2


--------------------------------------------------------------------------------
/templates/hadoop-master/files/opt/start-namenode.sh.j2:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -exuo pipefail
 4 | 
 5 | {% if kerberos_enabled %}
 6 | wait-service-ready-mark -s kdc
 7 | {% endif %}
 8 | 
 9 | if [ ! -d /var/lib/hadoop-hdfs/cache ]; then
10 |   echo 'Y' | /opt/hadoop/bin/hdfs namenode -format
11 | fi
12 | 
13 | exec hdfs namenode


--------------------------------------------------------------------------------
/templates/hadoop-master/files/opt/start-resourcemanager.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | set -exuo pipefail
4 | 
5 | wait-service-ready-mark -s hdfs
6 | 
7 | exec yarn resourcemanager


--------------------------------------------------------------------------------
/templates/hadoop-master/files/opt/start-spark-history-server.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | set -exuo pipefail
4 | 
5 | wait-service-ready-mark -s hdfs
6 | 
7 | exec /opt/spark/sbin/start-history-server.sh
8 | 


--------------------------------------------------------------------------------
/templates/hadoop-master/files/root/.ssh/config:
--------------------------------------------------------------------------------
 1 | Host *
 2 |     StrictHostKeyChecking no
 3 | 
 4 | Host hadoop-master1
 5 |     Hostname hadoop-master1.orb.local
 6 |     User root
 7 |     Port 22
 8 |     ForwardAgent yes
 9 |     IdentityFile /root/.ssh/id_rsa_hadoop_testing
10 | 
11 | Host hadoop-worker1
12 |     Hostname hadoop-worker1.orb.local
13 |     User root
14 |     Port 22
15 |     ForwardAgent yes
16 |     IdentityFile /root/.ssh/id_rsa_hadoop_testing
17 | 
18 | Host hadoop-worker2
19 |     Hostname hadoop-worker2.orb.local
20 |     User root
21 |     Port 22
22 |     ForwardAgent yes
23 |     IdentityFile /root/.ssh/id_rsa_hadoop_testing
24 | 
25 | Host hadoop-worker3
26 |     Hostname hadoop-worker3.orb.local
27 |     User root
28 |     Port 22
29 |     ForwardAgent yes
30 |     IdentityFile /root/.ssh/id_rsa_hadoop_testing
31 | 


--------------------------------------------------------------------------------
/templates/hadoop-master/files/root/.ssh/id_rsa_hadoop_testing:
--------------------------------------------------------------------------------
 1 | -----BEGIN OPENSSH PRIVATE KEY-----
 2 | b3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAABlwAAAAdzc2gtcn
 3 | NhAAAAAwEAAQAAAYEAwL+lDkkt0e+0au1SztlN7NmEtY+Yw40xjSMC6r7FdICUQO9iUIxn
 4 | RopVskWV4vaGhTJiKY1EtlXTQjL3kPaVWskiBZ32wTSDaQ6kw4CqakGnS4o5kMEp+kXPFN
 5 | vlnrXGtuobn7h9KkjGOtRJCtM6stcc81m8gjmjkdVoqErzQ5Sa7/Ou+utWb3LTVS+IBuyQ
 6 | oZLZgpLW95QNxdQD4EbJvs4SUiXmldFlJ1jCEuXh08ntCEQHmVBq5zbBeYG+VywE0w/G1W
 7 | WOQfE5+C0B4JF2ofrCWZxnK8br5iUn0GxzZsQs16UYV1sFJHdniM7M6Ni6ovgHjNgZFcn0
 8 | fmIJu20F6vEPnaoqGDOGerWOnIMoQ5KtXollkgBsRflEyCFDdKPFpJv5jczC9wYXwYcjJk
 9 | XAF982cgJBVLoo0PwSUSe1pfg+dMtztB566vasIIVkw95vHr0YB/Pi/RBZ3SB9UZ2KEciw
10 | Oey8KFyqcylftSerLUCV8EDmlqfJIKZTM/JhrH5ZAAAFkCTsLg0k7C4NAAAAB3NzaC1yc2
11 | EAAAGBAMC/pQ5JLdHvtGrtUs7ZTezZhLWPmMONMY0jAuq+xXSAlEDvYlCMZ0aKVbJFleL2
12 | hoUyYimNRLZV00Iy95D2lVrJIgWd9sE0g2kOpMOAqmpBp0uKOZDBKfpFzxTb5Z61xrbqG5
13 | +4fSpIxjrUSQrTOrLXHPNZvII5o5HVaKhK80OUmu/zrvrrVm9y01UviAbskKGS2YKS1veU
14 | DcXUA+BGyb7OElIl5pXRZSdYwhLl4dPJ7QhEB5lQauc2wXmBvlcsBNMPxtVljkHxOfgtAe
15 | CRdqH6wlmcZyvG6+YlJ9Bsc2bELNelGFdbBSR3Z4jOzOjYuqL4B4zYGRXJ9H5iCbttBerx
16 | D52qKhgzhnq1jpyDKEOSrV6JZZIAbEX5RMghQ3SjxaSb+Y3MwvcGF8GHIyZFwBffNnICQV
17 | S6KND8ElEntaX4PnTLc7Qeeur2rCCFZMPebx69GAfz4v0QWd0gfVGdihHIsDnsvChcqnMp
18 | X7Unqy1AlfBA5panySCmUzPyYax+WQAAAAMBAAEAAAGABKHuhoD/gQzS8CF3VKmgH/XL5N
19 | 24SRXpB5h8ctYmMoI/He7q9N7lAgrj26KkwzVT3xpqhc9jhALczpnhPZqRlSKhptMoubTI
20 | hkM/9kNuxRpCGjSOpOIhn0Zjf1+4HXgGuEF4674wQu6IFiShQ0l3nxIAvAAPge0+90uy5d
21 | 2USm7lSOhZT6ZNl+The6oGac0wfjOkdukO0dk8+gDaHX8yDNwQ0GjhCX+Ef/03/+KGypNv
22 | NxxEdvy57wfvvzr9L0TrZ5X6mIPKPH8I9aG8ySz+vQi6kov4Y7EpAB8+kNfByJThz4qoXl
23 | sPLtMftSOgIQ/wcw/JWVHju0jiCstBFEhPGqR4J8+Wb9/G4x9jkxJUgyTx/aBP2MzHofAB
24 | ogfVEjncw12M+WzMCV4pFexgmNwZk8SrSIear33VHb8kY0WamRxn/0P7CXUaXLQqsTlwMV
25 | nVYeCruU2KhlFIc9ehRDQvmZwKWivLT8ZUIRjcULfuVe4R5dlNLnhrkn8oz8gzHCRBAAAA
26 | wQDP5jhHqJ7m7HZujUsz/PRNOqHPNheRaqw0m0LE5PnK+zBREBOQtS0dKWX7Cq6ttFa3Y1
27 | dEPcxqrVGqt8KhlbI08F07Cmk7jQzRAbXzmphMbGf/9Jya0enmngtAouyr7jdTA1tRNoGt
28 | ccthCja2OSDYVztCpG6f5hVz0zclA7Iuv2OxKTDOmKK5et63Xp/1odnZcR+0Y/B8K/NAsT
29 | aJI1N+tYG10MDfgO90ZX7nTVTLGIKVg9BX5bm1Vzhw3Wa0HCcAAADBANtAYdhPC2HCFmw1
30 | l2BZ2kVlpmIWDaRphh740d9J1Z2s5OkqdOwzwgL43l6CXOL+l7FijXamV7FM/UV0RCoxqN
31 | mgRjKoFaTyOL9cZ1LjK8G0PWV8NyNJ1BZClzkgScOETXJAHnW2XmPFgk5qOG2N3dq7/3pM
32 | SUv69eEwMt0RRl3r1yQr14dgMPQX2bprJURKIikOiG/BooiJQsikwIkwHbx448tdtsdxvS
33 | Ro1078OxyKHD6jhsb6TDrUCkaGUPn7mQAAAMEA4Q4UelqMf+IwisbF4BUrR2X6vw9j9w2l
34 | mu0VDyuoHKS3lFzzgF/1HdQFB2HokSBd0aEcsqDVibArYpGZWqQcNP+jMu//xl+C7191wP
35 | nKAZn93hBA4aPFzP8iFZTmGRHRq9uGvyQnJ2dvbNIxHjmR7m8/oWg0mbfEJDrwJZF34XkY
36 | mo50d6U6adp4igpVeUbwywKM6scdG7e+TLl63VxJxMMOfCWA2NwnE+Dem1EImVT3ixqoO9
37 | KeSklJHqp5BlDBAAAAE3Jvb3RAaGFkb29wLXRlc3RpbmcBAgMEBQYH
38 | -----END OPENSSH PRIVATE KEY-----
39 | 


--------------------------------------------------------------------------------
/templates/hadoop-master/files/root/HELP.j2:
--------------------------------------------------------------------------------
 1 | #jinja2: trim_blocks: True, lstrip_blocks: True
 2 | 
 3 | {% if kerberos_enabled %}
 4 | # Kerberos login as 'spark'
 5 | kinit -kt /share/keytabs/hadoop-master1/spark.service.keytab spark/hadoop-master1.orb.local@TEST.ORG
 6 | {% endif %}
 7 | 
 8 | Run Spark Pi
 9 | {% if kerberos_enabled %}
10 | spark-submit run-example --deploy-mode cluster --queue root.default --proxy-user spark SparkPi 10
11 | {% else %}
12 | spark-submit run-example --deploy-mode cluster --queue root.default SparkPi 10
13 | {% endif %}
14 | 
15 | # Connect to HiveServer2 directly
16 | {% if kerberos_enabled %}
17 | kyuubi-beeline -u 'jdbc:hive2://hadoop-master1.orb.local:10000/default;principal=hive/_HOST@TEST.ORG'
18 | {% else %}
19 | kyuubi-beeline -u 'jdbc:hive2://hadoop-master1.orb.local:10000/default;user=hive'
20 | {% endif %}
21 | 
22 | # Connect to Kyuubi directly
23 | {% if kerberos_enabled %}
24 | kyuubi-beeline -u 'jdbc:kyuubi://hadoop-master1.orb.local:10009/default;kyuubiClientPrincipal=spark/_HOST@TEST.ORG;kyuubiClientKeytab=/share/keytabs/hadoop-master1/spark.service.keytab;kyuubiServerPrincipal=kyuubi/_HOST@TEST.ORG'
25 | kyuubi-beeline -u 'jdbc:kyuubi://hadoop-master1.orb.local:10009/default;principal=kyuubi/_HOST@TEST.ORG'
26 | kyuubi-beeline -u 'jdbc:kyuubi://hadoop-master1.orb.local:10009/default;principal=kyuubi/_HOST@TEST.ORG' --conf kyuubi.engine.share.level=CONNECTION
27 | {% else %}
28 | kyuubi-beeline -u 'jdbc:kyuubi://hadoop-master1.orb.local:10009/default;user=kyuubi'
29 | kyuubi-beeline -u 'jdbc:kyuubi://hadoop-master1.orb.local:10009/default;user=kyuubi' --conf kyuubi.engine.share.level=CONNECTION
30 | {% endif %}
31 | 
32 | # Connect to Kyuubi HA
33 | {% if kerberos_enabled %}
34 | kyuubi-beeline -u 'jdbc:kyuubi://hadoop-master1.orb.local:2181/default;principal=kyuubi/_HOST@TEST.ORG;serviceDiscoveryMode=zookeeper;zooKeeperNamespace=kyuubi'
35 | {% else %}
36 | kyuubi-beeline -u 'jdbc:kyuubi://hadoop-master1.orb.local:2181/default;serviceDiscoveryMode=zookeeper;zooKeeperNamespace=kyuubi'
37 | {% endif %}
38 | 
39 | # Submit Spark Pi using Kyuubi Batch API
40 | hadoop fs -put file:///opt/spark/examples/jars/spark-examples_{{ spark_scala_binary_version }}-{{ spark_version }}.jar hdfs://hadoop-master1.orb.local:8020/tmp/
41 | 
42 | {% if kerberos_enabled %}
43 | # TODO
44 | {% else %}
45 | kyuubi-ctl submit batch \
46 |   --hostUrl=http://hadoop-master1.orb.local:10099 \
47 |   -f kyuubi-batch-spark-pi.yaml
48 | {% endif %}
49 | 


--------------------------------------------------------------------------------
/templates/hadoop-master/files/root/kyuubi-batch-spark-pi.yaml:
--------------------------------------------------------------------------------
 1 | #jinja2: trim_blocks: True, lstrip_blocks: True
 2 | 
 3 | apiVersion: v1
 4 | user: hive
 5 | request:
 6 |   batchType: Spark
 7 |   name: SparkPi
 8 |   resource: hdfs://hadoop-master1.orb.local:8020/tmp/spark-examples_{{ spark_scala_binary_version }}-{{ spark_version }}.jar
 9 |   className: org.apache.spark.examples.SparkPi
10 |   args:
11 |     - 10
12 |   configs:
13 |     kyuubi.batch.impl.version: 2
14 |     hive.server2.proxy.user: spark
15 |     spark.yarn.queue: root.default
16 |     wait.completion: true
17 | options:
18 |   verbose: true
19 | 


--------------------------------------------------------------------------------
/templates/hadoop-worker/Dockerfile.j2:
--------------------------------------------------------------------------------
 1 | #jinja2: trim_blocks: True, lstrip_blocks: True
 2 | # Licensed under the Apache License, Version 2.0 (the "License");
 3 | # you may not use this file except in compliance with the License.
 4 | # You may obtain a copy of the License at
 5 | #
 6 | #     http://www.apache.org/licenses/LICENSE-2.0
 7 | #
 8 | # Unless required by applicable law or agreed to in writing, software
 9 | # distributed under the License is distributed on an "AS IS" BASIS,
10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | # See the License for the specific language governing permissions and
12 | # limitations under the License.
13 | 
14 | ARG PROJECT_VERSION
15 | FROM hadoop-testing/base-ubuntu-2004:$PROJECT_VERSION
16 | 
17 | ARG HADOOP_VERSION
18 | ARG SPARK_VERSION
19 | ARG TRINO_VERSION
20 | 
21 | ENV HADOOP_HOME=/opt/hadoop
22 | ENV HADOOP_CONF_DIR=/etc/hadoop/conf
23 | ENV LD_LIBRARY_PATH=${HADOOP_HOME}/lib/native
24 | {% if trino_enabled %}
25 | ENV TRINO_HOME=/opt/trino
26 | {% endif %}
27 | ENV PATH=${HADOOP_HOME}/bin:${PATH}
28 | 
29 | ADD download/hadoop-${HADOOP_VERSION}.tar.gz /opt
30 | ADD download/spark-${SPARK_VERSION}-bin-{{ spark_custom_name }}/yarn /opt/spark-${SPARK_VERSION}-bin-{{ spark_custom_name }}/yarn
31 | {% if trino_enabled %}
32 | ADD download/trino-server-${TRINO_VERSION}.tar.gz /opt
33 | {% endif %}
34 | 
35 | # Copy configuration files
36 | COPY ./files /
37 | 
38 | RUN ln -snf /opt/hadoop-${HADOOP_VERSION} ${HADOOP_HOME} && \
39 |     ln -snf spark-${SPARK_VERSION}-bin-{{ spark_custom_name }} /opt/spark
40 | {% if trino_enabled %}
41 | RUN ln -snf /opt/trino-server-${TRINO_VERSION} ${TRINO_HOME}
42 | {% endif %}
43 | 
44 | RUN chown -R root:hadoop /opt/hadoop-${HADOOP_VERSION} && \
45 |     chmod 6050 /opt/hadoop-${HADOOP_VERSION}/bin/container-executor && \
46 |     chown root:hadoop /etc/hadoop/conf/container-executor.cfg && \
47 |     chmod 0400 /etc/hadoop/conf/container-executor.cfg && \
48 |     mv -f /etc/hadoop/conf/container-executor.cfg /opt/hadoop-${HADOOP_VERSION}/etc/hadoop
49 | 
50 | RUN /opt/hadoop-init.d/init-hdfs.sh
51 | {% if trino_enabled %}
52 | RUN /opt/trino-init.d/init-workdir.sh
53 | {% endif %}
54 | 
55 | # HDFS ports
56 | EXPOSE 9864 9866 9867 9870
57 | 
58 | # YARN ports
59 | EXPOSE 8030 8031 8032 8033 8040 8041 8042 8088 10020 19888
60 | 
61 | CMD supervisord -c /etc/supervisord.conf
62 | 


--------------------------------------------------------------------------------
/templates/hadoop-worker/files/etc/supervisor/conf.d/hdfs-datanode.conf.j2:
--------------------------------------------------------------------------------
 1 | [program:hdfs-datanode]
 2 | directory=/opt/hadoop
 3 | command=hdfs datanode
 4 | startsecs=2
 5 | stopwaitsecs=10
 6 | {% if kerberos_enabled %}
 7 | user=root
 8 | {% else %}
 9 | user=hdfs
10 | {% endif %}
11 | redirect_stderr=true
12 | stdout_logfile=/var/log/hdfs-datanode.log
13 | autostart=true
14 | 


--------------------------------------------------------------------------------
/templates/hadoop-worker/files/etc/supervisor/conf.d/yarn-nodemanager.conf:
--------------------------------------------------------------------------------
 1 | [program:yarn-nodemanager]
 2 | directory=/opt/hadoop
 3 | command=yarn nodemanager
 4 | startsecs=2
 5 | stopwaitsecs=10
 6 | user=yarn
 7 | redirect_stderr=true
 8 | stdout_logfile=/var/log/yarn-nodemanager.log
 9 | autostart=true
10 | 


--------------------------------------------------------------------------------
/test-ssh.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - name: test hosted ansible has the controllability of all the hadoop-xxx nodes
 3 | 
 4 |   hosts: all
 5 | 
 6 |   gather_facts: True
 7 |   tasks:
 8 |     - name: ping
 9 |       ping:
10 | 


--------------------------------------------------------------------------------