├── .gitignore ├── LICENSE ├── README.md ├── ansible.cfg ├── base-ubuntu-2004 └── files │ ├── etc │ ├── apt │ │ ├── sources.list.mirror-aarch64 │ │ └── sources.list.mirror-x86_64 │ ├── ssh │ │ └── sshd_config │ ├── supervisor │ │ └── conf.d │ │ │ └── sshd.conf │ └── supervisord.conf │ ├── opt │ └── dev │ │ └── site-override.xslt │ ├── root │ └── .ssh │ │ ├── authorized_keys │ │ └── id_rsa_hadoop_testing.pub │ └── usr │ ├── local │ └── bin │ │ ├── apply-all-site-xml-overrides │ │ ├── apply-site-xml-override │ │ ├── create-service-ready-mark │ │ ├── remove-service-ready-mark │ │ ├── wait-port-ready │ │ └── wait-service-ready-mark │ └── sbin │ └── install_packages ├── build.yaml ├── dev └── checkout_pr.sh ├── docs └── imgs │ ├── deployment_architecture.excalidraw │ ├── deployment_architecture.png │ ├── namenode-ui.png │ ├── switchy-omega-1.png │ ├── switchy-omega-2.png │ └── switchy-omega-3.png ├── download └── .gitkeep ├── files ├── etc │ ├── grafana │ │ └── provisioning │ │ │ ├── dashboards │ │ │ └── default.yaml │ │ │ └── datasources │ │ │ ├── loki.yaml │ │ │ └── prometheus.yaml │ ├── loki │ │ └── config.yaml │ └── prometheus │ │ └── prometheus.yml └── var │ └── lib │ └── grafana │ └── dashboards │ ├── kyuubi.json │ └── zookeeper_10465_rev4.json ├── host_vars └── local.yaml ├── hosts ├── kdc ├── Dockerfile ├── README.md └── files │ ├── etc │ ├── krb5.conf │ ├── krb5kdc │ │ ├── kadm5-other.acl │ │ ├── kadm5.acl │ │ └── kdc.conf │ ├── supervisord.conf │ └── supervisord.d │ │ └── kdc.conf │ ├── opt │ ├── entrypoint.sh │ └── service-ready.d │ │ ├── 001-wait-kdc-ready.sh │ │ ├── 002-create-service-principals.sh │ │ └── 003-create-kdc-ready-mark.sh │ └── usr │ └── local │ └── bin │ └── create_principal ├── mysql ├── Dockerfile └── files │ └── docker-entrypoint-initdb.d │ ├── hive-schema-2.3.0.mysql.sql │ ├── hive-txn-schema-2.3.0.mysql.sql │ ├── kyuubi-schema-1.8.0.mysql.sql │ └── ranger-database-init.mysql.sql ├── node-template.yaml ├── requirements.txt ├── templates ├── .env.j2 ├── base-ubuntu-2004 │ └── Dockerfile.j2 ├── build-image.sh.j2 ├── compose.yaml.j2 ├── download.sh.j2 ├── hadoop-common │ └── files │ │ ├── etc │ │ ├── hadoop │ │ │ └── conf │ │ │ │ ├── capacity-scheduler.xml │ │ │ │ ├── container-executor.cfg │ │ │ │ ├── core-site.xml.j2 │ │ │ │ ├── hadoop-env.sh.j2 │ │ │ │ ├── hdfs-site.xml.j2 │ │ │ │ ├── log4j.properties │ │ │ │ ├── mapred-site.xml.j2 │ │ │ │ └── yarn-site.xml.j2 │ │ ├── krb5.conf │ │ ├── supervisor │ │ │ └── conf.d │ │ │ │ └── trino-server.conf │ │ └── trino │ │ │ └── conf │ │ │ ├── config.properties │ │ │ ├── jvm.config │ │ │ ├── log.properties │ │ │ └── node.properties │ │ └── opt │ │ ├── hadoop-init.d │ │ └── init-hdfs.sh │ │ └── trino-init.d │ │ └── init-workdir.sh ├── hadoop-master │ ├── Dockerfile.j2 │ └── files │ │ ├── etc │ │ ├── flink │ │ │ ├── conf │ │ │ │ ├── flink-conf.yaml │ │ │ │ ├── log4j-cli.properties │ │ │ │ ├── log4j-console.properties │ │ │ │ ├── log4j-session.properties │ │ │ │ └── log4j.properties │ │ │ └── conf_history_server │ │ │ │ ├── flink-conf.yaml │ │ │ │ └── log4j-console.properties │ │ ├── hive │ │ │ └── conf │ │ │ │ ├── beeline-site.xml.j2 │ │ │ │ ├── hive-env.sh │ │ │ │ ├── hive-log4j2.properties │ │ │ │ └── hive-site.xml.j2 │ │ ├── kyuubi │ │ │ └── conf │ │ │ │ ├── kyuubi-defaults.conf.j2 │ │ │ │ ├── kyuubi-env.sh │ │ │ │ └── log4j2.xml.j2 │ │ ├── ranger │ │ │ ├── conf │ │ │ │ └── install.properties │ │ │ └── startup.sh │ │ ├── spark │ │ │ └── conf │ │ │ │ ├── log4j2.xml.j2 │ │ │ │ ├── spark-defaults.conf.j2 │ │ │ │ └── spark-env.sh │ │ ├── supervisor │ │ │ └── conf.d │ │ │ │ ├── flink-history-server.conf │ │ │ │ ├── hdfs-namenode.conf │ │ │ │ ├── hive-metastore.conf │ │ │ │ ├── hive-server2.conf │ │ │ │ ├── kyuubi-server.conf │ │ │ │ ├── mapred-history-server.conf │ │ │ │ ├── ranger-admin.conf │ │ │ │ ├── spark-history-server.conf │ │ │ │ ├── yarn-resourcemanager.conf │ │ │ │ ├── zeppelin-server.conf │ │ │ │ └── zookeeper.conf │ │ ├── zeppelin │ │ │ └── conf │ │ │ │ ├── configuration.xsl │ │ │ │ ├── interpreter-list.j2 │ │ │ │ ├── interpreter.json.j2 │ │ │ │ ├── log4j.properties │ │ │ │ ├── log4j2.properties │ │ │ │ ├── log4j_yarn_cluster.properties │ │ │ │ ├── shiro.ini.template │ │ │ │ ├── zeppelin-env.sh │ │ │ │ └── zeppelin-site.xml │ │ └── zookeeper │ │ │ └── conf │ │ │ ├── zoo.cfg │ │ │ └── zookeeper-env.sh │ │ ├── opt │ │ ├── entrypoint.sh │ │ ├── hadoop-init.d │ │ │ └── init-hdfs.sh │ │ ├── parquet │ │ │ └── bin │ │ │ │ └── parquet │ │ ├── service-ready.d │ │ │ ├── 001-wait-hdfs-ready.sh │ │ │ ├── 002-create-hdfs-dirs.sh.j2 │ │ │ ├── 003-create-hdfs-ready-mark.sh │ │ │ └── 004-kinit-spark.sh │ │ ├── start-hive-server2.sh │ │ ├── start-namenode.sh.j2 │ │ ├── start-resourcemanager.sh │ │ └── start-spark-history-server.sh │ │ └── root │ │ ├── .ssh │ │ ├── config │ │ └── id_rsa_hadoop_testing │ │ ├── HELP.j2 │ │ └── kyuubi-batch-spark-pi.yaml └── hadoop-worker │ ├── Dockerfile.j2 │ └── files │ └── etc │ └── supervisor │ └── conf.d │ ├── hdfs-datanode.conf.j2 │ └── yarn-nodemanager.conf └── test-ssh.yaml /.gitignore: -------------------------------------------------------------------------------- 1 | *.iml 2 | *.ipr 3 | *.iws 4 | *.log 5 | .DS_Store 6 | .idea 7 | .python-version 8 | base-ubuntu-2004/Dockerfile 9 | depends 10 | download/ 11 | flags 12 | graphviz 13 | dependency_graph.svg 14 | *.swp 15 | build/tmp 16 | .env 17 | build-image.sh 18 | download.sh 19 | compose.yaml 20 | hadoop-master1/ 21 | hadoop-worker1/ 22 | hadoop-worker2/ 23 | hadoop-worker3/ 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /ansible.cfg: -------------------------------------------------------------------------------- 1 | [defaults] 2 | inventory=hosts 3 | module_name=shell 4 | gathering=explicit 5 | host_key_checking=False 6 | -------------------------------------------------------------------------------- /base-ubuntu-2004/files/etc/apt/sources.list.mirror-aarch64: -------------------------------------------------------------------------------- 1 | deb http://mirrors.cloud.tencent.com/ubuntu-ports/ focal main restricted universe multiverse 2 | # deb-src http://mirrors.cloud.tencent.com/ubuntu-ports/ focal main restricted universe multiverse 3 | 4 | deb http://mirrors.cloud.tencent.com/ubuntu-ports/ focal-security main restricted universe multiverse 5 | # deb-src http://mirrors.cloud.tencent.com/ubuntu-ports/ focal-security main restricted universe multiverse 6 | 7 | deb http://mirrors.cloud.tencent.com/ubuntu-ports/ focal-updates main restricted universe multiverse 8 | # deb-src http://mirrors.cloud.tencent.com/ubuntu-ports/ focal-updates main restricted universe multiverse 9 | 10 | # deb http://mirrors.cloud.tencent.com/ubuntu-ports/ focal-proposed main restricted universe multiverse 11 | # deb-src http://mirrors.cloud.tencent.com/ubuntu-ports/ focal-proposed main restricted universe multiverse 12 | 13 | deb http://mirrors.cloud.tencent.com/ubuntu-ports/ focal-backports main restricted universe multiverse 14 | # deb-src http://mirrors.cloud.tencent.com/ubuntu-ports/ focal-backports main restricted universe multiverse 15 | -------------------------------------------------------------------------------- /base-ubuntu-2004/files/etc/apt/sources.list.mirror-x86_64: -------------------------------------------------------------------------------- 1 | deb http://mirrors.cloud.tencent.com/ubuntu/ focal main restricted universe multiverse 2 | # deb-src http://mirrors.cloud.tencent.com/ubuntu/ focal main restricted universe multiverse 3 | 4 | deb http://mirrors.cloud.tencent.com/ubuntu/ focal-security main restricted universe multiverse 5 | # deb-src http://mirrors.cloud.tencent.com/ubuntu/ focal-security main restricted universe multiverse 6 | 7 | deb http://mirrors.cloud.tencent.com/ubuntu/ focal-updates main restricted universe multiverse 8 | # deb-src http://mirrors.cloud.tencent.com/ubuntu/ focal-updates main restricted universe multiverse 9 | 10 | # deb http://mirrors.cloud.tencent.com/ubuntu/ focal-proposed main restricted universe multiverse 11 | # deb-src http://mirrors.cloud.tencent.com/ubuntu/ focal-proposed main restricted universe multiverse 12 | 13 | deb http://mirrors.cloud.tencent.com/ubuntu/ focal-backports main restricted universe multiverse 14 | # deb-src http://mirrors.cloud.tencent.com/ubuntu/ focal-backports main restricted universe multiverse 15 | -------------------------------------------------------------------------------- /base-ubuntu-2004/files/etc/ssh/sshd_config: -------------------------------------------------------------------------------- 1 | Include /etc/ssh/sshd_config.d/*.conf 2 | 3 | # Change to yes to enable challenge-response passwords (beware issues with 4 | # some PAM modules and threads) 5 | KbdInteractiveAuthentication no 6 | 7 | # Set this to 'yes' to enable PAM authentication, account processing, 8 | # and session processing. If this is enabled, PAM authentication will 9 | # be allowed through the KbdInteractiveAuthentication and 10 | # PasswordAuthentication. Depending on your PAM configuration, 11 | # PAM authentication via KbdInteractiveAuthentication may bypass 12 | # the setting of "PermitRootLogin without-password". 13 | # If you just want the PAM account and session checks to run without 14 | # PAM authentication, then enable this but set PasswordAuthentication 15 | # and KbdInteractiveAuthentication to 'no'. 16 | UsePAM yes 17 | 18 | X11Forwarding yes 19 | PrintMotd no 20 | 21 | # Allow client to pass locale environment variables 22 | AcceptEnv LANG LC_* 23 | 24 | # override default of no subsystems 25 | Subsystem sftp /usr/lib/openssh/sftp-server 26 | 27 | RSAAuthentication yes 28 | PubkeyAuthentication yes 29 | -------------------------------------------------------------------------------- /base-ubuntu-2004/files/etc/supervisor/conf.d/sshd.conf: -------------------------------------------------------------------------------- 1 | [program:sshd] 2 | directory=/usr/sbin 3 | command=/usr/sbin/sshd -D 4 | startsecs=30 5 | stopwaitsecs=10 6 | redirect_stderr=true 7 | stdout_logfile=/var/log/sshd.log 8 | autostart=true 9 | autorestart=true -------------------------------------------------------------------------------- /base-ubuntu-2004/files/etc/supervisord.conf: -------------------------------------------------------------------------------- 1 | [supervisord] 2 | logfile=/var/log/supervisord.log 3 | logfile_maxbytes = 50MB 4 | logfile_backups=10 5 | loglevel=info 6 | pidfile=/var/run/supervisord.pid 7 | nodaemon=true 8 | directory=/tmp 9 | strip_ansi=false 10 | 11 | [unix_http_server] 12 | file=/var/run/supervisor.sock 13 | chmod=0777 14 | 15 | [rpcinterface:supervisor] 16 | supervisor.rpcinterface_factory=supervisor.rpcinterface:make_main_rpcinterface 17 | 18 | [supervisorctl] 19 | serverurl=unix:///var/run/supervisor.sock ; use a unix:// URL for a unix socket 20 | 21 | ; The [include] section can just contain the "files" setting. This 22 | ; setting can list multiple files (separated by whitespace or 23 | ; newlines). It can also contain wildcards. The filenames are 24 | ; interpreted as relative to this file. Included files *cannot* 25 | ; include files themselves. 26 | 27 | [include] 28 | files=/etc/supervisor/conf.d/*.conf 29 | -------------------------------------------------------------------------------- /base-ubuntu-2004/files/opt/dev/site-override.xslt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /base-ubuntu-2004/files/root/.ssh/authorized_keys: -------------------------------------------------------------------------------- 1 | ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQDAv6UOSS3R77Rq7VLO2U3s2YS1j5jDjTGNIwLqvsV0gJRA72JQjGdGilWyRZXi9oaFMmIpjUS2VdNCMveQ9pVaySIFnfbBNINpDqTDgKpqQadLijmQwSn6Rc8U2+Wetca26hufuH0qSMY61EkK0zqy1xzzWbyCOaOR1WioSvNDlJrv867661ZvctNVL4gG7JChktmCktb3lA3F1APgRsm+zhJSJeaV0WUnWMIS5eHTye0IRAeZUGrnNsF5gb5XLATTD8bVZY5B8Tn4LQHgkXah+sJZnGcrxuvmJSfQbHNmxCzXpRhXWwUkd2eIzszo2Lqi+AeM2BkVyfR+Ygm7bQXq8Q+dqioYM4Z6tY6cgyhDkq1eiWWSAGxF+UTIIUN0o8Wkm/mNzML3BhfBhyMmRcAX3zZyAkFUuijQ/BJRJ7Wl+D50y3O0Hnrq9qwghWTD3m8evRgH8+L9EFndIH1RnYoRyLA57LwoXKpzKV+1J6stQJXwQOaWp8kgplMz8mGsflk= root@hadoop-testing 2 | -------------------------------------------------------------------------------- /base-ubuntu-2004/files/root/.ssh/id_rsa_hadoop_testing.pub: -------------------------------------------------------------------------------- 1 | ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQDAv6UOSS3R77Rq7VLO2U3s2YS1j5jDjTGNIwLqvsV0gJRA72JQjGdGilWyRZXi9oaFMmIpjUS2VdNCMveQ9pVaySIFnfbBNINpDqTDgKpqQadLijmQwSn6Rc8U2+Wetca26hufuH0qSMY61EkK0zqy1xzzWbyCOaOR1WioSvNDlJrv867661ZvctNVL4gG7JChktmCktb3lA3F1APgRsm+zhJSJeaV0WUnWMIS5eHTye0IRAeZUGrnNsF5gb5XLATTD8bVZY5B8Tn4LQHgkXah+sJZnGcrxuvmJSfQbHNmxCzXpRhXWwUkd2eIzszo2Lqi+AeM2BkVyfR+Ygm7bQXq8Q+dqioYM4Z6tY6cgyhDkq1eiWWSAGxF+UTIIUN0o8Wkm/mNzML3BhfBhyMmRcAX3zZyAkFUuijQ/BJRJ7Wl+D50y3O0Hnrq9qwghWTD3m8evRgH8+L9EFndIH1RnYoRyLA57LwoXKpzKV+1J6stQJXwQOaWp8kgplMz8mGsflk= root@hadoop-testing 2 | -------------------------------------------------------------------------------- /base-ubuntu-2004/files/usr/local/bin/apply-all-site-xml-overrides: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -euo pipefail 4 | 5 | fail() { 6 | echo "$(basename "$0"): $*" >&2 7 | exit 1 8 | } 9 | 10 | if [ $# -ne 1 ]; then 11 | fail "Usage: $0 " >&2 12 | fi 13 | 14 | overrides_dir="$1" 15 | 16 | for file in $(find $overrides_dir -name '*.xml'); do 17 | target_filename="${file#"$overrides_dir"}" 18 | echo "Applying configuration override from $file to $target_filename" 19 | apply-site-xml-override "$target_filename" "$file" 20 | done 21 | -------------------------------------------------------------------------------- /base-ubuntu-2004/files/usr/local/bin/apply-site-xml-override: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -euo pipefail 4 | 5 | fail() { 6 | echo "$(basename "$0"): $*" >&2 7 | exit 1 8 | } 9 | 10 | if [ $# -ne 2 ]; then 11 | fail "Usage: $0 " >&2 12 | fi 13 | 14 | site_xml="$1" 15 | overrides="$2" 16 | site_xml_new="$1.new" 17 | 18 | test -f "${site_xml}" || fail "${site_xml} does not exist or is not a file" 19 | test -f "${overrides}" || fail "${overrides} does not exist or is not a file" 20 | test ! -e "${site_xml_new}" || fail "${site_xml_new} already exists" 21 | 22 | xsltproc --param override-path "'${overrides}'" "/opt/dev/site-override.xslt" "${site_xml}" > "${site_xml_new}" 23 | cat "${site_xml_new}" > "${site_xml}" # Preserve file owner & permissions 24 | rm "${site_xml_new}" 25 | -------------------------------------------------------------------------------- /base-ubuntu-2004/files/usr/local/bin/create-service-ready-mark: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | dir=/share/marks/ready.d/ 4 | 5 | set -euo pipefail 6 | 7 | function usage() { 8 | echo "Usage: $0 [-d ] -s " >&2 9 | exit 1 10 | } 11 | 12 | while getopts "d:s:" o; do 13 | case "${o}" in 14 | d) 15 | dir="$OPTARG" 16 | ;; 17 | s) 18 | service="$OPTARG" 19 | ;; 20 | *) 21 | esac 22 | done 23 | 24 | if [[ ! -v service ]]; then 25 | usage 26 | fi 27 | 28 | mkdir -p $dir 29 | touch $dir/$service 30 | -------------------------------------------------------------------------------- /base-ubuntu-2004/files/usr/local/bin/remove-service-ready-mark: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | dir=/share/marks/ready.d/ 4 | 5 | set -euo pipefail 6 | 7 | function usage() { 8 | echo "Usage: $0 [-d ] -s " >&2 9 | exit 1 10 | } 11 | 12 | while getopts "d:s:" o; do 13 | case "${o}" in 14 | d) 15 | dir="$OPTARG" 16 | ;; 17 | s) 18 | service="$OPTARG" 19 | ;; 20 | *) 21 | esac 22 | done 23 | 24 | if [[ ! -v service ]]; then 25 | usage 26 | fi 27 | 28 | rm -f $dir/$service 29 | 30 | -------------------------------------------------------------------------------- /base-ubuntu-2004/files/usr/local/bin/wait-port-ready: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | timeout=60 4 | interval=1 5 | 6 | set -euo pipefail 7 | 8 | function usage() { 9 | echo "Usage: $0 [-t ] [-i ] -p " >&2 10 | exit 1 11 | } 12 | 13 | while getopts "t:i:p:" o; do 14 | case "${o}" in 15 | t) 16 | timeout="$OPTARG" 17 | ;; 18 | i) 19 | interval="$OPTARG" 20 | ;; 21 | p) 22 | port="$OPTARG" 23 | ;; 24 | *) 25 | esac 26 | done 27 | 28 | if [[ ! -v port ]]; then 29 | usage 30 | fi 31 | 32 | end_time=$((SECONDS + timeout)) 33 | 34 | while [ $SECONDS -lt $end_time ]; do 35 | if /opt/busybox/nc $(hostname) $port -e true; then 36 | echo "Port $port is now available" 37 | break 38 | else 39 | echo "Port $port is not available, retrying in $interval seconds..." 40 | sleep $interval 41 | fi 42 | done 43 | 44 | if [ $SECONDS -ge $end_time ]; then 45 | echo "Timeout reached. Port $port is still not available." 46 | exit 1 47 | fi -------------------------------------------------------------------------------- /base-ubuntu-2004/files/usr/local/bin/wait-service-ready-mark: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | timeout=60 4 | interval=1 5 | dir=/share/marks/ready.d/ 6 | 7 | set -euo pipefail 8 | 9 | function usage() { 10 | echo "Usage: $0 [-t ] [-i ] [-d ] -s " >&2 11 | exit 1 12 | } 13 | 14 | while getopts "t:i:d:s:" o; do 15 | case "${o}" in 16 | t) 17 | timeout="$OPTARG" 18 | ;; 19 | i) 20 | interval="$OPTARG" 21 | ;; 22 | d) 23 | dir="$OPTARG" 24 | ;; 25 | s) 26 | service="$OPTARG" 27 | ;; 28 | *) 29 | esac 30 | done 31 | 32 | if [[ ! -v service ]]; then 33 | usage 34 | fi 35 | 36 | end_time=$((SECONDS + timeout)) 37 | 38 | while [ $SECONDS -lt $end_time ]; do 39 | if [[ -f $dir/$service ]]; then 40 | echo "Service $service is now ready" 41 | break 42 | else 43 | echo "Service $service is not ready, retrying in $interval seconds..." 44 | sleep $interval 45 | fi 46 | done 47 | 48 | if [ $SECONDS -ge $end_time ]; then 49 | echo "Timeout reached. Service $service is still not ready." 50 | exit 1 51 | fi 52 | -------------------------------------------------------------------------------- /base-ubuntu-2004/files/usr/sbin/install_packages: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -e 3 | set -u 4 | export DEBIAN_FRONTEND=noninteractive 5 | n=0 6 | max=2 7 | until [ $n -gt $max ]; do 8 | set +e 9 | ( 10 | apt-get update -qq && 11 | apt-get install -y --no-install-recommends "$@" 12 | ) 13 | CODE=$? 14 | set -e 15 | if [ $CODE -eq 0 ]; then 16 | break 17 | fi 18 | if [ $n -eq $max ]; then 19 | exit $CODE 20 | fi 21 | echo "apt failed, retrying" 22 | n=$(($n + 1)) 23 | done 24 | rm -r /var/lib/apt/lists /var/cache/apt/archives 25 | -------------------------------------------------------------------------------- /build.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: compile template files 3 | 4 | hosts: local 5 | 6 | tasks: 7 | - name: orchestrate basic files 8 | ansible.builtin.template: 9 | src: templates/{{ item.file }}.j2 10 | dest: "{{ playbook_dir }}/{{ item.file }}" 11 | mode: "{{ item.mode }}" 12 | with_items: 13 | - { file: ".env", mode: "a+x" } 14 | - { file: "build-image.sh", mode: "a+x" } 15 | - { file: "download.sh", mode: "a+x" } 16 | - { file: "base-ubuntu-2004/Dockerfile", mode: ~ } 17 | 18 | - include_tasks: node-template.yaml 19 | loop: 20 | - name: "hadoop-master1" 21 | group: "hadoop-master" 22 | - name: "hadoop-worker1" 23 | group: "hadoop-worker" 24 | - name: "hadoop-worker2" 25 | group: "hadoop-worker" 26 | - name: "hadoop-worker3" 27 | group: "hadoop-worker" 28 | loop_control: 29 | loop_var: node 30 | 31 | - name: orchestrate compose file 32 | ansible.builtin.template: 33 | src: templates/compose.yaml.j2 34 | dest: "{{ playbook_dir }}/compose.yaml" 35 | -------------------------------------------------------------------------------- /dev/checkout_pr.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one or more 4 | # contributor license agreements. See the NOTICE file distributed with 5 | # this work for additional information regarding copyright ownership. 6 | # The ASF licenses this file to You under the Apache License, Version 2.0 7 | # (the "License"); you may not use this file except in compliance with 8 | # the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | set -o pipefail 20 | set -e 21 | set -x 22 | 23 | function usage { 24 | echo "Usage: $(basename "${0}") [-f] " 2>&1 25 | echo ' -f force overwrite of local branch (default: fail if exists)' 26 | exit 1 27 | } 28 | 29 | if [[ ${#} -eq 0 ]]; then 30 | usage 31 | fi 32 | 33 | FORCE="" 34 | while getopts ":f" arg; do 35 | case "${arg}" in 36 | f) 37 | FORCE="--force" 38 | ;; 39 | ?) 40 | usage 41 | ;; 42 | esac 43 | done 44 | shift "$(($OPTIND -1))" 45 | 46 | PR_NUM=$1 47 | 48 | git fetch upstream pull/${PR_NUM}/head:PR_${PR_NUM} ${FORCE} 49 | git checkout PR_${PR_NUM} 50 | -------------------------------------------------------------------------------- /docs/imgs/deployment_architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awesome-kyuubi/hadoop-testing/e35aa5c439064898fd4fcba3abeccdb2f623b97e/docs/imgs/deployment_architecture.png -------------------------------------------------------------------------------- /docs/imgs/namenode-ui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awesome-kyuubi/hadoop-testing/e35aa5c439064898fd4fcba3abeccdb2f623b97e/docs/imgs/namenode-ui.png -------------------------------------------------------------------------------- /docs/imgs/switchy-omega-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awesome-kyuubi/hadoop-testing/e35aa5c439064898fd4fcba3abeccdb2f623b97e/docs/imgs/switchy-omega-1.png -------------------------------------------------------------------------------- /docs/imgs/switchy-omega-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awesome-kyuubi/hadoop-testing/e35aa5c439064898fd4fcba3abeccdb2f623b97e/docs/imgs/switchy-omega-2.png -------------------------------------------------------------------------------- /docs/imgs/switchy-omega-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awesome-kyuubi/hadoop-testing/e35aa5c439064898fd4fcba3abeccdb2f623b97e/docs/imgs/switchy-omega-3.png -------------------------------------------------------------------------------- /download/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awesome-kyuubi/hadoop-testing/e35aa5c439064898fd4fcba3abeccdb2f623b97e/download/.gitkeep -------------------------------------------------------------------------------- /files/etc/grafana/provisioning/dashboards/default.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: 1 2 | 3 | providers: 4 | - name: Default # A uniquely identifiable name for the provider 5 | folder: ~ # The folder where to place the dashboards 6 | type: file 7 | options: 8 | path: /var/lib/grafana/dashboards 9 | -------------------------------------------------------------------------------- /files/etc/grafana/provisioning/datasources/loki.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: 1 2 | 3 | datasources: 4 | - name: Loki 5 | type: loki 6 | access: proxy 7 | orgId: 1 8 | url: http://loki:3100 9 | basicAuth: false 10 | isDefault: false 11 | version: 1 12 | editable: false 13 | jsonData: 14 | maxLines: 100000 15 | -------------------------------------------------------------------------------- /files/etc/grafana/provisioning/datasources/prometheus.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: 1 2 | 3 | datasources: 4 | - name: Prometheus 5 | type: prometheus 6 | access: proxy 7 | orgId: 1 8 | url: http://prometheus:9090 9 | isDefault: true 10 | version: 1 11 | editable: false 12 | -------------------------------------------------------------------------------- /files/etc/loki/config.yaml: -------------------------------------------------------------------------------- 1 | auth_enabled: false 2 | 3 | server: 4 | http_listen_port: 3100 5 | grpc_listen_port: 9096 6 | grpc_server_max_recv_msg_size: 67108864 7 | grpc_server_max_send_msg_size: 67108864 8 | 9 | common: 10 | instance_addr: 0.0.0.0 11 | path_prefix: /loki 12 | storage: 13 | filesystem: 14 | chunks_directory: /loki/chunks 15 | rules_directory: /loki/rules 16 | replication_factor: 1 17 | ring: 18 | kvstore: 19 | store: inmemory 20 | 21 | query_range: 22 | results_cache: 23 | cache: 24 | embedded_cache: 25 | enabled: true 26 | max_size_mb: 100 27 | 28 | limits_config: 29 | retention_period: 7d 30 | max_entries_limit_per_query: 100000 31 | 32 | compactor: 33 | working_directory: /loki/compactor 34 | delete_request_store: filesystem 35 | # shared_store: filesystem 36 | # deletion_mode: filter-and-delete 37 | compaction_interval: 10m 38 | retention_enabled: true 39 | retention_delete_delay: 1h 40 | retention_delete_worker_count: 150 41 | 42 | schema_config: 43 | configs: 44 | - from: 2024-01-01 45 | store: tsdb 46 | object_store: filesystem 47 | schema: v13 48 | index: 49 | prefix: index_ 50 | period: 24h 51 | -------------------------------------------------------------------------------- /files/etc/prometheus/prometheus.yml: -------------------------------------------------------------------------------- 1 | global: 2 | scrape_interval: 3s 3 | scrape_timeout: 3s 4 | evaluation_interval: 5s 5 | 6 | scrape_configs: 7 | - job_name: "prometheus" 8 | scheme: "http" 9 | metrics_path: "/metrics" 10 | static_configs: 11 | - targets: 12 | - "prometheus:9090" 13 | 14 | - job_name: "kyuubi" 15 | scheme: "http" 16 | metrics_path: "/metrics" 17 | static_configs: 18 | - targets: 19 | - "hadoop-master1.orb.local:10019" 20 | 21 | - job_name: "zookeeper" 22 | scheme: "http" 23 | metrics_path: "/metrics" 24 | static_configs: 25 | - targets: 26 | - "hadoop-master1.orb.local:7000" 27 | -------------------------------------------------------------------------------- /host_vars/local.yaml: -------------------------------------------------------------------------------- 1 | project_version: 1-SNAPSHOT 2 | 3 | # base 4 | zulu8_version: 8.82.0.21 5 | jdk8_version: 8.0.432 6 | 7 | zulu11_version: 11.76.21 8 | jdk11_version: 11.0.25 9 | 10 | zulu17_version: 17.54.21 11 | jdk17_version: 17.0.13 12 | 13 | jdk21_enabled: false 14 | zulu21_version: 21.38.21 15 | jdk21_version: 21.0.5 16 | 17 | s3_enabled: false 18 | aws_java_sdk_version: 1.12.620 19 | 20 | # components 21 | kerberos_enabled: false 22 | 23 | hadoop_version: 3.3.6 24 | 25 | hive_version: 2.3.9 26 | 27 | zeppelin_enabled: true 28 | zeppelin_version: 0.12.0 29 | zeppelin_custom_name: all 30 | 31 | zookeeper_version: 3.8.4 32 | 33 | spark_enabled: true 34 | spark_version: 3.5.5 35 | spark_binary_version: 3.5 36 | spark_scala_binary_version: 2.12 37 | spark_hadoop_version: 3.3.4 38 | spark_custom_name: hadoop3 39 | 40 | flink_enabled: false 41 | flink_binary_version: '1.20' 42 | flink_version: 1.20.1 43 | flink_hive_version: 2.3.10 44 | 45 | kyuubi_enabled: true 46 | kyuubi_version: 1.10.1 47 | kyuubi_hadoop_version: 3.3.6 48 | 49 | ranger_enabled: false 50 | ranger_version: 2.4.0 51 | 52 | trino_enabled: false 53 | trino_version: 436 54 | 55 | parquet_enabled: true 56 | parquet_version: 1.15.0 57 | 58 | iceberg_enabled: true 59 | iceberg_version: 1.8.1 60 | 61 | hudi_enabled: false 62 | hudi_version: 0.14.1 63 | 64 | kafka_enabled: false 65 | kafka_version: 3.6.2 66 | kafka_ui_enabled: false 67 | kafka_ui_version: 1.1.0 68 | 69 | grafana_enabled: false 70 | grafana_version: 11.5.2 71 | 72 | prometheus_enabled: false 73 | prometheus_version: 2.53.3 74 | 75 | loki_enabled: false 76 | loki_version: 3.4.2 77 | loki_log4j2_appender_version: 0.9.32 78 | 79 | # repository 80 | repository_apache: https://mirrors.cloud.tencent.com/apache 81 | repository_maven: https://mirrors.cloud.tencent.com/maven 82 | 83 | apt_mirror_enabled: true 84 | -------------------------------------------------------------------------------- /hosts: -------------------------------------------------------------------------------- 1 | local ansible_connection=local 2 | 3 | [hadoop-master] 4 | hadoop-master1 5 | 6 | [hadoop-worker] 7 | hadoop-worker1 8 | hadoop-worker2 9 | hadoop-worker3 10 | -------------------------------------------------------------------------------- /kdc/Dockerfile: -------------------------------------------------------------------------------- 1 | # Licensed under the Apache License, Version 2.0 (the "License"); 2 | # you may not use this file except in compliance with the License. 3 | # You may obtain a copy of the License at 4 | # 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # 7 | # Unless required by applicable law or agreed to in writing, software 8 | # distributed under the License is distributed on an "AS IS" BASIS, 9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | # See the License for the specific language governing permissions and 11 | # limitations under the License. 12 | 13 | ARG PROJECT_VERSION 14 | FROM hadoop-testing/base-ubuntu-2004:$PROJECT_VERSION 15 | 16 | # COPY CONFIGURATION 17 | COPY ./files / 18 | 19 | # INSTALL KERBEROS 20 | RUN install_packages krb5-kdc krb5-admin-server krb5-user 21 | 22 | # CREATE KERBEROS DATABASE 23 | RUN /usr/sbin/kdb5_util create -s -P password 24 | 25 | # CREATE ANOTHER KERBEROS DATABASE 26 | RUN mkdir -p /var/lib/krb5kdc && \ 27 | /usr/sbin/kdb5_util create -s -P password -r OTHER.ORG -d /var/lib/krb5kdc/principal-other 28 | 29 | # MAKE '.TEST.ORG' TRUST 'OTHER.ORG' 30 | RUN /usr/sbin/kadmin.local -q "addprinc -pw 123456 krbtgt/.TEST.ORG@OTHER.ORG" 31 | RUN /usr/sbin/kadmin.local -q "addprinc -pw 123456 krbtgt/TEST.ORG" -r OTHER.ORG -d /var/lib/krb5kdc/principal-other 32 | 33 | # EXPOSE KERBEROS PORTS 34 | EXPOSE 88 35 | EXPOSE 89 36 | EXPOSE 749 37 | 38 | CMD ["supervisord", "-c", "/etc/supervisord.conf"] 39 | ENTRYPOINT ["/opt/entrypoint.sh"] 40 | -------------------------------------------------------------------------------- /kdc/README.md: -------------------------------------------------------------------------------- 1 | KDC 2 | === 3 | 4 | ## Commands 5 | 6 | Launch container 7 | ```shell 8 | docker run --rm -it -h kdc --name kdc hadoop-testing/kdc:${PROJECT_VERSION} 9 | ``` 10 | 11 | Login shell 12 | ```shell 13 | docker exec -it kdc bash 14 | ``` -------------------------------------------------------------------------------- /kdc/files/etc/krb5.conf: -------------------------------------------------------------------------------- 1 | [logging] 2 | default = FILE:/var/log/krb5libs.log 3 | kdc = FILE:/var/log/krb5kdc.log 4 | admin_server = FILE:/var/log/kadmind.log 5 | 6 | [libdefaults] 7 | default_realm = TEST.ORG 8 | dns_lookup_realm = false 9 | dns_lookup_kdc = false 10 | forwardable = true 11 | allow_weak_crypto = true 12 | 13 | [realms] 14 | TEST.ORG = { 15 | kdc = kdc.orb.local:88 16 | admin_server = kdc.orb.local 17 | } 18 | OTHER.ORG = { 19 | kdc = kdc.orb.local:89 20 | admin_server = kdc.orb.local 21 | } 22 | -------------------------------------------------------------------------------- /kdc/files/etc/krb5kdc/kadm5-other.acl: -------------------------------------------------------------------------------- 1 | */admin@OTHER.ORG * 2 | -------------------------------------------------------------------------------- /kdc/files/etc/krb5kdc/kadm5.acl: -------------------------------------------------------------------------------- 1 | */admin@TEST.ORG * 2 | -------------------------------------------------------------------------------- /kdc/files/etc/krb5kdc/kdc.conf: -------------------------------------------------------------------------------- 1 | [kdcdefaults] 2 | kdc_ports = 88 3 | kdc_tcp_ports = 88 4 | 5 | [realms] 6 | TEST.ORG = { 7 | acl_file = /etc/krb5kdc/kadm5.acl 8 | dict_file = /usr/share/dict/words 9 | admin_keytab = /etc/krb5kdc/kadm5.keytab 10 | } 11 | 12 | OTHER.ORG = { 13 | acl_file = /etc/krb5kdc/kadm5-other.acl 14 | dict_file = /usr/share/dict/words 15 | admin_keytab = /etc/krb5kdc/kadm5-other.keytab 16 | kdc_listen = 89 17 | kdc_tcp_listen = 89 18 | kdc_ports = 89 19 | kdc_tcp_ports = 89 20 | } 21 | 22 | -------------------------------------------------------------------------------- /kdc/files/etc/supervisord.conf: -------------------------------------------------------------------------------- 1 | [supervisord] 2 | logfile = /var/log/supervisord.log 3 | logfile_maxbytes = 50MB 4 | logfile_backups=10 5 | loglevel = info 6 | pidfile = /var/run/supervisord.pid 7 | nodaemon = true 8 | directory = /tmp 9 | strip_ansi = false 10 | 11 | [unix_http_server] 12 | file = /tmp/supervisor.sock 13 | 14 | [rpcinterface:supervisor] 15 | supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface 16 | 17 | [supervisorctl] 18 | serverurl = unix:///tmp/supervisor.sock 19 | 20 | [include] 21 | files = /etc/supervisord.d/*.conf 22 | -------------------------------------------------------------------------------- /kdc/files/etc/supervisord.d/kdc.conf: -------------------------------------------------------------------------------- 1 | [program:krb5kdc] 2 | command=/bin/bash -c "exec /usr/sbin/krb5kdc -P /var/run/krb5kdc.pid -n -r TEST.ORG -n -d /var/lib/krb5kdc/principal-other -r OTHER.ORG" 3 | autostart=true 4 | autorestart=true 5 | redirect_stderr=true 6 | stdout_logfile=/dev/stdout 7 | stdout_logfile_maxbytes=0 8 | 9 | [program:kadmind] 10 | command=/bin/bash -c "exec /usr/sbin/kadmind -P /var/run/kadmind.pid -nofork -r TEST.ORG" 11 | autostart=true 12 | autorestart=true 13 | redirect_stderr=true 14 | stdout_logfile=/dev/stdout 15 | stdout_logfile_maxbytes=0 16 | -------------------------------------------------------------------------------- /kdc/files/opt/entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -exuo pipefail 4 | 5 | "$@" & 6 | 7 | if [[ -v POST_BOOTSTRAP_COMMAND ]]; then 8 | $POST_BOOTSTRAP_COMMAND 9 | fi 10 | 11 | if [[ -d /opt/service-ready.d ]]; then 12 | for init_script in /opt/service-ready.d/*; do 13 | bash "${init_script}" 14 | done 15 | fi 16 | 17 | wait 18 | -------------------------------------------------------------------------------- /kdc/files/opt/service-ready.d/001-wait-kdc-ready.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -exuo pipefail 4 | 5 | wait-port-ready -p 88 6 | wait-port-ready -p 89 7 | wait-port-ready -p 749 -------------------------------------------------------------------------------- /kdc/files/opt/service-ready.d/002-create-service-principals.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -exuo pipefail 4 | 5 | rm -rf /share/keytabs/* 6 | 7 | mkdir -p /share/keytabs/hadoop-master1 8 | # HDFS NameNode 9 | create_principal -p nn/hadoop-master1.orb.local -k /share/keytabs/hadoop-master1/nn.service.keytab 10 | create_principal -p host/hadoop-master1.orb.local -k /share/keytabs/hadoop-master1/nn.service.keytab 11 | create_principal -p HTTP/hadoop-master1.orb.local -k /share/keytabs/hadoop-master1/nn.service.keytab 12 | 13 | # YARN ResourceManager 14 | create_principal -p rm/hadoop-master1.orb.local -k /share/keytabs/hadoop-master1/rm.service.keytab 15 | create_principal -p host/hadoop-master1.orb.local -k /share/keytabs/hadoop-master1/rm.service.keytab 16 | 17 | # MapReduce JobHistory Server 18 | create_principal -p jhs/hadoop-master1.orb.local -k /share/keytabs/hadoop-master1/jhs.service.keytab 19 | 20 | # Hive MetaStore & Hive Server2 21 | create_principal -p hive/hadoop-master1.orb.local -k /share/keytabs/hadoop-master1/hive.service.keytab 22 | 23 | # Spark History Server 24 | create_principal -p spark/hadoop-master1.orb.local -k /share/keytabs/hadoop-master1/spark.service.keytab 25 | 26 | # Flink History Server 27 | create_principal -p flink/hadoop-master1.orb.local -k /share/keytabs/hadoop-master1/flink.service.keytab 28 | 29 | # Kyuubi Server 30 | create_principal -p kyuubi/hadoop-master1.orb.local -k /share/keytabs/hadoop-master1/kyuubi.service.keytab 31 | 32 | for i in {1..3}; do 33 | mkdir -p /share/keytabs/hadoop-worker$i 34 | # HDFS DataNode 35 | create_principal -p dn/hadoop-worker$i.orb.local -k /share/keytabs/hadoop-worker$i/dn.service.keytab 36 | create_principal -p host/hadoop-worker$i.orb.local -k /share/keytabs/hadoop-worker$i/dn.service.keytab 37 | # YARN NodeManger 38 | create_principal -p nm/hadoop-worker$i.orb.local -k /share/keytabs/hadoop-worker$i/nm.service.keytab 39 | create_principal -p host/hadoop-worker$i.orb.local -k /share/keytabs/hadoop-worker$i/nm.service.keytab 40 | done 41 | 42 | chmod -R a+r /share/keytabs -------------------------------------------------------------------------------- /kdc/files/opt/service-ready.d/003-create-kdc-ready-mark.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | create-service-ready-mark -s kdc 4 | -------------------------------------------------------------------------------- /kdc/files/usr/local/bin/create_principal: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -euo pipefail 4 | 5 | function usage() { 6 | if [ $# -ne 2 ]; then 7 | echo "Usage: $0 [-o] -p -k " >&2 8 | echo >&2 9 | echo "-o - use non default realm" >&2 10 | exit 1 11 | fi 12 | } 13 | 14 | DEFAULT_REALM='TEST.ORG' 15 | OTHER_REALM='OTHER.ORG' 16 | OTHER_REALM_DATABASE='/var/lib/krb5kdc/principal-other' 17 | 18 | realm="$DEFAULT_REALM" 19 | kadmin_opts='' 20 | 21 | while getopts "op:k:" o; do 22 | case "${o}" in 23 | o) 24 | realm="$OTHER_REALM" 25 | kadmin_opts="-r $OTHER_REALM -d $OTHER_REALM_DATABASE" 26 | ;; 27 | p) 28 | principal="$OPTARG" 29 | ;; 30 | k) 31 | keytab="$OPTARG" 32 | ;; 33 | *) 34 | esac 35 | done 36 | 37 | if [[ ! -v principal ]]; then 38 | usage 39 | fi 40 | 41 | if [[ ! -v keytab ]]; then 42 | usage 43 | fi 44 | 45 | /usr/sbin/kadmin.local $kadmin_opts -q "addprinc -randkey $principal@$realm" 46 | /usr/sbin/kadmin.local $kadmin_opts -q "ktadd -norandkey -k $keytab $principal" 47 | -------------------------------------------------------------------------------- /mysql/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM bitnami/mysql:8.0 2 | 3 | COPY ./files / 4 | 5 | ENV MYSQL_ROOT_PASSWORD=root \ 6 | MYSQL_AUTHENTICATION_PLUGIN=mysql_native_password \ 7 | MYSQL_DATABASE=metastore 8 | -------------------------------------------------------------------------------- /mysql/files/docker-entrypoint-initdb.d/hive-txn-schema-2.3.0.mysql.sql: -------------------------------------------------------------------------------- 1 | -- Licensed to the Apache Software Foundation (ASF) under one or more 2 | -- contributor license agreements. See the NOTICE file distributed with 3 | -- this work for additional information regarding copyright ownership. 4 | -- The ASF licenses this file to You under the Apache License, Version 2.0 5 | -- (the "License"); you may not use this file except in compliance with 6 | -- the License. You may obtain a copy of the License at 7 | -- 8 | -- http://www.apache.org/licenses/LICENSE-2.0 9 | -- 10 | -- Unless required by applicable law or agreed to in writing, software 11 | -- distributed under the License is distributed on an "AS IS" BASIS, 12 | -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | -- See the License for the specific language governing permissions and 14 | -- limitations under the License. 15 | 16 | -- 17 | -- Tables for transaction management 18 | -- 19 | 20 | CREATE TABLE TXNS ( 21 | TXN_ID bigint PRIMARY KEY, 22 | TXN_STATE char(1) NOT NULL, 23 | TXN_STARTED bigint NOT NULL, 24 | TXN_LAST_HEARTBEAT bigint NOT NULL, 25 | TXN_USER varchar(128) NOT NULL, 26 | TXN_HOST varchar(128) NOT NULL, 27 | TXN_AGENT_INFO varchar(128), 28 | TXN_META_INFO varchar(128), 29 | TXN_HEARTBEAT_COUNT int 30 | ) ENGINE=InnoDB DEFAULT CHARSET=latin1; 31 | 32 | CREATE TABLE TXN_COMPONENTS ( 33 | TC_TXNID bigint NOT NULL, 34 | TC_DATABASE varchar(128) NOT NULL, 35 | TC_TABLE varchar(128) NOT NULL, 36 | TC_PARTITION varchar(767), 37 | TC_OPERATION_TYPE char(1) NOT NULL, 38 | FOREIGN KEY (TC_TXNID) REFERENCES TXNS (TXN_ID) 39 | ) ENGINE=InnoDB DEFAULT CHARSET=latin1; 40 | 41 | CREATE INDEX TC_TXNID_INDEX ON TXN_COMPONENTS (TC_TXNID); 42 | 43 | CREATE TABLE COMPLETED_TXN_COMPONENTS ( 44 | CTC_TXNID bigint NOT NULL, 45 | CTC_DATABASE varchar(128) NOT NULL, 46 | CTC_TABLE varchar(256), 47 | CTC_PARTITION varchar(767) 48 | ) ENGINE=InnoDB DEFAULT CHARSET=latin1; 49 | 50 | CREATE TABLE NEXT_TXN_ID ( 51 | NTXN_NEXT bigint NOT NULL 52 | ) ENGINE=InnoDB DEFAULT CHARSET=latin1; 53 | INSERT INTO NEXT_TXN_ID VALUES(1); 54 | 55 | CREATE TABLE HIVE_LOCKS ( 56 | HL_LOCK_EXT_ID bigint NOT NULL, 57 | HL_LOCK_INT_ID bigint NOT NULL, 58 | HL_TXNID bigint, 59 | HL_DB varchar(128) NOT NULL, 60 | HL_TABLE varchar(128), 61 | HL_PARTITION varchar(767), 62 | HL_LOCK_STATE char(1) not null, 63 | HL_LOCK_TYPE char(1) not null, 64 | HL_LAST_HEARTBEAT bigint NOT NULL, 65 | HL_ACQUIRED_AT bigint, 66 | HL_USER varchar(128) NOT NULL, 67 | HL_HOST varchar(128) NOT NULL, 68 | HL_HEARTBEAT_COUNT int, 69 | HL_AGENT_INFO varchar(128), 70 | HL_BLOCKEDBY_EXT_ID bigint, 71 | HL_BLOCKEDBY_INT_ID bigint, 72 | PRIMARY KEY(HL_LOCK_EXT_ID, HL_LOCK_INT_ID), 73 | KEY HIVE_LOCK_TXNID_INDEX (HL_TXNID) 74 | ) ENGINE=InnoDB DEFAULT CHARSET=latin1; 75 | 76 | CREATE INDEX HL_TXNID_IDX ON HIVE_LOCKS (HL_TXNID); 77 | 78 | CREATE TABLE NEXT_LOCK_ID ( 79 | NL_NEXT bigint NOT NULL 80 | ) ENGINE=InnoDB DEFAULT CHARSET=latin1; 81 | INSERT INTO NEXT_LOCK_ID VALUES(1); 82 | 83 | CREATE TABLE COMPACTION_QUEUE ( 84 | CQ_ID bigint PRIMARY KEY, 85 | CQ_DATABASE varchar(128) NOT NULL, 86 | CQ_TABLE varchar(128) NOT NULL, 87 | CQ_PARTITION varchar(767), 88 | CQ_STATE char(1) NOT NULL, 89 | CQ_TYPE char(1) NOT NULL, 90 | CQ_TBLPROPERTIES varchar(2048), 91 | CQ_WORKER_ID varchar(128), 92 | CQ_START bigint, 93 | CQ_RUN_AS varchar(128), 94 | CQ_HIGHEST_TXN_ID bigint, 95 | CQ_META_INFO varbinary(2048), 96 | CQ_HADOOP_JOB_ID varchar(32) 97 | ) ENGINE=InnoDB DEFAULT CHARSET=latin1; 98 | 99 | CREATE TABLE COMPLETED_COMPACTIONS ( 100 | CC_ID bigint PRIMARY KEY, 101 | CC_DATABASE varchar(128) NOT NULL, 102 | CC_TABLE varchar(128) NOT NULL, 103 | CC_PARTITION varchar(767), 104 | CC_STATE char(1) NOT NULL, 105 | CC_TYPE char(1) NOT NULL, 106 | CC_TBLPROPERTIES varchar(2048), 107 | CC_WORKER_ID varchar(128), 108 | CC_START bigint, 109 | CC_END bigint, 110 | CC_RUN_AS varchar(128), 111 | CC_HIGHEST_TXN_ID bigint, 112 | CC_META_INFO varbinary(2048), 113 | CC_HADOOP_JOB_ID varchar(32) 114 | ) ENGINE=InnoDB DEFAULT CHARSET=latin1; 115 | 116 | CREATE TABLE NEXT_COMPACTION_QUEUE_ID ( 117 | NCQ_NEXT bigint NOT NULL 118 | ) ENGINE=InnoDB DEFAULT CHARSET=latin1; 119 | INSERT INTO NEXT_COMPACTION_QUEUE_ID VALUES(1); 120 | 121 | CREATE TABLE AUX_TABLE ( 122 | MT_KEY1 varchar(128) NOT NULL, 123 | MT_KEY2 bigint NOT NULL, 124 | MT_COMMENT varchar(255), 125 | PRIMARY KEY(MT_KEY1, MT_KEY2) 126 | ) ENGINE=InnoDB DEFAULT CHARSET=latin1; 127 | 128 | CREATE TABLE WRITE_SET ( 129 | WS_DATABASE varchar(128) NOT NULL, 130 | WS_TABLE varchar(128) NOT NULL, 131 | WS_PARTITION varchar(767), 132 | WS_TXNID bigint NOT NULL, 133 | WS_COMMIT_ID bigint NOT NULL, 134 | WS_OPERATION_TYPE char(1) NOT NULL 135 | ) ENGINE=InnoDB DEFAULT CHARSET=latin1; 136 | -------------------------------------------------------------------------------- /mysql/files/docker-entrypoint-initdb.d/kyuubi-schema-1.8.0.mysql.sql: -------------------------------------------------------------------------------- 1 | -- the metadata table ddl 2 | 3 | CREATE DATABASE IF NOT EXISTS kyuubi; 4 | 5 | USE kyuubi; 6 | 7 | CREATE TABLE IF NOT EXISTS metadata( 8 | key_id bigint PRIMARY KEY AUTO_INCREMENT COMMENT 'the auto increment key id', 9 | identifier varchar(36) NOT NULL COMMENT 'the identifier id, which is an UUID', 10 | session_type varchar(32) NOT NULL COMMENT 'the session type, SQL or BATCH', 11 | real_user varchar(255) NOT NULL COMMENT 'the real user', 12 | user_name varchar(255) NOT NULL COMMENT 'the user name, might be a proxy user', 13 | ip_address varchar(128) COMMENT 'the client ip address', 14 | kyuubi_instance varchar(1024) COMMENT 'the kyuubi instance that creates this', 15 | state varchar(128) NOT NULL COMMENT 'the session state', 16 | resource varchar(1024) COMMENT 'the main resource', 17 | class_name varchar(1024) COMMENT 'the main class name', 18 | request_name varchar(1024) COMMENT 'the request name', 19 | request_conf mediumtext COMMENT 'the request config map', 20 | request_args mediumtext COMMENT 'the request arguments', 21 | create_time BIGINT NOT NULL COMMENT 'the metadata create time', 22 | engine_type varchar(32) NOT NULL COMMENT 'the engine type', 23 | cluster_manager varchar(128) COMMENT 'the engine cluster manager', 24 | engine_open_time bigint COMMENT 'the engine open time', 25 | engine_id varchar(128) COMMENT 'the engine application id', 26 | engine_name mediumtext COMMENT 'the engine application name', 27 | engine_url varchar(1024) COMMENT 'the engine tracking url', 28 | engine_state varchar(32) COMMENT 'the engine application state', 29 | engine_error mediumtext COMMENT 'the engine application diagnose', 30 | end_time bigint COMMENT 'the metadata end time', 31 | priority int NOT NULL DEFAULT 10 COMMENT 'the application priority, high value means high priority', 32 | peer_instance_closed boolean default '0' COMMENT 'closed by peer kyuubi instance', 33 | UNIQUE INDEX unique_identifier_index(identifier), 34 | INDEX user_name_index(user_name), 35 | INDEX engine_type_index(engine_type), 36 | INDEX create_time_index(create_time), 37 | -- See more detail about this index in ./005-KYUUBI-5327.mysql.sql 38 | INDEX priority_create_time_index(priority DESC, create_time ASC) 39 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; 40 | -------------------------------------------------------------------------------- /mysql/files/docker-entrypoint-initdb.d/ranger-database-init.mysql.sql: -------------------------------------------------------------------------------- 1 | -- Licensed to the Apache Software Foundation (ASF) under one or more 2 | -- contributor license agreements. See the NOTICE file distributed with 3 | -- this work for additional information regarding copyright ownership. 4 | -- The ASF licenses this file to You under the Apache License, Version 2.0 5 | -- (the "License"); you may not use this file except in compliance with 6 | -- the License. You may obtain a copy of the License at 7 | -- 8 | -- http://www.apache.org/licenses/LICENSE-2.0 9 | -- 10 | -- Unless required by applicable law or agreed to in writing, software 11 | -- distributed under the License is distributed on an "AS IS" BASIS, 12 | -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | -- See the License for the specific language governing permissions and 14 | -- limitations under the License. 15 | 16 | CREATE DATABASE IF NOT EXISTS ranger; 17 | -------------------------------------------------------------------------------- /node-template.yaml: -------------------------------------------------------------------------------- 1 | # Required Parameter: node 2 | 3 | - name: orchestrate {{ node.name }} - clean 4 | ansible.builtin.file: 5 | path: "{{ node.name }}" 6 | state: absent 7 | 8 | - name: orchestrate {{ node.name }} - hadoop-common - mkdir 9 | ansible.builtin.file: 10 | path: "{{ node.name }}/{{ item.path }}" 11 | state: directory 12 | with_community.general.filetree: "templates/hadoop-common" 13 | when: item.state == "directory" 14 | 15 | - name: orchestrate {{ node.name }} - hadoop-common - template 16 | ansible.builtin.template: 17 | src: "{{ item.src }}" 18 | dest: "{{ node.name }}/{{ item.path | regex_replace('\\.j2$', '') }}" 19 | mode: "a+x" 20 | with_community.general.filetree: "templates/hadoop-common" 21 | when: item.state == "file" 22 | 23 | - name: orchestrate {{ node.name }} - {{ node.group }} - mkdir 24 | ansible.builtin.file: 25 | path: "{{ node.name }}/{{ item.path }}" 26 | state: directory 27 | with_community.general.filetree: "templates/{{ node.group }}" 28 | when: item.state == "directory" 29 | 30 | - name: orchestrate {{ node.name }} - {{ node.group }} - template 31 | ansible.builtin.template: 32 | src: "{{ item.src }}" 33 | dest: "{{ node.name }}/{{ item.path | regex_replace('\\.j2$', '') }}" 34 | mode: "a+x" 35 | with_community.general.filetree: "templates/{{ node.group }}" 36 | when: item.state == "file" 37 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | ansible==8.7.0 2 | ansible-core==2.15.13 3 | cffi==1.17.1 4 | cryptography==44.0.0 5 | Jinja2==3.1.5 6 | MarkupSafe==3.0.2 7 | packaging==24.2 8 | pycparser==2.22 9 | PyYAML==6.0.2 10 | resolvelib==1.0.1 11 | -------------------------------------------------------------------------------- /templates/.env.j2: -------------------------------------------------------------------------------- 1 | PROJECT_VERSION={{ project_version }} 2 | 3 | ZULU8_VERSION={{ zulu8_version }} 4 | ZULU11_VERSION={{ zulu11_version }} 5 | ZULU17_VERSION={{ zulu17_version }} 6 | ZULU21_VERSION={{ zulu21_version }} 7 | JDK8_VERSION={{ jdk8_version }} 8 | JDK11_VERSION={{ jdk11_version }} 9 | JDK17_VERSION={{ jdk17_version }} 10 | JDK21_VERSION={{ jdk21_version }} 11 | 12 | AWS_JAVA_SDK_VERSION={{ aws_java_sdk_version }} 13 | HADOOP_VERSION={{ hadoop_version }} 14 | HIVE_VERSION={{ hive_version }} 15 | ICEBERG_VERSION={{ iceberg_version }} 16 | HUDI_VERSION={{ hudi_version }} 17 | KAFKA_VERSION={{ kafka_version }} 18 | KAFKA_UI_VERSION={{ kafka_ui_version }} 19 | KYUUBI_VERSION={{ kyuubi_version }} 20 | KYUUBI_HADOOP_VERSION={{ kyuubi_hadoop_version }} 21 | LOKI_APPENDER_VERSION={{ loki_log4j2_appender_version }} 22 | MYSQL_JDBC_VERSION=8.0.33 23 | SPARK_SCALA_BINARY_VERSION={{ spark_scala_binary_version }} 24 | SPARK_VERSION={{ spark_version }} 25 | SPARK_BINARY_VERSION={{ spark_binary_version }} 26 | SPARK_HADOOP_VERSION={{ spark_hadoop_version }} 27 | FLINK_BINARY_VERSION={{ flink_binary_version }} 28 | FLINK_VERSION={{ flink_version }} 29 | FLINK_HIVE_VERSION={{ flink_hive_version }} 30 | ZEPPELIN_VERSION={{ zeppelin_version }} 31 | ZOOKEEPER_VERSION={{ zookeeper_version }} 32 | RANGER_VERSION={{ ranger_version }} 33 | TRINO_VERSION={{ trino_version }} 34 | PARQUET_VERSION={{ parquet_version }} 35 | 36 | GRAFANA_VERSION={{ grafana_version }} 37 | PROMETHEUS_VERSION={{ prometheus_version }} 38 | LOKI_VERSION={{ loki_version }} 39 | 40 | # export APACHE_MIRROR=https://dlcdn.apache.org 41 | # export MAVEN_MIRROR=https://maven-central-asia.storage-download.googleapis.com/maven2 42 | APACHE_MIRROR=${APACHE_MIRROR:-{{ repository_apache }}} 43 | MAVEN_MIRROR=${MAVEN_MIRROR:-{{ repository_maven }}} 44 | -------------------------------------------------------------------------------- /templates/base-ubuntu-2004/Dockerfile.j2: -------------------------------------------------------------------------------- 1 | # Licensed under the Apache License, Version 2.0 (the "License"); 2 | # you may not use this file except in compliance with the License. 3 | # You may obtain a copy of the License at 4 | # 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # 7 | # Unless required by applicable law or agreed to in writing, software 8 | # distributed under the License is distributed on an "AS IS" BASIS, 9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | # See the License for the specific language governing permissions and 11 | # limitations under the License. 12 | 13 | FROM ubuntu:20.04 14 | 15 | ARG JDK8_TAR_NAME 16 | ARG JDK11_TAR_NAME 17 | ARG JDK17_TAR_NAME 18 | ARG JDK21_TAR_NAME 19 | 20 | COPY ./files / 21 | 22 | {% if apt_mirror_enabled %} 23 | RUN cat /etc/apt/sources.list.mirror-$(uname -m) > /etc/apt/sources.list 24 | {% endif %} 25 | 26 | RUN set -xeu && \ 27 | ln -snf /usr/bin/bash /usr/bin/sh && \ 28 | install_packages busybox python3-pip supervisor xsltproc curl tree jq python-is-python3 \ 29 | openssh-client openssh-server sqlite3 libcap-dev \ 30 | {% if kerberos_enabled %}krb5-user jsvc libssl1.1{% endif %} && \ 31 | mkdir /run/sshd && chmod 0755 /run/sshd && \ 32 | mkdir /opt/busybox && busybox --install /opt/busybox 33 | 34 | ADD download/${JDK8_TAR_NAME}.tar.gz /opt 35 | ADD download/${JDK11_TAR_NAME}.tar.gz /opt 36 | ADD download/${JDK17_TAR_NAME}.tar.gz /opt 37 | {% if jdk21_enabled %} 38 | ADD download/${JDK21_TAR_NAME}.tar.gz /opt 39 | {% endif %} 40 | 41 | RUN ln -snf /opt/${JDK8_TAR_NAME} /opt/openjdk-8 42 | RUN ln -snf /opt/${JDK11_TAR_NAME} /opt/openjdk-11 43 | RUN ln -snf /opt/${JDK17_TAR_NAME} /opt/openjdk-17 44 | {% if jdk21_enabled %} 45 | RUN ln -snf /opt/${JDK21_TAR_NAME} /opt/openjdk-21 46 | {% endif %} 47 | 48 | ENV JAVA_HOME=/opt/openjdk-8 49 | ENV PATH="$JAVA_HOME/bin:$PATH:/opt/busybox" 50 | ENV LC_ALL=C.UTF-8 51 | 52 | RUN groupadd hadoop --gid 6000 && \ 53 | useradd hadoop --gid 6000 --uid 6000 -m && \ 54 | groupadd hdfs --gid 6001 && \ 55 | useradd hdfs --gid 6001 --uid 6001 --groups hadoop -m && \ 56 | groupadd yarn --gid 6002 && \ 57 | useradd yarn --gid 6002 --uid 6002 --groups hadoop -m && \ 58 | groupadd mapred --gid 6003 && \ 59 | useradd mapred --gid 6003 --uid 6003 --groups hadoop -m && \ 60 | groupadd hive --gid 6004 && \ 61 | useradd hive --gid 6004 --uid 6004 -m && \ 62 | groupadd hbase --gid 6005 && \ 63 | useradd hbase --gid 6005 --uid 6005 -m && \ 64 | groupadd kafka --gid 6006 && \ 65 | useradd kafka --gid 6006 --uid 6006 -m && \ 66 | groupadd zookeeper --gid 6007 && \ 67 | useradd zookeeper --gid 6007 --uid 6007 -m && \ 68 | groupadd spark --gid 6008 && \ 69 | useradd spark --gid 6008 --uid 6008 -m && \ 70 | groupadd kyuubi --gid 6009 && \ 71 | useradd kyuubi --gid 6009 --uid 6009 -m && \ 72 | groupadd hue --gid 6010 && \ 73 | useradd hue --gid 6010 --uid 6010 -m && \ 74 | groupadd ranger --gid 6011 && \ 75 | useradd ranger --gid 6011 --uid 6011 -m && \ 76 | groupadd flink --gid 6012 && \ 77 | useradd flink --gid 6012 --uid 6012 -m && \ 78 | groupadd trino --gid 6013 && \ 79 | useradd trino --gid 6013 --uid 6013 -m && \ 80 | groupadd zeppelin --gid 6014 && \ 81 | useradd zeppelin --gid 6014 --uid 6014 -m 82 | -------------------------------------------------------------------------------- /templates/build-image.sh.j2: -------------------------------------------------------------------------------- 1 | #jinja2: trim_blocks: True, lstrip_blocks: True 2 | #!/usr/bin/env bash 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | set -xe 18 | 19 | SELF_DIR="$(cd "$(dirname "$0")"; pwd)" 20 | 21 | source "${SELF_DIR}/.env" 22 | 23 | # use fast copy (CoW) 24 | # https://github.com/pkolaczk/fclones/issues/219 25 | CP="cp --reflink=auto" 26 | if [ "$(uname)" == "Darwin" ]; then 27 | CP="cp -c" 28 | fi 29 | 30 | mkdir -p base-ubuntu-2004/download 31 | if [ $(uname -m) = "arm64" ] || [ $(uname -m) = "aarch64" ]; then JDK8_TAR_NAME=zulu${ZULU8_VERSION}-ca-jdk${JDK8_VERSION}-linux_aarch64; else JDK8_TAR_NAME=zulu${ZULU8_VERSION}-ca-jdk${JDK8_VERSION}-linux_x64; fi 32 | $CP download/${JDK8_TAR_NAME}.tar.gz base-ubuntu-2004/download/${JDK8_TAR_NAME}.tar.gz 33 | if [ $(uname -m) = "arm64" ] || [ $(uname -m) = "aarch64" ]; then JDK11_TAR_NAME=zulu${ZULU11_VERSION}-ca-jdk${JDK11_VERSION}-linux_aarch64; else JDK11_TAR_NAME=zulu${ZULU11_VERSION}-ca-jdk${JDK11_VERSION}-linux_x64; fi 34 | $CP download/${JDK11_TAR_NAME}.tar.gz base-ubuntu-2004/download/${JDK11_TAR_NAME}.tar.gz 35 | if [ $(uname -m) = "arm64" ] || [ $(uname -m) = "aarch64" ]; then JDK17_TAR_NAME=zulu${ZULU17_VERSION}-ca-jdk${JDK17_VERSION}-linux_aarch64; else JDK17_TAR_NAME=zulu${ZULU17_VERSION}-ca-jdk${JDK17_VERSION}-linux_x64; fi 36 | $CP download/${JDK17_TAR_NAME}.tar.gz base-ubuntu-2004/download/${JDK17_TAR_NAME}.tar.gz 37 | {% if jdk21_enabled %} 38 | if [ $(uname -m) = "arm64" ] || [ $(uname -m) = "aarch64" ]; then JDK21_TAR_NAME=zulu${ZULU21_VERSION}-ca-jdk${JDK21_VERSION}-linux_aarch64; else JDK21_TAR_NAME=zulu${ZULU21_VERSION}-ca-jdk${JDK21_VERSION}-linux_x64; fi 39 | $CP download/${JDK21_TAR_NAME}.tar.gz base-ubuntu-2004/download/${JDK21_TAR_NAME}.tar.gz 40 | {% endif %} 41 | 42 | docker build \ 43 | --file "${SELF_DIR}/base-ubuntu-2004/Dockerfile" \ 44 | --build-arg JDK8_TAR_NAME=${JDK8_TAR_NAME} \ 45 | --build-arg JDK11_TAR_NAME=${JDK11_TAR_NAME} \ 46 | --build-arg JDK17_TAR_NAME=${JDK17_TAR_NAME} \ 47 | --build-arg JDK21_TAR_NAME=${JDK21_TAR_NAME} \ 48 | --tag hadoop-testing/base-ubuntu-2004:${PROJECT_VERSION} \ 49 | "${SELF_DIR}/base-ubuntu-2004" $@ 50 | 51 | rm -rf base-ubuntu-2004/download/* 52 | 53 | {% if kerberos_enabled %} 54 | docker build \ 55 | --build-arg PROJECT_VERSION=${PROJECT_VERSION} \ 56 | --file "${SELF_DIR}/kdc/Dockerfile" \ 57 | --tag hadoop-testing/kdc:${PROJECT_VERSION} \ 58 | "${SELF_DIR}/kdc" $@ 59 | {% endif %} 60 | 61 | function build_hadoop_master_image() { 62 | local INDEX=$1 63 | mkdir -p hadoop-master${INDEX}/download 64 | {% if zeppelin_enabled %} 65 | $CP download/zeppelin-${ZEPPELIN_VERSION}-bin{{ '-%s' % zeppelin_custom_name if zeppelin_custom_name }}.tgz hadoop-master${INDEX}/download/zeppelin-${ZEPPELIN_VERSION}-bin{{ '-%s' % zeppelin_custom_name if zeppelin_custom_name }}.tgz 66 | {% endif %} 67 | $CP download/apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz hadoop-master${INDEX}/download/apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz 68 | if [ $(uname -m) = "arm64" ] || [ $(uname -m) = "aarch64" ]; then HADOOP_TAR_NAME=hadoop-${HADOOP_VERSION}-aarch64; else HADOOP_TAR_NAME=hadoop-${HADOOP_VERSION}; fi 69 | $CP download/${HADOOP_TAR_NAME}.tar.gz hadoop-master${INDEX}/download/hadoop-${HADOOP_VERSION}.tar.gz 70 | $CP download/apache-hive-${HIVE_VERSION}-bin.tar.gz hadoop-master${INDEX}/download/apache-hive-${HIVE_VERSION}-bin.tar.gz 71 | {% if spark_enabled %} 72 | $CP download/spark-${SPARK_VERSION}-bin-{{ spark_custom_name }}.tgz hadoop-master${INDEX}/download/spark-${SPARK_VERSION}-bin-{{ spark_custom_name }}.tgz 73 | {% endif %} 74 | {% if flink_enabled %} 75 | $CP download/flink-${FLINK_VERSION}-bin-scala_2.12.tgz hadoop-master${INDEX}/download/flink-${FLINK_VERSION}-bin-scala_2.12.tgz 76 | $CP download/flink-sql-connector-hive-${FLINK_HIVE_VERSION}_2.12-${FLINK_VERSION}.jar hadoop-master${INDEX}/download/flink-sql-connector-hive-${FLINK_HIVE_VERSION}_2.12-${FLINK_VERSION}.jar 77 | {% endif %} 78 | $CP download/apache-kyuubi-${KYUUBI_VERSION}-bin.tgz hadoop-master${INDEX}/download/apache-kyuubi-${KYUUBI_VERSION}-bin.tgz 79 | $CP download/kyuubi-hive-jdbc-shaded-${KYUUBI_VERSION}.jar hadoop-master${INDEX}/download/kyuubi-hive-jdbc-shaded-${KYUUBI_VERSION}.jar 80 | {% if ranger_enabled %} 81 | $CP download/ranger-${RANGER_VERSION}-admin.tar.gz hadoop-master${INDEX}/download/ranger-${RANGER_VERSION}-admin.tar.gz 82 | {% endif %} 83 | {% if spark_enabled %} 84 | $CP download/kyuubi-spark-connector-tpch_${SPARK_SCALA_BINARY_VERSION}-${KYUUBI_VERSION}.jar hadoop-master${INDEX}/download/kyuubi-spark-connector-tpch_${SPARK_SCALA_BINARY_VERSION}-${KYUUBI_VERSION}.jar 85 | $CP download/kyuubi-spark-connector-tpcds_${SPARK_SCALA_BINARY_VERSION}-${KYUUBI_VERSION}.jar hadoop-master${INDEX}/download/kyuubi-spark-connector-tpcds_${SPARK_SCALA_BINARY_VERSION}-${KYUUBI_VERSION}.jar 86 | {% endif %} 87 | $CP download/mysql-connector-j-${MYSQL_JDBC_VERSION}.jar hadoop-master${INDEX}/download/mysql-connector-j-${MYSQL_JDBC_VERSION}.jar 88 | $CP download/log4j2-appender-nodep-${LOKI_APPENDER_VERSION}.jar hadoop-master${INDEX}/download/log4j2-appender-nodep-${LOKI_APPENDER_VERSION}.jar 89 | {% if spark_enabled and iceberg_enabled %} 90 | $CP download/iceberg-spark-runtime-${SPARK_BINARY_VERSION}_${SPARK_SCALA_BINARY_VERSION}-${ICEBERG_VERSION}.jar hadoop-master${INDEX}/download/iceberg-spark-runtime-${SPARK_BINARY_VERSION}_${SPARK_SCALA_BINARY_VERSION}-${ICEBERG_VERSION}.jar 91 | {% endif %} 92 | {% if flink_enabled and iceberg_enabled %} 93 | $CP download/iceberg-flink-runtime-${FLINK_BINARY_VERSION}-${ICEBERG_VERSION}.jar hadoop-master${INDEX}/download/iceberg-flink-runtime-${FLINK_BINARY_VERSION}-${ICEBERG_VERSION}.jar 94 | {% endif %} 95 | {% if spark_enabled and hudi_enabled %} 96 | $CP download/hudi-spark${SPARK_BINARY_VERSION}-bundle_${SPARK_SCALA_BINARY_VERSION}-${HUDI_VERSION}.jar hadoop-master${INDEX}/download/hudi-spark${SPARK_BINARY_VERSION}-bundle_${SPARK_SCALA_BINARY_VERSION}-${HUDI_VERSION}.jar 97 | {% endif %} 98 | $CP download/jcl-over-slf4j-1.7.36.jar hadoop-master${INDEX}/download/jcl-over-slf4j-1.7.36.jar 99 | $CP download/log4j-layout-template-json-2.20.0.jar hadoop-master${INDEX}/download/log4j-layout-template-json-2.20.0.jar 100 | {% if trino_enabled %} 101 | $CP download/trino-server-${TRINO_VERSION}.tar.gz hadoop-master${INDEX}/download/trino-server-${TRINO_VERSION}.tar.gz 102 | $CP download/trino-cli-${TRINO_VERSION}-executable.jar hadoop-master${INDEX}/download/trino-cli-${TRINO_VERSION}-executable.jar 103 | {% endif %} 104 | {% if parquet_enabled %} 105 | $CP download/parquet-cli-${PARQUET_VERSION}-runtime.jar hadoop-master${INDEX}/download/parquet-cli-${PARQUET_VERSION}-runtime.jar 106 | {% endif %} 107 | 108 | docker build \ 109 | --build-arg PROJECT_VERSION=${PROJECT_VERSION} \ 110 | --build-arg ZEPPELIN_VERSION=${ZEPPELIN_VERSION} \ 111 | --build-arg ZOOKEEPER_VERSION=${ZOOKEEPER_VERSION} \ 112 | --build-arg HADOOP_VERSION=${HADOOP_VERSION} \ 113 | --build-arg HIVE_VERSION=${HIVE_VERSION} \ 114 | --build-arg SPARK_VERSION=${SPARK_VERSION} \ 115 | --build-arg SPARK_BINARY_VERSION=${SPARK_BINARY_VERSION} \ 116 | --build-arg FLINK_VERSION=${FLINK_VERSION} \ 117 | --build-arg FLINK_BINARY_VERSION=${FLINK_BINARY_VERSION} \ 118 | --build-arg FLINK_HIVE_VERSION=${FLINK_HIVE_VERSION} \ 119 | --build-arg SPARK_SCALA_BINARY_VERSION=${SPARK_SCALA_BINARY_VERSION} \ 120 | --build-arg KYUUBI_VERSION=${KYUUBI_VERSION} \ 121 | --build-arg RANGER_VERSION=${RANGER_VERSION} \ 122 | --build-arg MYSQL_JDBC_VERSION=${MYSQL_JDBC_VERSION} \ 123 | --build-arg ICEBERG_VERSION=${ICEBERG_VERSION} \ 124 | --build-arg HUDI_VERSION=${HUDI_VERSION} \ 125 | --build-arg LOKI_APPENDER_VERSION=${LOKI_APPENDER_VERSION} \ 126 | --build-arg TRINO_VERSION=${TRINO_VERSION} \ 127 | --build-arg PARQUET_VERSION=${PARQUET_VERSION} \ 128 | --file "${SELF_DIR}/hadoop-master${INDEX}/Dockerfile" \ 129 | --tag hadoop-testing/hadoop-master${INDEX}:${PROJECT_VERSION} \ 130 | "${SELF_DIR}/hadoop-master${INDEX}" $2 131 | 132 | rm -rf hadoop-master${INDEX}/download/* 133 | } 134 | 135 | build_hadoop_master_image 1 "$@" 136 | 137 | function build_hadoop_worker_image() { 138 | local INDEX=$1 139 | mkdir -p hadoop-worker${INDEX}/download 140 | if [ $(uname -m) = "arm64" ] || [ $(uname -m) = "aarch64" ]; then HADOOP_TAR_NAME=hadoop-${HADOOP_VERSION}-aarch64; else HADOOP_TAR_NAME=hadoop-${HADOOP_VERSION}; fi 141 | $CP download/${HADOOP_TAR_NAME}.tar.gz hadoop-worker${INDEX}/download/hadoop-${HADOOP_VERSION}.tar.gz 142 | {% if spark_enabled %} 143 | $CP download/spark-${SPARK_VERSION}-bin-{{ spark_custom_name }}.tgz hadoop-worker${INDEX}/download/spark-${SPARK_VERSION}-bin-{{ spark_custom_name }}.tgz 144 | {% endif %} 145 | {% if trino_enabled %} 146 | $CP download/trino-server-${TRINO_VERSION}.tar.gz hadoop-worker${INDEX}/download/trino-server-${TRINO_VERSION}.tar.gz 147 | {% endif %} 148 | {% if spark_enabled %} 149 | tar -xzf hadoop-worker${INDEX}/download/spark-${SPARK_VERSION}-bin-{{ spark_custom_name }}.tgz -C hadoop-worker${INDEX}/download spark-${SPARK_VERSION}-bin-{{ spark_custom_name }}/yarn 150 | {% endif %} 151 | docker build \ 152 | --build-arg PROJECT_VERSION=${PROJECT_VERSION} \ 153 | --build-arg HADOOP_VERSION=${HADOOP_VERSION} \ 154 | --build-arg SPARK_VERSION=${SPARK_VERSION} \ 155 | --build-arg TRINO_VERSION=${TRINO_VERSION} \ 156 | --file "${SELF_DIR}/hadoop-worker${INDEX}/Dockerfile" \ 157 | --tag hadoop-testing/hadoop-worker${INDEX}:${PROJECT_VERSION} \ 158 | "${SELF_DIR}/hadoop-worker${INDEX}" $2 159 | 160 | rm -rf hadoop-worker${INDEX}/download/* 161 | } 162 | 163 | build_hadoop_worker_image 1 "$@" 164 | build_hadoop_worker_image 2 "$@" 165 | build_hadoop_worker_image 3 "$@" 166 | -------------------------------------------------------------------------------- /templates/compose.yaml.j2: -------------------------------------------------------------------------------- 1 | #jinja2: trim_blocks: True, lstrip_blocks: True 2 | services: 3 | mysql: 4 | image: bitnami/mysql:8.0 5 | hostname: mysql.orb.local 6 | container_name: mysql 7 | networks: 8 | - hadoop_net 9 | environment: 10 | - MYSQL_ROOT_PASSWORD=root 11 | - MYSQL_AUTHENTICATION_PLUGIN=mysql_native_password 12 | - MYSQL_DATABASE=metastore 13 | volumes: 14 | - ./mysql/files/docker-entrypoint-initdb.d:/docker-entrypoint-initdb.d 15 | ports: 16 | - 3306:3306 17 | 18 | socks5: 19 | image: serjs/go-socks5-proxy 20 | hostname: socks5.orb.local 21 | container_name: socks5 22 | networks: 23 | - hadoop_net 24 | environment: 25 | - PROXY_PORT=18070 26 | ports: 27 | - 18070:18070 28 | 29 | hadoop-master1: 30 | image: hadoop-testing/hadoop-master1:${PROJECT_VERSION} 31 | hostname: hadoop-master1.orb.local 32 | container_name: hadoop-master1 33 | networks: 34 | - hadoop_net 35 | volumes: 36 | - share:/share 37 | ports: 38 | - 8088:8088 39 | - 9870:9870 40 | - 10009:10009 41 | - 10099:10099 42 | - 18080:18080 43 | - 19888:19888 44 | - 6080:6080 45 | depends_on: 46 | - mysql 47 | 48 | hadoop-worker1: 49 | image: hadoop-testing/hadoop-worker1:${PROJECT_VERSION} 50 | hostname: hadoop-worker1.orb.local 51 | container_name: hadoop-worker1 52 | networks: 53 | - hadoop_net 54 | privileged: true 55 | volumes: 56 | - share:/share 57 | depends_on: 58 | - hadoop-master1 59 | 60 | hadoop-worker2: 61 | image: hadoop-testing/hadoop-worker2:${PROJECT_VERSION} 62 | hostname: hadoop-worker2.orb.local 63 | container_name: hadoop-worker2 64 | networks: 65 | - hadoop_net 66 | privileged: true 67 | volumes: 68 | - share:/share 69 | depends_on: 70 | - hadoop-master1 71 | 72 | hadoop-worker3: 73 | image: hadoop-testing/hadoop-worker3:${PROJECT_VERSION} 74 | hostname: hadoop-worker3.orb.local 75 | container_name: hadoop-worker3 76 | networks: 77 | - hadoop_net 78 | privileged: true 79 | volumes: 80 | - share:/share 81 | depends_on: 82 | - hadoop-master1 83 | 84 | {% if kerberos_enabled %} 85 | kdc: 86 | image: hadoop-testing/kdc:${PROJECT_VERSION} 87 | hostname: kdc.orb.local 88 | container_name: kdc 89 | networks: 90 | - hadoop_net 91 | volumes: 92 | - share:/share 93 | ports: 94 | - 88:88 95 | - 89:89 96 | {% endif %} 97 | 98 | {% if prometheus_enabled %} 99 | prometheus: 100 | image: prom/prometheus:v${PROMETHEUS_VERSION} 101 | hostname: prometheus.orb.local 102 | container_name: prometheus 103 | networks: 104 | - hadoop_net 105 | volumes: 106 | - ./files/etc/prometheus:/etc/prometheus 107 | ports: 108 | - 9090:9090 109 | {% endif %} 110 | 111 | {% if loki_enabled %} 112 | loki: 113 | image: grafana/loki:${LOKI_VERSION} 114 | hostname: loki.orb.local 115 | container_name: loki 116 | networks: 117 | - hadoop_net 118 | volumes: 119 | - ./files/etc/loki:/etc/loki 120 | ports: 121 | - 3100:3100 122 | command: -config.file=/etc/loki/config.yaml 123 | {% endif %} 124 | 125 | {% if grafana_enabled %} 126 | grafana: 127 | image: grafana/grafana-oss:${GRAFANA_VERSION} 128 | hostname: grafana.orb.local 129 | container_name: grafana 130 | networks: 131 | - hadoop_net 132 | environment: 133 | - GF_PATHS_PROVISIONING=/etc/grafana/provisioning 134 | - GF_AUTH_ANONYMOUS_ENABLED=true 135 | - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin 136 | volumes: 137 | - ./files/etc/grafana/provisioning:/etc/grafana/provisioning 138 | - ./files/var/lib/grafana/dashboards:/var/lib/grafana/dashboards 139 | ports: 140 | - 3000:3000 141 | {% endif %} 142 | 143 | {% if kafka_enabled %} 144 | kafka: 145 | image: bitnami/kafka:${KAFKA_VERSION} 146 | hostname: kafka.orb.local 147 | container_name: kafka 148 | networks: 149 | - hadoop_net 150 | ports: 151 | - 9092:9092 152 | environment: 153 | - KAFKA_ZOOKEEPER_CONNECT=hadoop-master1.orb.local:2181 154 | - ALLOW_PLAINTEXT_LISTENER=yes 155 | depends_on: 156 | - hadoop-master1 157 | {% endif %} 158 | 159 | {% if kafka_enabled and kafka_ui_enabled %} 160 | kafka-ui: 161 | image: kafbat/kafka-ui:v${KAFKA_UI_VERSION} 162 | hostname: kafka-ui.orb.local 163 | container_name: kafka-ui 164 | networks: 165 | - hadoop_net 166 | ports: 167 | - 19092:19092 168 | environment: 169 | SERVER_PORT: 19092 170 | KAFKA_CLUSTERS_0_BOOTSTRAPSERVERS: kafka.orb.local:9092 171 | {% endif %} 172 | 173 | volumes: 174 | share: 175 | 176 | networks: 177 | hadoop_net: 178 | name: orb.local 179 | driver: bridge -------------------------------------------------------------------------------- /templates/download.sh.j2: -------------------------------------------------------------------------------- 1 | #jinja2: trim_blocks: True, lstrip_blocks: True 2 | #!/usr/bin/env bash 3 | 4 | SELF_DIR="$(cd "$(dirname "$0")"; pwd)" 5 | 6 | source "${SELF_DIR}/.env" 7 | 8 | function download_if_not_exists() { 9 | local url=$1 10 | local filename=${url##*/} 11 | if [ ! -s "${SELF_DIR}/download/$filename" ]; then 12 | echo "delete empty $filename ..." 13 | rm -f "${SELF_DIR}/download/$filename" 14 | fi 15 | if [ ! -f "${SELF_DIR}/download/$filename" ]; then 16 | echo "downloading $filename ..." 17 | wget -O "${SELF_DIR}/download/$filename" $url 18 | else 19 | echo "skip downloading existed $filename" 20 | fi 21 | } 22 | 23 | mkdir -p "${SELF_DIR}/download" 24 | 25 | if [ $(uname -m) = "arm64" ] || [ $(uname -m) = "aarch64" ]; then JDK8_TAR_NAME=zulu${ZULU8_VERSION}-ca-jdk${JDK8_VERSION}-linux_aarch64; else JDK8_TAR_NAME=zulu${ZULU8_VERSION}-ca-jdk${JDK8_VERSION}-linux_x64; fi 26 | download_if_not_exists https://cdn.azul.com/zulu/bin/${JDK8_TAR_NAME}.tar.gz 27 | 28 | if [ $(uname -m) = "arm64" ] || [ $(uname -m) = "aarch64" ]; then JDK11_TAR_NAME=zulu${ZULU11_VERSION}-ca-jdk${JDK11_VERSION}-linux_aarch64; else JDK11_TAR_NAME=zulu${ZULU11_VERSION}-ca-jdk${JDK11_VERSION}-linux_x64; fi 29 | download_if_not_exists https://cdn.azul.com/zulu/bin/${JDK11_TAR_NAME}.tar.gz 30 | 31 | if [ $(uname -m) = "arm64" ] || [ $(uname -m) = "aarch64" ]; then JDK17_TAR_NAME=zulu${ZULU17_VERSION}-ca-jdk${JDK17_VERSION}-linux_aarch64; else JDK17_TAR_NAME=zulu${ZULU17_VERSION}-ca-jdk${JDK17_VERSION}-linux_x64; fi 32 | download_if_not_exists https://cdn.azul.com/zulu/bin/${JDK17_TAR_NAME}.tar.gz 33 | 34 | {% if jdk21_enabled %} 35 | if [ $(uname -m) = "arm64" ] || [ $(uname -m) = "aarch64" ]; then JDK21_TAR_NAME=zulu${ZULU21_VERSION}-ca-jdk${JDK21_VERSION}-linux_aarch64; else JDK21_TAR_NAME=zulu${ZULU21_VERSION}-ca-jdk${JDK21_VERSION}-linux_x64; fi 36 | download_if_not_exists https://cdn.azul.com/zulu/bin/${JDK21_TAR_NAME}.tar.gz 37 | {% endif %} 38 | 39 | {% if zeppelin_enabled %} 40 | download_if_not_exists ${APACHE_MIRROR}/zeppelin/zeppelin-${ZEPPELIN_VERSION}/zeppelin-${ZEPPELIN_VERSION}-bin{{ '-%s' % zeppelin_custom_name if zeppelin_custom_name }}.tgz 41 | {% endif %} 42 | download_if_not_exists ${APACHE_MIRROR}/zookeeper/zookeeper-${ZOOKEEPER_VERSION}/apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz 43 | if [ $(uname -m) = "arm64" ] || [ $(uname -m) = "aarch64" ]; then HADOOP_TAR_NAME=hadoop-${HADOOP_VERSION}-aarch64; else HADOOP_TAR_NAME=hadoop-${HADOOP_VERSION}; fi 44 | download_if_not_exists ${APACHE_MIRROR}/hadoop/core/hadoop-${HADOOP_VERSION}/${HADOOP_TAR_NAME}.tar.gz 45 | {# Hive 2 is EOL, only avaiable at archive.apache.org/dist #} 46 | download_if_not_exists https://archive.apache.org/dist/hive/hive-${HIVE_VERSION}/apache-hive-${HIVE_VERSION}-bin.tar.gz 47 | {% if spark_enabled %} 48 | download_if_not_exists ${APACHE_MIRROR}/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-{{ spark_custom_name }}.tgz 49 | {% endif %} 50 | {% if flink_enabled %} 51 | download_if_not_exists ${APACHE_MIRROR}/flink/flink-${FLINK_VERSION}/flink-${FLINK_VERSION}-bin-scala_2.12.tgz 52 | download_if_not_exists ${MAVEN_MIRROR}/org/apache/flink/flink-sql-connector-hive-${FLINK_HIVE_VERSION}_2.12/${FLINK_VERSION}/flink-sql-connector-hive-${FLINK_HIVE_VERSION}_2.12-${FLINK_VERSION}.jar 53 | {% endif %} 54 | {% if kyuubi_enabled %} 55 | download_if_not_exists ${APACHE_MIRROR}/kyuubi/kyuubi-${KYUUBI_VERSION}/apache-kyuubi-${KYUUBI_VERSION}-bin.tgz 56 | download_if_not_exists ${MAVEN_MIRROR}/org/apache/kyuubi/kyuubi-hive-jdbc-shaded/${KYUUBI_VERSION}/kyuubi-hive-jdbc-shaded-${KYUUBI_VERSION}.jar 57 | {% endif %} 58 | {% if kafka_enabled %} 59 | download_if_not_exists ${MAVEN_MIRROR}/org/apache/kafka/kafka-clients/${KAFKA_VERSION}/kafka-clients-${KAFKA_VERSION}.jar 60 | {% endif %} 61 | {% if ranger_enabled %} 62 | download_if_not_exists https://github.com/pan3793/ranger/releases/download/release-ranger-${RANGER_VERSION}/ranger-${RANGER_VERSION}-admin.tar.gz 63 | {% endif %} 64 | {% if trino_enabled %} 65 | download_if_not_exists ${MAVEN_MIRROR}/io/trino/trino-server/${TRINO_VERSION}/trino-server-${TRINO_VERSION}.tar.gz 66 | download_if_not_exists ${MAVEN_MIRROR}/io/trino/trino-cli/${TRINO_VERSION}/trino-cli-${TRINO_VERSION}-executable.jar 67 | {% endif %} 68 | {% if parquet_enabled %} 69 | PARQUET_CLI_JAR_NAME=parquet-cli 70 | download_if_not_exists ${MAVEN_MIRROR}/org/apache/parquet/${PARQUET_CLI_JAR_NAME}/${PARQUET_VERSION}/${PARQUET_CLI_JAR_NAME}-${PARQUET_VERSION}-runtime.jar 71 | {% endif %} 72 | 73 | MYSQL_JDBC_JAR_NAME=mysql-connector-j 74 | download_if_not_exists ${MAVEN_MIRROR}/com/mysql/${MYSQL_JDBC_JAR_NAME}/${MYSQL_JDBC_VERSION}/${MYSQL_JDBC_JAR_NAME}-${MYSQL_JDBC_VERSION}.jar 75 | 76 | {% if spark_enabled and s3_enabled %} 77 | HADOOP_AWS_JAR_NAME=hadoop-aws 78 | download_if_not_exists ${MAVEN_MIRROR}/org/apache/hadoop/${HADOOP_AWS_JAR_NAME}/${SPARK_HADOOP_VERSION}/${HADOOP_AWS_JAR_NAME}-${SPARK_HADOOP_VERSION}.jar 79 | AWS_JAVA_SDK_BUNDLE_JAR_NAME=aws-java-sdk-bundle 80 | download_if_not_exists ${MAVEN_MIRROR}/com/amazonaws/${AWS_JAVA_SDK_BUNDLE_JAR_NAME}/${AWS_JAVA_SDK_VERSION}/${AWS_JAVA_SDK_BUNDLE_JAR_NAME}-${AWS_JAVA_SDK_VERSION}.jar 81 | SPARK_HADOOP_CLOUD_JAR_NAME=spark-hadoop-cloud_${SPARK_SCALA_BINARY_VERSION} 82 | download_if_not_exists ${MAVEN_MIRROR}/org/apache/spark/${SPARK_HADOOP_CLOUD_JAR_NAME}/${SPARK_VERSION}/${SPARK_HADOOP_CLOUD_JAR_NAME}-${SPARK_VERSION}.jar 83 | HADOOP_CLOUD_STORAGE_JAR_NAME=hadoop-cloud-storage 84 | download_if_not_exists ${MAVEN_MIRROR}/org/apache/hadoop/${HADOOP_CLOUD_STORAGE_JAR_NAME}/${SPARK_HADOOP_VERSION}/${HADOOP_CLOUD_STORAGE_JAR_NAME}-${SPARK_HADOOP_VERSION}.jar 85 | {% endif %} 86 | 87 | {% if spark_enabled %} 88 | TPCDS_CONNECTOR_JAR_NAME=kyuubi-spark-connector-tpcds_${SPARK_SCALA_BINARY_VERSION} 89 | download_if_not_exists ${MAVEN_MIRROR}/org/apache/kyuubi/${TPCDS_CONNECTOR_JAR_NAME}/${KYUUBI_VERSION}/${TPCDS_CONNECTOR_JAR_NAME}-${KYUUBI_VERSION}.jar 90 | TPCH_CONNECTOR_JAR_NAME=kyuubi-spark-connector-tpch_${SPARK_SCALA_BINARY_VERSION} 91 | download_if_not_exists ${MAVEN_MIRROR}/org/apache/kyuubi/${TPCH_CONNECTOR_JAR_NAME}/${KYUUBI_VERSION}/${TPCH_CONNECTOR_JAR_NAME}-${KYUUBI_VERSION}.jar 92 | {% endif %} 93 | 94 | {% if spark_enabled and iceberg_enabled %} 95 | ICEBERG_SPARK_JAR_NAME=iceberg-spark-runtime-${SPARK_BINARY_VERSION}_${SPARK_SCALA_BINARY_VERSION} 96 | download_if_not_exists ${MAVEN_MIRROR}/org/apache/iceberg/${ICEBERG_SPARK_JAR_NAME}/${ICEBERG_VERSION}/${ICEBERG_SPARK_JAR_NAME}-${ICEBERG_VERSION}.jar 97 | {% endif %} 98 | 99 | {% if flink_enabled and iceberg_enabled %} 100 | ICEBERG_FLINK_JAR_NAME=iceberg-flink-runtime-${FLINK_BINARY_VERSION} 101 | download_if_not_exists ${MAVEN_MIRROR}/org/apache/iceberg/${ICEBERG_FLINK_JAR_NAME}/${ICEBERG_VERSION}/${ICEBERG_FLINK_JAR_NAME}-${ICEBERG_VERSION}.jar 102 | {% endif %} 103 | 104 | {% if spark_enabled and hudi_enabled %} 105 | HUDI_SPARK_BUNDLE_JAR_NAME=hudi-spark${SPARK_BINARY_VERSION}-bundle_${SPARK_SCALA_BINARY_VERSION} 106 | download_if_not_exists https://github.com/yanghua/incubator-hudi/releases/download/release-${HUDI_VERSION}/${HUDI_SPARK_BUNDLE_JAR_NAME}-${HUDI_VERSION}.jar 107 | {% endif %} 108 | 109 | LOKI_APPENDER_JAR_NAME=log4j2-appender-nodep 110 | download_if_not_exists ${MAVEN_MIRROR}/pl/tkowalcz/tjahzi/${LOKI_APPENDER_JAR_NAME}/${LOKI_APPENDER_VERSION}/${LOKI_APPENDER_JAR_NAME}-${LOKI_APPENDER_VERSION}.jar 111 | 112 | download_if_not_exists ${MAVEN_MIRROR}/org/slf4j/jcl-over-slf4j/1.7.36/jcl-over-slf4j-1.7.36.jar 113 | 114 | # TODO: Remove after kyuubi ships that jar 115 | download_if_not_exists ${MAVEN_MIRROR}/org/apache/logging/log4j/log4j-layout-template-json/2.20.0/log4j-layout-template-json-2.20.0.jar 116 | -------------------------------------------------------------------------------- /templates/hadoop-common/files/etc/hadoop/conf/capacity-scheduler.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | yarn.scheduler.capacity.maximum-applications 5 | 10000 6 | 7 | Maximum number of applications that can be pending and running. 8 | 9 | 10 | 11 | yarn.scheduler.capacity.maximum-am-resource-percent 12 | 1 13 | 14 | Maximum percent of resources in the cluster which can be used to run 15 | application masters i.e. controls number of concurrent running 16 | applications. 17 | 18 | 19 | 20 | yarn.scheduler.capacity.root.queues 21 | default 22 | 23 | The queues at the this level (root is the root queue). 24 | 25 | 26 | 27 | yarn.scheduler.capacity.root.default.capacity 28 | 100 29 | Default queue target capacity. 30 | 31 | 32 | yarn.scheduler.capacity.root.default.maximum-capacity 33 | 100 34 | 35 | The maximum capacity of the default queue. 36 | 37 | 38 | 39 | yarn.scheduler.capacity.root.default.state 40 | RUNNING 41 | 42 | The state of the default queue. State can be one of RUNNING or STOPPED. 43 | 44 | 45 | 46 | yarn.scheduler.capacity.root.default.acl_submit_applications 47 | * 48 | 49 | The ACL of who can submit jobs to the default queue. 50 | 51 | 52 | 53 | yarn.scheduler.capacity.root.default.user-limit-factor 54 | 1 55 | 56 | Default queue user limit a percentage from 0.0 to 1.0. 57 | 58 | 59 | 60 | yarn.scheduler.capacity.root.default.acl_administer_queue 61 | * 62 | 63 | The ACL of who can administer jobs on the default queue. 64 | 65 | 66 | 67 | yarn.scheduler.capacity.node-locality-delay 68 | -1 69 | 70 | Number of missed scheduling opportunities after which the CapacityScheduler 71 | attempts to schedule rack-local containers. 72 | Typically this should be set to number of racks in the cluster, this 73 | feature is disabled by default, set to -1. 74 | 75 | 76 | 77 | -------------------------------------------------------------------------------- /templates/hadoop-common/files/etc/hadoop/conf/container-executor.cfg: -------------------------------------------------------------------------------- 1 | yarn.nodemanager.linux-container-executor.group=hadoop#configured value of yarn.nodemanager.linux-container-executor.group 2 | banned.users=#comma separated list of users who can not run applications 3 | min.user.id=1000#Prevent other super-users 4 | allowed.system.users=##comma separated list of system users who CAN run applications 5 | feature.tc.enabled=false 6 | -------------------------------------------------------------------------------- /templates/hadoop-common/files/etc/hadoop/conf/core-site.xml.j2: -------------------------------------------------------------------------------- 1 | 2 | 3 | {% if kerberos_enabled %} 4 | 5 | hadoop.security.authentication 6 | kerberos 7 | 8 | 9 | hadoop.security.authorization 10 | true 11 | 12 | 13 | hadoop.rpc.protection 14 | authentication 15 | 16 | 17 | hadoop.security.auth_to_local 18 | 19 | RULE:[2:$1/$2@$0]([ndj]n/.*@TEST.ORG)s/.*/hdfs/ 20 | RULE:[2:$1/$2@$0]([rn]m/.*@TEST.ORG)s/.*/yarn/ 21 | RULE:[2:$1/$2@$0](jhs/.*@TEST.ORG)s/.*/mapred/ 22 | DEFAULT RULE:[1:$1] RULE:[2:$1] 23 | 24 | 25 | {% endif %} 26 | 27 | fs.defaultFS 28 | hdfs://hadoop-master1.orb.local:8020 29 | 30 | 31 | hadoop.caller.context.enabled 32 | true 33 | 34 | 35 | hadoop.proxyuser.hadoop.hosts 36 | * 37 | 38 | 39 | hadoop.proxyuser.hadoop.groups 40 | * 41 | 42 | 43 | hadoop.proxyuser.hdfs.hosts 44 | * 45 | 46 | 47 | hadoop.proxyuser.hdfs.groups 48 | * 49 | 50 | 51 | hadoop.proxyuser.yarn.hosts 52 | * 53 | 54 | 55 | hadoop.proxyuser.yarn.groups 56 | * 57 | 58 | 59 | hadoop.proxyuser.mapred.hosts 60 | * 61 | 62 | 63 | hadoop.proxyuser.mapred.groups 64 | * 65 | 66 | 67 | hadoop.proxyuser.hive.hosts 68 | * 69 | 70 | 71 | hadoop.proxyuser.hive.groups 72 | * 73 | 74 | 75 | hadoop.proxyuser.HTTP.hosts 76 | * 77 | 78 | 79 | hadoop.proxyuser.HTTP.groups 80 | * 81 | 82 | 83 | hadoop.proxyuser.kyuubi.hosts 84 | * 85 | 86 | 87 | hadoop.proxyuser.kyuubi.groups 88 | * 89 | 90 | 91 | hadoop.proxyuser.hue.hosts 92 | * 93 | 94 | 95 | hadoop.proxyuser.hue.groups 96 | * 97 | 98 | 99 | -------------------------------------------------------------------------------- /templates/hadoop-common/files/etc/hadoop/conf/hadoop-env.sh.j2: -------------------------------------------------------------------------------- 1 | # Set Hadoop-specific environment variables here. 2 | # Forcing YARN-based mapreduce implementaion. 3 | # Make sure to comment out if you want to go back to the default or 4 | # if you want this to be tweakable on a per-user basis 5 | # export HADOOP_MAPRED_HOME=/usr/lib/hadoop-mapreduce 6 | 7 | # The maximum amount of heap to use, in MB. Default is 1000. 8 | export HADOOP_HEAPSIZE=256 9 | 10 | # Extra Java runtime options. Empty by default. 11 | export HADOOP_NAMENODE_OPTS="$HADOOP_NAMENODE_OPTS -Xmx512m" 12 | export YARN_OPTS="$YARN_OPTS -Xmx256m" 13 | 14 | # Necessary to prevent map reduce jobs triggered by hive queries from dying with OOM error 15 | export HADOOP_CLIENT_OPTS="$HADOOP_CLIENT_OPTS -Xmx512m" 16 | 17 | export HADOOP_LOG_DIR=/var/log/hadoop-hdfs 18 | export HDFS_AUDIT_LOGGER=INFO,RFAAUDIT 19 | 20 | {% if kerberos_enabled %} 21 | export HDFS_DATANODE_SECURE_USER=hdfs 22 | export JSVC_HOME=/usr/bin 23 | {% endif %} -------------------------------------------------------------------------------- /templates/hadoop-common/files/etc/hadoop/conf/hdfs-site.xml.j2: -------------------------------------------------------------------------------- 1 | 2 | 3 | {% if kerberos_enabled %} 4 | 5 | 6 | dfs.block.access.token.enable 7 | true 8 | 9 | 10 | dfs.namenode.kerberos.principal 11 | nn/_HOST@TEST.ORG 12 | 13 | 14 | dfs.namenode.keytab.file 15 | /share/keytabs/hadoop-master1/nn.service.keytab 16 | 17 | 18 | dfs.namenode.kerberos.internal.spnego.principal 19 | HTTP/_HOST@TEST.ORG 20 | 21 | 22 | dfs.web.authentication.kerberos.keytab 23 | /share/keytabs/hadoop-master1/nn.service.keytab 24 | 25 | 26 | 27 | 28 | dfs.datanode.data.dir.perm 29 | 700 30 | 31 | 32 | dfs.datanode.http.address 33 | 0.0.0.0:9864 34 | 35 | 36 | dfs.datanode.address 37 | 0.0.0.0:9866 38 | 39 | 40 | dfs.datanode.kerberos.principal 41 | dn/_HOST@TEST.ORG 42 | 43 | 44 | dfs.datanode.keytab.file 45 | /share/keytabs/{{ node.name }}/dn.service.keytab 46 | 47 | 48 | dfs.encrypt.data.transfer 49 | false 50 | 51 | {% endif %} 52 | 53 | 54 | dfs.namenode.http-address 55 | 0.0.0.0:9870 56 | 57 | 58 | dfs.namenode.rpc-address 59 | hadoop-master1.orb.local:8020 60 | 61 | 62 | dfs.permissions.enabled 63 | false 64 | 65 | 66 | 67 | dfs.safemode.threshold.pct 68 | 0 69 | 70 | 71 | dfs.namenode.name.dir 72 | /var/lib/hadoop-hdfs/cache/name/ 73 | 74 | 75 | dfs.datanode.data.dir 76 | /var/lib/hadoop-hdfs/cache/data/ 77 | 78 | 79 | -------------------------------------------------------------------------------- /templates/hadoop-common/files/etc/hadoop/conf/mapred-site.xml.j2: -------------------------------------------------------------------------------- 1 | 2 | 3 | {% if kerberos_enabled %} 4 | 5 | mapreduce.jobhistory.keytab 6 | /share/keytabs/hadoop-master1/jhs.service.keytab 7 | 8 | 9 | mapreduce.jobhistory.principal 10 | jhs/_HOST@TEST.ORG 11 | 12 | {% endif %} 13 | 14 | mapreduce.jobtracker.address 15 | hadoop-master1.orb.local:8021 16 | 17 | 18 | mapreduce.framework.name 19 | yarn 20 | 21 | 22 | mapreduce.jobhistory.address 23 | hadoop-master1.orb.local:10020 24 | 25 | 26 | mapreduce.jobhistory.webapp.address 27 | hadoop-master1.orb.local:19888 28 | 29 | 30 | yarn.app.mapreduce.am.staging-dir 31 | /mr-staging 32 | hdfs 33 | 34 | 35 | mapreduce.jobhistory.intermediate-done-dir 36 | /mr-history/intermediate 37 | hdfs 38 | 39 | 40 | mapreduce.jobhistory.done-dir 41 | /mr-history/done 42 | hdfs 43 | 44 | 45 | mapreduce.task.tmp.dir 46 | /var/lib/hadoop-mapreduce/cache/${user.name}/tasks 47 | To set the value of tmp directory for map and reduce tasks. 48 | 49 | 50 | yarn.app.mapreduce.am.env 51 | HADOOP_MAPRED_HOME=/opt/hadoop 52 | 53 | 54 | mapreduce.map.env 55 | HADOOP_MAPRED_HOME=/opt/hadoop 56 | 57 | 58 | mapreduce.reduce.env 59 | HADOOP_MAPRED_HOME=/opt/hadoop 60 | 61 | 62 | -------------------------------------------------------------------------------- /templates/hadoop-common/files/etc/hadoop/conf/yarn-site.xml.j2: -------------------------------------------------------------------------------- 1 | 2 | 3 | {% if kerberos_enabled %} 4 | 5 | 6 | yarn.resourcemanager.principal 7 | rm/_HOST@TEST.ORG 8 | 9 | 10 | yarn.resourcemanager.keytab 11 | /share/keytabs/{{ node.name }}/rm.service.keytab 12 | 13 | 14 | 15 | 16 | yarn.nodemanager.principal 17 | nm/_HOST@TEST.ORG 18 | 19 | 20 | yarn.nodemanager.keytab 21 | /share/keytabs/{{ node.name }}/nm.service.keytab 22 | 23 | 24 | yarn.nodemanager.container-executor.class 25 | org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor 26 | 27 | 28 | yarn.nodemanager.linux-container-executor.group 29 | hadoop 30 | 31 | {% endif %} 32 | 33 | yarn.resourcemanager.address 34 | hadoop-master1.orb.local:8032 35 | 36 | 37 | yarn.nodemanager.aux-services 38 | mapreduce_shuffle,spark_shuffle 39 | 40 | 41 | yarn.nodemanager.aux-services.mapreduce_shuffle.class 42 | org.apache.hadoop.mapred.ShuffleHandler 43 | 44 | 45 | yarn.nodemanager.aux-services.spark_shuffle.classpath 46 | /opt/spark/yarn/* 47 | 48 | 49 | yarn.nodemanager.aux-services.spark_shuffle.class 50 | org.apache.spark.network.yarn.YarnShuffleService 51 | 52 | 53 | spark.shuffle.service.port 54 | 7001 55 | 56 | 57 | yarn.log-aggregation-enable 58 | true 59 | 60 | 61 | yarn.dispatcher.exit-on-error 62 | true 63 | 64 | 65 | yarn.nodemanager.local-dirs 66 | /var/lib/hadoop-yarn/cache/${user.name}/nm-local-dir 67 | local 68 | 69 | 70 | yarn.nodemanager.log-dirs 71 | /var/log/hadoop-yarn/containers 72 | local 73 | 74 | 75 | yarn.nodemanager.remote-app-log-dir 76 | /yarn-app-log 77 | hdfs 78 | 79 | 80 | yarn.application.classpath 81 | 82 | /etc/hadoop/conf, 83 | /opt/hadoop/share/hadoop/common/*, 84 | /opt/hadoop/share/hadoop/common/lib/*, 85 | /opt/hadoop/share/hadoop/hdfs/*, 86 | /opt/hadoop/share/hadoop/hdfs/lib/*, 87 | /opt/hadoop/share/hadoop/yarn/*, 88 | /opt/hadoop/share/hadoop/yarn/lib/* 89 | 90 | 91 | 92 | yarn.resourcemanager.hostname 93 | hadoop-master1.orb.local 94 | 95 | 96 | yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage 97 | 100 98 | 99 | 100 | yarn.nodemanager.pmem-check-enabled 101 | false 102 | 103 | 104 | yarn.nodemanager.vmem-check-enabled 105 | false 106 | 107 | 108 | yarn.nodemanager.resource.memory.enforced 109 | false 110 | 111 | 112 | yarn.nodemanager.elastic-memory-control.enabled 113 | false 114 | 115 | 116 | yarn.log.server.url 117 | http://hadoop-master1.orb.local:19888/jobhistory/logs 118 | 119 | 120 | yarn.scheduler.minimum-allocation-mb 121 | 256 122 | 123 | 124 | -------------------------------------------------------------------------------- /templates/hadoop-common/files/etc/krb5.conf: -------------------------------------------------------------------------------- 1 | [logging] 2 | default = FILE:/var/log/krb5libs.log 3 | kdc = FILE:/var/log/krb5kdc.log 4 | admin_server = FILE:/var/log/kadmind.log 5 | 6 | [libdefaults] 7 | default_realm = TEST.ORG 8 | dns_lookup_realm = false 9 | dns_lookup_kdc = false 10 | forwardable = true 11 | allow_weak_crypto = true 12 | 13 | [realms] 14 | TEST.ORG = { 15 | kdc = kdc.orb.local:88 16 | admin_server = kdc.orb.local 17 | } 18 | 19 | -------------------------------------------------------------------------------- /templates/hadoop-common/files/etc/supervisor/conf.d/trino-server.conf: -------------------------------------------------------------------------------- 1 | [program:trino_server] 2 | directory=/opt/trino 3 | command=/opt/trino/bin/launcher run --etc-dir=/etc/trino/conf 4 | startsecs=30 5 | stopwaitsecs=10 6 | user=trino 7 | redirect_stderr=true 8 | stdout_logfile=/var/log/trino-server.log 9 | {% if trino_enabled %} 10 | autostart=true 11 | {% else %} 12 | autostart=false 13 | {% endif %} 14 | 15 | environment= 16 | PATH=/opt/openjdk-21/bin:%(ENV_PATH)s 17 | -------------------------------------------------------------------------------- /templates/hadoop-common/files/etc/trino/conf/config.properties: -------------------------------------------------------------------------------- 1 | coordinator=false 2 | http-server.http.port=18081 3 | discovery.uri=http://hadoop-master1.orb.local:18081 4 | -------------------------------------------------------------------------------- /templates/hadoop-common/files/etc/trino/conf/jvm.config: -------------------------------------------------------------------------------- 1 | -server 2 | -Xmx1G 3 | -XX:InitialRAMPercentage=80.0 4 | -XX:MaxRAMPercentage=80.0 5 | -XX:G1HeapRegionSize=32M 6 | -XX:+ExplicitGCInvokesConcurrent 7 | -XX:+ExitOnOutOfMemoryError 8 | -XX:+HeapDumpOnOutOfMemoryError 9 | -XX:-OmitStackTraceInFastThrow 10 | -XX:ReservedCodeCacheSize=512M 11 | -XX:PerMethodRecompilationCutoff=10000 12 | -XX:PerBytecodeRecompilationCutoff=10000 13 | -Djdk.attach.allowAttachSelf=true 14 | -Djdk.nio.maxCachedBufferSize=2000000 15 | -Dfile.encoding=UTF-8 16 | # Reduce starvation of threads by GClocker, recommend to set about the number of cpu cores (JDK-8192647) 17 | -XX:+UnlockDiagnosticVMOptions 18 | -XX:GCLockerRetryAllocationCount=32 19 | -------------------------------------------------------------------------------- /templates/hadoop-common/files/etc/trino/conf/log.properties: -------------------------------------------------------------------------------- 1 | io.trino=INFO 2 | -------------------------------------------------------------------------------- /templates/hadoop-common/files/etc/trino/conf/node.properties: -------------------------------------------------------------------------------- 1 | node.environment=production 2 | node.id=hadoop-worker1 3 | node.data-dir=/var/trino/data 4 | -------------------------------------------------------------------------------- /templates/hadoop-common/files/opt/hadoop-init.d/init-hdfs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -x 2 | 3 | mkdir /var/lib/hadoop-hdfs 4 | chown -R hdfs:hdfs /var/lib/hadoop-hdfs 5 | 6 | mkdir /var/lib/hadoop-mapreduce 7 | chown -R mapred:mapred /var/lib/hadoop-mapreduce 8 | 9 | mkdir /var/lib/hadoop-yarn 10 | chown -R yarn:yarn /var/lib/hadoop-yarn 11 | 12 | mkdir /opt/hadoop/logs /var/log/hadoop-hdfs /var/log/hadoop-yarn 13 | chown -R hadoop:hadoop /opt/hadoop/logs 14 | chown -R hdfs:hadoop /var/log/hadoop-hdfs 15 | chown -R yarn:hadoop /var/log/hadoop-yarn 16 | chmod -R 770 /opt/hadoop/logs /var/log/hadoop-hdfs 17 | chmod 755 /var/log/hadoop-yarn 18 | 19 | # workaround for 'could not open session' bug as suggested here: 20 | # https://github.com/docker/docker/issues/7056#issuecomment-49371610 21 | rm -f /etc/security/limits.d/hdfs.conf 22 | -------------------------------------------------------------------------------- /templates/hadoop-common/files/opt/trino-init.d/init-workdir.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -x 2 | 3 | mkdir /var/trino 4 | chown -R trino:trino /var/trino 5 | -------------------------------------------------------------------------------- /templates/hadoop-master/Dockerfile.j2: -------------------------------------------------------------------------------- 1 | #jinja2: trim_blocks: True, lstrip_blocks: True 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # 6 | # http://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | 14 | ARG PROJECT_VERSION 15 | FROM hadoop-testing/base-ubuntu-2004:$PROJECT_VERSION 16 | 17 | ARG ZEPPELIN_VERSION 18 | ARG ZOOKEEPER_VERSION 19 | ARG HADOOP_VERSION 20 | ARG HIVE_VERSION 21 | ARG SPARK_VERSION 22 | ARG SPARK_BINARY_VERSION 23 | ARG FLINK_VERSION 24 | ARG FLINK_BINARY_VERSION 25 | ARG FLINK_HIVE_VERSION 26 | ARG SPARK_SCALA_BINARY_VERSION 27 | ARG KYUUBI_VERSION 28 | ARG MYSQL_JDBC_VERSION 29 | ARG LOKI_APPENDER_VERSION 30 | ARG RANGER_VERSION 31 | ARG ICEBERG_VERSION 32 | ARG HUDI_VERSION 33 | ARG TRINO_VERSION 34 | ARG PARQUET_VERSION 35 | 36 | ENV ZEPPELIN_HOME=/opt/zeppelin 37 | ENV ZEPPELIN_CONF_DIR=/etc/zeppelin/conf 38 | ENV ZOOKEEPER_HOME=/opt/zookeeper 39 | ENV ZOOCFGDIR=/etc/zookeeper/conf 40 | ENV HADOOP_HOME=/opt/hadoop 41 | ENV HADOOP_CONF_DIR=/etc/hadoop/conf 42 | ENV LD_LIBRARY_PATH=${HADOOP_HOME}/lib/native 43 | ENV HIVE_HOME=/opt/hive 44 | ENV HIVE_CONF_DIR=/etc/hive/conf 45 | {% if spark_enabled %} 46 | ENV SPARK_HOME=/opt/spark 47 | ENV SPARK_CONF_DIR=/etc/spark/conf 48 | {% endif %} 49 | {% if flink_enabled %} 50 | ENV FLINK_HOME=/opt/flink 51 | ENV FLINK_CONF_DIR=/etc/flink/conf 52 | {% endif %} 53 | ENV KYUUBI_HOME=/opt/kyuubi 54 | ENV KYUUBI_CONF_DIR=/etc/kyuubi/conf 55 | {% if ranger_enabled %} 56 | ENV RANGER_HOME=/opt/ranger 57 | {% endif %} 58 | {% if trino_enabled %} 59 | ENV TRINO_HOME=/opt/trino 60 | {% endif %} 61 | {% if parquet_enabled %} 62 | ENV PARQUET_HOME=/opt/parquet 63 | {% endif %} 64 | ENV MYSQL_JDBC_VERSION=${MYSQL_JDBC_VERSION} 65 | ENV PATH=${HIVE_HOME}/bin:${HADOOP_HOME}/bin:${ZEPPELIN_HOME}/bin:${ZOOKEEPER_HOME}/bin:${PATH} 66 | {% if spark_enabled %} 67 | ENV PATH=${SPARK_HOME}/bin:${PATH} 68 | {% endif %} 69 | {% if flink_enabled %} 70 | ENV PATH=${FLINK_HOME}/bin:${PATH} 71 | {% endif %} 72 | ENV PATH=${KYUUBI_HOME}/bin:${PATH} 73 | {% if parquet_enabled %} 74 | ENV PATH=${PARQUET_HOME}/bin:${PATH} 75 | {% endif %} 76 | 77 | {% if zeppelin_enabled %} 78 | ADD download/zeppelin-${ZEPPELIN_VERSION}-bin{{ '-%s' % zeppelin_custom_name if zeppelin_custom_name }}.tgz /opt 79 | {% endif %} 80 | ADD download/apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz /opt 81 | ADD download/hadoop-${HADOOP_VERSION}.tar.gz /opt 82 | ADD download/apache-hive-${HIVE_VERSION}-bin.tar.gz /opt 83 | {% if spark_enabled %} 84 | ADD download/spark-${SPARK_VERSION}-bin-{{ spark_custom_name }}.tgz /opt 85 | {% endif %} 86 | {% if flink_enabled %} 87 | ADD download/flink-${FLINK_VERSION}-bin-scala_2.12.tgz /opt 88 | {% endif %} 89 | ADD download/apache-kyuubi-${KYUUBI_VERSION}-bin.tgz /opt 90 | {% if ranger_enabled %} 91 | ADD download/ranger-${RANGER_VERSION}-admin.tar.gz /opt 92 | {% endif %} 93 | {% if trino_enabled %} 94 | ADD download/trino-server-${TRINO_VERSION}.tar.gz /opt 95 | {% endif %} 96 | 97 | # Copy configuration files 98 | COPY ./files / 99 | 100 | RUN chmod 600 /root/.ssh/id_rsa_hadoop_testing 101 | 102 | RUN ln -snf /opt/apache-zookeeper-${ZOOKEEPER_VERSION}-bin ${ZOOKEEPER_HOME} && \ 103 | ln -snf /opt/hadoop-${HADOOP_VERSION} ${HADOOP_HOME} && \ 104 | ln -snf /opt/apache-hive-${HIVE_VERSION}-bin ${HIVE_HOME} && \ 105 | ln -snf /opt/apache-kyuubi-${KYUUBI_VERSION}-bin ${KYUUBI_HOME} && \ 106 | ln -snf ${HIVE_CONF_DIR}/hive-site.xml ${KYUUBI_CONF_DIR}/hive-site.xml && \ 107 | mkdir -p /var/log/kyuubi && chmod -R 777 /var/log/kyuubi 108 | 109 | ADD download/log4j-layout-template-json-2.20.0.jar ${KYUUBI_HOME}/jars/ 110 | ADD download/mysql-connector-j-${MYSQL_JDBC_VERSION}.jar ${KYUUBI_HOME}/jars/ 111 | 112 | {% if spark_enabled %} 113 | RUN ln -snf /opt/spark-${SPARK_VERSION}-bin-{{ spark_custom_name }} ${SPARK_HOME} && \ 114 | ln -snf ${HIVE_CONF_DIR}/hive-site.xml ${SPARK_CONF_DIR}/hive-site.xml 115 | 116 | ADD download/mysql-connector-j-${MYSQL_JDBC_VERSION}.jar ${SPARK_HOME}/jars/ 117 | {% endif %} 118 | 119 | {% if flink_enabled %} 120 | RUN ln -snf /opt/flink-${FLINK_VERSION} ${FLINK_HOME} && \ 121 | ln -snf ${HIVE_CONF_DIR}/hive-site.xml ${FLINK_CONF_DIR}/hive-site.xml && \ 122 | ln -s ${HADOOP_HOME}/share/hadoop/client/hadoop-client-api-${HADOOP_VERSION}.jar ${FLINK_HOME}/lib/ && \ 123 | ln -s ${HADOOP_HOME}/share/hadoop/client/hadoop-client-runtime-${HADOOP_VERSION}.jar ${FLINK_HOME}/lib/ && \ 124 | mkdir /var/log/flink && chmod -R 777 /var/log/flink 125 | {% endif %} 126 | 127 | {% if zeppelin_enabled %} 128 | RUN ln -snf /opt/zeppelin-${ZEPPELIN_VERSION}-bin{{ '-%s' % zeppelin_custom_name if zeppelin_custom_name }} ${ZEPPELIN_HOME} && \ 129 | mkdir -p /var/log/zeppelin && \ 130 | mkdir -p /var/run/zeppelin && \ 131 | mkdir -p /var/run/zeppelin/webapps && \ 132 | mkdir -p /var/run/zeppelin/pid && \ 133 | mkdir -p /var/lib/zeppelin && \ 134 | mkdir -p /var/lib/zeppelin/notebook && \ 135 | cp -R ${ZEPPELIN_HOME}/notebook/* /var/lib/zeppelin/notebook && \ 136 | chown -R 6014:6014 /etc/zeppelin && \ 137 | chown -R 6014:6014 /var/run/zeppelin && \ 138 | chown -R 6014:6014 /var/lib/zeppelin && \ 139 | chown -R 6014:6014 /var/log/zeppelin 140 | 141 | ADD download/mysql-connector-j-${MYSQL_JDBC_VERSION}.jar ${ZEPPELIN_HOME}/interpreter/jdbc/ 142 | ADD download/kyuubi-hive-jdbc-shaded-${KYUUBI_VERSION}.jar ${ZEPPELIN_HOME}/interpreter/jdbc/ 143 | RUN ln -s ${HIVE_HOME}/jdbc/hive-jdbc-${HIVE_VERSION}-standalone.jar ${ZEPPELIN_HOME}/interpreter/jdbc/ && \ 144 | ln -s ${HADOOP_HOME}/share/hadoop/client/hadoop-client-api-${HADOOP_VERSION}.jar ${ZEPPELIN_HOME}/interpreter/jdbc/ && \ 145 | ln -s ${HADOOP_HOME}/share/hadoop/client/hadoop-client-runtime-${HADOOP_VERSION}.jar ${ZEPPELIN_HOME}/interpreter/jdbc/ 146 | {% endif %} 147 | 148 | {% if ranger_enabled %} 149 | {% endif %} 150 | {% if ranger_enabled %} 151 | RUN ln -snf /opt/ranger-${RANGER_VERSION}-admin ${RANGER_HOME} 152 | {% endif %} 153 | {% if trino_enabled %} 154 | RUN ln -snf /opt/trino-server-${TRINO_VERSION} ${TRINO_HOME} 155 | {% endif %} 156 | 157 | ADD download/mysql-connector-j-${MYSQL_JDBC_VERSION}.jar ${HIVE_HOME}/lib/ 158 | {% if ranger_enabled %} 159 | ADD download/mysql-connector-j-${MYSQL_JDBC_VERSION}.jar ${RANGER_HOME}/ 160 | {% endif %} 161 | {% if spark_enabled and iceberg_enabled %} 162 | ADD download/iceberg-spark-runtime-${SPARK_BINARY_VERSION}_${SPARK_SCALA_BINARY_VERSION}-${ICEBERG_VERSION}.jar ${SPARK_HOME}/jars/ 163 | {% endif %} 164 | {% if flink_enabled and iceberg_enabled %} 165 | ADD download/iceberg-flink-runtime-${FLINK_BINARY_VERSION}-${ICEBERG_VERSION}.jar ${FLINK_HOME}/lib/ 166 | {% endif %} 167 | {% if spark_enabled and hudi_enabled %} 168 | ADD download/hudi-spark${SPARK_BINARY_VERSION}-bundle_${SPARK_SCALA_BINARY_VERSION}-${HUDI_VERSION}.jar ${SPARK_HOME}/jars/ 169 | {% endif %} 170 | {% if flink_enabled %} 171 | ADD download/jcl-over-slf4j-1.7.36.jar ${FLINK_HOME}/lib/ 172 | ADD download/flink-sql-connector-hive-${FLINK_HIVE_VERSION}_2.12-${FLINK_VERSION}.jar ${FLINK_HOME}/lib/ 173 | {% endif %} 174 | {% if trino_enabled %} 175 | ADD --chmod=755 download/trino-cli-${TRINO_VERSION}-executable.jar ${TRINO_HOME}/bin/trino-cli 176 | {% endif %} 177 | {% if parquet_enabled %} 178 | ADD download/parquet-cli-${PARQUET_VERSION}-runtime.jar ${PARQUET_HOME}/jars/ 179 | {% endif %} 180 | 181 | ADD download/log4j2-appender-nodep-${LOKI_APPENDER_VERSION}.jar ${HIVE_HOME}/lib/ 182 | ADD download/log4j2-appender-nodep-${LOKI_APPENDER_VERSION}.jar ${KYUUBI_HOME}/jars/ 183 | 184 | {% if spark_enabled %} 185 | ADD download/kyuubi-spark-connector-tpch_${SPARK_SCALA_BINARY_VERSION}-${KYUUBI_VERSION}.jar ${SPARK_HOME}/jars/ 186 | ADD download/kyuubi-spark-connector-tpcds_${SPARK_SCALA_BINARY_VERSION}-${KYUUBI_VERSION}.jar ${SPARK_HOME}/jars/ 187 | ADD download/log4j2-appender-nodep-${LOKI_APPENDER_VERSION}.jar ${SPARK_HOME}/jars/ 188 | {% endif %} 189 | 190 | {% if ranger_enabled %} 191 | # chown would doulbe the size of the image by introduce a new layer, but ranger seems does not work without chmod 192 | RUN chown -R ranger:hadoop /opt/ranger-${RANGER_VERSION}-admin 193 | {% endif %} 194 | 195 | RUN /opt/hadoop-init.d/init-hdfs.sh 196 | {% if trino_enabled %} 197 | RUN /opt/trino-init.d/init-workdir.sh 198 | {% endif %} 199 | 200 | # Zookeeper ports 201 | EXPOSE 2181 202 | 203 | # HDFS ports 204 | EXPOSE 8020 9864 9866 9867 9870 205 | 206 | # YARN ports 207 | EXPOSE 8030 8031 8032 8033 8040 8041 8042 8088 10020 19888 208 | 209 | # HIVE ports 210 | EXPOSE 9083 10000 211 | 212 | # SPARK ports 213 | EXPOSE 18080 214 | 215 | # Flink ports 216 | EXPOSE 8082 217 | 218 | {% if ranger_enabled %} 219 | EXPOSE 6080 220 | {% endif %} 221 | 222 | {% if trino_enabled %} 223 | EXPOSE 18081 224 | {% endif %} 225 | 226 | CMD ["supervisord", "-c", "/etc/supervisord.conf"] 227 | ENTRYPOINT ["/opt/entrypoint.sh"] 228 | -------------------------------------------------------------------------------- /templates/hadoop-master/files/etc/flink/conf/log4j-cli.properties: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | ################################################################################ 18 | 19 | # Allows this configuration to be modified at runtime. The file will be checked every 30 seconds. 20 | monitorInterval=30 21 | 22 | rootLogger.level = INFO 23 | rootLogger.appenderRef.file.ref = FileAppender 24 | 25 | # Log all infos in the given file 26 | appender.file.name = FileAppender 27 | appender.file.type = FILE 28 | appender.file.append = false 29 | appender.file.fileName = ${sys:log.file} 30 | appender.file.layout.type = PatternLayout 31 | appender.file.layout.pattern = %d{yyyy-MM-dd HH:mm:ss,SSS} %-5p %-60c %x - %m%n 32 | 33 | # Log output from org.apache.flink.yarn to the console. This is used by the 34 | # CliFrontend class when using a per-job YARN cluster. 35 | logger.yarn.name = org.apache.flink.yarn 36 | logger.yarn.level = INFO 37 | logger.yarn.appenderRef.console.ref = ConsoleAppender 38 | logger.yarncli.name = org.apache.flink.yarn.cli.FlinkYarnSessionCli 39 | logger.yarncli.level = INFO 40 | logger.yarncli.appenderRef.console.ref = ConsoleAppender 41 | logger.hadoop.name = org.apache.hadoop 42 | logger.hadoop.level = INFO 43 | logger.hadoop.appenderRef.console.ref = ConsoleAppender 44 | 45 | # Make sure hive logs go to the file. 46 | logger.hive.name = org.apache.hadoop.hive 47 | logger.hive.level = INFO 48 | logger.hive.additivity = false 49 | logger.hive.appenderRef.file.ref = FileAppender 50 | 51 | # Log output from org.apache.flink.kubernetes to the console. 52 | logger.kubernetes.name = org.apache.flink.kubernetes 53 | logger.kubernetes.level = INFO 54 | logger.kubernetes.appenderRef.console.ref = ConsoleAppender 55 | 56 | appender.console.name = ConsoleAppender 57 | appender.console.type = CONSOLE 58 | appender.console.layout.type = PatternLayout 59 | appender.console.layout.pattern = %d{yyyy-MM-dd HH:mm:ss,SSS} %-5p %-60c %x - %m%n 60 | 61 | # suppress the warning that hadoop native libraries are not loaded (irrelevant for the client) 62 | logger.hadoopnative.name = org.apache.hadoop.util.NativeCodeLoader 63 | logger.hadoopnative.level = OFF 64 | 65 | # Suppress the irrelevant (wrong) warnings from the Netty channel handler 66 | logger.netty.name = org.jboss.netty.channel.DefaultChannelPipeline 67 | logger.netty.level = OFF 68 | -------------------------------------------------------------------------------- /templates/hadoop-master/files/etc/flink/conf/log4j-console.properties: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | ################################################################################ 18 | 19 | # Allows this configuration to be modified at runtime. The file will be checked every 30 seconds. 20 | monitorInterval=30 21 | 22 | # This affects logging for both user code and Flink 23 | rootLogger.level = INFO 24 | rootLogger.appenderRef.console.ref = ConsoleAppender 25 | rootLogger.appenderRef.rolling.ref = RollingFileAppender 26 | 27 | # Uncomment this if you want to _only_ change Flink's logging 28 | #logger.flink.name = org.apache.flink 29 | #logger.flink.level = INFO 30 | 31 | # The following lines keep the log level of common libraries/connectors on 32 | # log level INFO. The root logger does not override this. You have to manually 33 | # change the log levels here. 34 | logger.pekko.name = org.apache.pekko 35 | logger.pekko.level = INFO 36 | logger.kafka.name= org.apache.kafka 37 | logger.kafka.level = INFO 38 | logger.hadoop.name = org.apache.hadoop 39 | logger.hadoop.level = INFO 40 | logger.zookeeper.name = org.apache.zookeeper 41 | logger.zookeeper.level = INFO 42 | logger.shaded_zookeeper.name = org.apache.flink.shaded.zookeeper3 43 | logger.shaded_zookeeper.level = INFO 44 | 45 | # Log all infos to the console 46 | appender.console.name = ConsoleAppender 47 | appender.console.type = CONSOLE 48 | appender.console.layout.type = PatternLayout 49 | appender.console.layout.pattern = %d{yyyy-MM-dd HH:mm:ss,SSS} %-5p %-60c %x - %m%n 50 | appender.console.filter.threshold.type = ThresholdFilter 51 | appender.console.filter.threshold.level = ${sys:console.log.level:-ALL} 52 | 53 | # Log all infos in the given rolling file 54 | appender.rolling.name = RollingFileAppender 55 | appender.rolling.type = RollingFile 56 | appender.rolling.append = true 57 | appender.rolling.fileName = ${sys:log.file} 58 | appender.rolling.filePattern = ${sys:log.file}.%i 59 | appender.rolling.layout.type = PatternLayout 60 | appender.rolling.layout.pattern = %d{yyyy-MM-dd HH:mm:ss,SSS} %-5p %-60c %x - %m%n 61 | appender.rolling.policies.type = Policies 62 | appender.rolling.policies.size.type = SizeBasedTriggeringPolicy 63 | appender.rolling.policies.size.size=100MB 64 | appender.rolling.policies.startup.type = OnStartupTriggeringPolicy 65 | appender.rolling.strategy.type = DefaultRolloverStrategy 66 | appender.rolling.strategy.max = ${env:MAX_LOG_FILE_NUMBER:-10} 67 | 68 | # Suppress the irrelevant (wrong) warnings from the Netty channel handler 69 | logger.netty.name = org.jboss.netty.channel.DefaultChannelPipeline 70 | logger.netty.level = OFF 71 | -------------------------------------------------------------------------------- /templates/hadoop-master/files/etc/flink/conf/log4j-session.properties: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | ################################################################################ 18 | 19 | # Allows this configuration to be modified at runtime. The file will be checked every 30 seconds. 20 | monitorInterval=30 21 | 22 | rootLogger.level = INFO 23 | rootLogger.appenderRef.console.ref = ConsoleAppender 24 | 25 | appender.console.name = ConsoleAppender 26 | appender.console.type = CONSOLE 27 | appender.console.layout.type = PatternLayout 28 | appender.console.layout.pattern = %d{yyyy-MM-dd HH:mm:ss,SSS} %-5p %-60c %x - %m%n 29 | 30 | # Suppress the irrelevant (wrong) warnings from the Netty channel handler 31 | logger.netty.name = org.jboss.netty.channel.DefaultChannelPipeline 32 | logger.netty.level = OFF 33 | logger.zookeeper.name = org.apache.zookeeper 34 | logger.zookeeper.level = WARN 35 | logger.shaded_zookeeper.name = org.apache.flink.shaded.zookeeper3 36 | logger.shaded_zookeeper.level = WARN 37 | logger.curator.name = org.apache.flink.shaded.org.apache.curator.framework 38 | logger.curator.level = WARN 39 | logger.runtimeutils.name= org.apache.flink.runtime.util.ZooKeeperUtils 40 | logger.runtimeutils.level = WARN 41 | logger.runtimeleader.name = org.apache.flink.runtime.leaderretrieval.ZooKeeperLeaderRetrievalDriver 42 | logger.runtimeleader.level = WARN 43 | -------------------------------------------------------------------------------- /templates/hadoop-master/files/etc/flink/conf/log4j.properties: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | ################################################################################ 18 | 19 | # Allows this configuration to be modified at runtime. The file will be checked every 30 seconds. 20 | monitorInterval=30 21 | 22 | # This affects logging for both user code and Flink 23 | rootLogger.level = INFO 24 | rootLogger.appenderRef.file.ref = MainAppender 25 | 26 | # Uncomment this if you want to _only_ change Flink's logging 27 | #logger.flink.name = org.apache.flink 28 | #logger.flink.level = INFO 29 | 30 | # The following lines keep the log level of common libraries/connectors on 31 | # log level INFO. The root logger does not override this. You have to manually 32 | # change the log levels here. 33 | logger.pekko.name = org.apache.pekko 34 | logger.pekko.level = INFO 35 | logger.kafka.name= org.apache.kafka 36 | logger.kafka.level = INFO 37 | logger.hadoop.name = org.apache.hadoop 38 | logger.hadoop.level = INFO 39 | logger.zookeeper.name = org.apache.zookeeper 40 | logger.zookeeper.level = INFO 41 | logger.shaded_zookeeper.name = org.apache.flink.shaded.zookeeper3 42 | logger.shaded_zookeeper.level = INFO 43 | 44 | # Log all infos in the given file 45 | appender.main.name = MainAppender 46 | appender.main.type = RollingFile 47 | appender.main.append = true 48 | appender.main.fileName = ${sys:log.file} 49 | appender.main.filePattern = ${sys:log.file}.%i 50 | appender.main.layout.type = PatternLayout 51 | appender.main.layout.pattern = %d{yyyy-MM-dd HH:mm:ss,SSS} %-5p %-60c %x - %m%n 52 | appender.main.policies.type = Policies 53 | appender.main.policies.size.type = SizeBasedTriggeringPolicy 54 | appender.main.policies.size.size = 100MB 55 | appender.main.policies.startup.type = OnStartupTriggeringPolicy 56 | appender.main.strategy.type = DefaultRolloverStrategy 57 | appender.main.strategy.max = ${env:MAX_LOG_FILE_NUMBER:-10} 58 | 59 | # Suppress the irrelevant (wrong) warnings from the Netty channel handler 60 | logger.netty.name = org.jboss.netty.channel.DefaultChannelPipeline 61 | logger.netty.level = OFF 62 | -------------------------------------------------------------------------------- /templates/hadoop-master/files/etc/flink/conf_history_server/flink-conf.yaml: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | ################################################################################ 18 | 19 | #============================================================================== 20 | # HistoryServer 21 | #============================================================================== 22 | 23 | # The HistoryServer is started and stopped via bin/historyserver.sh (start|stop) 24 | 25 | # The address under which the web-based HistoryServer listens. 26 | #historyserver.web.address: 0.0.0.0 27 | 28 | # The port under which the web-based HistoryServer listens. 29 | historyserver.web.port: 8082 30 | 31 | # Comma separated list of directories to monitor for completed jobs. 32 | historyserver.archive.fs.dir: hdfs://hadoop-master1.orb.local:8020/flink-history 33 | 34 | # Interval in milliseconds for refreshing the monitored directories. 35 | #historyserver.archive.fs.refresh-interval: 10000 36 | 37 | #============================================================================== 38 | # Flink Cluster Security Configuration 39 | #============================================================================== 40 | 41 | # Kerberos authentication for various components - Hadoop, ZooKeeper, and connectors - 42 | # may be enabled in four steps: 43 | # 1. configure the local krb5.conf file 44 | # 2. provide Kerberos credentials (either a keytab or a ticket cache w/ kinit) 45 | # 3. make the credentials available to various JAAS login contexts 46 | # 4. configure the connector to use JAAS/SASL 47 | 48 | # The below configure how Kerberos credentials are provided. A keytab will be used instead of 49 | # a ticket cache if the keytab path and principal are set. 50 | 51 | {% if kerberos_enabled %} 52 | security.kerberos.login.use-ticket-cache: true 53 | security.kerberos.login.keytab: /share/keytabs/hadoop-master1/flink.service.keytab 54 | security.kerberos.login.principal: flink/hadoop-master1.orb.local@TEST.ORG 55 | {% else %} 56 | # security.kerberos.login.use-ticket-cache: true 57 | # security.kerberos.login.keytab: /path/to/kerberos/keytab 58 | # security.kerberos.login.principal: flink-user 59 | {% endif %} 60 | 61 | # The configuration below defines which JAAS login contexts 62 | 63 | # security.kerberos.login.contexts: Client,KafkaClient 64 | -------------------------------------------------------------------------------- /templates/hadoop-master/files/etc/flink/conf_history_server/log4j-console.properties: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | ################################################################################ 18 | 19 | # Allows this configuration to be modified at runtime. The file will be checked every 30 seconds. 20 | monitorInterval=30 21 | 22 | # This affects logging for both user code and Flink 23 | rootLogger.level = INFO 24 | rootLogger.appenderRef.console.ref = ConsoleAppender 25 | 26 | # Uncomment this if you want to _only_ change Flink's logging 27 | #logger.flink.name = org.apache.flink 28 | #logger.flink.level = INFO 29 | 30 | # The following lines keep the log level of common libraries/connectors on 31 | # log level INFO. The root logger does not override this. You have to manually 32 | # change the log levels here. 33 | logger.pekko.name = org.apache.pekko 34 | logger.pekko.level = INFO 35 | logger.kafka.name= org.apache.kafka 36 | logger.kafka.level = INFO 37 | logger.hadoop.name = org.apache.hadoop 38 | logger.hadoop.level = INFO 39 | logger.zookeeper.name = org.apache.zookeeper 40 | logger.zookeeper.level = INFO 41 | logger.shaded_zookeeper.name = org.apache.flink.shaded.zookeeper3 42 | logger.shaded_zookeeper.level = INFO 43 | 44 | # Log all infos to the console 45 | appender.console.name = ConsoleAppender 46 | appender.console.type = CONSOLE 47 | appender.console.layout.type = PatternLayout 48 | appender.console.layout.pattern = %d{yyyy-MM-dd HH:mm:ss,SSS} %-5p %-60c %x - %m%n 49 | appender.console.filter.threshold.type = ThresholdFilter 50 | appender.console.filter.threshold.level = ${sys:console.log.level:-ALL} 51 | 52 | # Suppress the irrelevant (wrong) warnings from the Netty channel handler 53 | logger.netty.name = org.jboss.netty.channel.DefaultChannelPipeline 54 | logger.netty.level = OFF 55 | -------------------------------------------------------------------------------- /templates/hadoop-master/files/etc/hive/conf/beeline-site.xml.j2: -------------------------------------------------------------------------------- 1 | 2 | 3 | {% if kerberos_enabled %} 4 | 5 | beeline.hs2.jdbc.url.tcpUrl 6 | jdbc:hive2://hadoop-master1.orb.local:10000/default;principal=hive/_HOST@TEST.ORG 7 | 8 | {% else %} 9 | 10 | beeline.hs2.jdbc.url.tcpUrl 11 | jdbc:hive2://hadoop-master1.orb.local:10000/default;user=hive;password=hive 12 | 13 | {% endif %} 14 | 15 | beeline.hs2.jdbc.url.default 16 | tcpUrl 17 | 18 | 19 | -------------------------------------------------------------------------------- /templates/hadoop-master/files/etc/hive/conf/hive-env.sh: -------------------------------------------------------------------------------- 1 | export HADOOP_HEAPSIZE=256 2 | export HADOOP_CLIENT_OPTS="${HADOOP_CLIENT_OPTS} -Xmx256m -Djava.io.tmpdir=/tmp" 3 | -------------------------------------------------------------------------------- /templates/hadoop-master/files/etc/hive/conf/hive-log4j2.properties: -------------------------------------------------------------------------------- 1 | rootLogger.level = info 2 | rootLogger.appenderRef.stdout.ref = console 3 | 4 | appender.console.type = Console 5 | appender.console.name = console 6 | appender.console.target = SYSTEM_OUT 7 | appender.console.layout.type = PatternLayout 8 | appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} [%t]: %p %c{2}: %m%n 9 | -------------------------------------------------------------------------------- /templates/hadoop-master/files/etc/hive/conf/hive-site.xml.j2: -------------------------------------------------------------------------------- 1 | 2 | 3 | {% if kerberos_enabled %} 4 | 5 | 6 | hive.server2.authentication 7 | kerberos 8 | 9 | 10 | hive.server2.authentication.kerberos.principal 11 | hive/_HOST@TEST.ORG 12 | 13 | 14 | hive.server2.authentication.kerberos.keytab 15 | /share/keytabs/hadoop-master1/hive.service.keytab 16 | 17 | 18 | 19 | 20 | hive.metastore.sasl.enabled 21 | true 22 | 23 | 24 | hive.metastore.kerberos.principal 25 | hive/_HOST@TEST.ORG 26 | 27 | 28 | hive.metastore.kerberos.keytab.file 29 | /share/keytabs/hadoop-master1/hive.service.keytab 30 | 31 | {% endif %} 32 | 33 | hive.metastore.warehouse.dir 34 | hdfs://hadoop-master1.orb.local:8020/warehouse 35 | 36 | 37 | hive.metastore.uris 38 | thrift://hadoop-master1.orb.local:9083 39 | 40 | 41 | javax.jdo.option.ConnectionURL 42 | jdbc:mysql://mysql:3306/metastore?useSSL=false 43 | 44 | 45 | javax.jdo.option.ConnectionDriverName 46 | com.mysql.cj.jdbc.Driver 47 | 48 | 49 | javax.jdo.option.ConnectionUserName 50 | root 51 | 52 | 53 | javax.jdo.option.ConnectionPassword 54 | root 55 | 56 | 57 | datanucleus.autoCreateSchema 58 | false 59 | 60 | 61 | datanucleus.fixedDatastore 62 | true 63 | 64 | 65 | datanucleus.autoStartMechanism 66 | SchemaTable 67 | 68 | 69 | hive.metastore.connect.retries 70 | 15 71 | 72 | 73 | hive.security.authorization.createtable.owner.grants 74 | ALL 75 | The set of privileges automatically granted to the owner whenever a table gets created. 76 | 77 | 78 | hive.users.in.admin.role 79 | hdfs,hive 80 | 81 | 82 | 83 | metastore.storage.schema.reader.impl 84 | org.apache.hadoop.hive.metastore.SerDeStorageSchemaReader 85 | 86 | 87 | hive.support.concurrency 88 | true 89 | 90 | 91 | hive.txn.manager 92 | org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager 93 | 94 | 95 | hive.lock.manager 96 | org.apache.hadoop.hive.ql.lockmgr.EmbeddedLockManager 97 | 98 | 99 | hive.compactor.initiator.on 100 | true 101 | 102 | 103 | hive.compactor.worker.threads 104 | 1 105 | 106 | 107 | hive.metastore.disallow.incompatible.col.type.changes 108 | false 109 | 110 | 111 | -------------------------------------------------------------------------------- /templates/hadoop-master/files/etc/kyuubi/conf/kyuubi-defaults.conf.j2: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | ## Kyuubi Configurations 19 | 20 | {% if kerberos_enabled %} 21 | kyuubi.authentication KERBEROS 22 | kyuubi.kinit.principal kyuubi/_HOST@TEST.ORG 23 | kyuubi.kinit.keytab /share/keytabs/hadoop-master1/kyuubi.service.keytab 24 | {% endif %} 25 | 26 | kyuubi.frontend.bind.host hadoop-master1.orb.local 27 | kyuubi.frontend.protocols THRIFT_BINARY,REST 28 | kyuubi.frontend.thrift.binary.bind.port 10009 29 | kyuubi.frontend.rest.bind.port 10099 30 | 31 | kyuubi.engine.type SPARK_SQL 32 | kyuubi.engine.share.level USER 33 | kyuubi.session.engine.initialize.timeout PT3M 34 | 35 | kyuubi.ha.addresses hadoop-master1.orb.local:2181 36 | kyuubi.ha.namespace kyuubi 37 | 38 | kyuubi.yarn.user.strategy ADMIN 39 | kyuubi.yarn.user.admin yarn 40 | 41 | kyuubi.metadata.store.jdbc.database.schema.init=false 42 | kyuubi.metadata.store.jdbc.database.type=mysql 43 | kyuubi.metadata.store.jdbc.driver=com.mysql.jdbc.Driver 44 | kyuubi.metadata.store.jdbc.url=jdbc:mysql://mysql:3306/kyuubi?useSSL=false&useUnicode=true&characterEncoding=UTF-8 45 | kyuubi.metadata.store.jdbc.user=root 46 | kyuubi.metadata.store.jdbc.password=root 47 | 48 | # spark engine 49 | kyuubi.session.engine.spark.initialize.sql \ 50 | show databases in tpcds; \ 51 | show databases in tpch 52 | 53 | spark.yarn.maxAppAttempts 1 54 | spark.submit.deployMode cluster 55 | kyuubi.session.engine.startup.waitCompletion false 56 | 57 | # hive engine 58 | kyuubi.engine.hive.extra.classpath /opt/hadoop/share/hadoop/client/*:/opt/hadoop/share/hadoop/mapreduce/* 59 | 60 | # flink engine 61 | flink.execution.target yarn-application 62 | kyuubi.engine.flink.extra.classpath /opt/hadoop/share/hadoop/client/*:/opt/hadoop/share/hadoop/mapreduce/* 63 | 64 | # Details in https://kyuubi.readthedocs.io/en/master/configuration/settings.html 65 | -------------------------------------------------------------------------------- /templates/hadoop-master/files/etc/kyuubi/conf/kyuubi-env.sh: -------------------------------------------------------------------------------- 1 | export KYUUBI_LOG_DIR=/var/log/kyuubi 2 | export KYUUBI_WORK_DIR_ROOT=/var/lib/kyuubi/work 3 | -------------------------------------------------------------------------------- /templates/hadoop-master/files/etc/kyuubi/conf/log4j2.xml.j2: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | ${env:KYUUBI_LOG_DIR} 5 | rest-audit.log 6 | k8s-audit.log 7 | operation-audit.log 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | {% if kafka_enabled %} 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | {% endif %} 33 | 34 | 35 | 36 | 37 | {% if kafka_enabled %} 38 | 39 | {% endif %} 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | {% if kafka_enabled %} 51 | 52 | {% endif %} 53 | 54 | 55 | 56 | 57 | {% if kafka_enabled %} 58 | 59 | {% endif %} 60 | 61 | 62 | 63 | 64 | {% if kafka_enabled %} 65 | 66 | {% endif %} 67 | 68 | 69 | 70 | -------------------------------------------------------------------------------- /templates/hadoop-master/files/etc/ranger/conf/install.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # 17 | # This file provides a list of the deployment variables for the Policy Manager Web Application 18 | # 19 | 20 | #------------------------- DB CONFIG - BEGIN ---------------------------------- 21 | # Uncomment the below if the DBA steps need to be run separately 22 | setup_mode=SeparateDBA 23 | 24 | PYTHON_COMMAND_INVOKER=python3 25 | 26 | #DB_FLAVOR=MYSQL|ORACLE|POSTGRES|MSSQL|SQLA 27 | DB_FLAVOR=MYSQL 28 | # 29 | 30 | # 31 | # Location of DB client library (please check the location of the jar file) 32 | # 33 | #SQL_CONNECTOR_JAR=/usr/share/java/ojdbc6.jar 34 | #SQL_CONNECTOR_JAR=/usr/share/java/mysql-connector-java.jar 35 | #SQL_CONNECTOR_JAR=/usr/share/java/postgresql.jar 36 | #SQL_CONNECTOR_JAR=/usr/share/java/sqljdbc4.jar 37 | #SQL_CONNECTOR_JAR=/opt/sqlanywhere17/java/sajdbc4.jar 38 | SQL_CONNECTOR_JAR=/opt/ranger/mysql-connector-j-__REPLACE_MYSQL_JDBC_VERSION__.jar 39 | 40 | 41 | # 42 | # DB password for the DB admin user-id 43 | # ************************************************************************** 44 | # ** If the password is left empty or not-defined here, 45 | # ** it will try with blank password during installation process 46 | # ************************************************************************** 47 | # 48 | #db_root_user=root|SYS|postgres|sa|dba 49 | #db_host=host:port # for DB_FLAVOR=MYSQL|POSTGRES|SQLA|MSSQL #for example: db_host=localhost:3306 50 | #db_host=host:port:SID # for DB_FLAVOR=ORACLE #for SID example: db_host=localhost:1521:ORCL 51 | #db_host=host:port/ServiceName # for DB_FLAVOR=ORACLE #for Service example: db_host=localhost:1521/XE 52 | db_root_user=root 53 | db_root_password=root 54 | db_host=mysql:3306 55 | #SSL config 56 | db_ssl_enabled=false 57 | db_ssl_required=false 58 | db_ssl_verifyServerCertificate=false 59 | #db_ssl_auth_type=1-way|2-way, where 1-way represents standard one way ssl authentication and 2-way represents mutual ssl authentication 60 | db_ssl_auth_type=2-way 61 | javax_net_ssl_keyStore= 62 | javax_net_ssl_keyStorePassword= 63 | javax_net_ssl_trustStore= 64 | javax_net_ssl_trustStorePassword= 65 | javax_net_ssl_trustStore_type=jks 66 | javax_net_ssl_keyStore_type=jks 67 | 68 | # For postgresql db 69 | db_ssl_certificate_file= 70 | 71 | # 72 | # DB UserId used for the Ranger schema 73 | # 74 | db_name=ranger 75 | db_user=root 76 | db_password=root 77 | 78 | #For over-riding the jdbc url. 79 | is_override_db_connection_string=false 80 | db_override_connection_string= 81 | 82 | 83 | # change password. Password for below mentioned users can be changed only once using this property. 84 | #PLEASE NOTE :: Password should be minimum 8 characters with min one alphabet and one numeric. 85 | rangerAdmin_password=Ranger@admin123 86 | rangerTagsync_password=Ranger@admin123 87 | rangerUsersync_password=Ranger@admin123 88 | keyadmin_password=Ranger@admin123 89 | 90 | 91 | #Source for Audit Store. Currently solr, elasticsearch and cloudwatch logs are supported. 92 | # * audit_store is solr 93 | audit_store= 94 | 95 | # * audit_solr_url Elasticsearch Host(s). E.g. 127.0.0.1 96 | audit_elasticsearch_urls= 97 | audit_elasticsearch_port= 98 | audit_elasticsearch_protocol= 99 | audit_elasticsearch_user= 100 | audit_elasticsearch_password= 101 | audit_elasticsearch_index= 102 | audit_elasticsearch_bootstrap_enabled=false 103 | 104 | 105 | # * audit_solr_url URL to Solr. E.g. http://:6083/solr/ranger_audits 106 | audit_solr_urls= 107 | audit_solr_user= 108 | audit_solr_password= 109 | audit_solr_zookeepers= 110 | 111 | audit_solr_collection_name=ranger_audits 112 | #solr Properties for cloud mode 113 | audit_solr_config_name=ranger_audits 114 | audit_solr_configset_location= 115 | audit_solr_no_shards=1 116 | audit_solr_no_replica=1 117 | audit_solr_max_shards_per_node=1 118 | audit_solr_acl_user_list_sasl=solr,infra-solr 119 | audit_solr_bootstrap_enabled=false 120 | 121 | # * audit to amazon cloudwatch properties 122 | audit_cloudwatch_region= 123 | audit_cloudwatch_log_group= 124 | audit_cloudwatch_log_stream_prefix= 125 | 126 | #------------------------- DB CONFIG - END ---------------------------------- 127 | 128 | # 129 | # ------- PolicyManager CONFIG ---------------- 130 | # 131 | 132 | policymgr_external_url=http://localhost:6080 133 | policymgr_http_enabled=true 134 | policymgr_https_keystore_file= 135 | policymgr_https_keystore_keyalias=rangeradmin 136 | policymgr_https_keystore_password= 137 | 138 | #Add Supported Components list below separated by semi-colon, default value is empty string to support all components 139 | #Example : policymgr_supportedcomponents=hive,hbase,hdfs 140 | policymgr_supportedcomponents= 141 | 142 | # 143 | # ------- PolicyManager CONFIG - END --------------- 144 | # 145 | 146 | 147 | # 148 | # ------- UNIX User CONFIG ---------------- 149 | # 150 | unix_user=ranger 151 | unix_user_pwd=ranger@admin 152 | unix_group=ranger 153 | 154 | # 155 | # ------- UNIX User CONFIG - END ---------------- 156 | # 157 | # 158 | 159 | # 160 | # UNIX authentication service for Policy Manager 161 | # 162 | # PolicyManager can authenticate using UNIX username/password 163 | # The UNIX server specified here as authServiceHostName needs to be installed with ranger-unix-ugsync package. 164 | # Once the service is installed on authServiceHostName, the UNIX username/password from the host can be used to login into policy manager 165 | # 166 | # ** The installation of ranger-unix-ugsync package can be installed after the policymanager installation is finished. 167 | # 168 | #LDAP|ACTIVE_DIRECTORY|UNIX|NONE 169 | authentication_method=NONE 170 | remoteLoginEnabled=true 171 | authServiceHostName=localhost 172 | authServicePort=5151 173 | ranger_unixauth_keystore=keystore.jks 174 | ranger_unixauth_keystore_password=password 175 | ranger_unixauth_truststore=cacerts 176 | ranger_unixauth_truststore_password=changeit 177 | 178 | ####LDAP settings - Required only if have selected LDAP authentication #### 179 | # 180 | # Sample Settings 181 | # 182 | #xa_ldap_url=ldap://127.0.0.1:389 183 | #xa_ldap_userDNpattern=uid={0},ou=users,dc=xasecure,dc=net 184 | #xa_ldap_groupSearchBase=ou=groups,dc=xasecure,dc=net 185 | #xa_ldap_groupSearchFilter=(member=uid={0},ou=users,dc=xasecure,dc=net) 186 | #xa_ldap_groupRoleAttribute=cn 187 | #xa_ldap_base_dn=dc=xasecure,dc=net 188 | #xa_ldap_bind_dn=cn=admin,ou=users,dc=xasecure,dc=net 189 | #xa_ldap_bind_password= 190 | #xa_ldap_referral=follow|ignore 191 | #xa_ldap_userSearchFilter=(uid={0}) 192 | 193 | xa_ldap_url= 194 | xa_ldap_userDNpattern= 195 | xa_ldap_groupSearchBase= 196 | xa_ldap_groupSearchFilter= 197 | xa_ldap_groupRoleAttribute= 198 | xa_ldap_base_dn= 199 | xa_ldap_bind_dn= 200 | xa_ldap_bind_password= 201 | xa_ldap_referral= 202 | xa_ldap_userSearchFilter= 203 | ####ACTIVE_DIRECTORY settings - Required only if have selected AD authentication #### 204 | # 205 | # Sample Settings 206 | # 207 | #xa_ldap_ad_domain=xasecure.net 208 | #xa_ldap_ad_url=ldap://127.0.0.1:389 209 | #xa_ldap_ad_base_dn=dc=xasecure,dc=net 210 | #xa_ldap_ad_bind_dn=cn=administrator,ou=users,dc=xasecure,dc=net 211 | #xa_ldap_ad_bind_password= 212 | #xa_ldap_ad_referral=follow|ignore 213 | #xa_ldap_ad_userSearchFilter=(sAMAccountName={0}) 214 | 215 | xa_ldap_ad_domain= 216 | xa_ldap_ad_url= 217 | xa_ldap_ad_base_dn= 218 | xa_ldap_ad_bind_dn= 219 | xa_ldap_ad_bind_password= 220 | xa_ldap_ad_referral= 221 | xa_ldap_ad_userSearchFilter= 222 | 223 | #------------ Kerberos Config ----------------- 224 | spnego_principal= 225 | spnego_keytab= 226 | token_valid=30 227 | cookie_domain= 228 | cookie_path=/ 229 | admin_principal= 230 | admin_keytab= 231 | lookup_principal= 232 | lookup_keytab= 233 | hadoop_conf=/etc/hadoop/conf 234 | # 235 | #-------- SSO CONFIG - Start ------------------ 236 | # 237 | sso_enabled=false 238 | sso_providerurl=https://127.0.0.1:8443/gateway/knoxsso/api/v1/websso 239 | sso_publickey= 240 | 241 | # 242 | #-------- SSO CONFIG - END ------------------ 243 | 244 | # Custom log directory path 245 | RANGER_ADMIN_LOG_DIR=$PWD 246 | RANGER_ADMIN_LOGBACK_CONF_FILE= 247 | 248 | # PID file path 249 | RANGER_PID_DIR_PATH=$PWD 250 | 251 | # ################# DO NOT MODIFY ANY VARIABLES BELOW ######################### 252 | # 253 | # --- These deployment variables are not to be modified unless you understand the full impact of the changes 254 | # 255 | ################################################################################ 256 | XAPOLICYMGR_DIR=$PWD 257 | app_home=$PWD/ews/webapp 258 | TMPFILE=$PWD/.fi_tmp 259 | LOGFILE=$PWD/logfile 260 | LOGFILES="$LOGFILE" 261 | 262 | JAVA_BIN='java' 263 | JAVA_VERSION_REQUIRED='1.8' 264 | JAVA_ORACLE='Java(TM) SE Runtime Environment' 265 | 266 | ranger_admin_max_heap_size=512m 267 | #retry DB and Java patches after the given time in seconds. 268 | PATCH_RETRY_INTERVAL=120 269 | STALE_PATCH_ENTRY_HOLD_TIME=10 270 | 271 | #mysql_create_user_file=${PWD}/db/mysql/create_dev_user.sql 272 | mysql_core_file=db/mysql/optimized/current/ranger_core_db_mysql.sql 273 | mysql_audit_file=db/mysql/xa_audit_db.sql 274 | #mysql_asset_file=${PWD}/db/mysql/reset_asset.sql 275 | 276 | #oracle_create_user_file=${PWD}/db/oracle/create_dev_user_oracle.sql 277 | oracle_core_file=db/oracle/optimized/current/ranger_core_db_oracle.sql 278 | oracle_audit_file=db/oracle/xa_audit_db_oracle.sql 279 | #oracle_asset_file=${PWD}/db/oracle/reset_asset_oracle.sql 280 | # 281 | postgres_core_file=db/postgres/optimized/current/ranger_core_db_postgres.sql 282 | postgres_audit_file=db/postgres/xa_audit_db_postgres.sql 283 | # 284 | sqlserver_core_file=db/sqlserver/optimized/current/ranger_core_db_sqlserver.sql 285 | sqlserver_audit_file=db/sqlserver/xa_audit_db_sqlserver.sql 286 | # 287 | sqlanywhere_core_file=db/sqlanywhere/optimized/current/ranger_core_db_sqlanywhere.sql 288 | sqlanywhere_audit_file=db/sqlanywhere/xa_audit_db_sqlanywhere.sql 289 | cred_keystore_filename=$app_home/WEB-INF/classes/conf/.jceks/rangeradmin.jceks 290 | -------------------------------------------------------------------------------- /templates/hadoop-master/files/etc/ranger/startup.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # 15 | 16 | set -xe 17 | 18 | function setup_config() { 19 | cp /etc/ranger/conf/* ${RANGER_HOME}/ 20 | sed -i 's/__REPLACE_MYSQL_JDBC_VERSION__/'"${MYSQL_JDBC_VERSION}"'/g' "${RANGER_HOME}/install.properties" 21 | } 22 | 23 | function setup_ranger() { 24 | cd ${RANGER_HOME} 25 | sh setup.sh && sh ews/ranger-admin-services.sh start 26 | } 27 | 28 | setup_config 29 | 30 | setup_ranger 31 | 32 | RANGER_ADMIN_PID=`ps -ef | grep -v grep | grep -i "org.apache.ranger.server.tomcat.EmbeddedServer" | awk '{print $2}'` 33 | 34 | tail --pid=$RANGER_ADMIN_PID -f /dev/null 35 | -------------------------------------------------------------------------------- /templates/hadoop-master/files/etc/spark/conf/log4j2.xml.j2: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | {% if loki_enabled %} 8 | 9 | loki 10 | 3100 11 | 12 | %X{tid} [%t] %d{yyyy-MM-dd HH:mm:ss.SSS} %5p %c{1} - %m%n%exception{full} 13 | 14 | 17 | {% endif %} 18 | 19 | 20 | 21 | 22 | {% if loki_enabled %} 23 | 24 | {% endif %} 25 | 26 | 27 | 28 | {% if loki_enabled %} 29 | 30 | {% endif %} 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /templates/hadoop-master/files/etc/spark/conf/spark-defaults.conf.j2: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # Default system properties included when running spark-submit. 19 | # This is useful for setting default environmental settings. 20 | 21 | spark.master=yarn 22 | spark.submit.deployMode=client 23 | spark.driver.memory=512m 24 | spark.executor.cores=1 25 | spark.executor.memory=512m 26 | 27 | spark.yarn.appMasterEnv.JAVA_HOME=/opt/openjdk-17 28 | spark.executorEnv.JAVA_HOME=/opt/openjdk-17 29 | 30 | spark.driver.extraLibraryPath=/opt/hadoop/lib/native 31 | spark.executor.extraLibraryPath=/opt/hadoop/lib/native 32 | 33 | spark.eventLog.enabled=true 34 | spark.eventLog.dir=hdfs://hadoop-master1.orb.local:8020/spark-history 35 | 36 | spark.history.ui.port=18080 37 | spark.history.fs.logDirectory=hdfs://hadoop-master1.orb.local:8020/spark-history 38 | {% if kerberos_enabled %} 39 | spark.history.kerberos.enabled=true 40 | spark.history.kerberos.principal=spark/hadoop-master1.orb.local@TEST.ORG 41 | spark.history.kerberos.keytab=/share/keytabs/hadoop-master1/spark.service.keytab 42 | {% endif %} 43 | spark.yarn.historyServer.address=hadoop-master1.orb.local:18080 44 | 45 | spark.shuffle.service.enabled=true 46 | 47 | spark.sql.catalog.tpcds=org.apache.kyuubi.spark.connector.tpcds.TPCDSCatalog 48 | spark.sql.catalog.tpch=org.apache.kyuubi.spark.connector.tpch.TPCHCatalog 49 | 50 | {% if iceberg_enabled %} 51 | spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions 52 | 53 | spark.sql.catalog.spark_catalog=org.apache.iceberg.spark.SparkSessionCatalog 54 | spark.sql.catalog.spark_catalog.type=hive 55 | {% endif %} 56 | -------------------------------------------------------------------------------- /templates/hadoop-master/files/etc/spark/conf/spark-env.sh: -------------------------------------------------------------------------------- 1 | export JAVA_HOME=/opt/openjdk-17 2 | -------------------------------------------------------------------------------- /templates/hadoop-master/files/etc/supervisor/conf.d/flink-history-server.conf: -------------------------------------------------------------------------------- 1 | [program:flink-history-server] 2 | directory=/opt/flink 3 | command=/opt/flink/bin/historyserver.sh start-foreground 4 | startsecs=30 5 | stopwaitsecs=10 6 | user=flink 7 | redirect_stderr=true 8 | stdout_logfile=/var/log/flink-history-server.log 9 | {% if flink_enabled %} 10 | autostart=true 11 | {% else %} 12 | autostart=false 13 | {% endif %} 14 | 15 | environment= 16 | JAVA_HOME=/opt/openjdk-8, 17 | FLINK_CONF_DIR=/etc/flink/conf_history_server, 18 | FLINK_NO_DAEMONIZE=1 19 | -------------------------------------------------------------------------------- /templates/hadoop-master/files/etc/supervisor/conf.d/hdfs-namenode.conf: -------------------------------------------------------------------------------- 1 | [program:hdfs-namenode] 2 | directory=/opt 3 | command=bash /opt/start-namenode.sh 4 | startsecs=30 5 | stopwaitsecs=10 6 | user=hdfs 7 | redirect_stderr=true 8 | stdout_logfile=/var/log/hdfs-namenode.log 9 | autostart=true 10 | -------------------------------------------------------------------------------- /templates/hadoop-master/files/etc/supervisor/conf.d/hive-metastore.conf: -------------------------------------------------------------------------------- 1 | [program:hive-metastore] 2 | directory=/opt/hive 3 | command=hive --service metastore 4 | startsecs=30 5 | stopwaitsecs=10 6 | user=hive 7 | redirect_stderr=true 8 | stdout_logfile=/var/log/hive-metastore.log 9 | autostart=true 10 | -------------------------------------------------------------------------------- /templates/hadoop-master/files/etc/supervisor/conf.d/hive-server2.conf: -------------------------------------------------------------------------------- 1 | [program:hive-server2] 2 | directory=/opt/hive 3 | command=/opt/start-hive-server2.sh 4 | startsecs=30 5 | stopwaitsecs=10 6 | user=hive 7 | redirect_stderr=true 8 | stdout_logfile=/var/log/hive-server2.log 9 | autostart=true 10 | -------------------------------------------------------------------------------- /templates/hadoop-master/files/etc/supervisor/conf.d/kyuubi-server.conf: -------------------------------------------------------------------------------- 1 | [program:kyuubi-server] 2 | directory=/opt/kyuubi 3 | command=kyuubi run 4 | startsecs=30 5 | stopwaitsecs=10 6 | user=kyuubi 7 | redirect_stderr=true 8 | stdout_logfile=/var/log/kyuubi-server.log 9 | autostart=true 10 | -------------------------------------------------------------------------------- /templates/hadoop-master/files/etc/supervisor/conf.d/mapred-history-server.conf: -------------------------------------------------------------------------------- 1 | [program:mapred-history-server] 2 | directory=/opt/hadoop 3 | command=/opt/hadoop/bin/mapred historyserver 4 | startsecs=30 5 | stopwaitsecs=10 6 | user=mapred 7 | redirect_stderr=true 8 | stdout_logfile=/var/log/mapred-history-server.log 9 | autostart=true 10 | -------------------------------------------------------------------------------- /templates/hadoop-master/files/etc/supervisor/conf.d/ranger-admin.conf: -------------------------------------------------------------------------------- 1 | [program:ranger-admin] 2 | directory=/etc/ranger 3 | command=bash startup.sh 4 | startsecs=30 5 | stopwaitsecs=10 6 | user=ranger 7 | redirect_stderr=true 8 | stdout_logfile=/var/log/ranger-admin.log 9 | {% if ranger_enabled %} 10 | autostart=true 11 | {% else %} 12 | autostart=false 13 | {% endif %} 14 | -------------------------------------------------------------------------------- /templates/hadoop-master/files/etc/supervisor/conf.d/spark-history-server.conf: -------------------------------------------------------------------------------- 1 | [program:spark-history-server] 2 | directory=/opt/spark 3 | command=/opt/start-spark-history-server.sh 4 | startsecs=30 5 | stopwaitsecs=10 6 | user=spark 7 | redirect_stderr=true 8 | stdout_logfile=/var/log/spark-history-server.log 9 | {% if spark_enabled %} 10 | autostart=true 11 | {% else %} 12 | autostart=false 13 | {% endif %} 14 | 15 | environment= 16 | JAVA_HOME=/opt/openjdk-17, 17 | SPARK_NO_DAEMONIZE=1 18 | -------------------------------------------------------------------------------- /templates/hadoop-master/files/etc/supervisor/conf.d/yarn-resourcemanager.conf: -------------------------------------------------------------------------------- 1 | [program:yarn-resourcemanager] 2 | directory=/opt/hadoop 3 | command=/opt/start-resourcemanager.sh 4 | startsecs=30 5 | stopwaitsecs=10 6 | user=yarn 7 | redirect_stderr=true 8 | stdout_logfile=/var/log/yarn-resourcemanager.log 9 | autostart=true 10 | -------------------------------------------------------------------------------- /templates/hadoop-master/files/etc/supervisor/conf.d/zeppelin-server.conf: -------------------------------------------------------------------------------- 1 | [program:zeppelin-server] 2 | directory=/opt/zeppelin 3 | command=/opt/zeppelin/bin/zeppelin.sh 4 | startsecs=30 5 | stopwaitsecs=10 6 | user=zeppelin 7 | redirect_stderr=true 8 | stdout_logfile=/var/log/zeppelin-server.log 9 | {% if zeppelin_enabled %} 10 | autostart=true 11 | {% else %} 12 | autostart=false 13 | {% endif %} 14 | -------------------------------------------------------------------------------- /templates/hadoop-master/files/etc/supervisor/conf.d/zookeeper.conf: -------------------------------------------------------------------------------- 1 | [program:zookeeper] 2 | directory=/opt/zookeeper 3 | command=zkServer.sh start-foreground 4 | startsecs=30 5 | stopwaitsecs=10 6 | user=zookeeper 7 | redirect_stderr=true 8 | stdout_logfile=/var/log/zookeeper.log 9 | autostart=true 10 | -------------------------------------------------------------------------------- /templates/hadoop-master/files/etc/zeppelin/conf/configuration.xsl: -------------------------------------------------------------------------------- 1 | 2 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 |
namevaluedescription
38 | 39 | 40 |
41 |
42 | -------------------------------------------------------------------------------- /templates/hadoop-master/files/etc/zeppelin/conf/interpreter-list.j2: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | # 18 | # [name] [maven artifact] [description] 19 | 20 | alluxio org.apache.zeppelin:zeppelin-alluxio:{{ zeppelin_version }} Alluxio interpreter 21 | angular org.apache.zeppelin:zeppelin-angular:{{ zeppelin_version }} HTML and AngularJS view rendering 22 | bigquery org.apache.zeppelin:zeppelin-bigquery:{{ zeppelin_version }} BigQuery interpreter 23 | cassandra org.apache.zeppelin:zeppelin-cassandra:{{ zeppelin_version }} Cassandra interpreter 24 | elasticsearch org.apache.zeppelin:zeppelin-elasticsearch:{{ zeppelin_version }} Elasticsearch interpreter 25 | file org.apache.zeppelin:zeppelin-file:{{ zeppelin_version }} HDFS file interpreter 26 | flink org.apache.zeppelin:zeppelin-flink:{{ zeppelin_version }} Flink interpreter 27 | groovy org.apache.zeppelin:zeppelin-groovy:{{ zeppelin_version }} Groovy interpreter 28 | hbase org.apache.zeppelin:zeppelin-hbase:{{ zeppelin_version }} Hbase interpreter 29 | java org.apache.zeppelin:zeppelin-java:{{ zeppelin_version }} Java interpreter 30 | jdbc org.apache.zeppelin:zeppelin-jdbc:{{ zeppelin_version }} Jdbc interpreter 31 | livy org.apache.zeppelin:zeppelin-livy:{{ zeppelin_version }} Livy interpreter 32 | md org.apache.zeppelin:zeppelin-markdown:{{ zeppelin_version }} Markdown support 33 | neo4j org.apache.zeppelin:zeppelin-neo4j:{{ zeppelin_version }} Neo4j interpreter 34 | python org.apache.zeppelin:zeppelin-python:{{ zeppelin_version }} Python interpreter 35 | shell org.apache.zeppelin:zeppelin-shell:{{ zeppelin_version }} Shell command 36 | sparql org.apache.zeppelin:zeppelin-sparql:{{ zeppelin_version }} Sparql interpreter 37 | submarine org.apache.zeppelin:zeppelin-submarine:{{ zeppelin_version }} Submarine interpreter 38 | -------------------------------------------------------------------------------- /templates/hadoop-master/files/etc/zeppelin/conf/log4j.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | log4j.rootLogger = INFO, dailyfile 19 | 20 | log4j.appender.stdout = org.apache.log4j.ConsoleAppender 21 | log4j.appender.stdout.layout = org.apache.log4j.PatternLayout 22 | log4j.appender.stdout.layout.ConversionPattern=%5p [%d] ([%t] %F[%M]:%L) - %m%n 23 | 24 | log4j.appender.dailyfile.DatePattern=.yyyy-MM-dd 25 | log4j.appender.dailyfile = org.apache.log4j.DailyRollingFileAppender 26 | log4j.appender.dailyfile.File = ${zeppelin.log.file} 27 | log4j.appender.dailyfile.layout = org.apache.log4j.PatternLayout 28 | log4j.appender.dailyfile.layout.ConversionPattern=%5p [%d] ([%t] %F[%M]:%L) - %m%n -------------------------------------------------------------------------------- /templates/hadoop-master/files/etc/zeppelin/conf/log4j2.properties: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | ################################################################################ 18 | 19 | # This affects logging for both user code and Flink 20 | rootLogger.level = INFO 21 | rootLogger.appenderRef.file.ref = MainAppender 22 | 23 | # Uncomment this if you want to _only_ change Flink's logging 24 | #logger.flink.name = org.apache.flink 25 | #logger.flink.level = INFO 26 | 27 | # The following lines keep the log level of common libraries/connectors on 28 | # log level INFO. The root logger does not override this. You have to manually 29 | # change the log levels here. 30 | logger.akka.name = akka 31 | logger.akka.level = INFO 32 | logger.kafka.name= org.apache.kafka 33 | logger.kafka.level = INFO 34 | logger.hadoop.name = org.apache.hadoop 35 | logger.hadoop.level = INFO 36 | logger.zookeeper.name = org.apache.zookeeper 37 | logger.zookeeper.level = INFO 38 | 39 | # Log all infos in the given file 40 | appender.main.name = MainAppender 41 | appender.main.type = File 42 | appender.main.append = false 43 | appender.main.fileName = ${sys:zeppelin.log.file} 44 | appender.main.layout.type = PatternLayout 45 | appender.main.layout.pattern = %d{yyyy-MM-dd HH:mm:ss,SSS} %-5p %-60c %x - %m%n 46 | 47 | # Suppress the irrelevant (wrong) warnings from the Netty channel handler 48 | logger.netty.name = org.apache.flink.shaded.akka.org.jboss.netty.channel.DefaultChannelPipeline 49 | logger.netty.level = OFF -------------------------------------------------------------------------------- /templates/hadoop-master/files/etc/zeppelin/conf/log4j_yarn_cluster.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | log4j.rootLogger = INFO, stdout 19 | 20 | log4j.appender.stdout = org.apache.log4j.ConsoleAppender 21 | log4j.appender.stdout.layout = org.apache.log4j.PatternLayout 22 | log4j.appender.stdout.layout.ConversionPattern=%5p [%d] ([%t] %F[%M]:%L) - %m%n 23 | -------------------------------------------------------------------------------- /templates/hadoop-master/files/etc/zeppelin/conf/shiro.ini.template: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | [users] 19 | # List of users with their password allowed to access Zeppelin. 20 | # To use a different strategy (LDAP / Database / ...) check the shiro doc at http://shiro.apache.org/configuration.html#Configuration-INISections 21 | # To enable admin user, uncomment the following line and set an appropriate password. 22 | #admin = password1, admin 23 | user1 = password2, role1, role2 24 | user2 = password3, role3 25 | user3 = password4, role2 26 | 27 | # Sample LDAP configuration, for user Authentication, currently tested for single Realm 28 | [main] 29 | ### A sample for configuring Active Directory Realm 30 | #activeDirectoryRealm = org.apache.zeppelin.realm.ActiveDirectoryGroupRealm 31 | #activeDirectoryRealm.systemUsername = userNameA 32 | 33 | #use either systemPassword or hadoopSecurityCredentialPath, more details in http://zeppelin.apache.org/docs/latest/security/shiroauthentication.html 34 | #activeDirectoryRealm.systemPassword = passwordA 35 | #activeDirectoryRealm.hadoopSecurityCredentialPath = jceks://file/user/zeppelin/zeppelin.jceks 36 | #activeDirectoryRealm.searchBase = CN=Users,DC=SOME_GROUP,DC=COMPANY,DC=COM 37 | #activeDirectoryRealm.url = ldap://ldap.test.com:389 38 | #activeDirectoryRealm.groupRolesMap = "CN=admin,OU=groups,DC=SOME_GROUP,DC=COMPANY,DC=COM":"admin","CN=finance,OU=groups,DC=SOME_GROUP,DC=COMPANY,DC=COM":"finance","CN=hr,OU=groups,DC=SOME_GROUP,DC=COMPANY,DC=COM":"hr" 39 | #activeDirectoryRealm.authorizationCachingEnabled = false 40 | 41 | ### A sample for configuring LDAP Directory Realm 42 | #ldapRealm = org.apache.zeppelin.realm.LdapGroupRealm 43 | ## search base for ldap groups (only relevant for LdapGroupRealm): 44 | #ldapRealm.contextFactory.environment[ldap.searchBase] = dc=COMPANY,dc=COM 45 | #ldapRealm.contextFactory.url = ldap://ldap.test.com:389 46 | #ldapRealm.userDnTemplate = uid={0},ou=Users,dc=COMPANY,dc=COM 47 | #ldapRealm.contextFactory.authenticationMechanism = simple 48 | 49 | ### A sample PAM configuration 50 | #pamRealm=org.apache.zeppelin.realm.PamRealm 51 | #pamRealm.service=sshd 52 | 53 | ## A same for configuring Knox SSO Realm 54 | #knoxJwtRealm = org.apache.zeppelin.realm.jwt.KnoxJwtRealm 55 | #knoxJwtRealm.providerUrl = https://domain.example.com/ 56 | #knoxJwtRealm.login = gateway/knoxsso/knoxauth/login.html 57 | #knoxJwtRealm.logout = gateway/knoxssout/api/v1/webssout 58 | #knoxJwtRealm.logoutAPI = true 59 | #knoxJwtRealm.redirectParam = originalUrl 60 | #knoxJwtRealm.cookieName = hadoop-jwt 61 | #knoxJwtRealm.publicKeyPath = /etc/zeppelin/conf/knox-sso.pem 62 | # 63 | #authc = org.apache.zeppelin.realm.jwt.KnoxAuthenticationFilter 64 | 65 | ### A sample for configuring Kerberos Realm 66 | # krbRealm = org.apache.zeppelin.realm.kerberos.KerberosRealm 67 | # krbRealm.principal = HTTP/zeppelin.fqdn.domain.com@EXAMPLE.COM 68 | # krbRealm.keytab = /etc/security/keytabs/spnego.service.keytab 69 | # krbRealm.nameRules = DEFAULT 70 | # krbRealm.signatureSecretFile = /etc/security/http_secret 71 | # krbRealm.tokenValidity = 36000 72 | # krbRealm.cookieDomain = domain.com 73 | # krbRealm.cookiePath = / 74 | # krbRealm.logout = /logout 75 | # krbRealm.logoutAPI = true 76 | # krbRealm.providerUrl = https://domain.example.com/ 77 | # krbRealm.redirectParam = originalUrl 78 | # authc = org.apache.zeppelin.realm.kerberos.KerberosAuthenticationFilter 79 | 80 | sessionManager = org.apache.shiro.web.session.mgt.DefaultWebSessionManager 81 | 82 | ### If caching of user is required then uncomment below lines 83 | #cacheManager = org.apache.shiro.cache.MemoryConstrainedCacheManager 84 | #securityManager.cacheManager = $cacheManager 85 | 86 | ### Enables 'HttpOnly' flag in Zeppelin cookies 87 | cookie = org.apache.shiro.web.servlet.SimpleCookie 88 | cookie.name = JSESSIONID 89 | cookie.httpOnly = true 90 | ### Uncomment the below line only when Zeppelin is running over HTTPS 91 | #cookie.secure = true 92 | sessionManager.sessionIdCookie = $cookie 93 | 94 | securityManager.sessionManager = $sessionManager 95 | # 86,400,000 milliseconds = 24 hour 96 | securityManager.sessionManager.globalSessionTimeout = 86400000 97 | shiro.loginUrl = /api/login 98 | 99 | [roles] 100 | role1 = * 101 | role2 = * 102 | role3 = * 103 | admin = * 104 | 105 | [urls] 106 | # This section is used for url-based security. For details see the shiro.ini documentation. 107 | # 108 | # You can secure interpreter, configuration and credential information by urls. 109 | # Comment or uncomment the below urls that you want to hide: 110 | # anon means the access is anonymous. 111 | # authc means form based auth Security. 112 | # 113 | # IMPORTANT: Order matters: URL path expressions are evaluated against an incoming request 114 | # in the order they are defined and the FIRST MATCH WINS. 115 | # 116 | # To allow anonymous access to all but the stated urls, 117 | # uncomment the line second last line (/** = anon) and comment the last line (/** = authc) 118 | # 119 | /api/version = anon 120 | /api/cluster/address = anon 121 | # Allow all authenticated users to restart interpreters on a notebook page. 122 | # Comment out the following line if you would like to authorize only admin users to restart interpreters. 123 | /api/interpreter/setting/restart/** = authc 124 | /api/interpreter/** = authc, roles[admin] 125 | /api/notebook-repositories/** = authc, roles[admin] 126 | /api/configurations/** = authc, roles[admin] 127 | /api/credential/** = authc, roles[admin] 128 | /api/admin/** = authc, roles[admin] 129 | #/** = anon 130 | /** = authc 131 | -------------------------------------------------------------------------------- /templates/hadoop-master/files/etc/zeppelin/conf/zeppelin-env.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one or more 4 | # contributor license agreements. See the NOTICE file distributed with 5 | # this work for additional information regarding copyright ownership. 6 | # The ASF licenses this file to You under the Apache License, Version 2.0 7 | # (the "License"); you may not use this file except in compliance with 8 | # the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | export JAVA_HOME=/opt/openjdk-11 20 | export ZEPPELIN_LOG_DIR=/var/log/zeppelin 21 | export ZEPPELIN_PID_DIR=/var/run/zeppelin/pid 22 | 23 | # export JAVA_HOME= 24 | # export USE_HADOOP= # Whether include hadoop jars into zeppelin server process. (true or false) 25 | # export SPARK_MASTER= # Spark master url. eg. spark://master_addr:7077. Leave empty if you want to use local mode. 26 | # export ZEPPELIN_ADDR # Bind address (default 127.0.0.1) 27 | # export ZEPPELIN_PORT # port number to listen (default 8080) 28 | # export ZEPPELIN_LOCAL_IP # Zeppelin's thrift server ip address, if not specified, one random IP address will be choosen. 29 | # export ZEPPELIN_JAVA_OPTS # Additional jvm options. for example, export ZEPPELIN_JAVA_OPTS="-Dspark.executor.memory=8g -Dspark.cores.max=16" 30 | # export ZEPPELIN_MEM # Zeppelin jvm mem options Default -Xms1024m -Xmx1024m -XX:MaxMetaspaceSize=512m 31 | # export ZEPPELIN_INTP_MEM # zeppelin interpreter process jvm mem options. Default -Xms1024m -Xmx1024m -XX:MaxMetaspaceSize=512m 32 | # export ZEPPELIN_INTP_JAVA_OPTS # zeppelin interpreter process jvm options. 33 | # export ZEPPELIN_SSL_PORT # ssl port (used when ssl environment variable is set to true) 34 | # export ZEPPELIN_JMX_ENABLE # Enable JMX feature by defining "true" 35 | # export ZEPPELIN_JMX_PORT # Port number which JMX uses. If not set, JMX won't be enabled 36 | 37 | # export ZEPPELIN_LOG_DIR # Where log files are stored. PWD by default. 38 | # export ZEPPELIN_PID_DIR # The pid files are stored. ${ZEPPELIN_HOME}/run by default. 39 | # export ZEPPELIN_WAR_TEMPDIR # The location of jetty temporary directory. 40 | # export ZEPPELIN_NOTEBOOK_DIR # Where notebook saved 41 | # export ZEPPELIN_NOTEBOOK_HOMESCREEN # Id of notebook to be displayed in homescreen. ex) 2A94M5J1Z 42 | # export ZEPPELIN_NOTEBOOK_HOMESCREEN_HIDE # hide homescreen notebook from list when this value set to "true". default "false" 43 | 44 | # export ZEPPELIN_NOTEBOOK_S3_BUCKET # Bucket where notebook saved 45 | # export ZEPPELIN_NOTEBOOK_S3_ENDPOINT # Endpoint of the bucket 46 | # export ZEPPELIN_NOTEBOOK_S3_USER # User in bucket where notebook saved. For example bucket/user/notebook/2A94M5J1Z/note.json 47 | # export ZEPPELIN_NOTEBOOK_S3_KMS_KEY_ID # AWS KMS key ID 48 | # export ZEPPELIN_NOTEBOOK_S3_KMS_KEY_REGION # AWS KMS key region 49 | # export ZEPPELIN_NOTEBOOK_S3_SSE # Server-side encryption enabled for notebooks 50 | # export ZEPPELIN_NOTEBOOK_S3_PATH_STYLE_ACCESS # Path style access for S3 bucket 51 | 52 | # export ZEPPELIN_NOTEBOOK_GCS_STORAGE_DIR # GCS "directory" (prefix) under which notebooks are saved. E.g. gs://example-bucket/path/to/dir 53 | # export GOOGLE_APPLICATION_CREDENTIALS # Provide a service account key file for GCS and BigQuery API calls (overrides application default credentials) 54 | 55 | # export ZEPPELIN_NOTEBOOK_MONGO_URI # MongoDB connection URI used to connect to a MongoDB database server. Default "mongodb://localhost" 56 | # export ZEPPELIN_NOTEBOOK_MONGO_DATABASE # Database name to store notebook. Default "zeppelin" 57 | # export ZEPPELIN_NOTEBOOK_MONGO_COLLECTION # Collection name to store notebook. Default "notes" 58 | # export ZEPPELIN_NOTEBOOK_MONGO_AUTOIMPORT # If "true" import local notes under ZEPPELIN_NOTEBOOK_DIR on startup. Default "false" 59 | 60 | # export ZEPPELIN_IDENT_STRING # A string representing this instance of zeppelin. $USER by default. 61 | # export ZEPPELIN_NICENESS # The scheduling priority for daemons. Defaults to 0. 62 | # export ZEPPELIN_INTERPRETER_LOCALREPO # Local repository for interpreter's additional dependency loading 63 | # export ZEPPELIN_INTERPRETER_DEP_MVNREPO # Remote principal repository for interpreter's additional dependency loading 64 | # export ZEPPELIN_HELIUM_NODE_INSTALLER_URL # Remote Node installer url for Helium dependency loader 65 | # export ZEPPELIN_HELIUM_NPM_INSTALLER_URL # Remote Npm installer url for Helium dependency loader 66 | # export ZEPPELIN_HELIUM_YARNPKG_INSTALLER_URL # Remote Yarn package installer url for Helium dependency loader 67 | # export ZEPPELIN_NOTEBOOK_STORAGE # Refers to pluggable notebook storage class, can have two classes simultaneously with a sync between them (e.g. local and remote). 68 | # export ZEPPELIN_NOTEBOOK_ONE_WAY_SYNC # If there are multiple notebook storages, should we treat the first one as the only source of truth? 69 | # export ZEPPELIN_NOTEBOOK_PUBLIC # Make notebook public by default when created, private otherwise 70 | 71 | # export DOCKER_TIME_ZONE # Set to the same time zone as the zeppelin server. E.g, "America/New_York" or "Asia/Shanghai" 72 | 73 | #### Spark interpreter configuration #### 74 | 75 | ## Kerberos ticket refresh setting 76 | ## 77 | #export KINIT_FAIL_THRESHOLD # (optional) How many times should kinit retry. The default value is 5. 78 | #export KERBEROS_REFRESH_INTERVAL # (optional) The refresh interval for Kerberos ticket. The default value is 1d. 79 | 80 | ## Use provided spark installation ## 81 | ## defining SPARK_HOME makes Zeppelin run spark interpreter process using spark-submit 82 | ## 83 | # export SPARK_HOME # (required) When it is defined, load it instead of Zeppelin embedded Spark libraries 84 | # export SPARK_SUBMIT_OPTIONS # (optional) extra options to pass to spark submit. eg) "--driver-memory 512M --executor-memory 1G". 85 | # export SPARK_APP_NAME # (optional) The name of spark application. 86 | # export SPARK_CONF_DIR # (optional) In the zeppelin interpreter on docker mode, Need to set the local spark conf folder path 87 | 88 | ## Use embedded spark binaries ## 89 | ## without SPARK_HOME defined, Zeppelin still able to run spark interpreter process using embedded spark binaries. 90 | ## however, it is not encouraged when you can define SPARK_HOME 91 | ## 92 | # Options read in YARN client mode 93 | # export HADOOP_CONF_DIR # yarn-site.xml is located in configuration directory in HADOOP_CONF_DIR. 94 | # Pyspark (supported with Spark 1.2.1 and above) 95 | # To configure pyspark, you need to set spark distribution's path to 'spark.home' property in Interpreter setting screen in Zeppelin GUI 96 | # export PYSPARK_PYTHON # path to the python command. must be the same path on the driver(Zeppelin) and all workers. 97 | # export PYTHONPATH 98 | 99 | ## Spark interpreter options ## 100 | ## 101 | # export ZEPPELIN_SPARK_USEHIVECONTEXT # Use HiveContext instead of SQLContext if set true. true by default. 102 | # export ZEPPELIN_SPARK_CONCURRENTSQL # Execute multiple SQL concurrently if set true. false by default. 103 | # export ZEPPELIN_SPARK_IMPORTIMPLICIT # Import implicits, UDF collection, and sql if set true. true by default. 104 | # export ZEPPELIN_SPARK_MAXRESULT # Max number of Spark SQL result to display. 1000 by default. 105 | # export ZEPPELIN_WEBSOCKET_MAX_TEXT_MESSAGE_SIZE # Size in characters of the maximum text message to be received by websocket. Defaults to 1024000 106 | 107 | #### HBase interpreter configuration #### 108 | 109 | ## To connect to HBase running on a cluster, either HBASE_HOME or HBASE_CONF_DIR must be set 110 | 111 | # export HBASE_HOME= # (require) Under which HBase scripts and configuration should be 112 | # export HBASE_CONF_DIR= # (optional) Alternatively, configuration directory can be set to point to the directory that has hbase-site.xml 113 | 114 | #### Zeppelin impersonation configuration 115 | # export ZEPPELIN_IMPERSONATE_CMD # Optional, when user want to run interpreter as end web user. eg) 'sudo -H -u ${ZEPPELIN_IMPERSONATE_USER} bash -c ' 116 | # export ZEPPELIN_IMPERSONATE_SPARK_PROXY_USER #Optional, by default is true; can be set to false if you don't want to use --proxy-user option with Spark interpreter when impersonation enabled 117 | -------------------------------------------------------------------------------- /templates/hadoop-master/files/etc/zeppelin/conf/zeppelin-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 19 | 20 | 21 | 22 | 23 | zeppelin.server.addr 24 | hadoop-master1.orb.local 25 | Server binding address. If you cannot connect to your web browser on WSL or Windows, change 127.0.0.1 to 0.0.0.0. It, however, causes security issues when you open your machine to the public 26 | 27 | 28 | 29 | zeppelin.server.port 30 | 8081 31 | Server port. 32 | 33 | 34 | 35 | zeppelin.cluster.addr 36 | 37 | Server cluster address, eg. 127.0.0.1:6000,127.0.0.2:6000,127.0.0.3:6000 38 | 39 | 40 | 41 | zeppelin.server.context.path 42 | / 43 | Context Path of the Web Application 44 | 45 | 46 | 47 | zeppelin.war.tempdir 48 | /var/run/zeppelin/webapps 49 | Location of jetty temporary directory 50 | 51 | 52 | 53 | zeppelin.notebook.dir 54 | file:///var/lib/zeppelin/notebook 55 | path or URI for notebook persist 56 | 57 | 58 | 59 | zeppelin.note.file.exclude.fields 60 | 61 | fields to be excluded from being saved in note files, with Paragraph prefix mean the fields in Paragraph, e.g. Paragraph.results 62 | 63 | 64 | 65 | zeppelin.interpreter.include 66 | md,spark,jdbc 67 | All the inteprreters that you would like to include. You can only specify either 'zeppelin.interpreter.include' or 'zeppelin.interpreter.exclude'. Specifying them together is not allowed. 68 | 69 | 70 | 71 | zeppelin.interpreter.exclude 72 | 73 | All the inteprreters that you would like to exclude. You can only specify either 'zeppelin.interpreter.include' or 'zeppelin.interpreter.exclude'. Specifying them together is not allowed. 74 | 75 | 76 | 77 | zeppelin.notebook.collaborative.mode.enable 78 | true 79 | Enable collaborative mode 80 | 81 | 82 | 83 | zeppelin.notebook.versioned.mode.enable 84 | true 85 | Value to enable/disable version control support in Notes 86 | 87 | 88 | 101 | 102 | 103 | zeppelin.notebook.storage 104 | org.apache.zeppelin.notebook.repo.FileSystemNotebookRepo 105 | versioned notebook persistence layer implementation 106 | 107 | 108 | 109 | zeppelin.interpreter.dir 110 | interpreter 111 | Interpreter implementation base directory 112 | 113 | 114 | 115 | zeppelin.interpreter.localRepo 116 | /var/lib/zeppelin/local-repo 117 | Local repository for interpreter's additional dependency loading 118 | 119 | 120 | 121 | zeppelin.interpreter.dep.mvnRepo 122 | https://mirrors.cloud.tencent.com/maven/ 123 | Remote principal repository for interpreter's additional dependency loading 124 | 125 | 126 | 127 | zeppelin.dep.localrepo 128 | /var/lib/zeppelin/local-repo 129 | Local repository for dependency loader 130 | 131 | 132 | 133 | zeppelin.helium.node.installer.url 134 | https://nodejs.org/dist/ 135 | Remote Node installer url for Helium dependency loader 136 | 137 | 138 | 139 | zeppelin.helium.npm.installer.url 140 | https://registry.npmjs.org/ 141 | Remote Npm installer url for Helium dependency loader 142 | 143 | 144 | 145 | zeppelin.helium.yarnpkg.installer.url 146 | https://github.com/yarnpkg/yarn/releases/download/ 147 | Remote Yarn package installer url for Helium dependency loader 148 | 149 | 150 | 151 | zeppelin.interpreter.group.default 152 | spark 153 | 154 | 155 | 156 | 157 | zeppelin.interpreter.connect.timeout 158 | 600s 159 | Interpreter process connect timeout. Default time unit is msec. 160 | 161 | 162 | 163 | zeppelin.interpreter.output.limit 164 | 102400 165 | Output message from interpreter exceeding the limit will be truncated 166 | 167 | 168 | 169 | zeppelin.server.allowed.origins 170 | * 171 | Allowed sources for REST and WebSocket requests (i.e. http://onehost:8080,http://otherhost.com). If you leave * you are vulnerable to https://issues.apache.org/jira/browse/ZEPPELIN-173 172 | 173 | 174 | 175 | zeppelin.username.force.lowercase 176 | false 177 | Force convert username case to lower case, useful for Active Directory/LDAP. Default is not to change case 178 | 179 | 180 | 181 | zeppelin.notebook.default.owner.username 182 | 183 | Set owner role by default 184 | 185 | 186 | 187 | zeppelin.notebook.public 188 | true 189 | Make notebook public by default when created, private otherwise 190 | 191 | 192 | 193 | zeppelin.websocket.max.text.message.size 194 | 10240000 195 | Size in characters of the maximum text message to be received by websocket. Defaults to 10240000 196 | 197 | 198 | 199 | zeppelin.server.default.dir.allowed 200 | false 201 | Enable directory listings on server. 202 | 203 | 204 | 205 | zeppelin.interpreter.yarn.monitor.interval_secs 206 | 10 207 | Check interval in secs for yarn apps monitors 208 | 209 | 210 | 211 | zeppelin.server.jetty.name 212 | 213 | Hardcoding Application Server name to Prevent Fingerprinting 214 | 215 | 216 | 217 | zeppelin.server.xframe.options 218 | SAMEORIGIN 219 | The X-Frame-Options HTTP response header can be used to indicate whether or not a browser should be allowed to render a page in a frame/iframe/object. 220 | 221 | 222 | 223 | zeppelin.server.xxss.protection 224 | 1; mode=block 225 | The HTTP X-XSS-Protection response header is a feature of Internet Explorer, Chrome and Safari that stops pages from loading when they detect reflected cross-site scripting (XSS) attacks. When value is set to 1 and a cross-site scripting attack is detected, the browser will sanitize the page (remove the unsafe parts). 226 | 227 | 228 | 229 | zeppelin.server.xcontent.type.options 230 | nosniff 231 | The HTTP X-Content-Type-Options response header helps to prevent MIME type sniffing attacks. It directs the browser to honor the type specified in the Content-Type header, rather than trying to determine the type from the content itself. The default value "nosniff" is really the only meaningful value. This header is supported on all browsers except Safari and Safari on iOS. 232 | 233 | 234 | 246 | 247 | 248 | zeppelin.run.mode 249 | auto 250 | 'auto|local|k8s|docker' 251 | 252 | 253 | 254 | zeppelin.search.enable 255 | false 256 | 257 | 258 | 259 | zeppelin.search.index.rebuild 260 | false 261 | Whether rebuild index when zeppelin start. If true, it would read all notes and rebuild the index, this would consume lots of memory if you have large amounts of notes, so by default it is false 262 | 263 | 264 | 265 | zeppelin.search.use.disk 266 | true 267 | Whether using disk for storing search index, if false, memory will be used instead. 268 | 269 | 270 | 271 | zeppelin.search.index.path 272 | /var/run/zeppelin/search-index 273 | path for storing search index on disk. 274 | 275 | 276 | 277 | zeppelin.jobmanager.enable 278 | false 279 | The Job tab in zeppelin page seems not so useful instead it cost lots of memory and affect the performance. 280 | Disable it can save lots of memory 281 | 282 | 283 | 284 | zeppelin.spark.only_yarn_cluster 285 | false 286 | Whether only allow yarn cluster mode 287 | 288 | 289 | 290 | -------------------------------------------------------------------------------- /templates/hadoop-master/files/etc/zookeeper/conf/zoo.cfg: -------------------------------------------------------------------------------- 1 | # The number of milliseconds of each tick 2 | tickTime=2000 3 | # The number of ticks that the initial 4 | # synchronization phase can take 5 | initLimit=10 6 | # The number of ticks that can pass between 7 | # sending a request and getting an acknowledgement 8 | syncLimit=5 9 | # the directory where the snapshot is stored. 10 | # do not use /tmp for storage, /tmp here is just 11 | # example sakes. 12 | dataDir=/var/lib/zookeeper 13 | # the port at which the clients will connect 14 | clientPort=2181 15 | # the maximum number of client connections. 16 | # increase this if you need to handle more clients 17 | #maxClientCnxns=60 18 | # 19 | # Be sure to read the maintenance section of the 20 | # administrator guide before turning on autopurge. 21 | # 22 | # https://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance 23 | # 24 | # The number of snapshots to retain in dataDir 25 | #autopurge.snapRetainCount=3 26 | # Purge task interval in hours 27 | # Set to "0" to disable auto purge feature 28 | #autopurge.purgeInterval=1 29 | 30 | ## Metrics Providers 31 | # 32 | # https://prometheus.io Metrics Exporter 33 | metricsProvider.className=org.apache.zookeeper.metrics.prometheus.PrometheusMetricsProvider 34 | metricsProvider.httpHost=0.0.0.0 35 | metricsProvider.httpPort=7000 36 | metricsProvider.exportJvmInfo=true 37 | -------------------------------------------------------------------------------- /templates/hadoop-master/files/etc/zookeeper/conf/zookeeper-env.sh: -------------------------------------------------------------------------------- 1 | export ZK_SERVER_HEAP=384 2 | 3 | # ZOOKEEPER-1177 (3.6.0) 4 | # https://xie.infoq.cn/article/f346a8284f59e16bb7f89188e 5 | export SERVER_JVMFLAGS="-Dzookeeper.watchManagerName=org.apache.zookeeper.server.watch.WatchManagerOptimized" 6 | -------------------------------------------------------------------------------- /templates/hadoop-master/files/opt/entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -exuo pipefail 4 | 5 | remove-service-ready-mark -s hdfs 6 | 7 | "$@" & 8 | 9 | if [[ -v POST_BOOTSTRAP_COMMAND ]]; then 10 | $POST_BOOTSTRAP_COMMAND 11 | fi 12 | 13 | if [[ -d /opt/service-ready.d ]]; then 14 | for init_script in /opt/service-ready.d/*; do 15 | bash "${init_script}" 16 | done 17 | fi 18 | 19 | wait 20 | -------------------------------------------------------------------------------- /templates/hadoop-master/files/opt/hadoop-init.d/init-hdfs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -x 2 | 3 | mkdir /var/lib/zookeeper 4 | chown -R zookeeper:zookeeper /var/lib/zookeeper 5 | 6 | mkdir /var/lib/kyuubi 7 | chown -R kyuubi:root /var/lib/kyuubi 8 | 9 | mkdir /var/lib/kyuubi/work 10 | chmod -R 777 /var/lib/kyuubi/work 11 | 12 | mkdir /var/lib/hadoop-hdfs 13 | chown -R hdfs:hdfs /var/lib/hadoop-hdfs 14 | 15 | mkdir /var/lib/hadoop-mapreduce 16 | chown -R mapred:mapred /var/lib/hadoop-mapreduce 17 | 18 | mkdir /var/lib/hadoop-yarn 19 | chown -R yarn:yarn /var/lib/hadoop-yarn 20 | 21 | mkdir /opt/hadoop/logs /var/log/hadoop-hdfs /var/log/hadoop-yarn 22 | chown -R hadoop.hadoop /opt/hadoop/logs 23 | chown -R hdfs.hadoop /var/log/hadoop-hdfs 24 | chown -R yarn.hadoop /var/log/hadoop-yarn 25 | chmod -R 770 /opt/hadoop/logs /var/log/hadoop-hdfs 26 | chmod -R 755 /var/log/hadoop-yarn 27 | 28 | touch /var/log/hdfs-namenode.log 29 | chown hdfs /var/log/hdfs-namenode.log 30 | 31 | # Additional libs 32 | # cp -av /opt/hadoop/lib/native/Linux-amd64-64/* /usr/lib64/ 33 | # mkdir -v /opt/hive/auxlib || test -d /opt/hive-client/auxlib 34 | -------------------------------------------------------------------------------- /templates/hadoop-master/files/opt/parquet/bin/parquet: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | ${JAVA_HOME}/bin/java -cp "${PARQUET_HOME}/jars/*:`${HADOOP_HOME}/bin/hadoop classpath`" org.apache.parquet.cli.Main "$@" 3 | -------------------------------------------------------------------------------- /templates/hadoop-master/files/opt/service-ready.d/001-wait-hdfs-ready.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -exuo pipefail 4 | 5 | wait-port-ready -p 8020 -t ${HDFS_READY_TIMEOUT_SEC:-180} 6 | -------------------------------------------------------------------------------- /templates/hadoop-master/files/opt/service-ready.d/002-create-hdfs-dirs.sh.j2: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -exuo pipefail 4 | 5 | HADOOP="$HADOOP_HOME/bin/hadoop" 6 | {% if kerberos_enabled %} 7 | kinit -kt /share/keytabs/hadoop-master1/nn.service.keytab nn/hadoop-master1.orb.local@TEST.ORG 8 | {% else %} 9 | HADOOP="HADOOP_USER_NAME=hdfs $HADOOP" 10 | {% endif %} 11 | 12 | DIR_LIST=/tmp/hdfs-init-dirs 13 | 14 | set +x 15 | 16 | function parallel_create_dirs() { 17 | while read line; do 18 | # skip empty or started with # line 19 | [[ -z "$line" || "$line" =~ ^# ]] && continue 20 | 21 | owner=$(echo $line | jq -r '.owner') 22 | group=$(echo $line | jq -r '.group') 23 | permission=$(echo $line | jq -r '.permission') 24 | path=$(echo $line | jq -r '.path') 25 | command="$HADOOP fs -mkdir -p $path && $HADOOP fs -chmod $permission $path && $HADOOP fs -chown $owner:$group $path" 26 | echo $command 27 | done < $DIR_LIST | xargs -t -I {} -P 5 bash -c "{}" 28 | } 29 | 30 | echo > $DIR_LIST 31 | # top-level 32 | echo '{"owner": "hdfs", "group": "hdfs", "permission": "1777", "path": "/tmp"}' >> $DIR_LIST 33 | echo '{"owner": "hdfs", "group": "hdfs", "permission": "1755", "path": "/var"}' >> $DIR_LIST 34 | echo '{"owner": "hdfs", "group": "hdfs", "permission": "1755", "path": "/user"}' >> $DIR_LIST 35 | echo '{"owner": "hive", "group": "hadoop", "permission": "1777", "path": "/warehouse"}' >> $DIR_LIST 36 | echo '{"owner": "yarn", "group": "hadoop", "permission": "1777", "path": "/yarn-app-log"}' >> $DIR_LIST 37 | echo '{"owner": "mapred", "group": "mapred", "permission": "1777", "path": "/mr-staging"}' >> $DIR_LIST 38 | echo '{"owner": "mapred", "group": "hdfs", "permission": "1777", "path": "/mr-history"}' >> $DIR_LIST 39 | {% if spark_enabled %} 40 | echo '{"owner": "spark", "group": "hdfs", "permission": "1777", "path": "/spark-history"}' >> $DIR_LIST 41 | {% endif %} 42 | {% if flink_enabled %} 43 | echo '{"owner": "flink", "group": "hdfs", "permission": "1777", "path": "/flink-history"}' >> $DIR_LIST 44 | {% endif %} 45 | parallel_create_dirs 46 | 47 | echo > $DIR_LIST 48 | # user home 49 | echo '{"owner": "mapred", "group": "mapred", "permission": "755", "path": "/user/history"}' >> $DIR_LIST 50 | echo '{"owner": "yarn", "group": "hadoop", "permission": "755", "path": "/user/yarn"}' >> $DIR_LIST 51 | echo '{"owner": "hive", "group": "hadoop", "permission": "755", "path": "/user/hive"}' >> $DIR_LIST 52 | echo '{"owner": "root", "group": "hadoop", "permission": "755", "path": "/user/root"}' >> $DIR_LIST 53 | {% if spark_enabled %} 54 | echo '{"owner": "spark", "group": "hadoop", "permission": "755", "path": "/user/spark"}' >> $DIR_LIST 55 | {% endif %} 56 | {% if flink_enabled %} 57 | echo '{"owner": "flink", "group": "hadoop", "permission": "755", "path": "/user/flink"}' >> $DIR_LIST 58 | {% endif %} 59 | parallel_create_dirs 60 | -------------------------------------------------------------------------------- /templates/hadoop-master/files/opt/service-ready.d/003-create-hdfs-ready-mark.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | create-service-ready-mark -s hdfs 4 | -------------------------------------------------------------------------------- /templates/hadoop-master/files/opt/service-ready.d/004-kinit-spark.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | {% if spark_enabled and kerberos_enabled %} 4 | kinit -kt /share/keytabs/hadoop-master1/spark.service.keytab spark/hadoop-master1.orb.local@TEST.ORG 5 | {% endif %} 6 | -------------------------------------------------------------------------------- /templates/hadoop-master/files/opt/start-hive-server2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -exuo pipefail 4 | 5 | wait-service-ready-mark -s hdfs 6 | 7 | exec hive --service hiveserver2 -------------------------------------------------------------------------------- /templates/hadoop-master/files/opt/start-namenode.sh.j2: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -exuo pipefail 4 | 5 | {% if kerberos_enabled %} 6 | wait-service-ready-mark -s kdc 7 | {% endif %} 8 | 9 | if [ ! -d /var/lib/hadoop-hdfs/cache ]; then 10 | echo 'Y' | /opt/hadoop/bin/hdfs namenode -format 11 | fi 12 | 13 | exec hdfs namenode -------------------------------------------------------------------------------- /templates/hadoop-master/files/opt/start-resourcemanager.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -exuo pipefail 4 | 5 | wait-service-ready-mark -s hdfs 6 | 7 | exec yarn resourcemanager -------------------------------------------------------------------------------- /templates/hadoop-master/files/opt/start-spark-history-server.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -exuo pipefail 4 | 5 | wait-service-ready-mark -s hdfs 6 | 7 | exec /opt/spark/sbin/start-history-server.sh 8 | -------------------------------------------------------------------------------- /templates/hadoop-master/files/root/.ssh/config: -------------------------------------------------------------------------------- 1 | Host * 2 | StrictHostKeyChecking no 3 | 4 | Host hadoop-master1 5 | Hostname hadoop-master1.orb.local 6 | User root 7 | Port 22 8 | ForwardAgent yes 9 | IdentityFile /root/.ssh/id_rsa_hadoop_testing 10 | 11 | Host hadoop-worker1 12 | Hostname hadoop-worker1.orb.local 13 | User root 14 | Port 22 15 | ForwardAgent yes 16 | IdentityFile /root/.ssh/id_rsa_hadoop_testing 17 | 18 | Host hadoop-worker2 19 | Hostname hadoop-worker2.orb.local 20 | User root 21 | Port 22 22 | ForwardAgent yes 23 | IdentityFile /root/.ssh/id_rsa_hadoop_testing 24 | 25 | Host hadoop-worker3 26 | Hostname hadoop-worker3.orb.local 27 | User root 28 | Port 22 29 | ForwardAgent yes 30 | IdentityFile /root/.ssh/id_rsa_hadoop_testing 31 | -------------------------------------------------------------------------------- /templates/hadoop-master/files/root/.ssh/id_rsa_hadoop_testing: -------------------------------------------------------------------------------- 1 | -----BEGIN OPENSSH PRIVATE KEY----- 2 | b3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAABlwAAAAdzc2gtcn 3 | NhAAAAAwEAAQAAAYEAwL+lDkkt0e+0au1SztlN7NmEtY+Yw40xjSMC6r7FdICUQO9iUIxn 4 | RopVskWV4vaGhTJiKY1EtlXTQjL3kPaVWskiBZ32wTSDaQ6kw4CqakGnS4o5kMEp+kXPFN 5 | vlnrXGtuobn7h9KkjGOtRJCtM6stcc81m8gjmjkdVoqErzQ5Sa7/Ou+utWb3LTVS+IBuyQ 6 | oZLZgpLW95QNxdQD4EbJvs4SUiXmldFlJ1jCEuXh08ntCEQHmVBq5zbBeYG+VywE0w/G1W 7 | WOQfE5+C0B4JF2ofrCWZxnK8br5iUn0GxzZsQs16UYV1sFJHdniM7M6Ni6ovgHjNgZFcn0 8 | fmIJu20F6vEPnaoqGDOGerWOnIMoQ5KtXollkgBsRflEyCFDdKPFpJv5jczC9wYXwYcjJk 9 | XAF982cgJBVLoo0PwSUSe1pfg+dMtztB566vasIIVkw95vHr0YB/Pi/RBZ3SB9UZ2KEciw 10 | Oey8KFyqcylftSerLUCV8EDmlqfJIKZTM/JhrH5ZAAAFkCTsLg0k7C4NAAAAB3NzaC1yc2 11 | EAAAGBAMC/pQ5JLdHvtGrtUs7ZTezZhLWPmMONMY0jAuq+xXSAlEDvYlCMZ0aKVbJFleL2 12 | hoUyYimNRLZV00Iy95D2lVrJIgWd9sE0g2kOpMOAqmpBp0uKOZDBKfpFzxTb5Z61xrbqG5 13 | +4fSpIxjrUSQrTOrLXHPNZvII5o5HVaKhK80OUmu/zrvrrVm9y01UviAbskKGS2YKS1veU 14 | DcXUA+BGyb7OElIl5pXRZSdYwhLl4dPJ7QhEB5lQauc2wXmBvlcsBNMPxtVljkHxOfgtAe 15 | CRdqH6wlmcZyvG6+YlJ9Bsc2bELNelGFdbBSR3Z4jOzOjYuqL4B4zYGRXJ9H5iCbttBerx 16 | D52qKhgzhnq1jpyDKEOSrV6JZZIAbEX5RMghQ3SjxaSb+Y3MwvcGF8GHIyZFwBffNnICQV 17 | S6KND8ElEntaX4PnTLc7Qeeur2rCCFZMPebx69GAfz4v0QWd0gfVGdihHIsDnsvChcqnMp 18 | X7Unqy1AlfBA5panySCmUzPyYax+WQAAAAMBAAEAAAGABKHuhoD/gQzS8CF3VKmgH/XL5N 19 | 24SRXpB5h8ctYmMoI/He7q9N7lAgrj26KkwzVT3xpqhc9jhALczpnhPZqRlSKhptMoubTI 20 | hkM/9kNuxRpCGjSOpOIhn0Zjf1+4HXgGuEF4674wQu6IFiShQ0l3nxIAvAAPge0+90uy5d 21 | 2USm7lSOhZT6ZNl+The6oGac0wfjOkdukO0dk8+gDaHX8yDNwQ0GjhCX+Ef/03/+KGypNv 22 | NxxEdvy57wfvvzr9L0TrZ5X6mIPKPH8I9aG8ySz+vQi6kov4Y7EpAB8+kNfByJThz4qoXl 23 | sPLtMftSOgIQ/wcw/JWVHju0jiCstBFEhPGqR4J8+Wb9/G4x9jkxJUgyTx/aBP2MzHofAB 24 | ogfVEjncw12M+WzMCV4pFexgmNwZk8SrSIear33VHb8kY0WamRxn/0P7CXUaXLQqsTlwMV 25 | nVYeCruU2KhlFIc9ehRDQvmZwKWivLT8ZUIRjcULfuVe4R5dlNLnhrkn8oz8gzHCRBAAAA 26 | wQDP5jhHqJ7m7HZujUsz/PRNOqHPNheRaqw0m0LE5PnK+zBREBOQtS0dKWX7Cq6ttFa3Y1 27 | dEPcxqrVGqt8KhlbI08F07Cmk7jQzRAbXzmphMbGf/9Jya0enmngtAouyr7jdTA1tRNoGt 28 | ccthCja2OSDYVztCpG6f5hVz0zclA7Iuv2OxKTDOmKK5et63Xp/1odnZcR+0Y/B8K/NAsT 29 | aJI1N+tYG10MDfgO90ZX7nTVTLGIKVg9BX5bm1Vzhw3Wa0HCcAAADBANtAYdhPC2HCFmw1 30 | l2BZ2kVlpmIWDaRphh740d9J1Z2s5OkqdOwzwgL43l6CXOL+l7FijXamV7FM/UV0RCoxqN 31 | mgRjKoFaTyOL9cZ1LjK8G0PWV8NyNJ1BZClzkgScOETXJAHnW2XmPFgk5qOG2N3dq7/3pM 32 | SUv69eEwMt0RRl3r1yQr14dgMPQX2bprJURKIikOiG/BooiJQsikwIkwHbx448tdtsdxvS 33 | Ro1078OxyKHD6jhsb6TDrUCkaGUPn7mQAAAMEA4Q4UelqMf+IwisbF4BUrR2X6vw9j9w2l 34 | mu0VDyuoHKS3lFzzgF/1HdQFB2HokSBd0aEcsqDVibArYpGZWqQcNP+jMu//xl+C7191wP 35 | nKAZn93hBA4aPFzP8iFZTmGRHRq9uGvyQnJ2dvbNIxHjmR7m8/oWg0mbfEJDrwJZF34XkY 36 | mo50d6U6adp4igpVeUbwywKM6scdG7e+TLl63VxJxMMOfCWA2NwnE+Dem1EImVT3ixqoO9 37 | KeSklJHqp5BlDBAAAAE3Jvb3RAaGFkb29wLXRlc3RpbmcBAgMEBQYH 38 | -----END OPENSSH PRIVATE KEY----- 39 | -------------------------------------------------------------------------------- /templates/hadoop-master/files/root/HELP.j2: -------------------------------------------------------------------------------- 1 | #jinja2: trim_blocks: True, lstrip_blocks: True 2 | 3 | {% if kerberos_enabled %} 4 | # Kerberos login as 'spark' 5 | kinit -kt /share/keytabs/hadoop-master1/spark.service.keytab spark/hadoop-master1.orb.local@TEST.ORG 6 | {% endif %} 7 | 8 | Run Spark Pi 9 | {% if kerberos_enabled %} 10 | spark-submit run-example --deploy-mode cluster --queue root.default --proxy-user spark SparkPi 10 11 | {% else %} 12 | spark-submit run-example --deploy-mode cluster --queue root.default SparkPi 10 13 | {% endif %} 14 | 15 | # Connect to HiveServer2 directly 16 | {% if kerberos_enabled %} 17 | kyuubi-beeline -u 'jdbc:hive2://hadoop-master1.orb.local:10000/default;principal=hive/_HOST@TEST.ORG' 18 | {% else %} 19 | kyuubi-beeline -u 'jdbc:hive2://hadoop-master1.orb.local:10000/default;user=hive' 20 | {% endif %} 21 | 22 | # Connect to Kyuubi directly 23 | {% if kerberos_enabled %} 24 | kyuubi-beeline -u 'jdbc:kyuubi://hadoop-master1.orb.local:10009/default;kyuubiClientPrincipal=spark/_HOST@TEST.ORG;kyuubiClientKeytab=/share/keytabs/hadoop-master1/spark.service.keytab;kyuubiServerPrincipal=kyuubi/_HOST@TEST.ORG' 25 | kyuubi-beeline -u 'jdbc:kyuubi://hadoop-master1.orb.local:10009/default;principal=kyuubi/_HOST@TEST.ORG' 26 | kyuubi-beeline -u 'jdbc:kyuubi://hadoop-master1.orb.local:10009/default;principal=kyuubi/_HOST@TEST.ORG' --conf kyuubi.engine.share.level=CONNECTION 27 | {% else %} 28 | kyuubi-beeline -u 'jdbc:kyuubi://hadoop-master1.orb.local:10009/default;user=kyuubi' 29 | kyuubi-beeline -u 'jdbc:kyuubi://hadoop-master1.orb.local:10009/default;user=kyuubi' --conf kyuubi.engine.share.level=CONNECTION 30 | {% endif %} 31 | 32 | # Connect to Kyuubi HA 33 | {% if kerberos_enabled %} 34 | kyuubi-beeline -u 'jdbc:kyuubi://hadoop-master1.orb.local:2181/default;principal=kyuubi/_HOST@TEST.ORG;serviceDiscoveryMode=zookeeper;zooKeeperNamespace=kyuubi' 35 | {% else %} 36 | kyuubi-beeline -u 'jdbc:kyuubi://hadoop-master1.orb.local:2181/default;serviceDiscoveryMode=zookeeper;zooKeeperNamespace=kyuubi' 37 | {% endif %} 38 | 39 | # Submit Spark Pi using Kyuubi Batch API 40 | hadoop fs -put file:///opt/spark/examples/jars/spark-examples_{{ spark_scala_binary_version }}-{{ spark_version }}.jar hdfs://hadoop-master1.orb.local:8020/tmp/ 41 | 42 | {% if kerberos_enabled %} 43 | # TODO 44 | {% else %} 45 | kyuubi-ctl submit batch \ 46 | --hostUrl=http://hadoop-master1.orb.local:10099 \ 47 | -f kyuubi-batch-spark-pi.yaml 48 | {% endif %} 49 | -------------------------------------------------------------------------------- /templates/hadoop-master/files/root/kyuubi-batch-spark-pi.yaml: -------------------------------------------------------------------------------- 1 | #jinja2: trim_blocks: True, lstrip_blocks: True 2 | 3 | apiVersion: v1 4 | user: hive 5 | request: 6 | batchType: Spark 7 | name: SparkPi 8 | resource: hdfs://hadoop-master1.orb.local:8020/tmp/spark-examples_{{ spark_scala_binary_version }}-{{ spark_version }}.jar 9 | className: org.apache.spark.examples.SparkPi 10 | args: 11 | - 10 12 | configs: 13 | kyuubi.batch.impl.version: 2 14 | hive.server2.proxy.user: spark 15 | spark.yarn.queue: root.default 16 | wait.completion: true 17 | options: 18 | verbose: true 19 | -------------------------------------------------------------------------------- /templates/hadoop-worker/Dockerfile.j2: -------------------------------------------------------------------------------- 1 | #jinja2: trim_blocks: True, lstrip_blocks: True 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # 6 | # http://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | 14 | ARG PROJECT_VERSION 15 | FROM hadoop-testing/base-ubuntu-2004:$PROJECT_VERSION 16 | 17 | ARG HADOOP_VERSION 18 | ARG SPARK_VERSION 19 | ARG TRINO_VERSION 20 | 21 | ENV HADOOP_HOME=/opt/hadoop 22 | ENV HADOOP_CONF_DIR=/etc/hadoop/conf 23 | ENV LD_LIBRARY_PATH=${HADOOP_HOME}/lib/native 24 | {% if trino_enabled %} 25 | ENV TRINO_HOME=/opt/trino 26 | {% endif %} 27 | ENV PATH=${HADOOP_HOME}/bin:${PATH} 28 | 29 | ADD download/hadoop-${HADOOP_VERSION}.tar.gz /opt 30 | ADD download/spark-${SPARK_VERSION}-bin-{{ spark_custom_name }}/yarn /opt/spark-${SPARK_VERSION}-bin-{{ spark_custom_name }}/yarn 31 | {% if trino_enabled %} 32 | ADD download/trino-server-${TRINO_VERSION}.tar.gz /opt 33 | {% endif %} 34 | 35 | # Copy configuration files 36 | COPY ./files / 37 | 38 | RUN ln -snf /opt/hadoop-${HADOOP_VERSION} ${HADOOP_HOME} && \ 39 | ln -snf spark-${SPARK_VERSION}-bin-{{ spark_custom_name }} /opt/spark 40 | {% if trino_enabled %} 41 | RUN ln -snf /opt/trino-server-${TRINO_VERSION} ${TRINO_HOME} 42 | {% endif %} 43 | 44 | RUN chown -R root:hadoop /opt/hadoop-${HADOOP_VERSION} && \ 45 | chmod 6050 /opt/hadoop-${HADOOP_VERSION}/bin/container-executor && \ 46 | chown root:hadoop /etc/hadoop/conf/container-executor.cfg && \ 47 | chmod 0400 /etc/hadoop/conf/container-executor.cfg && \ 48 | mv -f /etc/hadoop/conf/container-executor.cfg /opt/hadoop-${HADOOP_VERSION}/etc/hadoop 49 | 50 | RUN /opt/hadoop-init.d/init-hdfs.sh 51 | {% if trino_enabled %} 52 | RUN /opt/trino-init.d/init-workdir.sh 53 | {% endif %} 54 | 55 | # HDFS ports 56 | EXPOSE 9864 9866 9867 9870 57 | 58 | # YARN ports 59 | EXPOSE 8030 8031 8032 8033 8040 8041 8042 8088 10020 19888 60 | 61 | CMD supervisord -c /etc/supervisord.conf 62 | -------------------------------------------------------------------------------- /templates/hadoop-worker/files/etc/supervisor/conf.d/hdfs-datanode.conf.j2: -------------------------------------------------------------------------------- 1 | [program:hdfs-datanode] 2 | directory=/opt/hadoop 3 | command=hdfs datanode 4 | startsecs=2 5 | stopwaitsecs=10 6 | {% if kerberos_enabled %} 7 | user=root 8 | {% else %} 9 | user=hdfs 10 | {% endif %} 11 | redirect_stderr=true 12 | stdout_logfile=/var/log/hdfs-datanode.log 13 | autostart=true 14 | -------------------------------------------------------------------------------- /templates/hadoop-worker/files/etc/supervisor/conf.d/yarn-nodemanager.conf: -------------------------------------------------------------------------------- 1 | [program:yarn-nodemanager] 2 | directory=/opt/hadoop 3 | command=yarn nodemanager 4 | startsecs=2 5 | stopwaitsecs=10 6 | user=yarn 7 | redirect_stderr=true 8 | stdout_logfile=/var/log/yarn-nodemanager.log 9 | autostart=true 10 | -------------------------------------------------------------------------------- /test-ssh.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: test hosted ansible has the controllability of all the hadoop-xxx nodes 3 | 4 | hosts: all 5 | 6 | gather_facts: True 7 | tasks: 8 | - name: ping 9 | ping: 10 | --------------------------------------------------------------------------------