├── kylin.yml ├── scala.yml ├── roles ├── kylin │ ├── templates │ │ ├── kylin_env.sh │ │ ├── kylin_job_conf_inmem.xml │ │ └── kylin.properties │ └── tasks │ │ └── main.yml ├── scala │ ├── templates │ │ └── scala_env.sh │ └── tasks │ │ └── main.yml ├── ssh │ ├── defaults │ │ └── main.yml │ └── tasks │ │ └── main.yml ├── jdk │ ├── files │ │ └── mysql-connector-java-5.1.46.jar │ ├── templates │ │ └── cdh_java_home.sh │ ├── defaults │ │ └── main.yml │ └── tasks │ │ └── main.yml ├── cdh │ ├── defaults │ │ └── main.yml │ └── tasks │ │ └── main.yml ├── cm │ ├── defaults │ │ └── main.yml │ ├── tasks │ │ └── main.yml │ └── templates │ │ └── config.ini └── common │ ├── defaults │ └── main.yml │ ├── files │ ├── CentOS-7-163.repo │ ├── ntp.conf │ └── CentOS-7-aliyun.repo │ └── tasks │ └── main.yml ├── images ├── cm_install_step_01.png ├── cm_install_step_02.png ├── cm_install_step_03.png ├── cm_install_step_04.png ├── cm_install_step_05.png ├── cm_install_step_06.png ├── cm_install_step_07.png ├── cm_install_step_08.png ├── cm_install_step_09.png ├── cm_install_step_10.png ├── cm_install_step_11.png ├── cm_install_step_12.png ├── cm_install_step_13.png ├── cm_install_step_14.png ├── cm_install_step_15.png ├── cm_install_step_16.png ├── cm_install_step_17.png └── cm_install_step_18.png ├── inventory ├── uat_cdh6.yml ├── dev_cdh6.ini └── uat_cdh6.ini ├── 01.cdh.yml ├── shell └── cleanLog.sh ├── 99.clean_all.yml └── README.md /kylin.yml: -------------------------------------------------------------------------------- 1 | - hosts: 2 | - kylin 3 | roles: 4 | - kylin 5 | -------------------------------------------------------------------------------- /scala.yml: -------------------------------------------------------------------------------- 1 | - hosts: 2 | - spark 3 | roles: 4 | - scala 5 | -------------------------------------------------------------------------------- /roles/kylin/templates/kylin_env.sh: -------------------------------------------------------------------------------- 1 | export KYLIN_HOME={{ kylin_path }} 2 | export PATH=$KYLIN_HOME/bin:$PATH 3 | -------------------------------------------------------------------------------- /roles/scala/templates/scala_env.sh: -------------------------------------------------------------------------------- 1 | export SCALA_HOME={{ scala_path }} 2 | export PATH=$SCALA_HOME/bin:$PATH 3 | -------------------------------------------------------------------------------- /images/cm_install_step_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/images/cm_install_step_01.png -------------------------------------------------------------------------------- /images/cm_install_step_02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/images/cm_install_step_02.png -------------------------------------------------------------------------------- /images/cm_install_step_03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/images/cm_install_step_03.png -------------------------------------------------------------------------------- /images/cm_install_step_04.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/images/cm_install_step_04.png -------------------------------------------------------------------------------- /images/cm_install_step_05.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/images/cm_install_step_05.png -------------------------------------------------------------------------------- /images/cm_install_step_06.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/images/cm_install_step_06.png -------------------------------------------------------------------------------- /images/cm_install_step_07.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/images/cm_install_step_07.png -------------------------------------------------------------------------------- /images/cm_install_step_08.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/images/cm_install_step_08.png -------------------------------------------------------------------------------- /images/cm_install_step_09.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/images/cm_install_step_09.png -------------------------------------------------------------------------------- /images/cm_install_step_10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/images/cm_install_step_10.png -------------------------------------------------------------------------------- /images/cm_install_step_11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/images/cm_install_step_11.png -------------------------------------------------------------------------------- /images/cm_install_step_12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/images/cm_install_step_12.png -------------------------------------------------------------------------------- /images/cm_install_step_13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/images/cm_install_step_13.png -------------------------------------------------------------------------------- /images/cm_install_step_14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/images/cm_install_step_14.png -------------------------------------------------------------------------------- /images/cm_install_step_15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/images/cm_install_step_15.png -------------------------------------------------------------------------------- /images/cm_install_step_16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/images/cm_install_step_16.png -------------------------------------------------------------------------------- /images/cm_install_step_17.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/images/cm_install_step_17.png -------------------------------------------------------------------------------- /images/cm_install_step_18.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/images/cm_install_step_18.png -------------------------------------------------------------------------------- /roles/ssh/defaults/main.yml: -------------------------------------------------------------------------------- 1 | # SSH 用户 2 | v_ssh_user: "root" 3 | 4 | # SSH 用户组 5 | v_ssh_group: "root" 6 | 7 | # SSH 用户默认路径 8 | v_ssh_user_path: "/root" 9 | -------------------------------------------------------------------------------- /roles/jdk/files/mysql-connector-java-5.1.46.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/roles/jdk/files/mysql-connector-java-5.1.46.jar -------------------------------------------------------------------------------- /roles/jdk/templates/cdh_java_home.sh: -------------------------------------------------------------------------------- 1 | export JAVA_HOME=/usr/java/jdk{{ v_jdk_version }}-cloudera 2 | export CLASSPATH=.:$CLASSPTAH:$JAVA_HOME/lib 3 | export JRE_HOME=$JAVA_HOME/jre 4 | export PATH=$PATH:$JAVA_HOME/bin 5 | -------------------------------------------------------------------------------- /roles/jdk/defaults/main.yml: -------------------------------------------------------------------------------- 1 | # cdh 推荐 jdk 版本下载地址 2 | v_cdh_oracle_j2sdk_download_url: "{{ v_cdh_download_server }}/cm6/{{ v_cdh_version }}/redhat7/yum/RPMS/x86_64/oracle-j2sdk1.8-1.8.0+update141-1.x86_64.rpm" 3 | 4 | # 是否安装 jdk 5 | v_jdk_install: "false" 6 | 7 | # 安装的 jdk 版本 8 | v_jdk_version: "1.8.0_141" 9 | -------------------------------------------------------------------------------- /inventory/uat_cdh6.yml: -------------------------------------------------------------------------------- 1 | # 自定义 hosts 2 | v_hosts: { 3 | "bjds-kubernetes-node-pre-10-240-114-34-vm.belle.lan": "10.240.114.34 ", 4 | "bjds-kubernetes-node-pre-10-240-114-38-vm.belle.lan": "10.240.114.38 ", 5 | "bjds-kubernetes-node-pre-10-240-114-65-vm.belle.lan": "10.240.114.65 ", 6 | "bjds-kubernetes-node-pre-10-240-114-67-vm.belle.lan": "10.240.114.67 " 7 | } -------------------------------------------------------------------------------- /roles/cdh/defaults/main.yml: -------------------------------------------------------------------------------- 1 | v_cdh_download_url: "{{v_cdh_download_server}}/cdh6/{{v_cdh_version}}/parcels/CDH-{{v_cdh_version}}-1.cdh{{v_cdh_version}}.p{{v_cdh_version_p}}-el7.parcel" 2 | 3 | v_cdh_sha_download_url: "{{v_cdh_download_server}}/cdh6/{{v_cdh_version}}/parcels/CDH-{{v_cdh_version}}-1.cdh{{v_cdh_version}}.p{{v_cdh_version_p}}-el7.parcel.sha256" 4 | 5 | v_cdh_manifest_download_url: "{{v_cdh_download_server}}/cdh6/{{v_cdh_version}}/parcels/manifest.json" 6 | -------------------------------------------------------------------------------- /01.cdh.yml: -------------------------------------------------------------------------------- 1 | - hosts: 2 | - cdh-cluster 3 | roles: 4 | - common 5 | tags: "common" 6 | 7 | - hosts: 8 | - cdh-cluster 9 | roles: 10 | - jdk 11 | tags: "jdk" 12 | 13 | - hosts: 14 | - cdh-server 15 | roles: 16 | - ssh 17 | tags: "ssh" 18 | 19 | - hosts: 20 | - cdh-agent 21 | roles: 22 | - ssh 23 | tags: "ssh" 24 | 25 | - hosts: 26 | - cdh-cluster 27 | roles: 28 | - cm 29 | tags: "cm" 30 | 31 | - hosts: 32 | - cdh-server 33 | roles: 34 | - cdh 35 | tags: "cdh" -------------------------------------------------------------------------------- /roles/cm/defaults/main.yml: -------------------------------------------------------------------------------- 1 | # cm yum 仓库下载地址 2 | v_yum_repo_url: "{{ v_cdh_download_server }}/cm6/{{ v_cdh_version }}/redhat7/yum/cloudera-manager.repo" 3 | 4 | # cm 仓库签名 GPG 密钥下载地址 5 | v_yum_repo_gpgcheck_url: "{{ v_cdh_download_server }}/cm6/{{ v_cdh_version }}/redhat7/yum/RPM-GPG-KEY-cloudera" 6 | 7 | # scm_db_type 8 | v_cm_db_type: "mysql" 9 | 10 | # scm_db_host 11 | v_cm_db_host: "" 12 | 13 | # scm_db_name 14 | v_cm_db_name: "scm" 15 | 16 | # scm_db_user 17 | v_cm_db_user: "root" 18 | 19 | # scm_db_password 20 | v_cm_db_password: "" 21 | 22 | # scm_db_port 23 | v_cm_db_port: 3306 24 | -------------------------------------------------------------------------------- /roles/common/defaults/main.yml: -------------------------------------------------------------------------------- 1 | # 是否使用 163 的 yum 源 2 | v_update_yum_with_163: "false" 3 | 4 | # 是否使用 aliyun 的 yum 源 5 | v_update_yum_with_aliyun: "false" 6 | 7 | # 是否更新内核 8 | v_update_kernel: "false" 9 | 10 | # 是否安装必备组件 11 | v_yum_install: "false" 12 | 13 | # 是否安装时间同步 14 | v_ntpdate_install: "false" 15 | 16 | # 时间同步服务器地址 17 | v_ntpdate_address: "ntp1.aliyun.com" 18 | 19 | # 是否允许设置主机的 hostname 20 | v_enable_set_hostname: "false" 21 | 22 | # 是否更新操作系统 hosts 文件 23 | v_update_hosts: "false" 24 | 25 | # 自定义 hosts 26 | v_hosts: { 27 | "localhost localhost.localdomain localhost4 localhost4.localdomain4": "127.0.0.1 ", 28 | "localhost localhost.localdomain localhost6 localhost6.localdomain6": "::1 " 29 | } 30 | 31 | # 服务器使用 swap 内存的百分比 32 | v_vm_swappiness: 10 33 | 34 | -------------------------------------------------------------------------------- /roles/ssh/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - name: 检查是否有生成用户的 ssh 公钥 2 | shell: "ls {{ v_ssh_user_path }}/.ssh|grep '.pub' |wc -l" 3 | register: key_exist 4 | ignore_errors: true 5 | 6 | - name: 生成 ssh 公钥 7 | user: 8 | name: "{{ v_ssh_user }}" 9 | generate_ssh_key: yes 10 | ssh_key_bits: 2048 11 | ssh_key_file: .ssh/id_rsa 12 | when: "key_exist.stdout == '0'" 13 | 14 | - name: 获取 ssh 用户公钥 15 | fetch: 16 | src: "{{ v_ssh_user_path }}/.ssh/id_rsa.pub" 17 | dest: "/tmp/id_{{ ansible_host }}_{{ v_ssh_user }}.pub" 18 | flat: yes 19 | 20 | # 从本地 authorized_keys 文件读取公钥内容 21 | - name: 获取 server 的 ssh 用户公钥完成免密登陆 agent 22 | authorized_key: 23 | user: "{{ v_ssh_user }}" 24 | key: "{{ lookup('file', '/tmp/id_{{ v_server_ip }}_{{ v_ssh_user }}.pub') }}" 25 | -------------------------------------------------------------------------------- /roles/kylin/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - name: 准备 kylin 工作目录 2 | file: name={{ item }} state=directory owner={{ hadoop_user }} group={{ hadoop_group }} mode=0755 3 | with_items: 4 | - "{{ kylin_work_path }}" 5 | 6 | - name: 下载 kylin 安装包 7 | get_url: url={{ kylin_download_url }} dest=/tmp owner={{ hadoop_user }} group={{ hadoop_group }} mode=644 8 | 9 | - name: 解压 kylin 安装包 10 | unarchive: 11 | # src也可以直接填写一个URL地址直接进行下载解压 12 | src: "/tmp/apache-kylin-{{kylin_version}}-bin-{{kylin_env}}.tar.gz" 13 | copy: no 14 | dest: "{{ kylin_work_path }}" 15 | owner: "{{ hadoop_user }}" 16 | group: "{{ hadoop_group }}" 17 | 18 | - name: 调整 kylin 目录所有者 19 | file: name={{ kylin_path }} state=directory recurse=yes owner={{ hadoop_user }} group={{ hadoop_group }} 20 | 21 | - name: 设置 kylin 环境变量 22 | template: src=kylin_env.sh dest=/etc/profile.d 23 | 24 | - name: 生效 kylin 环境变量 25 | shell: "source /etc/profile.d/kylin_env.sh" 26 | -------------------------------------------------------------------------------- /roles/scala/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - name: 准备 scala 工作目录 2 | file: name={{ item }} state=directory owner={{ hadoop_user }} group={{ hadoop_group }} mode=0755 3 | with_items: 4 | - "{{ scala_work_path }}" 5 | 6 | - name: 下载 scala 安装包 7 | get_url: url={{ scala_download_url }} dest=/tmp owner={{ hadoop_user }} group={{ hadoop_group }} mode=644 8 | 9 | - name: 解压 scala 安装包 10 | unarchive: 11 | # src也可以直接填写一个URL地址直接进行下载解压 12 | src: "/tmp/scala-{{ scala_version }}.tgz" 13 | copy: no 14 | dest: "{{ scala_work_path }}" 15 | owner: "{{ hadoop_user }}" 16 | group: "{{ hadoop_group }}" 17 | 18 | - name: 调整 scala 目录所有者 19 | file: name={{ scala_path }} state=directory recurse=yes owner={{ hadoop_user }} group={{ hadoop_group }} 20 | 21 | - name: 设置 scala 环境变量 22 | template: src=scala_env.sh dest=/etc/profile.d 23 | 24 | - name: 生效 scala 环境变量 25 | shell: "source /etc/profile.d/scala_env.sh" 26 | 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /roles/cdh/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - name: 下载 cdh Parcels 2 | get_url: url={{ item }} dest=/opt/cloudera/parcel-repo owner=cloudera-scm group=cloudera-scm mode=644 3 | with_items: 4 | - "{{ v_cdh_manifest_download_url }}" 5 | - "{{ v_cdh_download_url }}" 6 | 7 | - name: 下载 cdh parcel.sha256 并改名为 parcel.sha 8 | get_url: url={{ item }} dest="/opt/cloudera/parcel-repo/CDH-{{v_cdh_version}}-1.cdh{{v_cdh_version}}.p{{v_cdh_version_p}}-el7.parcel.sha" owner=cloudera-scm group=cloudera-scm mode=644 9 | with_items: 10 | - "{{ v_cdh_sha_download_url }}" 11 | 12 | - name: 将 manifest.json 文件中,找到对应版本的秘钥,复制到 .sha 文件中 13 | shell: 'echo "2e650f1f1ea020a3efc98a231b85c2df1a50b030" > "/opt/cloudera/parcel-repo/CDH-{{v_cdh_version}}-1.cdh{{v_cdh_version}}.p{{v_cdh_version_p}}-el7.parcel.sha"' 14 | 15 | - name: 重启 cloudera-scm-server 服务,并设置自启动 16 | systemd: 17 | name: cloudera-scm-server 18 | daemon_reload: yes 19 | state: restarted 20 | enabled: yes 21 | -------------------------------------------------------------------------------- /shell/cleanLog.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | 4 | # clear cloudera manager monitor log 5 | rm -rf /var/lib/cloudera-host-monitor/ts/*/partition*/* 6 | rm -rf /var/lib/cloudera-service-monitor/ts/*/partition*/* 7 | 8 | # clear cdh log 9 | rm -rf /var/log/cloudera-scm-eventserver/*.out.* 10 | rm -rf /var/log/cloudera-scm-firehose/*.out.* 11 | rm -rf /var/log/cloudera-scm-agent/*.log.* 12 | rm -rf /var/log/cloudera-scm-agent/*.out.* 13 | rm -rf /var/log/cloudera-scm-server/*.out.* 14 | rm -rf /var/log/cloudera-scm-server/*.log.* 15 | 16 | rm -rf /var/log/hadoop-hdfs/*.out.* 17 | rm -rf /var/log/hadoop-hdfs/*.log.* 18 | rm -rf /var/log/hadoop-httpfs/*.out.* 19 | rm -rf /var/log/hadoop-kms/*.out.* 20 | rm -rf /var/log/hadoop-mapreduce/*.out.* 21 | rm -rf /var/log/hadoop-yarn/*.out.* 22 | rm -rf /var/log/hadoop-yarn/*.audit.* 23 | rm -rf /var/log/hive/*.out.* 24 | rm -rf /var/log/oozie/*.out.* 25 | rm -rf /var/log/oozie/*.log.* 26 | 27 | rm -rf /var/log/zookeeper/*.log.* 28 | -------------------------------------------------------------------------------- /roles/jdk/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - name: 卸载系统自带 JDK 2 | yum: name={{ item }} state=absent 3 | with_items: 4 | - "java*" 5 | - "jdk*" 6 | - "oracle-j2sdk*" 7 | when: v_jdk_install=="true" 8 | 9 | - name: 下载 JDK 10 | get_url: url={{ item }} dest=/tmp mode=644 11 | with_items: 12 | - "{{ v_cdh_oracle_j2sdk_download_url }}" 13 | when: v_jdk_install=="true" 14 | 15 | - name: 安装 JDK 16 | yum: name={{ item }} state=present 17 | with_items: 18 | - "/tmp/oracle-j2sdk1.8-1.8.0+update141-1.x86_64.rpm" 19 | when: v_jdk_install=="true" 20 | 21 | - name: 配置 JAVA_HOME 22 | template: src=cdh_java_home.sh dest=/etc/profile.d 23 | when: v_jdk_install=="true" 24 | 25 | - name: 生效 JAVA_HOME 26 | shell: "source /etc/profile.d/cdh_java_home.sh" 27 | when: v_jdk_install=="true" 28 | 29 | - name: 准备 java 共享目录 30 | file: name={{ item }} state=directory mode=0755 31 | with_items: 32 | - "/usr/share/java" 33 | 34 | - name: 拷贝 mysql-connector 包 35 | copy: src=mysql-connector-java-5.1.46.jar dest=/usr/share/java/mysql-connector-java.jar mode=644 36 | -------------------------------------------------------------------------------- /roles/cm/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - name: 下载 cm yum 仓库 2 | # debug: msg={{ groups['cdh-server'] }} 3 | get_url: url={{ v_yum_repo_url }} dest=/etc/yum.repos.d/ 4 | 5 | - name: 导入仓库签名 GPG 密钥 6 | shell: "rpm --import {{ v_yum_repo_gpgcheck_url }}" 7 | 8 | - name: 安装 cloudera-manager-daemons cloudera-manager-agent 9 | yum: update_cache=yes name={{ item }} state=present 10 | with_items: 11 | - cloudera-manager-daemons 12 | - cloudera-manager-agent 13 | 14 | - name: 安装 cloudera-manager-server 15 | yum: update_cache=yes name={{ item }} state=present 16 | with_items: 17 | - cloudera-manager-server 18 | when: "'cdh-server' in group_names" 19 | 20 | - name: 拷贝 cm 配置文件 21 | template: src={{ item }} dest=/etc/cloudera-scm-agent/config.ini mode=644 22 | with_items: 23 | - config.ini 24 | 25 | - name: 重启 cloudera-scm-agent 服务,并设置自启动 26 | systemd: 27 | name: cloudera-scm-agent 28 | daemon_reload: yes 29 | state: restarted 30 | enabled: yes 31 | 32 | - name: 初始化数据库 33 | shell: "/opt/cloudera/cm/schema/scm_prepare_database.sh mysql -h {{ v_cm_db_host }} -P {{ v_cm_db_port }} --scm-host {{ v_server_ip }} {{ v_cm_db_name }} {{ v_cm_db_user }} {{ v_cm_db_password }} " 34 | when: "'cdh-server' in group_names" 35 | -------------------------------------------------------------------------------- /roles/common/files/CentOS-7-163.repo: -------------------------------------------------------------------------------- 1 | # CentOS-Base.repo 2 | # 3 | # The mirror system uses the connecting IP address of the client and the 4 | # update status of each mirror to pick mirrors that are updated to and 5 | # geographically close to the client. You should use this for CentOS updates 6 | # unless you are manually picking other mirrors. 7 | # 8 | # If the mirrorlist= does not work for you, as a fall back you can try the 9 | # remarked out baseurl= line instead. 10 | # 11 | # 12 | [base] 13 | name=CentOS-$releasever - Base - 163.com 14 | #mirrorlist=http://mirrorlist.centos.org/?release=$releasever&arch=$basearch&repo=os 15 | baseurl=http://mirrors.163.com/centos/$releasever/os/$basearch/ 16 | gpgcheck=1 17 | gpgkey=http://mirrors.163.com/centos/RPM-GPG-KEY-CentOS-7 18 | 19 | #released updates 20 | [updates] 21 | name=CentOS-$releasever - Updates - 163.com 22 | #mirrorlist=http://mirrorlist.centos.org/?release=$releasever&arch=$basearch&repo=updates 23 | baseurl=http://mirrors.163.com/centos/$releasever/updates/$basearch/ 24 | gpgcheck=1 25 | gpgkey=http://mirrors.163.com/centos/RPM-GPG-KEY-CentOS-7 26 | 27 | #additional packages that may be useful 28 | [extras] 29 | name=CentOS-$releasever - Extras - 163.com 30 | #mirrorlist=http://mirrorlist.centos.org/?release=$releasever&arch=$basearch&repo=extras 31 | baseurl=http://mirrors.163.com/centos/$releasever/extras/$basearch/ 32 | gpgcheck=1 33 | gpgkey=http://mirrors.163.com/centos/RPM-GPG-KEY-CentOS-7 34 | 35 | #additional packages that extend functionality of existing packages 36 | [centosplus] 37 | name=CentOS-$releasever - Plus - 163.com 38 | baseurl=http://mirrors.163.com/centos/$releasever/centosplus/$basearch/ 39 | gpgcheck=1 40 | enabled=0 41 | gpgkey=http://mirrors.163.com/centos/RPM-GPG-KEY-CentOS-7 42 | -------------------------------------------------------------------------------- /inventory/dev_cdh6.ini: -------------------------------------------------------------------------------- 1 | [cdh-server] 2 | 10.0.42.182 node_name="sz19f-scm-lmp-test-10-0-42-182-vm.belle.lan" node_ip="10.0.42.182" 3 | 4 | [cdh-agent] 5 | 10.0.42.140 node_name="sz19f-scm-lmp-test-10-0-42-140-vm.belle.lan" node_ip="10.0.42.140" 6 | 10.0.42.184 node_name="sz19f-scm-lmp-test-10-0-42-184-vm.belle.lan" node_ip="10.0.42.184" 7 | 10.0.42.179 node_name="sz19f-scm-lmp-test-10-0-42-179-vm.belle.lan" node_ip="10.0.42.179" 8 | 9 | [cdh-cluster:children] 10 | cdh-server 11 | cdh-agent 12 | 13 | [kylin] 14 | 172.20.32.125 15 | 16 | [sqoop] 17 | 172.20.32.125 18 | 19 | [all:vars] 20 | ; # 是否使用 aliyun 的 yum 源 21 | ; v_update_yum_with_aliyun="true" 22 | 23 | # 是否安装必备组件 24 | v_yum_install="true" 25 | 26 | # 是否安装时间同步 27 | v_ntpdate_install="true" 28 | 29 | ; # 时间同步服务器地址 30 | ; v_ntpdate_address="ntp1.aliyun.com" 31 | 32 | ; # 是否允许设置主机的 hostname 33 | ; v_enable_set_hostname="true" 34 | 35 | # 是否更新操作系统 hosts 文件 36 | # 自定义 hosts 请使用变量文件定义,然后使用 -e 引入变量文件。 37 | # 命令参考:ansible-playbook -i inventory/uat_cdh6.ini -e @inventory/uat_cdh6.yml cdh.yml 38 | ; v_update_hosts="true" 39 | 40 | # 指定 server ip,方便代码内获取 41 | v_server_ip="10.0.42.182" 42 | 43 | # cdh 包下载服务器地址 44 | ; v_cdh_download_server="https://archive.cloudera.com" 45 | v_cdh_download_server="http://10.0.43.24:8066" 46 | 47 | # cdh 主版本号 48 | v_cdh_version="6.0.1" 49 | 50 | # cdh 小版本号 51 | v_cdh_version_p="0.590678" 52 | 53 | # 是否安装 jdk 54 | v_jdk_install="true" 55 | 56 | # scm_db_host 57 | v_cm_db_host="10.0.30.39" 58 | 59 | # scm_db_name 60 | v_cm_db_name="db_cdh6_scm" 61 | 62 | # scm_db_user 63 | v_cm_db_user="user_cdh6" 64 | 65 | # scm_db_password 66 | v_cm_db_password="123456" 67 | 68 | # scm_db_port 69 | v_cm_db_port=3306 70 | 71 | ; hadoop_user="root" 72 | ; hadoop_group="root" 73 | ; kylin_version="2.4.0" 74 | ; kylin_work_path="/home/cdh" 75 | ; kylin_path="{{kylin_work_path}}/apache-kylin-{{kylin_version}}-bin-{{kylin_env}}" 76 | ; kylin_config_path="{{kylin_path}}/conf" 77 | ; kylin_env="cdh57" 78 | ; kylin_download_url="http://172.20.32.36/package/kylin/apache-kylin-{{kylin_version}}-bin-{{kylin_env}}.tar.gz" 79 | ; #kylin_download_url="http://mirrors.hust.edu.cn/apache/kylin/apache-kylin-{{kylin_version}}/apache-kylin-{{kylin_version}}-bin-{{kylin_env}}.tar.gz" 80 | -------------------------------------------------------------------------------- /roles/common/files/ntp.conf: -------------------------------------------------------------------------------- 1 | # For more information about this file, see the man pages 2 | # ntp.conf(5), ntp_acc(5), ntp_auth(5), ntp_clock(5), ntp_misc(5), ntp_mon(5). 3 | 4 | driftfile /var/lib/ntp/drift 5 | 6 | # Permit time synchronization with our time source, but do not 7 | # permit the source to query or modify the service on this system. 8 | restrict default nomodify notrap nopeer noquery 9 | 10 | # Permit all access over the loopback interface. This could 11 | # be tightened as well, but to do so would effect some of 12 | # the administrative functions. 13 | restrict 127.0.0.1 14 | restrict ::1 15 | 16 | # Hosts on local network are less restricted. 17 | #restrict 192.168.1.0 mask 255.255.255.0 nomodify notrap 18 | 19 | # Use public servers from the pool.ntp.org project. 20 | # Please consider joining the pool (http://www.pool.ntp.org/join.html). 21 | server ntp1.aliyun.com prefer 22 | server ntp3.aliyun.com 23 | server ntp5.aliyun.com 24 | server ntp7.aliyun.com 25 | 26 | #broadcast 192.168.1.255 autokey # broadcast server 27 | #broadcastclient # broadcast client 28 | #broadcast 224.0.1.1 autokey # multicast server 29 | #multicastclient 224.0.1.1 # multicast client 30 | #manycastserver 239.255.254.254 # manycast server 31 | #manycastclient 239.255.254.254 autokey # manycast client 32 | 33 | # Enable public key cryptography. 34 | #crypto 35 | 36 | includefile /etc/ntp/crypto/pw 37 | 38 | # Key file containing the keys and key identifiers used when operating 39 | # with symmetric key cryptography. 40 | keys /etc/ntp/keys 41 | 42 | # Specify the key identifiers which are trusted. 43 | #trustedkey 4 8 42 44 | 45 | # Specify the key identifier to use with the ntpdc utility. 46 | #requestkey 8 47 | 48 | # Specify the key identifier to use with the ntpq utility. 49 | #controlkey 8 50 | 51 | # Enable writing of statistics records. 52 | #statistics clockstats cryptostats loopstats peerstats 53 | 54 | # Disable the monitoring facility to prevent amplification attacks using ntpdc 55 | # monlist command when default restrict does not include the noquery flag. See 56 | # CVE-2013-5211 for more details. 57 | # Note: Monitoring will not be disabled with the limited restriction flag. 58 | disable monitor 59 | -------------------------------------------------------------------------------- /inventory/uat_cdh6.ini: -------------------------------------------------------------------------------- 1 | [cdh-server] 2 | 10.240.114.34 node_name="bjds-kubernetes-node-pre-10-240-114-34-vm.belle.lan" node_ip="10.240.114.34" 3 | 4 | [cdh-agent] 5 | 10.240.114.38 node_name="bjds-kubernetes-node-pre-10-240-114-38-vm.belle.lan" node_ip="10.240.114.38" 6 | 10.240.114.65 node_name="bjds-kubernetes-node-pre-10-240-114-65-vm.belle.lan" node_ip="10.240.114.65" 7 | 10.240.114.67 node_name="bjds-kubernetes-node-pre-10-240-114-67-vm.belle.lan" node_ip="10.240.114.67" 8 | 9 | [cdh-cluster:children] 10 | cdh-server 11 | cdh-agent 12 | 13 | [kylin] 14 | 172.20.32.125 15 | 16 | [sqoop] 17 | 172.20.32.125 18 | 19 | [all:vars] 20 | ; # 是否使用 aliyun 的 yum 源 21 | ; v_update_yum_with_aliyun="true" 22 | 23 | # 是否安装必备组件 24 | v_yum_install="true" 25 | 26 | ; # 是否安装时间同步 27 | ; v_ntpdate_install="true" 28 | ; 29 | ; # 时间同步服务器地址 30 | ; v_ntpdate_address="ntp1.aliyun.com" 31 | 32 | ; # 是否允许设置主机的 hostname 33 | ; v_enable_set_hostname="true" 34 | 35 | # 是否更新操作系统 hosts 文件 36 | # 自定义 hosts 请使用变量文件定义,然后使用 -e 引入变量文件。 37 | # 命令参考:ansible-playbook -i inventory/uat_cdh6.ini -e @inventory/uat_cdh6.yml cdh.yml 38 | v_update_hosts="true" 39 | 40 | # 指定 server ip,方便代码内获取 41 | v_server_ip="10.240.114.34" 42 | 43 | # cdh 包下载服务器地址 44 | ; v_cdh_download_server="https://archive.cloudera.com" 45 | v_cdh_download_server="http://10.240.114.45:8066" 46 | ; v_cdh_download_server="http://10.0.43.24:8066" 47 | 48 | # cdh 主版本号 49 | v_cdh_version="6.0.1" 50 | 51 | # cdh 小版本号 52 | v_cdh_version_p="0.590678" 53 | 54 | # 是否安装 jdk 55 | v_jdk_install="false" 56 | 57 | # scm_db_host 58 | v_cm_db_host="10.240.114.54" 59 | 60 | # scm_db_name 61 | v_cm_db_name="db_cdh6_scm" 62 | 63 | # scm_db_user 64 | v_cm_db_user="user_cdh6" 65 | 66 | # scm_db_password 67 | v_cm_db_password="123456" 68 | 69 | # scm_db_port 70 | v_cm_db_port=3306 71 | 72 | ; hadoop_user="root" 73 | ; hadoop_group="root" 74 | ; kylin_version="2.4.0" 75 | ; kylin_work_path="/home/cdh" 76 | ; kylin_path="{{kylin_work_path}}/apache-kylin-{{kylin_version}}-bin-{{kylin_env}}" 77 | ; kylin_config_path="{{kylin_path}}/conf" 78 | ; kylin_env="cdh57" 79 | ; kylin_download_url="http://172.20.32.36/package/kylin/apache-kylin-{{kylin_version}}-bin-{{kylin_env}}.tar.gz" 80 | ; #kylin_download_url="http://mirrors.hust.edu.cn/apache/kylin/apache-kylin-{{kylin_version}}/apache-kylin-{{kylin_version}}-bin-{{kylin_env}}.tar.gz" 81 | -------------------------------------------------------------------------------- /99.clean_all.yml: -------------------------------------------------------------------------------- 1 | - hosts: 2 | - cdh-cluster 3 | tasks: 4 | - name: 停止 cloudera-scm-agent 5 | systemd: 6 | name: cloudera-scm-agent 7 | state: stopped 8 | ignore_errors: true 9 | 10 | - hosts: 11 | - cdh-server 12 | tasks: 13 | - name: 停止 cloudera-scm-server 14 | systemd: 15 | name: cloudera-scm-server 16 | state: stopped 17 | ignore_errors: true 18 | 19 | - hosts: 20 | - cdh-cluster 21 | tasks: 22 | - name: 等待服务停止 23 | shell: "sleep 20" 24 | - name: umount cm 挂载的目录 25 | shell: "for cm_mount_path in $(mount | grep 'cloudera-scm'| awk '{print $3}'); do umount $cm_mount_path; done" 26 | ignore_errors: true 27 | - name: 卸载 cloudera-manager-daemons cloudera-manager-agent 28 | yum: name={{ item }} state=absent 29 | with_items: 30 | - cloudera-manager-daemons 31 | - cloudera-manager-agent 32 | ignore_errors: true 33 | - name: 卸载 cloudera-manager-server 34 | yum: name={{ item }} state=absent 35 | with_items: 36 | - cloudera-manager-server 37 | when: "'cdh-server' in group_names" 38 | ignore_errors: true 39 | 40 | - hosts: 41 | - cdh-cluster 42 | tasks: 43 | - name: 清理目录和文件 44 | shell: "rm -rf /usr/share/cmf /var/lib/cloudera* /var/log/cloudera* /var/run/cloudera* /var/run/hdfs-sockets && \ 45 | rm -rf /tmp/.scmpreparenode.lock /usr/lib/hue && \ 46 | rm -rf /var/lib/flume-ng /var/lib/hadoop* /var/lib/hue /var/lib/navigator /var/lib/oozie /var/lib/solr && \ 47 | rm -rf /var/lib/zookeeper /var/lib/kudu /var/lib/kafka /var/lib/impala /var/lib/sqoop* && \ 48 | rm -rf /usr/bin/hadoop* /usr/bin/zookeeper* /usr/bin/hbase* /usr/bin/hive* /usr/bin/hdfs /usr/bin/mapred && \ 49 | rm -rf /usr/bin/yarn /usr/bin/sqoop* /usr/bin/oozie /usr/bin/impala /usr/bin/spark* && \ 50 | rm -rf /etc/hadoop* /etc/zookeeper* /etc/hive* /etc/hue /etc/impala /etc/sqoop* /etc/oozie && \ 51 | rm -rf /etc/hbase* /etc/hcatalog /etc/spark /etc/solr /etc/cloudera* && \ 52 | rm -rf /opt/cloudera && \ 53 | rm -rf /data/kudu /data/dfs /data/yarn /data/mapred" 54 | ignore_errors: true 55 | - name: 清理 alternatives 软连接 56 | # 注意这里的 ls -l 不能写成 ll,否则命令会执行失败。 57 | shell: "for alternatives in $(ls -l /etc/alternatives | grep CDH-{{ v_cdh_version }} | awk '{print $9}'); do rm -rf /etc/alternatives/$alternatives; done" 58 | ignore_errors: true 59 | 60 | # kill 掉相关进程 61 | # for u in hdfs mapred cloudera-scm hbase hue zookeeper oozie hive impala flume; do sudo kill $(ps -u $u -o pid=); done 62 | -------------------------------------------------------------------------------- /roles/common/files/CentOS-7-aliyun.repo: -------------------------------------------------------------------------------- 1 | # wget http://mirrors.aliyun.com/repo/Centos-7.repo 2 | 3 | # CentOS-Base.repo 4 | # 5 | # The mirror system uses the connecting IP address of the client and the 6 | # update status of each mirror to pick mirrors that are updated to and 7 | # geographically close to the client. You should use this for CentOS updates 8 | # unless you are manually picking other mirrors. 9 | # 10 | # If the mirrorlist= does not work for you, as a fall back you can try the 11 | # remarked out baseurl= line instead. 12 | # 13 | # 14 | 15 | [base] 16 | name=CentOS-$releasever - Base - mirrors.aliyun.com 17 | failovermethod=priority 18 | baseurl=http://mirrors.aliyun.com/centos/$releasever/os/$basearch/ 19 | http://mirrors.aliyuncs.com/centos/$releasever/os/$basearch/ 20 | http://mirrors.cloud.aliyuncs.com/centos/$releasever/os/$basearch/ 21 | gpgcheck=1 22 | gpgkey=http://mirrors.aliyun.com/centos/RPM-GPG-KEY-CentOS-7 23 | 24 | #released updates 25 | [updates] 26 | name=CentOS-$releasever - Updates - mirrors.aliyun.com 27 | failovermethod=priority 28 | baseurl=http://mirrors.aliyun.com/centos/$releasever/updates/$basearch/ 29 | http://mirrors.aliyuncs.com/centos/$releasever/updates/$basearch/ 30 | http://mirrors.cloud.aliyuncs.com/centos/$releasever/updates/$basearch/ 31 | gpgcheck=1 32 | gpgkey=http://mirrors.aliyun.com/centos/RPM-GPG-KEY-CentOS-7 33 | 34 | #additional packages that may be useful 35 | [extras] 36 | name=CentOS-$releasever - Extras - mirrors.aliyun.com 37 | failovermethod=priority 38 | baseurl=http://mirrors.aliyun.com/centos/$releasever/extras/$basearch/ 39 | http://mirrors.aliyuncs.com/centos/$releasever/extras/$basearch/ 40 | http://mirrors.cloud.aliyuncs.com/centos/$releasever/extras/$basearch/ 41 | gpgcheck=1 42 | gpgkey=http://mirrors.aliyun.com/centos/RPM-GPG-KEY-CentOS-7 43 | 44 | #additional packages that extend functionality of existing packages 45 | [centosplus] 46 | name=CentOS-$releasever - Plus - mirrors.aliyun.com 47 | failovermethod=priority 48 | baseurl=http://mirrors.aliyun.com/centos/$releasever/centosplus/$basearch/ 49 | http://mirrors.aliyuncs.com/centos/$releasever/centosplus/$basearch/ 50 | http://mirrors.cloud.aliyuncs.com/centos/$releasever/centosplus/$basearch/ 51 | gpgcheck=1 52 | enabled=0 53 | gpgkey=http://mirrors.aliyun.com/centos/RPM-GPG-KEY-CentOS-7 54 | 55 | #contrib - packages by Centos Users 56 | [contrib] 57 | name=CentOS-$releasever - Contrib - mirrors.aliyun.com 58 | failovermethod=priority 59 | baseurl=http://mirrors.aliyun.com/centos/$releasever/contrib/$basearch/ 60 | http://mirrors.aliyuncs.com/centos/$releasever/contrib/$basearch/ 61 | http://mirrors.cloud.aliyuncs.com/centos/$releasever/contrib/$basearch/ 62 | gpgcheck=1 63 | enabled=0 64 | gpgkey=http://mirrors.aliyun.com/centos/RPM-GPG-KEY-CentOS-7 -------------------------------------------------------------------------------- /roles/kylin/templates/kylin_job_conf_inmem.xml: -------------------------------------------------------------------------------- 1 | 17 | 18 | 19 | 20 | mapreduce.job.split.metainfo.maxsize 21 | -1 22 | The maximum permissible size of the split metainfo file. 23 | The JobTracker won't attempt to read split metainfo files bigger than 24 | the configured value. No limits if set to -1. 25 | 26 | 27 | 28 | 29 | mapreduce.map.output.compress 30 | true 31 | Compress map outputs 32 | 33 | 34 | 38 | 46 | 47 | mapreduce.output.fileoutputformat.compress 48 | true 49 | Compress the output of a MapReduce job 50 | 51 | 55 | 63 | 64 | mapreduce.output.fileoutputformat.compress.type 65 | BLOCK 66 | The compression type to use for job outputs 67 | 68 | 69 | 70 | 71 | mapreduce.job.max.split.locations 72 | 2000 73 | No description 74 | 75 | 76 | 77 | dfs.replication 78 | 2 79 | Block replication 80 | 81 | 82 | 83 | mapreduce.task.timeout 84 | 7200000 85 | Set task timeout to 1 hour 86 | 87 | 88 | 89 | 90 | mapreduce.map.memory.mb 91 | 3072 92 | 93 | 94 | 95 | 96 | mapreduce.map.java.opts 97 | -Xmx2700m -XX:OnOutOfMemoryError='kill -9 %p' 98 | 99 | 100 | 101 | 102 | mapreduce.task.io.sort.mb 103 | 200 104 | 105 | 106 | 107 | -------------------------------------------------------------------------------- /roles/common/tasks/main.yml: -------------------------------------------------------------------------------- 1 | # 拉取节点的 ansible setup 信息,起到缓存效果,否则后续 when 判断可能失败 2 | - name: 缓存 ansilbe setup 信息 3 | setup: gather_subset=min 4 | 5 | - name: yum 切换 163 源 6 | copy: src=CentOS-7-163.repo dest=/etc/yum.repos.d/CentOS-Base.repo 7 | when: v_update_yum_with_163=="true" 8 | 9 | - name: yum 切换 aliyun 源 10 | copy: src=CentOS-7-aliyun.repo dest=/etc/yum.repos.d/CentOS-Base.repo 11 | when: v_update_yum_with_aliyun=="true" 12 | 13 | # 对于低于7.5的centos系统先进行系统内核升级 14 | - name: 升级系统内核 15 | shell: "yum -y update" 16 | when: v_update_kernel=="true" and ansible_distribution_major_version|int == 7 and ansible_distribution_version < "7.5" 17 | 18 | - name: 安装必备组件 19 | yum: update_cache=yes name={{ item }} state=present 20 | with_items: 21 | - wget 22 | - ntp 23 | - ntpdate 24 | - git 25 | - tar 26 | - rpcbind 27 | - telnet 28 | - vim 29 | - net-tools 30 | - bind-utils 31 | - lrzsz 32 | - epel-release 33 | - bash-completion 34 | - python-pip 35 | when: v_yum_install=="true" 36 | 37 | - name: 升级 Psycopg2 版本 38 | shell: "pip install --upgrade psycopg2" 39 | 40 | - name: 重启 rpcbind 服务,并开启自启动 41 | systemd: 42 | name: rpcbind 43 | daemon_reload: yes 44 | state: restarted 45 | enabled: yes 46 | 47 | # 查看同步情况命令 48 | # ntpq -p 49 | # ntpstat 50 | - name: 重启 ntpd 服务,并设置自启动 51 | systemd: 52 | name: ntpd 53 | daemon_reload: yes 54 | state: restarted 55 | enabled: yes 56 | when: v_ntpdate_install=="true" 57 | 58 | - name: 更新 ntp 的配置文件 59 | copy: src=ntp.conf dest=/etc/ntp.conf 60 | when: v_ntpdate_install=="true" 61 | 62 | - name: 重启 ntpd 服务,并设置自启动 63 | systemd: 64 | name: ntpd 65 | daemon_reload: yes 66 | state: restarted 67 | enabled: yes 68 | when: v_ntpdate_install=="true" 69 | 70 | - name: 立即同步时间 71 | command: ntpdate -u {{ v_ntpdate_address }} 72 | when: v_ntpdate_install=="true" 73 | 74 | # 查看定时任务命令:crontab -l 75 | # 文件存放目录:cat /var/spool/cron/root 76 | # centos7 获取时间命令:timedatectl 77 | # centos7 设置时间命令:timedatectl set-ntp no && timedatectl set-time "1982-01-01 00:00:00" && timedatectl set-ntp yes 78 | # 查看任务执行日志: 79 | # tail -n 500 /var/log/cron 80 | # tail -n 500 /var/spool/mail/root 81 | - name: 设置时间同步定时任务 82 | cron: 83 | name: "时间同步" 84 | minute: "*/30" 85 | user: root 86 | # hwclock -w:修改硬件时间,保持和软件 NTP 时间同步 87 | job: "/sbin/ntpdate -u {{ v_ntpdate_address }}; /sbin/hwclock -w" 88 | when: v_ntpdate_install=="true" 89 | 90 | - name: 设置启用时间同步 91 | shell: "timedatectl set-ntp yes" 92 | when: v_ntpdate_install=="true" 93 | 94 | - name: 修改机器名 95 | hostname: name={{ node_name }} 96 | when: v_enable_set_hostname=="true" 97 | 98 | - name: 配置 FQDN 99 | lineinfile: 100 | dest: /etc/sysconfig/network 101 | regexp: 'HOSTNAME' 102 | line: 'HOSTNAME={{ ansible_hostname }}' 103 | 104 | # 删除默认安装 105 | - name: 删除 CentOS 防火墙 106 | yum: name={{ item }} state=absent 107 | with_items: 108 | - firewalld 109 | - firewalld-filesystem 110 | - python-firewall 111 | when: ansible_distribution == "CentOS" 112 | 113 | - name: 关闭 selinux 114 | selinux: state=disabled 115 | 116 | - name: 集群hosts文件更新 117 | lineinfile: 118 | dest: /etc/hosts 119 | regexp: '{{item.key}}' 120 | line: '{{item.value}} {{item.key}}' 121 | with_dict: '{{ v_hosts }}' 122 | when: v_update_hosts=="true" 123 | 124 | # - name: 集群hosts文件更新 125 | # lineinfile: 126 | # dest: /etc/hosts 127 | # regexp: "{{ item }}" 128 | # line: "{{ item }}" 129 | # with_items: "{{ groups['cdh-cluster'] }}" 130 | # when: v_update_hosts=="true" 131 | 132 | # Cloudera 建议将 /proc/sys/vm/swappiness 设置为最大值 10。服务器默认设置为 30。 133 | # 使用 sysctl 命令在运行时更改该设置并编辑 /etc/sysctl.conf,以在重启后保存该设置。 134 | # 您可以继续进行安装,但 Cloudera Manager 可能会报告您的主机由于交换而运行状况不良。 135 | - name: 修改 linux swap 空间的 swappiness,降低对硬盘的缓存 136 | lineinfile: 137 | dest: /etc/sysctl.conf 138 | regexp: "vm.swappiness" 139 | line: "vm.swappiness={{ v_vm_swappiness }}" 140 | 141 | - name: 生效 swappiness 参数 142 | shell: "sysctl -p /etc/sysctl.conf" 143 | 144 | # 已启用透明大页面压缩,可能会导致重大性能问题。 145 | # 请运行“echo never > /sys/kernel/mm/transparent_hugepage/defrag” 146 | # 和“echo never > /sys/kernel/mm/transparent_hugepage/enabled”以禁用此设置, 147 | # 然后将同一命令添加到 /etc/rc.local 等初始化脚本中,以便在系统重启时予以设置。 148 | # https://blog.csdn.net/csfreebird/article/details/49307935 149 | - name: 禁用透明大页面压缩 150 | shell: "echo never > /sys/kernel/mm/transparent_hugepage/defrag && \ 151 | echo never > /sys/kernel/mm/transparent_hugepage/enabled" 152 | 153 | - name: 永久禁用透明大页面压缩 154 | lineinfile: 155 | dest: /etc/rc.local 156 | regexp: "transparent_hugepage" 157 | line: "echo never > /sys/kernel/mm/transparent_hugepage/defrag && echo never > /sys/kernel/mm/transparent_hugepage/enabled" 158 | -------------------------------------------------------------------------------- /roles/cm/templates/config.ini: -------------------------------------------------------------------------------- 1 | [General] 2 | # Hostname of the CM server. 3 | server_host={{ v_server_ip }} 4 | 5 | # Port that the CM server is listening on. 6 | server_port=7182 7 | 8 | ## It should not normally be necessary to modify these. 9 | # Port that the CM agent should listen on. 10 | # listening_port=9000 11 | 12 | # IP Address that the CM agent should listen on. 13 | # listening_ip= 14 | 15 | # Hostname that the CM agent reports as its hostname. If unset, will be 16 | # obtained in code through something like this: 17 | # 18 | # python -c 'import socket; \ 19 | # print socket.getfqdn(), \ 20 | # socket.gethostbyname(socket.getfqdn())' 21 | # 22 | # listening_hostname= 23 | 24 | # An alternate hostname to report as the hostname for this host in CM. 25 | # Useful when this agent is behind a load balancer or proxy and all 26 | # inbound communication must connect through that proxy. 27 | # reported_hostname= 28 | 29 | # Port that supervisord should listen on. 30 | # NB: This only takes effect if supervisord is restarted. 31 | # supervisord_port=19001 32 | 33 | # Log file. The supervisord log file will be placed into 34 | # the same directory. Note that if the agent is being started via the 35 | # init.d script, /var/log/cloudera-scm-agent/cloudera-scm-agent.out will 36 | # also have a small amount of output (from before logging is initialized). 37 | # log_file=/var/log/cloudera-scm-agent/cloudera-scm-agent.log 38 | 39 | # Persistent state directory. Directory to store CM agent state that 40 | # persists across instances of the agent process and system reboots. 41 | # Particularly, the agent's UUID is stored here. 42 | # lib_dir=/var/lib/cloudera-scm-agent 43 | 44 | # Parcel directory. Unpacked parcels will be stored in this directory. 45 | # Downloaded parcels will be stored in /../parcel-cache 46 | # parcel_dir=/opt/cloudera/parcels 47 | 48 | # Enable supervisord event monitoring. Used in eager heartbeating, amongst 49 | # other things. 50 | # enable_supervisord_events=true 51 | 52 | # Maximum time to wait (in seconds) for all metric collectors to finish 53 | # collecting data. 54 | max_collection_wait_seconds=10.0 55 | 56 | # Maximum time to wait (in seconds) when connecting to a local role's 57 | # webserver to fetch metrics. 58 | metrics_url_timeout_seconds=30.0 59 | 60 | # Maximum time to wait (in seconds) when connecting to a local TaskTracker 61 | # to fetch task attempt data. 62 | task_metrics_timeout_seconds=5.0 63 | 64 | # The list of non-device (nodev) filesystem types which will be monitored. 65 | monitored_nodev_filesystem_types=nfs,nfs4,tmpfs 66 | 67 | # The list of filesystem types which are considered local for monitoring purposes. 68 | # These filesystems are combined with the other local filesystem types found in 69 | # /proc/filesystems 70 | local_filesystem_whitelist=ext2,ext3,ext4,xfs 71 | 72 | # The largest size impala profile log bundle that this agent will serve to the 73 | # CM server. If the CM server requests more than this amount, the bundle will 74 | # be limited to this size. All instances of this limit being hit are logged to 75 | # the agent log. 76 | impala_profile_bundle_max_bytes=1073741824 77 | 78 | # The largest size stacks log bundle that this agent will serve to the CM 79 | # server. If the CM server requests more than this amount, the bundle will be 80 | # limited to this size. All instances of this limit being hit are logged to the 81 | # agent log. 82 | stacks_log_bundle_max_bytes=1073741824 83 | 84 | # The size to which the uncompressed portion of a stacks log can grow before it 85 | # is rotated. The log will then be compressed during rotation. 86 | stacks_log_max_uncompressed_file_size_bytes=5242880 87 | 88 | # The orphan process directory staleness threshold. If a diretory is more stale 89 | # than this amount of seconds, CM agent will remove it. 90 | orphan_process_dir_staleness_threshold=5184000 91 | 92 | # The orphan process directory refresh interval. The CM agent will check the 93 | # staleness of the orphan processes config directory every this amount of 94 | # seconds. 95 | orphan_process_dir_refresh_interval=3600 96 | 97 | # A knob to control the agent logging level. The options are listed as follows: 98 | # 1) DEBUG (set the agent logging level to 'logging.DEBUG') 99 | # 2) INFO (set the agent logging level to 'logging.INFO') 100 | scm_debug=INFO 101 | 102 | # The DNS resolution collecion interval in seconds. A java base test program 103 | # will be executed with at most this frequency to collect java DNS resolution 104 | # metrics. The test program is only executed if the associated health test, 105 | # Host DNS Resolution, is enabled. 106 | dns_resolution_collection_interval_seconds=60 107 | 108 | # The maximum time to wait (in seconds) for the java test program to collect 109 | # java DNS resolution metrics. 110 | dns_resolution_collection_timeout_seconds=30 111 | 112 | # The directory location in which the agent-wide kerberos credential cache 113 | # will be created. 114 | # agent_wide_credential_cache_location=/var/run/cloudera-scm-agent 115 | 116 | [Security] 117 | # Use TLS and certificate validation when connecting to the CM server. 118 | use_tls=0 119 | 120 | # The maximum allowed depth of the certificate chain returned by the peer. 121 | # The default value of 9 matches the default specified in openssl's 122 | # SSL_CTX_set_verify. 123 | max_cert_depth=9 124 | 125 | # A file of CA certificates in PEM format. The file can contain several CA 126 | # certificates identified by 127 | # 128 | # -----BEGIN CERTIFICATE----- 129 | # ... (CA certificate in base64 encoding) ... 130 | # -----END CERTIFICATE----- 131 | # 132 | # sequences. Before, between, and after the certificates text is allowed which 133 | # can be used e.g. for descriptions of the certificates. 134 | # 135 | # The file is loaded once, the first time an HTTPS connection is attempted. A 136 | # restart of the agent is required to pick up changes to the file. 137 | # 138 | # Note that if neither verify_cert_file or verify_cert_dir is set, certificate 139 | # verification will not be performed. 140 | # verify_cert_file= 141 | 142 | # Directory containing CA certificates in PEM format. The files each contain one 143 | # CA certificate. The files are looked up by the CA subject name hash value, 144 | # which must hence be available. If more than one CA certificate with the same 145 | # name hash value exist, the extension must be different (e.g. 9d66eef0.0, 146 | # 9d66eef0.1 etc). The search is performed in the ordering of the extension 147 | # number, regardless of other properties of the certificates. Use the c_rehash 148 | # utility to create the necessary links. 149 | # 150 | # The certificates in the directory are only looked up when required, e.g. when 151 | # building the certificate chain or when actually performing the verification 152 | # of a peer certificate. The contents of the directory can thus be changed 153 | # without an agent restart. 154 | # 155 | # When looking up CA certificates, the verify_cert_file is first searched, then 156 | # those in the directory. Certificate matching is done based on the subject name, 157 | # the key identifier (if present), and the serial number as taken from the 158 | # certificate to be verified. If these data do not match, the next certificate 159 | # will be tried. If a first certificate matching the parameters is found, the 160 | # verification process will be performed; no other certificates for the same 161 | # parameters will be searched in case of failure. 162 | # 163 | # Note that if neither verify_cert_file or verify_cert_dir is set, certificate 164 | # verification will not be performed. 165 | # verify_cert_dir= 166 | 167 | # PEM file containing client private key. 168 | # client_key_file= 169 | 170 | # A command to run which returns the client private key password on stdout 171 | # client_keypw_cmd= 172 | 173 | # If client_keypw_cmd isn't specified, instead a text file containing 174 | # the client private key password can be used. 175 | # client_keypw_file= 176 | 177 | # PEM file containing client certificate. 178 | # client_cert_file= 179 | 180 | ## Location of Hadoop files. These are the CDH locations when installed by 181 | ## packages. Unused when CDH is installed by parcels. 182 | [Hadoop] 183 | #cdh_crunch_home=/usr/lib/crunch 184 | #cdh_flume_home=/usr/lib/flume-ng 185 | #cdh_hadoop_bin=/usr/bin/hadoop 186 | #cdh_hadoop_home=/usr/lib/hadoop 187 | #cdh_hbase_home=/usr/lib/hbase 188 | #cdh_hbase_indexer_home=/usr/lib/hbase-solr 189 | #cdh_hcat_home=/usr/lib/hive-hcatalog 190 | #cdh_hdfs_home=/usr/lib/hadoop-hdfs 191 | #cdh_hive_home=/usr/lib/hive 192 | #cdh_httpfs_home=/usr/lib/hadoop-httpfs 193 | #cdh_hue_home=/usr/share/hue 194 | #cdh_hue_plugins_home=/usr/lib/hadoop 195 | #cdh_impala_home=/usr/lib/impala 196 | #cdh_kudu_home=/usr/lib/kudu 197 | #cdh_llama_home=/usr/lib/llama 198 | #cdh_mr1_home=/usr/lib/hadoop-0.20-mapreduce 199 | #cdh_mr2_home=/usr/lib/hadoop-mapreduce 200 | #cdh_oozie_home=/usr/lib/oozie 201 | #cdh_parquet_home=/usr/lib/parquet 202 | #cdh_pig_home=/usr/lib/pig 203 | #cdh_solr_home=/usr/lib/solr 204 | #cdh_spark_home=/usr/lib/spark 205 | #cdh_sqoop_home=/usr/lib/sqoop 206 | #cdh_sqoop2_home=/usr/lib/sqoop2 207 | #cdh_yarn_home=/usr/lib/hadoop-yarn 208 | #cdh_zookeeper_home=/usr/lib/zookeeper 209 | #hive_default_xml=/etc/hive/conf.dist/hive-default.xml 210 | #webhcat_default_xml=/etc/hive-webhcat/conf.dist/webhcat-default.xml 211 | #jsvc_home=/usr/libexec/bigtop-utils 212 | #tomcat_home=/usr/lib/bigtop-tomcat 213 | #oracle_home=/usr/share/oracle/instantclient 214 | 215 | ## Location of Cloudera Management Services files. 216 | [Cloudera] 217 | #mgmt_home=/usr/share/cmf 218 | 219 | ## Location of JDBC Drivers. 220 | [JDBC] 221 | #cloudera_mysql_connector_jar=/usr/share/java/mysql-connector-java.jar 222 | #cloudera_oracle_connector_jar=/usr/share/java/oracle-connector-java.jar 223 | #By default, postgres jar is found dynamically in $MGMT_HOME/lib 224 | #cloudera_postgresql_jdbc_jar= -------------------------------------------------------------------------------- /roles/kylin/templates/kylin.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | 19 | 20 | 21 | # The below commented values will effect as default settings 22 | # Uncomment and override them if necessary 23 | 24 | 25 | 26 | # 27 | #### METADATA | ENV ### 28 | # 29 | ## The metadata store in hbase 30 | #kylin.metadata.url=kylin_metadata@hbase 31 | # 32 | ## metadata cache sync retry times 33 | #kylin.metadata.sync-retries=3 34 | # 35 | ## Working folder in HDFS, better be qualified absolute path, make sure user has the right permission to this directory 36 | #kylin.env.hdfs-working-dir=/kylin 37 | # 38 | ## DEV|QA|PROD. DEV will turn on some dev features, QA and PROD has no difference in terms of functions. 39 | #kylin.env=QA 40 | # 41 | ## kylin zk base path 42 | #kylin.env.zookeeper-base-path=/kylin 43 | # 44 | #### SERVER | WEB | RESTCLIENT ### 45 | # 46 | ## Kylin server mode, valid value [all, query, job] 47 | #kylin.server.mode=all 48 | # 49 | ## List of web servers in use, this enables one web server instance to sync up with other servers. 50 | #kylin.server.cluster-servers=localhost:7070 51 | # 52 | ## Display timezone on UI,format like[GMT+N or GMT-N] 53 | #kylin.web.timezone=GMT+8 54 | # 55 | ## Timeout value for the queries submitted through the Web UI, in milliseconds 56 | #kylin.web.query-timeout=300000 57 | # 58 | #kylin.web.cross-domain-enabled=true 59 | # 60 | ##allow user to export query result 61 | #kylin.web.export-allow-admin=true 62 | #kylin.web.export-allow-other=true 63 | # 64 | ## Hide measures in measure list of cube designer, separate by comma 65 | #kylin.web.hide-measures=RAW 66 | # 67 | ##max connections of one route 68 | #kylin.restclient.connection.default-max-per-route=20 69 | # 70 | ##max connections of one rest-client 71 | #kylin.restclient.connection.max-total=200 72 | # 73 | #### PUBLIC CONFIG ### 74 | #kylin.engine.default=2 75 | #kylin.storage.default=2 76 | #kylin.web.hive-limit=20 77 | #kylin.web.help.length=4 78 | #kylin.web.help.0=start|Getting Started|http://kylin.apache.org/docs21/tutorial/kylin_sample.html 79 | #kylin.web.help.1=odbc|ODBC Driver|http://kylin.apache.org/docs21/tutorial/odbc.html 80 | #kylin.web.help.2=tableau|Tableau Guide|http://kylin.apache.org/docs21/tutorial/tableau_91.html 81 | #kylin.web.help.3=onboard|Cube Design Tutorial|http://kylin.apache.org/docs21/howto/howto_optimize_cubes.html 82 | #kylin.web.link-streaming-guide=http://kylin.apache.org/ 83 | #kylin.htrace.show-gui-trace-toggle=false 84 | #kylin.web.link-hadoop= 85 | #kylin.web.link-diagnostic= 86 | #kylin.web.contact-mail= 87 | #kylin.server.external-acl-provider= 88 | # 89 | #### SOURCE ### 90 | # 91 | ## Hive client, valid value [cli, beeline] 92 | #kylin.source.hive.client=cli 93 | # 94 | ## Absolute path to beeline shell, can be set to spark beeline instead of the default hive beeline on PATH 95 | #kylin.source.hive.beeline-shell=beeline 96 | # 97 | ## Parameters for beeline client, only necessary if hive client is beeline 98 | ##kylin.source.hive.beeline-params=-n root --hiveconf hive.security.authorization.sqlstd.confwhitelist.append='mapreduce.job.*|dfs.*' -u jdbc:hive2://localhost:10000 99 | # 100 | ## While hive client uses above settings to read hive table metadata, 101 | ## table operations can go through a separate SparkSQL command line, given SparkSQL connects to the same Hive metastore. 102 | #kylin.source.hive.enable-sparksql-for-table-ops=false 103 | ##kylin.source.hive.sparksql-beeline-shell=/path/to/spark-client/bin/beeline 104 | ##kylin.source.hive.sparksql-beeline-params=-n root --hiveconf hive.security.authorization.sqlstd.confwhitelist.append='mapreduce.job.*|dfs.*' -u jdbc:hive2://localhost:10000 105 | # 106 | #kylin.source.hive.keep-flat-table=false 107 | # 108 | ## Hive database name for putting the intermediate flat tables 109 | #kylin.source.hive.database-for-flat-table=default 110 | # 111 | ## Whether redistribute the intermediate flat table before building 112 | #kylin.source.hive.redistribute-flat-table=true 113 | # 114 | # 115 | #### STORAGE ### 116 | # 117 | ## The storage for final cube file in hbase 118 | #kylin.storage.url=hbase 119 | # 120 | ## The prefix of hbase table 121 | #kylin.storage.hbase.table-name-prefix=KYLIN_ 122 | # 123 | ## The namespace for hbase storage 124 | #kylin.storage.hbase.namespace=default 125 | # 126 | ## Compression codec for htable, valid value [none, snappy, lzo, gzip, lz4] 127 | #kylin.storage.hbase.compression-codec=none 128 | # 129 | ## HBase Cluster FileSystem, which serving hbase, format as hdfs://hbase-cluster:8020 130 | ## Leave empty if hbase running on same cluster with hive and mapreduce 131 | ##kylin.storage.hbase.cluster-fs= 132 | # 133 | ## The cut size for hbase region, in GB. 134 | #kylin.storage.hbase.region-cut-gb=5 135 | # 136 | ## The hfile size of GB, smaller hfile leading to the converting hfile MR has more reducers and be faster. 137 | ## Set 0 to disable this optimization. 138 | #kylin.storage.hbase.hfile-size-gb=2 139 | # 140 | #kylin.storage.hbase.min-region-count=1 141 | #kylin.storage.hbase.max-region-count=500 142 | # 143 | ## Optional information for the owner of kylin platform, it can be your team's email 144 | ## Currently it will be attached to each kylin's htable attribute 145 | #kylin.storage.hbase.owner-tag=whoami@kylin.apache.org 146 | # 147 | #kylin.storage.hbase.coprocessor-mem-gb=3 148 | # 149 | ## By default kylin can spill query's intermediate results to disks when it's consuming too much memory. 150 | ## Set it to false if you want query to abort immediately in such condition. 151 | #kylin.storage.partition.aggr-spill-enabled=true 152 | # 153 | ## The maximum number of bytes each coprocessor is allowed to scan. 154 | ## To allow arbitrary large scan, you can set it to 0. 155 | #kylin.storage.partition.max-scan-bytes=3221225472 156 | # 157 | ## The default coprocessor timeout is (hbase.rpc.timeout * 0.9) / 1000 seconds, 158 | ## You can set it to a smaller value. 0 means use default. 159 | ## kylin.storage.hbase.coprocessor-timeout-seconds=0 160 | # 161 | # 162 | #### JOB ### 163 | # 164 | ## Max job retry on error, default 0: no retry 165 | #kylin.job.retry=0 166 | # 167 | ## Max count of concurrent jobs running 168 | #kylin.job.max-concurrent-jobs=10 169 | # 170 | ## The percentage of the sampling, default 100% 171 | #kylin.job.sampling-percentage=100 172 | # 173 | ## If true, will send email notification on job complete 174 | ##kylin.job.notification-enabled=true 175 | ##kylin.job.notification-mail-enable-starttls=true 176 | ##kylin.job.notification-mail-host=smtp.office365.com 177 | ##kylin.job.notification-mail-port=587 178 | ##kylin.job.notification-mail-username=kylin@example.com 179 | ##kylin.job.notification-mail-password=mypassword 180 | ##kylin.job.notification-mail-sender=kylin@example.com 181 | # 182 | # 183 | #### ENGINE ### 184 | # 185 | ## Time interval to check hadoop job status 186 | #kylin.engine.mr.yarn-check-interval-seconds=10 187 | # 188 | #kylin.engine.mr.reduce-input-mb=500 189 | # 190 | #kylin.engine.mr.max-reducer-number=500 191 | # 192 | #kylin.engine.mr.mapper-input-rows=1000000 193 | # 194 | ## Enable dictionary building in MR reducer 195 | #kylin.engine.mr.build-dict-in-reducer=true 196 | # 197 | ## Number of reducers for fetching UHC column distinct values 198 | #kylin.engine.mr.uhc-reducer-count=1 199 | # 200 | ## Whether using an additional step to build UHC dictionary 201 | #kylin.engine.mr.build-uhc-dict-in-additional-step=false 202 | # 203 | # 204 | #### CUBE | DICTIONARY ### 205 | # 206 | #kylin.cube.cuboid-scheduler=org.apache.kylin.cube.cuboid.DefaultCuboidScheduler 207 | #kylin.cube.segment-advisor=org.apache.kylin.cube.CubeSegmentAdvisor 208 | # 209 | ## 'auto', 'inmem', 'layer' or 'random' for testing 210 | #kylin.cube.algorithm=layer 211 | # 212 | ## A smaller threshold prefers layer, a larger threshold prefers in-mem 213 | #kylin.cube.algorithm.layer-or-inmem-threshold=7 214 | # 215 | #kylin.cube.aggrgroup.max-combination=4096 216 | # 217 | #kylin.snapshot.max-mb=300 218 | # 219 | #kylin.cube.cubeplanner.enabled=false 220 | #kylin.cube.cubeplanner.enabled-for-existing-cube=false 221 | #kylin.cube.cubeplanner.expansion-threshold=15.0 222 | #kylin.cube.cubeplanner.recommend-cache-max-size=200 223 | #kylin.cube.cubeplanner.mandatory-rollup-threshold=1000 224 | #kylin.cube.cubeplanner.algorithm-threshold-greedy=10 225 | #kylin.cube.cubeplanner.algorithm-threshold-genetic=23 226 | # 227 | # 228 | #### QUERY ### 229 | # 230 | ## Controls the maximum number of bytes a query is allowed to scan storage. 231 | ## The default value 0 means no limit. 232 | ## The counterpart kylin.storage.partition.max-scan-bytes sets the maximum per coprocessor. 233 | #kylin.query.max-scan-bytes=0 234 | # 235 | #kylin.query.cache-enabled=true 236 | # 237 | ## TABLE ACL 238 | #kylin.query.security.table-acl-enabled=true 239 | # 240 | ## Usually should not modify this 241 | #kylin.query.interceptors=org.apache.kylin.rest.security.TableInterceptor 242 | # 243 | #kylin.query.escape-default-keyword=false 244 | # 245 | ## Usually should not modify this 246 | #kylin.query.transformers=org.apache.kylin.query.util.DefaultQueryTransformer,org.apache.kylin.query.util.KeywordDefaultDirtyHack 247 | # 248 | #### SECURITY ### 249 | # 250 | ## Spring security profile, options: testing, ldap, saml 251 | ## with "testing" profile, user can use pre-defined name/pwd like KYLIN/ADMIN to login 252 | #kylin.security.profile=testing 253 | # 254 | ## Admin roles in LDAP, for ldap and saml 255 | #kylin.security.acl.admin-role=admin 256 | # 257 | ## LDAP authentication configuration 258 | #kylin.security.ldap.connection-server=ldap://ldap_server:389 259 | #kylin.security.ldap.connection-username= 260 | #kylin.security.ldap.connection-password= 261 | # 262 | ## LDAP user account directory; 263 | #kylin.security.ldap.user-search-base= 264 | #kylin.security.ldap.user-search-pattern= 265 | #kylin.security.ldap.user-group-search-base= 266 | #kylin.security.ldap.user-group-search-filter=(|(member={0})(memberUid={1})) 267 | # 268 | ## LDAP service account directory 269 | #kylin.security.ldap.service-search-base= 270 | #kylin.security.ldap.service-search-pattern= 271 | #kylin.security.ldap.service-group-search-base= 272 | # 273 | ### SAML configurations for SSO 274 | ## SAML IDP metadata file location 275 | #kylin.security.saml.metadata-file=classpath:sso_metadata.xml 276 | #kylin.security.saml.metadata-entity-base-url=https://hostname/kylin 277 | #kylin.security.saml.keystore-file=classpath:samlKeystore.jks 278 | #kylin.security.saml.context-scheme=https 279 | #kylin.security.saml.context-server-name=hostname 280 | #kylin.security.saml.context-server-port=443 281 | #kylin.security.saml.context-path=/kylin 282 | # 283 | #### SPARK ENGINE CONFIGS ### 284 | # 285 | ## Hadoop conf folder, will export this as "HADOOP_CONF_DIR" to run spark-submit 286 | ## This must contain site xmls of core, yarn, hive, and hbase in one folder 287 | ##kylin.env.hadoop-conf-dir=/etc/hadoop/conf 288 | # 289 | ## Estimate the RDD partition numbers 290 | #kylin.engine.spark.rdd-partition-cut-mb=10 291 | # 292 | ## Minimal partition numbers of rdd 293 | #kylin.engine.spark.min-partition=1 294 | # 295 | ## Max partition numbers of rdd 296 | #kylin.engine.spark.max-partition=5000 297 | # 298 | ## Spark conf (default is in spark/conf/spark-defaults.conf) 299 | #kylin.engine.spark-conf.spark.master=yarn 300 | ##kylin.engine.spark-conf.spark.submit.deployMode=cluster 301 | #kylin.engine.spark-conf.spark.yarn.queue=default 302 | #kylin.engine.spark-conf.spark.executor.memory=1G 303 | #kylin.engine.spark-conf.spark.executor.cores=2 304 | #kylin.engine.spark-conf.spark.executor.instances=1 305 | #kylin.engine.spark-conf.spark.eventLog.enabled=true 306 | #kylin.engine.spark-conf.spark.eventLog.dir=hdfs\:///kylin/spark-history 307 | #kylin.engine.spark-conf.spark.history.fs.logDirectory=hdfs\:///kylin/spark-history 308 | #kylin.engine.spark-conf.spark.hadoop.yarn.timeline-service.enabled=false 309 | # 310 | ## manually upload spark-assembly jar to HDFS and then set this property will avoid repeatedly uploading jar at runtime 311 | ##kylin.engine.spark-conf.spark.yarn.archive=hdfs://namenode:8020/kylin/spark/spark-libs.jar 312 | ##kylin.engine.spark-conf.spark.io.compression.codec=org.apache.spark.io.SnappyCompressionCodec 313 | # 314 | ## uncomment for HDP 315 | ##kylin.engine.spark-conf.spark.driver.extraJavaOptions=-Dhdp.version=current 316 | ##kylin.engine.spark-conf.spark.yarn.am.extraJavaOptions=-Dhdp.version=current 317 | ##kylin.engine.spark-conf.spark.executor.extraJavaOptions=-Dhdp.version=current 318 | # 319 | # 320 | #### QUERY PUSH DOWN ### 321 | # 322 | ##kylin.query.pushdown.runner-class-name=org.apache.kylin.query.adhoc.PushDownRunnerJdbcImpl 323 | # 324 | ##kylin.query.pushdown.update-enabled=false 325 | ##kylin.query.pushdown.jdbc.url=jdbc:hive2://sandbox:10000/default 326 | ##kylin.query.pushdown.jdbc.driver=org.apache.hive.jdbc.HiveDriver 327 | ##kylin.query.pushdown.jdbc.username=hive 328 | ##kylin.query.pushdown.jdbc.password= 329 | # 330 | ##kylin.query.pushdown.jdbc.pool-max-total=8 331 | ##kylin.query.pushdown.jdbc.pool-max-idle=8 332 | ##kylin.query.pushdown.jdbc.pool-min-idle=0 333 | # 334 | #### JDBC Data Source 335 | ##kylin.source.jdbc.connection-url= 336 | ##kylin.source.jdbc.driver= 337 | ##kylin.source.jdbc.dialect= 338 | ##kylin.source.jdbc.user= 339 | ##kylin.source.jdbc.pass= 340 | ##kylin.source.jdbc.sqoop-home= 341 | ##kylin.source.jdbc.filed-delimiter=| -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CDH6 2 | 3 | CDH(Cloudera’s Distribution,including Apache Hadoop),是 Hadoop 分支中的一种,由 Cloudera 维护,基于稳定版本的 Apache hadoop 构建,并继承了许多补丁,可以直接用于生产环境。 4 | 5 | 由于整个安装过程涉及了多台服务器,为了更加方便地安装 CDH6,减少出错几率,将安装过程封装成项目驱动的形式。 6 | 7 | **注意:本项目的运行环境基于 CentOS7.5 + CDH6.01 。** 8 | 9 | 运行此项目,需要了解以下基础知识: 10 | 11 | - linux 12 | - ansible 13 | - docker 14 | 15 | ## 安装 16 | 17 | 官方安装文档:[Cloudera Enterprise 6.0.x Installation Guide](https://www.cloudera.com/documentation/enterprise/6/6.0/topics/installation.html) 18 | 19 | ### 准备工作 20 | 21 | #### 准备安装 CDH6 的服务器 22 | 23 | 硬软件需求:[Cloudera Enterprise 6 Requirements and Supported Versions](https://www.cloudera.com/documentation/enterprise/6/release-notes/topics/rg_requirements_supported_versions.html) 24 | 25 | IP | HostName | OS | Cores | Memory | Disk | Remark 26 | --------------|-----------------------------------------------------|------------|-------|--------|------|--------------- 27 | 10.240.114.34 | bjds-kubernetes-node-pre-10-240-114-34-vm.belle.lan | CentOS 7.5 | 8 | 16G | 250 | Server & Agent 28 | 10.240.114.38 | bjds-kubernetes-node-pre-10-240-114-38-vm.belle.lan | CentOS 7.5 | 8 | 16G | 250 | Agent 29 | 10.240.114.65 | bjds-kubernetes-node-pre-10-240-114-65-vm.belle.lan | CentOS 7.5 | 8 | 16G | 250 | Agent 30 | 10.240.114.67 | bjds-kubernetes-node-pre-10-240-114-67-vm.belle.lan | CentOS 7.5 | 8 | 16G | 250 | Agent 31 | 10.240.114.54 | bjds-kubernetes-node-pre-10-240-114-54-vm.belle.lan | CentOS 7.5 | 8 | 16G | 250 | MySQL 5.7.24 32 | 10.240.114.45 | bjds-kubernetes-node-pre-10-240-114-45-vm.belle.lan | CentOS 7.5 | 8 | 16G | 250 | 下载服务器 33 | 34 | #### 准备下载服务器 35 | 36 | CDH6 官网自身提供了下载服务器地址: 37 | 38 | - [cm6](https://archive.cloudera.com/cm6/6.0.1/redhat7/yum/RPMS/x86_64/) 39 | - [cdh6](https://archive.cloudera.com/cdh6/6.0.1/parcels/) 40 | 41 | 由于国内服务器需要翻墙才能正常下载,且安装包比较大,因此最佳的方式是在内网中搭建一个类似的下载服务器,然后将这些包下载到内网,极大地提升整个安装的效率。 42 | 43 | 为了简化操作,下载服务器采用 docker 运行。 44 | 45 | 首先 [安装 docker + docker-compose](https://www.zorin.xin/docker-manual/install/Centos7.html)。 46 | 47 | 然后在服务器上初始化下载服务器: 48 | 49 | ```sh 50 | # sfds 意为 static file download service 51 | 52 | # 初始化 sfds 配置目录 53 | mkdir -p /data/docker_volumn/sfds 54 | 55 | # 初始化数据文件目录 56 | mkdir -p /data/sfds 57 | 58 | # 初始化编排文件目录 59 | mkdir -p /data/docker_compose 60 | 61 | # 初始化 sfds 配置文件 62 | tee /data/docker_volumn/sfds/nginx.conf <<-'EOF' 63 | worker_processes 1; 64 | pid /var/run/nginx.pid; 65 | events { 66 | worker_connections 1024; 67 | } 68 | http { 69 | include /etc/nginx/mime.types; 70 | default_type application/octet-stream; 71 | sendfile on; 72 | keepalive_timeout 65; 73 | server 74 | { 75 | listen 9000; #端口 76 | server_name localhost; #服务名 77 | root /usr/share/nginx/html; #显示的根索引目录 78 | autoindex on; #开启索引功能 79 | autoindex_exact_size off; #关闭计算文件确切大小(单位bytes),只显示大概大小(单位kb、mb、gb) 80 | autoindex_localtime on; #显示本机时间而非 GMT 时间 81 | } 82 | } 83 | EOF 84 | 85 | # 初始化编排文件 86 | tee /data/docker_compose/docker-compose.yml <<-'EOF' 87 | version: "3" 88 | services: 89 | # 文件下载服务器 90 | sfds: 91 | image: bjddd192/nginx:1.10.1 92 | container_name: sfds 93 | restart: always 94 | ports: 95 | - "8066:9000" 96 | environment: 97 | - TZ=Asia/Shanghai 98 | volumes: 99 | - /data/docker_volumn/sfds/nginx.conf:/etc/nginx/nginx.conf 100 | - /data/sfds:/usr/share/nginx/html 101 | network_mode: bridge 102 | EOF 103 | 104 | # 启动下载服务器 105 | docker-compose -f /data/docker_compose/docker-compose.yml up -d 106 | ``` 107 | 108 | 下载服务器启动好以后,访问一下 `http://serverIP:8066`,如果能正常打开页面,说明下载服务器部署成功。 109 | 110 | #### 下载安装包 111 | 112 | [Cloudera Manager 6 Version and Download Information](https://www.cloudera.com/documentation/enterprise/6/release-notes/topics/rg_cm_6_version_download.html) 113 | 114 | 根据官网的下载路径,建立本地目录: 115 | 116 | ```sh 117 | mkdir -p /data/sfds/cdh6/6.0.1/parcels 118 | mkdir -p /data/sfds/cm6/6.0.1/redhat7/yum/RPMS/x86_64 119 | ``` 120 | 121 | 然后将官方的包下载到对应的目录,最终目录结构如下: 122 | 123 | ```cmd 124 | $ tree /data/sfds/cdh6/6.0.1/parcels 125 | /data/sfds/cdh6/6.0.1/parcels 126 | |-- CDH-6.0.1-1.cdh6.0.1.p0.590678-el7.parcel 127 | |-- CDH-6.0.1-1.cdh6.0.1.p0.590678-el7.parcel.sha256 128 | `-- manifest.json 129 | 130 | $ tree /data/sfds/cm6/6.0.1/redhat7/yum/RPMS/x86_64 131 | /data/sfds/cm6/6.0.1/redhat7/yum/RPMS/x86_64 132 | |-- cloudera-manager-agent-6.0.1-610811.el7.x86_64.rpm 133 | |-- cloudera-manager-daemons-6.0.1-610811.el7.x86_64.rpm 134 | |-- cloudera-manager-server-6.0.1-610811.el7.x86_64.rpm 135 | |-- cloudera-manager-server-db-2-6.0.1-610811.el7.x86_64.rpm 136 | `-- oracle-j2sdk1.8-1.8.0+update141-1.x86_64.rpm 137 | ``` 138 | 139 | #### 制作本地YUM仓库 140 | 141 | ```sh 142 | yum -y install createrepo 143 | cd /data/sfds/cm6/6.0.1/redhat7/yum 144 | createrepo . 145 | 146 | # 初始化仓库文件 147 | tee /data/sfds/cm6/6.0.1/redhat7/yum/cloudera-manager.repo <<-'EOF' 148 | [cloudera-manager] 149 | name=Cloudera Manager 6.0.1 150 | baseurl=http://10.240.114.45:8066/cm6/6.0.1/redhat7/yum/ 151 | gpgcheck=false 152 | enabled=true 153 | EOF 154 | ``` 155 | 156 | 验证仓库: 157 | 158 | ```sh 159 | wget http://10.240.114.45:8066/cm6/6.0.1/redhat7/yum/cloudera-manager.repo -P /etc/yum.repos.d/ 160 | rpm --import http://10.240.114.45:8066/cm6/6.0.1/redhat7/yum/RPM-GPG-KEY-cloudera 161 | yum makecache 162 | yum search cloudera 163 | yum search cloudera-manager-daemons cloudera-manager-agent cloudera-manager-server 164 | ``` 165 | 166 | 能够正常找到包说明本地YUM仓库制作成功。 167 | 168 | #### 数据库准备 169 | 170 | 数据库最好选择 MySQL 5.5.45+, 5.6.26+ and 5.7.6+ 版本,本实验环境使用的是 5.7.24 的版本。 171 | 172 | ```sql 173 | -- 删除数据库(如重新部署时使用) 174 | -- drop database db_cdh6_scm; 175 | -- drop database db_cdh6_amon; 176 | -- drop database db_cdh6_rmon; 177 | -- drop database db_cdh6_hue; 178 | -- drop database db_cdh6_metastore; 179 | -- drop database db_cdh6_sentry; 180 | -- drop database db_cdh6_nav; 181 | -- drop database db_cdh6_navms; 182 | -- drop database db_cdh6_oozie; 183 | 184 | -- 创建数据库 185 | create database db_cdh6_scm default character set utf8 default collate utf8_general_ci; 186 | create database db_cdh6_amon default character set utf8 default collate utf8_general_ci; 187 | create database db_cdh6_rmon default character set utf8 default collate utf8_general_ci; 188 | create database db_cdh6_hue default character set utf8 default collate utf8_general_ci; 189 | create database db_cdh6_metastore default character set utf8 default collate utf8_general_ci; 190 | create database db_cdh6_sentry default character set utf8 default collate utf8_general_ci; 191 | create database db_cdh6_nav default character set utf8 default collate utf8_general_ci; 192 | create database db_cdh6_navms default character set utf8 default collate utf8_general_ci; 193 | create database db_cdh6_oozie default character set utf8 default collate utf8_general_ci; 194 | 195 | -- 简单练习使用相同的数据库用户,如果用于线上环境最好是分别使用独立的用户。 196 | grant all on db_cdh6_scm.* to 'user_cdh6'@'%' identified by '123456'; 197 | grant all on db_cdh6_amon.* to 'user_cdh6'@'%' identified by '123456'; 198 | grant all on db_cdh6_rmon.* to 'user_cdh6'@'%' identified by '123456'; 199 | grant all on db_cdh6_hue.* to 'user_cdh6'@'%' identified by '123456'; 200 | grant all on db_cdh6_metastore.* to 'user_cdh6'@'%' identified by '123456'; 201 | grant all on db_cdh6_sentry.* to 'user_cdh6'@'%' identified by '123456'; 202 | grant all on db_cdh6_nav.* to 'user_cdh6'@'%' identified by '123456'; 203 | grant all on db_cdh6_navms.* to 'user_cdh6'@'%' identified by '123456'; 204 | grant all on db_cdh6_oozie.* to 'user_cdh6'@'%' identified by '123456'; 205 | 206 | -- 刷新权限 207 | flush privileges; 208 | ``` 209 | 210 | #### ansible 配置 211 | 212 | [ansible 安装与配置](https://www.zorin.xin/2018/08/05/ansible-install-and-config/) 213 | 214 | 本人是使用 mac 安装了 ansible 作为主控。 215 | 216 | ```sh 217 | # 配置到服务器的信任 218 | ssh-copy-id -p 60777 root@10.240.114.34 219 | ssh-copy-id -p 60777 root@10.240.114.38 220 | ssh-copy-id -p 60777 root@10.240.114.65 221 | ssh-copy-id -p 60777 root@10.240.114.67 222 | 223 | # 测试连接 224 | ansible cdh-cluster -i inventory/uat_cdh6.ini -m ping 225 | ``` 226 | 227 | ### 部署 CDH 228 | 229 | #### 安装 CM 和 CDH 230 | 231 | ```sh 232 | cd /Users/yanglei/01_git/github_me/ansible-playbooks-cdh6 233 | 234 | # 测试连接 235 | ansible cdh-cluster -i inventory/uat_cdh6.ini -m ping 236 | ansible cdh-cluster -i inventory/uat_cdh6.ini -m command -a "date" 237 | 238 | # 安装公共组件 239 | ansible-playbook -t common -i inventory/uat_cdh6.ini -e @inventory/uat_cdh6.yml 01.cdh.yml 240 | 241 | # 安装 jdk 242 | ansible-playbook -t jdk -i inventory/uat_cdh6.ini -e @inventory/uat_cdh6.yml 01.cdh.yml 243 | 244 | # 设置 server 免密登录 agent 245 | ansible-playbook -t ssh -i inventory/uat_cdh6.ini -e @inventory/uat_cdh6.yml 01.cdh.yml 246 | 247 | # 安装 scm 248 | ansible-playbook -t cm -i inventory/uat_cdh6.ini -e @inventory/uat_cdh6.yml 01.cdh.yml 249 | # 如安装数据库的过程中报错:java.sql.SQLException: Statement violates GTID consistency: CREATE TABLE ... SELECT. 250 | # 需临时关闭 mysql 的 gtid 功能: 251 | # set global gtid_mode=on_permissive; 252 | # set global gtid_mode=off_permissive; 253 | # set global gtid_mode=off; 254 | # set global enforce_gtid_consistency=off; 255 | # 撸完后恢复: 256 | # set global enforce_gtid_consistency=on; 257 | # set global gtid_mode=off_permissive; 258 | # set global gtid_mode=on_permissive; 259 | # set global gtid_mode=on; 260 | 261 | # 放置 cdh 离线安装包 262 | ansible-playbook -t cdh -i inventory/uat_cdh6.ini -e @inventory/uat_cdh6.yml 01.cdh.yml 263 | 264 | # 在 cdh-server 节点检查服务状态 265 | # 检查 scm 数据库是否已经自动创建了表结构 266 | # 如果都正常说明 scm 安装完成 267 | systemctl status cloudera-scm-agent.service 268 | systemctl status cloudera-scm-server.service 269 | # 查看日志 270 | tail -f /var/log/cloudera-scm-server/cloudera-scm-server.log 271 | ``` 272 | 273 | #### 集群配置 274 | 275 | ![cm_install_step_01.png](/images/cm_install_step_01.png) 276 | 启动 Web 控制台进行配置,地址如:http://10.240.114.34:7180/cmf/login ,默认用户名密码都是:admin。 277 | 278 | ![cm_install_step_02.png](/images/cm_install_step_02.png) 279 | 点击"继续"。 280 | 281 | ![cm_install_step_03.png](/images/cm_install_step_03.png) 282 | 接受许可。 283 | 284 | ![cm_install_step_04.png](/images/cm_install_step_04.png) 285 | 这里选择免费版,收费版请自行选择。 286 | 287 | ![cm_install_step_05.png](/images/cm_install_step_05.png) 288 | 点击"继续"。 289 | 290 | ![cm_install_step_06.png](/images/cm_install_step_06.png) 291 | 选择"当前管理的主机"。 292 | 293 | ![cm_install_step_07.png](/images/cm_install_step_07.png) 294 | 看到 CDH-6.0.1 版本可选后,点击"继续"。 295 | 296 | ![cm_install_step_08.png](/images/cm_install_step_08.png) 297 | 等待 CDH 包安装完成,点击"继续"。 298 | 299 | ![cm_install_step_09.png](/images/cm_install_step_09.png) 300 | 301 | ![cm_install_step_10.png](/images/cm_install_step_10.png) 302 | 点击"完成"。 303 | 304 | ![cm_install_step_11.png](/images/cm_install_step_11.png) 305 | 根据自己的需求选取服务。 306 | 307 | ![cm_install_step_12.png](/images/cm_install_step_12.png) 308 | 自定义角色分配。 309 | 310 | ![cm_install_step_13.png](/images/cm_install_step_13.png) 311 | 数据库设置。 312 | 313 | ![cm_install_step_14.png](/images/cm_install_step_14.png) 314 | 审核更改,如果有特定目录的设定或者参数的设定,可以在这里进行更正。 315 | 316 | ![cm_install_step_15.png](/images/cm_install_step_15.png) 317 | 等待首次运行完成。 318 | 319 | ![cm_install_step_16.png](/images/cm_install_step_16.png) 320 | 321 | ![cm_install_step_17.png](/images/cm_install_step_17.png) 322 | 323 | ![cm_install_step_18.png](/images/cm_install_step_18.png) 324 | 顺利进入管理控制台,部署基本完成。 325 | 326 | ### 部署 Kylin 327 | 328 | ```sh 329 | cd /Users/yanglei/01_git/oschina/ansible/big_data 330 | 331 | ansible kylin -i inventory/uat_cdh6.ini -m ping 332 | 333 | # 安装 kylin 334 | ansible-playbook -i inventory/uat_cdh6.ini kylin.yml 335 | 336 | # 给 spark 添加 jars 目录的软链接 337 | ansible kylin -i inventory/uat_cdh6.ini -m file -a 'src=/opt/cloudera/parcels/CDH/jars dest=$SPARK_HOME/jars state=link' 338 | 339 | # 用软链接短 HIVE_LIB 路径长度,防止 kylin 启动出现“参数列表过长”的问题 340 | ansible kylin -i inventory/uat_cdh6.ini -m file -a 'src=$HIVE_HOME/lib dest=/hivelib state=link' 341 | 342 | # 检查环境 343 | su - hdfs 344 | # hdfs dfs -chmod -R 777 / 345 | $KYLIN_HOME/bin/check-env.sh 346 | $KYLIN_HOME/bin/find-hive-dependency.sh 347 | $KYLIN_HOME/bin/find-hbase-dependency.sh 348 | $KYLIN_HOME/bin/find-spark-dependency.sh 349 | 350 | # 启动 351 | $KYLIN_HOME/bin/kylin.sh start 352 | 353 | # 停止 354 | $KYLIN_HOME/bin/kylin.sh stop 355 | 356 | # web验证 357 | http://172.20.32.131:7070/kylin 358 | # 初始用户名和密码是 ADMIN/KYLIN 359 | 360 | # 测试kylin 361 | $KYLIN_HOME/bin/sample.sh 362 | ``` 363 | 364 | ### 卸载 CDH 365 | 366 | ```sh 367 | ansible-playbook -i inventory/uat_cdh6.ini 99.clean_all.yml 368 | 369 | # 然后删除已创建的数据库 370 | ``` 371 | 372 | ### CDH 配置 373 | 374 | #### 目录位置 375 | 376 | 路径 | 说明 377 | ---------------------------------------|------------------------------ 378 | /var/lib/cloudera-scm-server | 服务端目录 379 | /var/log/cloudera-scm-* | CM 日志目录 380 | /opt/cloudera/parcels/ | Hadoop 相关服务安装目录 381 | /opt/cloudera/parcel-repo/ | 下载的服务软件包数据(parcels) 382 | /opt/cloudera/parcel-cache | 下载的服务软件包缓存数据 383 | /opt/cloudera/parcels/CDH/jars | CDH 所有 jar 包所在目录 384 | /etc/cloudera-scm-agent/config.ini | CM Agent 的配置文件 385 | /etc/cloudera-scm-server/ | CM Server 的配置目录 386 | /etc/cloudera-scm-server/db.properties | CM Server 的数据库配置 387 | /etc/hadoop/* | hadoop客户端配置目录 388 | /etc/hive/ | hive 的配置目录 389 | ... | 390 | 391 | #### 环境变量 392 | 393 | CDH 自身有一个环境变量脚本,如下: 394 | 395 | ```sh 396 | cat /opt/cloudera/parcels/CDH/meta/cdh_env.sh 397 | #!/bin/bash 398 | CDH_DIRNAME=${PARCEL_DIRNAME:-"CDH-6.0.1-1.cdh6.0.1.p0.590678"} 399 | export CDH_HADOOP_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/hadoop 400 | export CDH_MR1_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/hadoop-0.20-mapreduce 401 | export CDH_HDFS_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/hadoop-hdfs 402 | export CDH_HTTPFS_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/hadoop-httpfs 403 | export CDH_MR2_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/hadoop-mapreduce 404 | export CDH_YARN_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/hadoop-yarn 405 | export CDH_HBASE_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/hbase 406 | export CDH_ZOOKEEPER_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/zookeeper 407 | export CDH_HIVE_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/hive 408 | export CDH_HUE_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/hue 409 | export CDH_OOZIE_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/oozie 410 | export CDH_HUE_PLUGINS_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/hadoop 411 | export CDH_FLUME_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/flume-ng 412 | export CDH_PIG_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/pig 413 | export CDH_HCAT_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/hive-hcatalog 414 | export CDH_SENTRY_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/sentry 415 | export JSVC_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/bigtop-utils 416 | export CDH_HADOOP_BIN=$CDH_HADOOP_HOME/bin/hadoop 417 | export CDH_IMPALA_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/impala 418 | export CDH_SOLR_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/solr 419 | export CDH_HBASE_INDEXER_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/hbase-solr 420 | export SEARCH_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/search 421 | export CDH_SPARK_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/spark 422 | export WEBHCAT_DEFAULT_XML=$PARCELS_ROOT/$CDH_DIRNAME/etc/hive-webhcat/conf.dist/webhcat-default.xml 423 | export CDH_KMS_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/hadoop-kms 424 | export CDH_PARQUET_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/parquet 425 | export CDH_AVRO_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/avro 426 | export CDH_KAFKA_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/kafka 427 | export CDH_KUDU_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/kudu 428 | ``` 429 | 430 | #### 其他技巧 431 | 432 | 在Cloudrea Manager页面上,可以向集群中添加/删除主机,添加服务到集群等。 433 | 434 | Cloudrea Manager页面开启了google-analytics,因为从国内访问很慢,可以关闭google-analytics 435 | 436 | 管理 -> 设置 -> 其他 -> 允许使用情况数据收集 不选 437 | 438 | ### 参考资料 439 | 440 | #### 部署相关 441 | 442 | [CentOS7 ntp 服务器配置](https://www.cnblogs.com/harrymore/p/9566229.html) 443 | 444 | [CentOS7 配置 ntp 时间服务器](https://blog.csdn.net/zzy5066/article/details/79036674) 445 | 446 | [CentOS7 中使用NTP进行时间同步](http://www.cnblogs.com/yangxiansen/p/7860008.html) 447 | 448 | [如何给hadoop集群分配角色](https://blog.csdn.net/chenguangchun1993/article/details/79164857) 449 | 450 | [Cloudera Manager 和CDH6.0.1安装,卸载,各步骤截图](https://blog.csdn.net/tototuzuoquan/article/details/85111018) 451 | 452 | [CentOS7.5,CDH6安装部署](https://blog.csdn.net/TXBSW/article/details/84648269) 453 | 454 | [CDH 最新版本 6.0.1 安装详解](https://blog.csdn.net/u010003835/article/details/85007946) 455 | 456 | [CentOS 7下Cloudera Manager及CDH 6.0.1安装过程详解](https://www.cnblogs.com/wzlinux/p/10183357.html) 457 | 458 | [CDH5.15卸载指南](https://blog.csdn.net/weixin_35852328/article/details/81774627) 459 | 460 | #### 配置相关 461 | 462 | [CDH5快速入门手册](https://www.jianshu.com/p/72dc1c591647) 463 | --------------------------------------------------------------------------------