├── kylin.yml
├── scala.yml
├── roles
├── kylin
│ ├── templates
│ │ ├── kylin_env.sh
│ │ ├── kylin_job_conf_inmem.xml
│ │ └── kylin.properties
│ └── tasks
│ │ └── main.yml
├── scala
│ ├── templates
│ │ └── scala_env.sh
│ └── tasks
│ │ └── main.yml
├── ssh
│ ├── defaults
│ │ └── main.yml
│ └── tasks
│ │ └── main.yml
├── jdk
│ ├── files
│ │ └── mysql-connector-java-5.1.46.jar
│ ├── templates
│ │ └── cdh_java_home.sh
│ ├── defaults
│ │ └── main.yml
│ └── tasks
│ │ └── main.yml
├── cdh
│ ├── defaults
│ │ └── main.yml
│ └── tasks
│ │ └── main.yml
├── cm
│ ├── defaults
│ │ └── main.yml
│ ├── tasks
│ │ └── main.yml
│ └── templates
│ │ └── config.ini
└── common
│ ├── defaults
│ └── main.yml
│ ├── files
│ ├── CentOS-7-163.repo
│ ├── ntp.conf
│ └── CentOS-7-aliyun.repo
│ └── tasks
│ └── main.yml
├── images
├── cm_install_step_01.png
├── cm_install_step_02.png
├── cm_install_step_03.png
├── cm_install_step_04.png
├── cm_install_step_05.png
├── cm_install_step_06.png
├── cm_install_step_07.png
├── cm_install_step_08.png
├── cm_install_step_09.png
├── cm_install_step_10.png
├── cm_install_step_11.png
├── cm_install_step_12.png
├── cm_install_step_13.png
├── cm_install_step_14.png
├── cm_install_step_15.png
├── cm_install_step_16.png
├── cm_install_step_17.png
└── cm_install_step_18.png
├── inventory
├── uat_cdh6.yml
├── dev_cdh6.ini
└── uat_cdh6.ini
├── 01.cdh.yml
├── shell
└── cleanLog.sh
├── 99.clean_all.yml
└── README.md
/kylin.yml:
--------------------------------------------------------------------------------
1 | - hosts:
2 | - kylin
3 | roles:
4 | - kylin
5 |
--------------------------------------------------------------------------------
/scala.yml:
--------------------------------------------------------------------------------
1 | - hosts:
2 | - spark
3 | roles:
4 | - scala
5 |
--------------------------------------------------------------------------------
/roles/kylin/templates/kylin_env.sh:
--------------------------------------------------------------------------------
1 | export KYLIN_HOME={{ kylin_path }}
2 | export PATH=$KYLIN_HOME/bin:$PATH
3 |
--------------------------------------------------------------------------------
/roles/scala/templates/scala_env.sh:
--------------------------------------------------------------------------------
1 | export SCALA_HOME={{ scala_path }}
2 | export PATH=$SCALA_HOME/bin:$PATH
3 |
--------------------------------------------------------------------------------
/images/cm_install_step_01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/images/cm_install_step_01.png
--------------------------------------------------------------------------------
/images/cm_install_step_02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/images/cm_install_step_02.png
--------------------------------------------------------------------------------
/images/cm_install_step_03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/images/cm_install_step_03.png
--------------------------------------------------------------------------------
/images/cm_install_step_04.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/images/cm_install_step_04.png
--------------------------------------------------------------------------------
/images/cm_install_step_05.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/images/cm_install_step_05.png
--------------------------------------------------------------------------------
/images/cm_install_step_06.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/images/cm_install_step_06.png
--------------------------------------------------------------------------------
/images/cm_install_step_07.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/images/cm_install_step_07.png
--------------------------------------------------------------------------------
/images/cm_install_step_08.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/images/cm_install_step_08.png
--------------------------------------------------------------------------------
/images/cm_install_step_09.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/images/cm_install_step_09.png
--------------------------------------------------------------------------------
/images/cm_install_step_10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/images/cm_install_step_10.png
--------------------------------------------------------------------------------
/images/cm_install_step_11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/images/cm_install_step_11.png
--------------------------------------------------------------------------------
/images/cm_install_step_12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/images/cm_install_step_12.png
--------------------------------------------------------------------------------
/images/cm_install_step_13.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/images/cm_install_step_13.png
--------------------------------------------------------------------------------
/images/cm_install_step_14.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/images/cm_install_step_14.png
--------------------------------------------------------------------------------
/images/cm_install_step_15.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/images/cm_install_step_15.png
--------------------------------------------------------------------------------
/images/cm_install_step_16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/images/cm_install_step_16.png
--------------------------------------------------------------------------------
/images/cm_install_step_17.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/images/cm_install_step_17.png
--------------------------------------------------------------------------------
/images/cm_install_step_18.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/images/cm_install_step_18.png
--------------------------------------------------------------------------------
/roles/ssh/defaults/main.yml:
--------------------------------------------------------------------------------
1 | # SSH 用户
2 | v_ssh_user: "root"
3 |
4 | # SSH 用户组
5 | v_ssh_group: "root"
6 |
7 | # SSH 用户默认路径
8 | v_ssh_user_path: "/root"
9 |
--------------------------------------------------------------------------------
/roles/jdk/files/mysql-connector-java-5.1.46.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/roles/jdk/files/mysql-connector-java-5.1.46.jar
--------------------------------------------------------------------------------
/roles/jdk/templates/cdh_java_home.sh:
--------------------------------------------------------------------------------
1 | export JAVA_HOME=/usr/java/jdk{{ v_jdk_version }}-cloudera
2 | export CLASSPATH=.:$CLASSPTAH:$JAVA_HOME/lib
3 | export JRE_HOME=$JAVA_HOME/jre
4 | export PATH=$PATH:$JAVA_HOME/bin
5 |
--------------------------------------------------------------------------------
/roles/jdk/defaults/main.yml:
--------------------------------------------------------------------------------
1 | # cdh 推荐 jdk 版本下载地址
2 | v_cdh_oracle_j2sdk_download_url: "{{ v_cdh_download_server }}/cm6/{{ v_cdh_version }}/redhat7/yum/RPMS/x86_64/oracle-j2sdk1.8-1.8.0+update141-1.x86_64.rpm"
3 |
4 | # 是否安装 jdk
5 | v_jdk_install: "false"
6 |
7 | # 安装的 jdk 版本
8 | v_jdk_version: "1.8.0_141"
9 |
--------------------------------------------------------------------------------
/inventory/uat_cdh6.yml:
--------------------------------------------------------------------------------
1 | # 自定义 hosts
2 | v_hosts: {
3 | "bjds-kubernetes-node-pre-10-240-114-34-vm.belle.lan": "10.240.114.34 ",
4 | "bjds-kubernetes-node-pre-10-240-114-38-vm.belle.lan": "10.240.114.38 ",
5 | "bjds-kubernetes-node-pre-10-240-114-65-vm.belle.lan": "10.240.114.65 ",
6 | "bjds-kubernetes-node-pre-10-240-114-67-vm.belle.lan": "10.240.114.67 "
7 | }
--------------------------------------------------------------------------------
/roles/cdh/defaults/main.yml:
--------------------------------------------------------------------------------
1 | v_cdh_download_url: "{{v_cdh_download_server}}/cdh6/{{v_cdh_version}}/parcels/CDH-{{v_cdh_version}}-1.cdh{{v_cdh_version}}.p{{v_cdh_version_p}}-el7.parcel"
2 |
3 | v_cdh_sha_download_url: "{{v_cdh_download_server}}/cdh6/{{v_cdh_version}}/parcels/CDH-{{v_cdh_version}}-1.cdh{{v_cdh_version}}.p{{v_cdh_version_p}}-el7.parcel.sha256"
4 |
5 | v_cdh_manifest_download_url: "{{v_cdh_download_server}}/cdh6/{{v_cdh_version}}/parcels/manifest.json"
6 |
--------------------------------------------------------------------------------
/01.cdh.yml:
--------------------------------------------------------------------------------
1 | - hosts:
2 | - cdh-cluster
3 | roles:
4 | - common
5 | tags: "common"
6 |
7 | - hosts:
8 | - cdh-cluster
9 | roles:
10 | - jdk
11 | tags: "jdk"
12 |
13 | - hosts:
14 | - cdh-server
15 | roles:
16 | - ssh
17 | tags: "ssh"
18 |
19 | - hosts:
20 | - cdh-agent
21 | roles:
22 | - ssh
23 | tags: "ssh"
24 |
25 | - hosts:
26 | - cdh-cluster
27 | roles:
28 | - cm
29 | tags: "cm"
30 |
31 | - hosts:
32 | - cdh-server
33 | roles:
34 | - cdh
35 | tags: "cdh"
--------------------------------------------------------------------------------
/roles/cm/defaults/main.yml:
--------------------------------------------------------------------------------
1 | # cm yum 仓库下载地址
2 | v_yum_repo_url: "{{ v_cdh_download_server }}/cm6/{{ v_cdh_version }}/redhat7/yum/cloudera-manager.repo"
3 |
4 | # cm 仓库签名 GPG 密钥下载地址
5 | v_yum_repo_gpgcheck_url: "{{ v_cdh_download_server }}/cm6/{{ v_cdh_version }}/redhat7/yum/RPM-GPG-KEY-cloudera"
6 |
7 | # scm_db_type
8 | v_cm_db_type: "mysql"
9 |
10 | # scm_db_host
11 | v_cm_db_host: ""
12 |
13 | # scm_db_name
14 | v_cm_db_name: "scm"
15 |
16 | # scm_db_user
17 | v_cm_db_user: "root"
18 |
19 | # scm_db_password
20 | v_cm_db_password: ""
21 |
22 | # scm_db_port
23 | v_cm_db_port: 3306
24 |
--------------------------------------------------------------------------------
/roles/common/defaults/main.yml:
--------------------------------------------------------------------------------
1 | # 是否使用 163 的 yum 源
2 | v_update_yum_with_163: "false"
3 |
4 | # 是否使用 aliyun 的 yum 源
5 | v_update_yum_with_aliyun: "false"
6 |
7 | # 是否更新内核
8 | v_update_kernel: "false"
9 |
10 | # 是否安装必备组件
11 | v_yum_install: "false"
12 |
13 | # 是否安装时间同步
14 | v_ntpdate_install: "false"
15 |
16 | # 时间同步服务器地址
17 | v_ntpdate_address: "ntp1.aliyun.com"
18 |
19 | # 是否允许设置主机的 hostname
20 | v_enable_set_hostname: "false"
21 |
22 | # 是否更新操作系统 hosts 文件
23 | v_update_hosts: "false"
24 |
25 | # 自定义 hosts
26 | v_hosts: {
27 | "localhost localhost.localdomain localhost4 localhost4.localdomain4": "127.0.0.1 ",
28 | "localhost localhost.localdomain localhost6 localhost6.localdomain6": "::1 "
29 | }
30 |
31 | # 服务器使用 swap 内存的百分比
32 | v_vm_swappiness: 10
33 |
34 |
--------------------------------------------------------------------------------
/roles/ssh/tasks/main.yml:
--------------------------------------------------------------------------------
1 | - name: 检查是否有生成用户的 ssh 公钥
2 | shell: "ls {{ v_ssh_user_path }}/.ssh|grep '.pub' |wc -l"
3 | register: key_exist
4 | ignore_errors: true
5 |
6 | - name: 生成 ssh 公钥
7 | user:
8 | name: "{{ v_ssh_user }}"
9 | generate_ssh_key: yes
10 | ssh_key_bits: 2048
11 | ssh_key_file: .ssh/id_rsa
12 | when: "key_exist.stdout == '0'"
13 |
14 | - name: 获取 ssh 用户公钥
15 | fetch:
16 | src: "{{ v_ssh_user_path }}/.ssh/id_rsa.pub"
17 | dest: "/tmp/id_{{ ansible_host }}_{{ v_ssh_user }}.pub"
18 | flat: yes
19 |
20 | # 从本地 authorized_keys 文件读取公钥内容
21 | - name: 获取 server 的 ssh 用户公钥完成免密登陆 agent
22 | authorized_key:
23 | user: "{{ v_ssh_user }}"
24 | key: "{{ lookup('file', '/tmp/id_{{ v_server_ip }}_{{ v_ssh_user }}.pub') }}"
25 |
--------------------------------------------------------------------------------
/roles/kylin/tasks/main.yml:
--------------------------------------------------------------------------------
1 | - name: 准备 kylin 工作目录
2 | file: name={{ item }} state=directory owner={{ hadoop_user }} group={{ hadoop_group }} mode=0755
3 | with_items:
4 | - "{{ kylin_work_path }}"
5 |
6 | - name: 下载 kylin 安装包
7 | get_url: url={{ kylin_download_url }} dest=/tmp owner={{ hadoop_user }} group={{ hadoop_group }} mode=644
8 |
9 | - name: 解压 kylin 安装包
10 | unarchive:
11 | # src也可以直接填写一个URL地址直接进行下载解压
12 | src: "/tmp/apache-kylin-{{kylin_version}}-bin-{{kylin_env}}.tar.gz"
13 | copy: no
14 | dest: "{{ kylin_work_path }}"
15 | owner: "{{ hadoop_user }}"
16 | group: "{{ hadoop_group }}"
17 |
18 | - name: 调整 kylin 目录所有者
19 | file: name={{ kylin_path }} state=directory recurse=yes owner={{ hadoop_user }} group={{ hadoop_group }}
20 |
21 | - name: 设置 kylin 环境变量
22 | template: src=kylin_env.sh dest=/etc/profile.d
23 |
24 | - name: 生效 kylin 环境变量
25 | shell: "source /etc/profile.d/kylin_env.sh"
26 |
--------------------------------------------------------------------------------
/roles/scala/tasks/main.yml:
--------------------------------------------------------------------------------
1 | - name: 准备 scala 工作目录
2 | file: name={{ item }} state=directory owner={{ hadoop_user }} group={{ hadoop_group }} mode=0755
3 | with_items:
4 | - "{{ scala_work_path }}"
5 |
6 | - name: 下载 scala 安装包
7 | get_url: url={{ scala_download_url }} dest=/tmp owner={{ hadoop_user }} group={{ hadoop_group }} mode=644
8 |
9 | - name: 解压 scala 安装包
10 | unarchive:
11 | # src也可以直接填写一个URL地址直接进行下载解压
12 | src: "/tmp/scala-{{ scala_version }}.tgz"
13 | copy: no
14 | dest: "{{ scala_work_path }}"
15 | owner: "{{ hadoop_user }}"
16 | group: "{{ hadoop_group }}"
17 |
18 | - name: 调整 scala 目录所有者
19 | file: name={{ scala_path }} state=directory recurse=yes owner={{ hadoop_user }} group={{ hadoop_group }}
20 |
21 | - name: 设置 scala 环境变量
22 | template: src=scala_env.sh dest=/etc/profile.d
23 |
24 | - name: 生效 scala 环境变量
25 | shell: "source /etc/profile.d/scala_env.sh"
26 |
27 |
28 |
29 |
30 |
--------------------------------------------------------------------------------
/roles/cdh/tasks/main.yml:
--------------------------------------------------------------------------------
1 | - name: 下载 cdh Parcels
2 | get_url: url={{ item }} dest=/opt/cloudera/parcel-repo owner=cloudera-scm group=cloudera-scm mode=644
3 | with_items:
4 | - "{{ v_cdh_manifest_download_url }}"
5 | - "{{ v_cdh_download_url }}"
6 |
7 | - name: 下载 cdh parcel.sha256 并改名为 parcel.sha
8 | get_url: url={{ item }} dest="/opt/cloudera/parcel-repo/CDH-{{v_cdh_version}}-1.cdh{{v_cdh_version}}.p{{v_cdh_version_p}}-el7.parcel.sha" owner=cloudera-scm group=cloudera-scm mode=644
9 | with_items:
10 | - "{{ v_cdh_sha_download_url }}"
11 |
12 | - name: 将 manifest.json 文件中,找到对应版本的秘钥,复制到 .sha 文件中
13 | shell: 'echo "2e650f1f1ea020a3efc98a231b85c2df1a50b030" > "/opt/cloudera/parcel-repo/CDH-{{v_cdh_version}}-1.cdh{{v_cdh_version}}.p{{v_cdh_version_p}}-el7.parcel.sha"'
14 |
15 | - name: 重启 cloudera-scm-server 服务,并设置自启动
16 | systemd:
17 | name: cloudera-scm-server
18 | daemon_reload: yes
19 | state: restarted
20 | enabled: yes
21 |
--------------------------------------------------------------------------------
/shell/cleanLog.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 |
4 | # clear cloudera manager monitor log
5 | rm -rf /var/lib/cloudera-host-monitor/ts/*/partition*/*
6 | rm -rf /var/lib/cloudera-service-monitor/ts/*/partition*/*
7 |
8 | # clear cdh log
9 | rm -rf /var/log/cloudera-scm-eventserver/*.out.*
10 | rm -rf /var/log/cloudera-scm-firehose/*.out.*
11 | rm -rf /var/log/cloudera-scm-agent/*.log.*
12 | rm -rf /var/log/cloudera-scm-agent/*.out.*
13 | rm -rf /var/log/cloudera-scm-server/*.out.*
14 | rm -rf /var/log/cloudera-scm-server/*.log.*
15 |
16 | rm -rf /var/log/hadoop-hdfs/*.out.*
17 | rm -rf /var/log/hadoop-hdfs/*.log.*
18 | rm -rf /var/log/hadoop-httpfs/*.out.*
19 | rm -rf /var/log/hadoop-kms/*.out.*
20 | rm -rf /var/log/hadoop-mapreduce/*.out.*
21 | rm -rf /var/log/hadoop-yarn/*.out.*
22 | rm -rf /var/log/hadoop-yarn/*.audit.*
23 | rm -rf /var/log/hive/*.out.*
24 | rm -rf /var/log/oozie/*.out.*
25 | rm -rf /var/log/oozie/*.log.*
26 |
27 | rm -rf /var/log/zookeeper/*.log.*
28 |
--------------------------------------------------------------------------------
/roles/jdk/tasks/main.yml:
--------------------------------------------------------------------------------
1 | - name: 卸载系统自带 JDK
2 | yum: name={{ item }} state=absent
3 | with_items:
4 | - "java*"
5 | - "jdk*"
6 | - "oracle-j2sdk*"
7 | when: v_jdk_install=="true"
8 |
9 | - name: 下载 JDK
10 | get_url: url={{ item }} dest=/tmp mode=644
11 | with_items:
12 | - "{{ v_cdh_oracle_j2sdk_download_url }}"
13 | when: v_jdk_install=="true"
14 |
15 | - name: 安装 JDK
16 | yum: name={{ item }} state=present
17 | with_items:
18 | - "/tmp/oracle-j2sdk1.8-1.8.0+update141-1.x86_64.rpm"
19 | when: v_jdk_install=="true"
20 |
21 | - name: 配置 JAVA_HOME
22 | template: src=cdh_java_home.sh dest=/etc/profile.d
23 | when: v_jdk_install=="true"
24 |
25 | - name: 生效 JAVA_HOME
26 | shell: "source /etc/profile.d/cdh_java_home.sh"
27 | when: v_jdk_install=="true"
28 |
29 | - name: 准备 java 共享目录
30 | file: name={{ item }} state=directory mode=0755
31 | with_items:
32 | - "/usr/share/java"
33 |
34 | - name: 拷贝 mysql-connector 包
35 | copy: src=mysql-connector-java-5.1.46.jar dest=/usr/share/java/mysql-connector-java.jar mode=644
36 |
--------------------------------------------------------------------------------
/roles/cm/tasks/main.yml:
--------------------------------------------------------------------------------
1 | - name: 下载 cm yum 仓库
2 | # debug: msg={{ groups['cdh-server'] }}
3 | get_url: url={{ v_yum_repo_url }} dest=/etc/yum.repos.d/
4 |
5 | - name: 导入仓库签名 GPG 密钥
6 | shell: "rpm --import {{ v_yum_repo_gpgcheck_url }}"
7 |
8 | - name: 安装 cloudera-manager-daemons cloudera-manager-agent
9 | yum: update_cache=yes name={{ item }} state=present
10 | with_items:
11 | - cloudera-manager-daemons
12 | - cloudera-manager-agent
13 |
14 | - name: 安装 cloudera-manager-server
15 | yum: update_cache=yes name={{ item }} state=present
16 | with_items:
17 | - cloudera-manager-server
18 | when: "'cdh-server' in group_names"
19 |
20 | - name: 拷贝 cm 配置文件
21 | template: src={{ item }} dest=/etc/cloudera-scm-agent/config.ini mode=644
22 | with_items:
23 | - config.ini
24 |
25 | - name: 重启 cloudera-scm-agent 服务,并设置自启动
26 | systemd:
27 | name: cloudera-scm-agent
28 | daemon_reload: yes
29 | state: restarted
30 | enabled: yes
31 |
32 | - name: 初始化数据库
33 | shell: "/opt/cloudera/cm/schema/scm_prepare_database.sh mysql -h {{ v_cm_db_host }} -P {{ v_cm_db_port }} --scm-host {{ v_server_ip }} {{ v_cm_db_name }} {{ v_cm_db_user }} {{ v_cm_db_password }} "
34 | when: "'cdh-server' in group_names"
35 |
--------------------------------------------------------------------------------
/roles/common/files/CentOS-7-163.repo:
--------------------------------------------------------------------------------
1 | # CentOS-Base.repo
2 | #
3 | # The mirror system uses the connecting IP address of the client and the
4 | # update status of each mirror to pick mirrors that are updated to and
5 | # geographically close to the client. You should use this for CentOS updates
6 | # unless you are manually picking other mirrors.
7 | #
8 | # If the mirrorlist= does not work for you, as a fall back you can try the
9 | # remarked out baseurl= line instead.
10 | #
11 | #
12 | [base]
13 | name=CentOS-$releasever - Base - 163.com
14 | #mirrorlist=http://mirrorlist.centos.org/?release=$releasever&arch=$basearch&repo=os
15 | baseurl=http://mirrors.163.com/centos/$releasever/os/$basearch/
16 | gpgcheck=1
17 | gpgkey=http://mirrors.163.com/centos/RPM-GPG-KEY-CentOS-7
18 |
19 | #released updates
20 | [updates]
21 | name=CentOS-$releasever - Updates - 163.com
22 | #mirrorlist=http://mirrorlist.centos.org/?release=$releasever&arch=$basearch&repo=updates
23 | baseurl=http://mirrors.163.com/centos/$releasever/updates/$basearch/
24 | gpgcheck=1
25 | gpgkey=http://mirrors.163.com/centos/RPM-GPG-KEY-CentOS-7
26 |
27 | #additional packages that may be useful
28 | [extras]
29 | name=CentOS-$releasever - Extras - 163.com
30 | #mirrorlist=http://mirrorlist.centos.org/?release=$releasever&arch=$basearch&repo=extras
31 | baseurl=http://mirrors.163.com/centos/$releasever/extras/$basearch/
32 | gpgcheck=1
33 | gpgkey=http://mirrors.163.com/centos/RPM-GPG-KEY-CentOS-7
34 |
35 | #additional packages that extend functionality of existing packages
36 | [centosplus]
37 | name=CentOS-$releasever - Plus - 163.com
38 | baseurl=http://mirrors.163.com/centos/$releasever/centosplus/$basearch/
39 | gpgcheck=1
40 | enabled=0
41 | gpgkey=http://mirrors.163.com/centos/RPM-GPG-KEY-CentOS-7
42 |
--------------------------------------------------------------------------------
/inventory/dev_cdh6.ini:
--------------------------------------------------------------------------------
1 | [cdh-server]
2 | 10.0.42.182 node_name="sz19f-scm-lmp-test-10-0-42-182-vm.belle.lan" node_ip="10.0.42.182"
3 |
4 | [cdh-agent]
5 | 10.0.42.140 node_name="sz19f-scm-lmp-test-10-0-42-140-vm.belle.lan" node_ip="10.0.42.140"
6 | 10.0.42.184 node_name="sz19f-scm-lmp-test-10-0-42-184-vm.belle.lan" node_ip="10.0.42.184"
7 | 10.0.42.179 node_name="sz19f-scm-lmp-test-10-0-42-179-vm.belle.lan" node_ip="10.0.42.179"
8 |
9 | [cdh-cluster:children]
10 | cdh-server
11 | cdh-agent
12 |
13 | [kylin]
14 | 172.20.32.125
15 |
16 | [sqoop]
17 | 172.20.32.125
18 |
19 | [all:vars]
20 | ; # 是否使用 aliyun 的 yum 源
21 | ; v_update_yum_with_aliyun="true"
22 |
23 | # 是否安装必备组件
24 | v_yum_install="true"
25 |
26 | # 是否安装时间同步
27 | v_ntpdate_install="true"
28 |
29 | ; # 时间同步服务器地址
30 | ; v_ntpdate_address="ntp1.aliyun.com"
31 |
32 | ; # 是否允许设置主机的 hostname
33 | ; v_enable_set_hostname="true"
34 |
35 | # 是否更新操作系统 hosts 文件
36 | # 自定义 hosts 请使用变量文件定义,然后使用 -e 引入变量文件。
37 | # 命令参考:ansible-playbook -i inventory/uat_cdh6.ini -e @inventory/uat_cdh6.yml cdh.yml
38 | ; v_update_hosts="true"
39 |
40 | # 指定 server ip,方便代码内获取
41 | v_server_ip="10.0.42.182"
42 |
43 | # cdh 包下载服务器地址
44 | ; v_cdh_download_server="https://archive.cloudera.com"
45 | v_cdh_download_server="http://10.0.43.24:8066"
46 |
47 | # cdh 主版本号
48 | v_cdh_version="6.0.1"
49 |
50 | # cdh 小版本号
51 | v_cdh_version_p="0.590678"
52 |
53 | # 是否安装 jdk
54 | v_jdk_install="true"
55 |
56 | # scm_db_host
57 | v_cm_db_host="10.0.30.39"
58 |
59 | # scm_db_name
60 | v_cm_db_name="db_cdh6_scm"
61 |
62 | # scm_db_user
63 | v_cm_db_user="user_cdh6"
64 |
65 | # scm_db_password
66 | v_cm_db_password="123456"
67 |
68 | # scm_db_port
69 | v_cm_db_port=3306
70 |
71 | ; hadoop_user="root"
72 | ; hadoop_group="root"
73 | ; kylin_version="2.4.0"
74 | ; kylin_work_path="/home/cdh"
75 | ; kylin_path="{{kylin_work_path}}/apache-kylin-{{kylin_version}}-bin-{{kylin_env}}"
76 | ; kylin_config_path="{{kylin_path}}/conf"
77 | ; kylin_env="cdh57"
78 | ; kylin_download_url="http://172.20.32.36/package/kylin/apache-kylin-{{kylin_version}}-bin-{{kylin_env}}.tar.gz"
79 | ; #kylin_download_url="http://mirrors.hust.edu.cn/apache/kylin/apache-kylin-{{kylin_version}}/apache-kylin-{{kylin_version}}-bin-{{kylin_env}}.tar.gz"
80 |
--------------------------------------------------------------------------------
/roles/common/files/ntp.conf:
--------------------------------------------------------------------------------
1 | # For more information about this file, see the man pages
2 | # ntp.conf(5), ntp_acc(5), ntp_auth(5), ntp_clock(5), ntp_misc(5), ntp_mon(5).
3 |
4 | driftfile /var/lib/ntp/drift
5 |
6 | # Permit time synchronization with our time source, but do not
7 | # permit the source to query or modify the service on this system.
8 | restrict default nomodify notrap nopeer noquery
9 |
10 | # Permit all access over the loopback interface. This could
11 | # be tightened as well, but to do so would effect some of
12 | # the administrative functions.
13 | restrict 127.0.0.1
14 | restrict ::1
15 |
16 | # Hosts on local network are less restricted.
17 | #restrict 192.168.1.0 mask 255.255.255.0 nomodify notrap
18 |
19 | # Use public servers from the pool.ntp.org project.
20 | # Please consider joining the pool (http://www.pool.ntp.org/join.html).
21 | server ntp1.aliyun.com prefer
22 | server ntp3.aliyun.com
23 | server ntp5.aliyun.com
24 | server ntp7.aliyun.com
25 |
26 | #broadcast 192.168.1.255 autokey # broadcast server
27 | #broadcastclient # broadcast client
28 | #broadcast 224.0.1.1 autokey # multicast server
29 | #multicastclient 224.0.1.1 # multicast client
30 | #manycastserver 239.255.254.254 # manycast server
31 | #manycastclient 239.255.254.254 autokey # manycast client
32 |
33 | # Enable public key cryptography.
34 | #crypto
35 |
36 | includefile /etc/ntp/crypto/pw
37 |
38 | # Key file containing the keys and key identifiers used when operating
39 | # with symmetric key cryptography.
40 | keys /etc/ntp/keys
41 |
42 | # Specify the key identifiers which are trusted.
43 | #trustedkey 4 8 42
44 |
45 | # Specify the key identifier to use with the ntpdc utility.
46 | #requestkey 8
47 |
48 | # Specify the key identifier to use with the ntpq utility.
49 | #controlkey 8
50 |
51 | # Enable writing of statistics records.
52 | #statistics clockstats cryptostats loopstats peerstats
53 |
54 | # Disable the monitoring facility to prevent amplification attacks using ntpdc
55 | # monlist command when default restrict does not include the noquery flag. See
56 | # CVE-2013-5211 for more details.
57 | # Note: Monitoring will not be disabled with the limited restriction flag.
58 | disable monitor
59 |
--------------------------------------------------------------------------------
/inventory/uat_cdh6.ini:
--------------------------------------------------------------------------------
1 | [cdh-server]
2 | 10.240.114.34 node_name="bjds-kubernetes-node-pre-10-240-114-34-vm.belle.lan" node_ip="10.240.114.34"
3 |
4 | [cdh-agent]
5 | 10.240.114.38 node_name="bjds-kubernetes-node-pre-10-240-114-38-vm.belle.lan" node_ip="10.240.114.38"
6 | 10.240.114.65 node_name="bjds-kubernetes-node-pre-10-240-114-65-vm.belle.lan" node_ip="10.240.114.65"
7 | 10.240.114.67 node_name="bjds-kubernetes-node-pre-10-240-114-67-vm.belle.lan" node_ip="10.240.114.67"
8 |
9 | [cdh-cluster:children]
10 | cdh-server
11 | cdh-agent
12 |
13 | [kylin]
14 | 172.20.32.125
15 |
16 | [sqoop]
17 | 172.20.32.125
18 |
19 | [all:vars]
20 | ; # 是否使用 aliyun 的 yum 源
21 | ; v_update_yum_with_aliyun="true"
22 |
23 | # 是否安装必备组件
24 | v_yum_install="true"
25 |
26 | ; # 是否安装时间同步
27 | ; v_ntpdate_install="true"
28 | ;
29 | ; # 时间同步服务器地址
30 | ; v_ntpdate_address="ntp1.aliyun.com"
31 |
32 | ; # 是否允许设置主机的 hostname
33 | ; v_enable_set_hostname="true"
34 |
35 | # 是否更新操作系统 hosts 文件
36 | # 自定义 hosts 请使用变量文件定义,然后使用 -e 引入变量文件。
37 | # 命令参考:ansible-playbook -i inventory/uat_cdh6.ini -e @inventory/uat_cdh6.yml cdh.yml
38 | v_update_hosts="true"
39 |
40 | # 指定 server ip,方便代码内获取
41 | v_server_ip="10.240.114.34"
42 |
43 | # cdh 包下载服务器地址
44 | ; v_cdh_download_server="https://archive.cloudera.com"
45 | v_cdh_download_server="http://10.240.114.45:8066"
46 | ; v_cdh_download_server="http://10.0.43.24:8066"
47 |
48 | # cdh 主版本号
49 | v_cdh_version="6.0.1"
50 |
51 | # cdh 小版本号
52 | v_cdh_version_p="0.590678"
53 |
54 | # 是否安装 jdk
55 | v_jdk_install="false"
56 |
57 | # scm_db_host
58 | v_cm_db_host="10.240.114.54"
59 |
60 | # scm_db_name
61 | v_cm_db_name="db_cdh6_scm"
62 |
63 | # scm_db_user
64 | v_cm_db_user="user_cdh6"
65 |
66 | # scm_db_password
67 | v_cm_db_password="123456"
68 |
69 | # scm_db_port
70 | v_cm_db_port=3306
71 |
72 | ; hadoop_user="root"
73 | ; hadoop_group="root"
74 | ; kylin_version="2.4.0"
75 | ; kylin_work_path="/home/cdh"
76 | ; kylin_path="{{kylin_work_path}}/apache-kylin-{{kylin_version}}-bin-{{kylin_env}}"
77 | ; kylin_config_path="{{kylin_path}}/conf"
78 | ; kylin_env="cdh57"
79 | ; kylin_download_url="http://172.20.32.36/package/kylin/apache-kylin-{{kylin_version}}-bin-{{kylin_env}}.tar.gz"
80 | ; #kylin_download_url="http://mirrors.hust.edu.cn/apache/kylin/apache-kylin-{{kylin_version}}/apache-kylin-{{kylin_version}}-bin-{{kylin_env}}.tar.gz"
81 |
--------------------------------------------------------------------------------
/99.clean_all.yml:
--------------------------------------------------------------------------------
1 | - hosts:
2 | - cdh-cluster
3 | tasks:
4 | - name: 停止 cloudera-scm-agent
5 | systemd:
6 | name: cloudera-scm-agent
7 | state: stopped
8 | ignore_errors: true
9 |
10 | - hosts:
11 | - cdh-server
12 | tasks:
13 | - name: 停止 cloudera-scm-server
14 | systemd:
15 | name: cloudera-scm-server
16 | state: stopped
17 | ignore_errors: true
18 |
19 | - hosts:
20 | - cdh-cluster
21 | tasks:
22 | - name: 等待服务停止
23 | shell: "sleep 20"
24 | - name: umount cm 挂载的目录
25 | shell: "for cm_mount_path in $(mount | grep 'cloudera-scm'| awk '{print $3}'); do umount $cm_mount_path; done"
26 | ignore_errors: true
27 | - name: 卸载 cloudera-manager-daemons cloudera-manager-agent
28 | yum: name={{ item }} state=absent
29 | with_items:
30 | - cloudera-manager-daemons
31 | - cloudera-manager-agent
32 | ignore_errors: true
33 | - name: 卸载 cloudera-manager-server
34 | yum: name={{ item }} state=absent
35 | with_items:
36 | - cloudera-manager-server
37 | when: "'cdh-server' in group_names"
38 | ignore_errors: true
39 |
40 | - hosts:
41 | - cdh-cluster
42 | tasks:
43 | - name: 清理目录和文件
44 | shell: "rm -rf /usr/share/cmf /var/lib/cloudera* /var/log/cloudera* /var/run/cloudera* /var/run/hdfs-sockets && \
45 | rm -rf /tmp/.scmpreparenode.lock /usr/lib/hue && \
46 | rm -rf /var/lib/flume-ng /var/lib/hadoop* /var/lib/hue /var/lib/navigator /var/lib/oozie /var/lib/solr && \
47 | rm -rf /var/lib/zookeeper /var/lib/kudu /var/lib/kafka /var/lib/impala /var/lib/sqoop* && \
48 | rm -rf /usr/bin/hadoop* /usr/bin/zookeeper* /usr/bin/hbase* /usr/bin/hive* /usr/bin/hdfs /usr/bin/mapred && \
49 | rm -rf /usr/bin/yarn /usr/bin/sqoop* /usr/bin/oozie /usr/bin/impala /usr/bin/spark* && \
50 | rm -rf /etc/hadoop* /etc/zookeeper* /etc/hive* /etc/hue /etc/impala /etc/sqoop* /etc/oozie && \
51 | rm -rf /etc/hbase* /etc/hcatalog /etc/spark /etc/solr /etc/cloudera* && \
52 | rm -rf /opt/cloudera && \
53 | rm -rf /data/kudu /data/dfs /data/yarn /data/mapred"
54 | ignore_errors: true
55 | - name: 清理 alternatives 软连接
56 | # 注意这里的 ls -l 不能写成 ll,否则命令会执行失败。
57 | shell: "for alternatives in $(ls -l /etc/alternatives | grep CDH-{{ v_cdh_version }} | awk '{print $9}'); do rm -rf /etc/alternatives/$alternatives; done"
58 | ignore_errors: true
59 |
60 | # kill 掉相关进程
61 | # for u in hdfs mapred cloudera-scm hbase hue zookeeper oozie hive impala flume; do sudo kill $(ps -u $u -o pid=); done
62 |
--------------------------------------------------------------------------------
/roles/common/files/CentOS-7-aliyun.repo:
--------------------------------------------------------------------------------
1 | # wget http://mirrors.aliyun.com/repo/Centos-7.repo
2 |
3 | # CentOS-Base.repo
4 | #
5 | # The mirror system uses the connecting IP address of the client and the
6 | # update status of each mirror to pick mirrors that are updated to and
7 | # geographically close to the client. You should use this for CentOS updates
8 | # unless you are manually picking other mirrors.
9 | #
10 | # If the mirrorlist= does not work for you, as a fall back you can try the
11 | # remarked out baseurl= line instead.
12 | #
13 | #
14 |
15 | [base]
16 | name=CentOS-$releasever - Base - mirrors.aliyun.com
17 | failovermethod=priority
18 | baseurl=http://mirrors.aliyun.com/centos/$releasever/os/$basearch/
19 | http://mirrors.aliyuncs.com/centos/$releasever/os/$basearch/
20 | http://mirrors.cloud.aliyuncs.com/centos/$releasever/os/$basearch/
21 | gpgcheck=1
22 | gpgkey=http://mirrors.aliyun.com/centos/RPM-GPG-KEY-CentOS-7
23 |
24 | #released updates
25 | [updates]
26 | name=CentOS-$releasever - Updates - mirrors.aliyun.com
27 | failovermethod=priority
28 | baseurl=http://mirrors.aliyun.com/centos/$releasever/updates/$basearch/
29 | http://mirrors.aliyuncs.com/centos/$releasever/updates/$basearch/
30 | http://mirrors.cloud.aliyuncs.com/centos/$releasever/updates/$basearch/
31 | gpgcheck=1
32 | gpgkey=http://mirrors.aliyun.com/centos/RPM-GPG-KEY-CentOS-7
33 |
34 | #additional packages that may be useful
35 | [extras]
36 | name=CentOS-$releasever - Extras - mirrors.aliyun.com
37 | failovermethod=priority
38 | baseurl=http://mirrors.aliyun.com/centos/$releasever/extras/$basearch/
39 | http://mirrors.aliyuncs.com/centos/$releasever/extras/$basearch/
40 | http://mirrors.cloud.aliyuncs.com/centos/$releasever/extras/$basearch/
41 | gpgcheck=1
42 | gpgkey=http://mirrors.aliyun.com/centos/RPM-GPG-KEY-CentOS-7
43 |
44 | #additional packages that extend functionality of existing packages
45 | [centosplus]
46 | name=CentOS-$releasever - Plus - mirrors.aliyun.com
47 | failovermethod=priority
48 | baseurl=http://mirrors.aliyun.com/centos/$releasever/centosplus/$basearch/
49 | http://mirrors.aliyuncs.com/centos/$releasever/centosplus/$basearch/
50 | http://mirrors.cloud.aliyuncs.com/centos/$releasever/centosplus/$basearch/
51 | gpgcheck=1
52 | enabled=0
53 | gpgkey=http://mirrors.aliyun.com/centos/RPM-GPG-KEY-CentOS-7
54 |
55 | #contrib - packages by Centos Users
56 | [contrib]
57 | name=CentOS-$releasever - Contrib - mirrors.aliyun.com
58 | failovermethod=priority
59 | baseurl=http://mirrors.aliyun.com/centos/$releasever/contrib/$basearch/
60 | http://mirrors.aliyuncs.com/centos/$releasever/contrib/$basearch/
61 | http://mirrors.cloud.aliyuncs.com/centos/$releasever/contrib/$basearch/
62 | gpgcheck=1
63 | enabled=0
64 | gpgkey=http://mirrors.aliyun.com/centos/RPM-GPG-KEY-CentOS-7
--------------------------------------------------------------------------------
/roles/kylin/templates/kylin_job_conf_inmem.xml:
--------------------------------------------------------------------------------
1 |
17 |
18 |
19 |
20 | mapreduce.job.split.metainfo.maxsize
21 | -1
22 | The maximum permissible size of the split metainfo file.
23 | The JobTracker won't attempt to read split metainfo files bigger than
24 | the configured value. No limits if set to -1.
25 |
26 |
27 |
28 |
29 | mapreduce.map.output.compress
30 | true
31 | Compress map outputs
32 |
33 |
34 |
38 |
46 |
47 | mapreduce.output.fileoutputformat.compress
48 | true
49 | Compress the output of a MapReduce job
50 |
51 |
55 |
63 |
64 | mapreduce.output.fileoutputformat.compress.type
65 | BLOCK
66 | The compression type to use for job outputs
67 |
68 |
69 |
70 |
71 | mapreduce.job.max.split.locations
72 | 2000
73 | No description
74 |
75 |
76 |
77 | dfs.replication
78 | 2
79 | Block replication
80 |
81 |
82 |
83 | mapreduce.task.timeout
84 | 7200000
85 | Set task timeout to 1 hour
86 |
87 |
88 |
89 |
90 | mapreduce.map.memory.mb
91 | 3072
92 |
93 |
94 |
95 |
96 | mapreduce.map.java.opts
97 | -Xmx2700m -XX:OnOutOfMemoryError='kill -9 %p'
98 |
99 |
100 |
101 |
102 | mapreduce.task.io.sort.mb
103 | 200
104 |
105 |
106 |
107 |
--------------------------------------------------------------------------------
/roles/common/tasks/main.yml:
--------------------------------------------------------------------------------
1 | # 拉取节点的 ansible setup 信息,起到缓存效果,否则后续 when 判断可能失败
2 | - name: 缓存 ansilbe setup 信息
3 | setup: gather_subset=min
4 |
5 | - name: yum 切换 163 源
6 | copy: src=CentOS-7-163.repo dest=/etc/yum.repos.d/CentOS-Base.repo
7 | when: v_update_yum_with_163=="true"
8 |
9 | - name: yum 切换 aliyun 源
10 | copy: src=CentOS-7-aliyun.repo dest=/etc/yum.repos.d/CentOS-Base.repo
11 | when: v_update_yum_with_aliyun=="true"
12 |
13 | # 对于低于7.5的centos系统先进行系统内核升级
14 | - name: 升级系统内核
15 | shell: "yum -y update"
16 | when: v_update_kernel=="true" and ansible_distribution_major_version|int == 7 and ansible_distribution_version < "7.5"
17 |
18 | - name: 安装必备组件
19 | yum: update_cache=yes name={{ item }} state=present
20 | with_items:
21 | - wget
22 | - ntp
23 | - ntpdate
24 | - git
25 | - tar
26 | - rpcbind
27 | - telnet
28 | - vim
29 | - net-tools
30 | - bind-utils
31 | - lrzsz
32 | - epel-release
33 | - bash-completion
34 | - python-pip
35 | when: v_yum_install=="true"
36 |
37 | - name: 升级 Psycopg2 版本
38 | shell: "pip install --upgrade psycopg2"
39 |
40 | - name: 重启 rpcbind 服务,并开启自启动
41 | systemd:
42 | name: rpcbind
43 | daemon_reload: yes
44 | state: restarted
45 | enabled: yes
46 |
47 | # 查看同步情况命令
48 | # ntpq -p
49 | # ntpstat
50 | - name: 重启 ntpd 服务,并设置自启动
51 | systemd:
52 | name: ntpd
53 | daemon_reload: yes
54 | state: restarted
55 | enabled: yes
56 | when: v_ntpdate_install=="true"
57 |
58 | - name: 更新 ntp 的配置文件
59 | copy: src=ntp.conf dest=/etc/ntp.conf
60 | when: v_ntpdate_install=="true"
61 |
62 | - name: 重启 ntpd 服务,并设置自启动
63 | systemd:
64 | name: ntpd
65 | daemon_reload: yes
66 | state: restarted
67 | enabled: yes
68 | when: v_ntpdate_install=="true"
69 |
70 | - name: 立即同步时间
71 | command: ntpdate -u {{ v_ntpdate_address }}
72 | when: v_ntpdate_install=="true"
73 |
74 | # 查看定时任务命令:crontab -l
75 | # 文件存放目录:cat /var/spool/cron/root
76 | # centos7 获取时间命令:timedatectl
77 | # centos7 设置时间命令:timedatectl set-ntp no && timedatectl set-time "1982-01-01 00:00:00" && timedatectl set-ntp yes
78 | # 查看任务执行日志:
79 | # tail -n 500 /var/log/cron
80 | # tail -n 500 /var/spool/mail/root
81 | - name: 设置时间同步定时任务
82 | cron:
83 | name: "时间同步"
84 | minute: "*/30"
85 | user: root
86 | # hwclock -w:修改硬件时间,保持和软件 NTP 时间同步
87 | job: "/sbin/ntpdate -u {{ v_ntpdate_address }}; /sbin/hwclock -w"
88 | when: v_ntpdate_install=="true"
89 |
90 | - name: 设置启用时间同步
91 | shell: "timedatectl set-ntp yes"
92 | when: v_ntpdate_install=="true"
93 |
94 | - name: 修改机器名
95 | hostname: name={{ node_name }}
96 | when: v_enable_set_hostname=="true"
97 |
98 | - name: 配置 FQDN
99 | lineinfile:
100 | dest: /etc/sysconfig/network
101 | regexp: 'HOSTNAME'
102 | line: 'HOSTNAME={{ ansible_hostname }}'
103 |
104 | # 删除默认安装
105 | - name: 删除 CentOS 防火墙
106 | yum: name={{ item }} state=absent
107 | with_items:
108 | - firewalld
109 | - firewalld-filesystem
110 | - python-firewall
111 | when: ansible_distribution == "CentOS"
112 |
113 | - name: 关闭 selinux
114 | selinux: state=disabled
115 |
116 | - name: 集群hosts文件更新
117 | lineinfile:
118 | dest: /etc/hosts
119 | regexp: '{{item.key}}'
120 | line: '{{item.value}} {{item.key}}'
121 | with_dict: '{{ v_hosts }}'
122 | when: v_update_hosts=="true"
123 |
124 | # - name: 集群hosts文件更新
125 | # lineinfile:
126 | # dest: /etc/hosts
127 | # regexp: "{{ item }}"
128 | # line: "{{ item }}"
129 | # with_items: "{{ groups['cdh-cluster'] }}"
130 | # when: v_update_hosts=="true"
131 |
132 | # Cloudera 建议将 /proc/sys/vm/swappiness 设置为最大值 10。服务器默认设置为 30。
133 | # 使用 sysctl 命令在运行时更改该设置并编辑 /etc/sysctl.conf,以在重启后保存该设置。
134 | # 您可以继续进行安装,但 Cloudera Manager 可能会报告您的主机由于交换而运行状况不良。
135 | - name: 修改 linux swap 空间的 swappiness,降低对硬盘的缓存
136 | lineinfile:
137 | dest: /etc/sysctl.conf
138 | regexp: "vm.swappiness"
139 | line: "vm.swappiness={{ v_vm_swappiness }}"
140 |
141 | - name: 生效 swappiness 参数
142 | shell: "sysctl -p /etc/sysctl.conf"
143 |
144 | # 已启用透明大页面压缩,可能会导致重大性能问题。
145 | # 请运行“echo never > /sys/kernel/mm/transparent_hugepage/defrag”
146 | # 和“echo never > /sys/kernel/mm/transparent_hugepage/enabled”以禁用此设置,
147 | # 然后将同一命令添加到 /etc/rc.local 等初始化脚本中,以便在系统重启时予以设置。
148 | # https://blog.csdn.net/csfreebird/article/details/49307935
149 | - name: 禁用透明大页面压缩
150 | shell: "echo never > /sys/kernel/mm/transparent_hugepage/defrag && \
151 | echo never > /sys/kernel/mm/transparent_hugepage/enabled"
152 |
153 | - name: 永久禁用透明大页面压缩
154 | lineinfile:
155 | dest: /etc/rc.local
156 | regexp: "transparent_hugepage"
157 | line: "echo never > /sys/kernel/mm/transparent_hugepage/defrag && echo never > /sys/kernel/mm/transparent_hugepage/enabled"
158 |
--------------------------------------------------------------------------------
/roles/cm/templates/config.ini:
--------------------------------------------------------------------------------
1 | [General]
2 | # Hostname of the CM server.
3 | server_host={{ v_server_ip }}
4 |
5 | # Port that the CM server is listening on.
6 | server_port=7182
7 |
8 | ## It should not normally be necessary to modify these.
9 | # Port that the CM agent should listen on.
10 | # listening_port=9000
11 |
12 | # IP Address that the CM agent should listen on.
13 | # listening_ip=
14 |
15 | # Hostname that the CM agent reports as its hostname. If unset, will be
16 | # obtained in code through something like this:
17 | #
18 | # python -c 'import socket; \
19 | # print socket.getfqdn(), \
20 | # socket.gethostbyname(socket.getfqdn())'
21 | #
22 | # listening_hostname=
23 |
24 | # An alternate hostname to report as the hostname for this host in CM.
25 | # Useful when this agent is behind a load balancer or proxy and all
26 | # inbound communication must connect through that proxy.
27 | # reported_hostname=
28 |
29 | # Port that supervisord should listen on.
30 | # NB: This only takes effect if supervisord is restarted.
31 | # supervisord_port=19001
32 |
33 | # Log file. The supervisord log file will be placed into
34 | # the same directory. Note that if the agent is being started via the
35 | # init.d script, /var/log/cloudera-scm-agent/cloudera-scm-agent.out will
36 | # also have a small amount of output (from before logging is initialized).
37 | # log_file=/var/log/cloudera-scm-agent/cloudera-scm-agent.log
38 |
39 | # Persistent state directory. Directory to store CM agent state that
40 | # persists across instances of the agent process and system reboots.
41 | # Particularly, the agent's UUID is stored here.
42 | # lib_dir=/var/lib/cloudera-scm-agent
43 |
44 | # Parcel directory. Unpacked parcels will be stored in this directory.
45 | # Downloaded parcels will be stored in /../parcel-cache
46 | # parcel_dir=/opt/cloudera/parcels
47 |
48 | # Enable supervisord event monitoring. Used in eager heartbeating, amongst
49 | # other things.
50 | # enable_supervisord_events=true
51 |
52 | # Maximum time to wait (in seconds) for all metric collectors to finish
53 | # collecting data.
54 | max_collection_wait_seconds=10.0
55 |
56 | # Maximum time to wait (in seconds) when connecting to a local role's
57 | # webserver to fetch metrics.
58 | metrics_url_timeout_seconds=30.0
59 |
60 | # Maximum time to wait (in seconds) when connecting to a local TaskTracker
61 | # to fetch task attempt data.
62 | task_metrics_timeout_seconds=5.0
63 |
64 | # The list of non-device (nodev) filesystem types which will be monitored.
65 | monitored_nodev_filesystem_types=nfs,nfs4,tmpfs
66 |
67 | # The list of filesystem types which are considered local for monitoring purposes.
68 | # These filesystems are combined with the other local filesystem types found in
69 | # /proc/filesystems
70 | local_filesystem_whitelist=ext2,ext3,ext4,xfs
71 |
72 | # The largest size impala profile log bundle that this agent will serve to the
73 | # CM server. If the CM server requests more than this amount, the bundle will
74 | # be limited to this size. All instances of this limit being hit are logged to
75 | # the agent log.
76 | impala_profile_bundle_max_bytes=1073741824
77 |
78 | # The largest size stacks log bundle that this agent will serve to the CM
79 | # server. If the CM server requests more than this amount, the bundle will be
80 | # limited to this size. All instances of this limit being hit are logged to the
81 | # agent log.
82 | stacks_log_bundle_max_bytes=1073741824
83 |
84 | # The size to which the uncompressed portion of a stacks log can grow before it
85 | # is rotated. The log will then be compressed during rotation.
86 | stacks_log_max_uncompressed_file_size_bytes=5242880
87 |
88 | # The orphan process directory staleness threshold. If a diretory is more stale
89 | # than this amount of seconds, CM agent will remove it.
90 | orphan_process_dir_staleness_threshold=5184000
91 |
92 | # The orphan process directory refresh interval. The CM agent will check the
93 | # staleness of the orphan processes config directory every this amount of
94 | # seconds.
95 | orphan_process_dir_refresh_interval=3600
96 |
97 | # A knob to control the agent logging level. The options are listed as follows:
98 | # 1) DEBUG (set the agent logging level to 'logging.DEBUG')
99 | # 2) INFO (set the agent logging level to 'logging.INFO')
100 | scm_debug=INFO
101 |
102 | # The DNS resolution collecion interval in seconds. A java base test program
103 | # will be executed with at most this frequency to collect java DNS resolution
104 | # metrics. The test program is only executed if the associated health test,
105 | # Host DNS Resolution, is enabled.
106 | dns_resolution_collection_interval_seconds=60
107 |
108 | # The maximum time to wait (in seconds) for the java test program to collect
109 | # java DNS resolution metrics.
110 | dns_resolution_collection_timeout_seconds=30
111 |
112 | # The directory location in which the agent-wide kerberos credential cache
113 | # will be created.
114 | # agent_wide_credential_cache_location=/var/run/cloudera-scm-agent
115 |
116 | [Security]
117 | # Use TLS and certificate validation when connecting to the CM server.
118 | use_tls=0
119 |
120 | # The maximum allowed depth of the certificate chain returned by the peer.
121 | # The default value of 9 matches the default specified in openssl's
122 | # SSL_CTX_set_verify.
123 | max_cert_depth=9
124 |
125 | # A file of CA certificates in PEM format. The file can contain several CA
126 | # certificates identified by
127 | #
128 | # -----BEGIN CERTIFICATE-----
129 | # ... (CA certificate in base64 encoding) ...
130 | # -----END CERTIFICATE-----
131 | #
132 | # sequences. Before, between, and after the certificates text is allowed which
133 | # can be used e.g. for descriptions of the certificates.
134 | #
135 | # The file is loaded once, the first time an HTTPS connection is attempted. A
136 | # restart of the agent is required to pick up changes to the file.
137 | #
138 | # Note that if neither verify_cert_file or verify_cert_dir is set, certificate
139 | # verification will not be performed.
140 | # verify_cert_file=
141 |
142 | # Directory containing CA certificates in PEM format. The files each contain one
143 | # CA certificate. The files are looked up by the CA subject name hash value,
144 | # which must hence be available. If more than one CA certificate with the same
145 | # name hash value exist, the extension must be different (e.g. 9d66eef0.0,
146 | # 9d66eef0.1 etc). The search is performed in the ordering of the extension
147 | # number, regardless of other properties of the certificates. Use the c_rehash
148 | # utility to create the necessary links.
149 | #
150 | # The certificates in the directory are only looked up when required, e.g. when
151 | # building the certificate chain or when actually performing the verification
152 | # of a peer certificate. The contents of the directory can thus be changed
153 | # without an agent restart.
154 | #
155 | # When looking up CA certificates, the verify_cert_file is first searched, then
156 | # those in the directory. Certificate matching is done based on the subject name,
157 | # the key identifier (if present), and the serial number as taken from the
158 | # certificate to be verified. If these data do not match, the next certificate
159 | # will be tried. If a first certificate matching the parameters is found, the
160 | # verification process will be performed; no other certificates for the same
161 | # parameters will be searched in case of failure.
162 | #
163 | # Note that if neither verify_cert_file or verify_cert_dir is set, certificate
164 | # verification will not be performed.
165 | # verify_cert_dir=
166 |
167 | # PEM file containing client private key.
168 | # client_key_file=
169 |
170 | # A command to run which returns the client private key password on stdout
171 | # client_keypw_cmd=
172 |
173 | # If client_keypw_cmd isn't specified, instead a text file containing
174 | # the client private key password can be used.
175 | # client_keypw_file=
176 |
177 | # PEM file containing client certificate.
178 | # client_cert_file=
179 |
180 | ## Location of Hadoop files. These are the CDH locations when installed by
181 | ## packages. Unused when CDH is installed by parcels.
182 | [Hadoop]
183 | #cdh_crunch_home=/usr/lib/crunch
184 | #cdh_flume_home=/usr/lib/flume-ng
185 | #cdh_hadoop_bin=/usr/bin/hadoop
186 | #cdh_hadoop_home=/usr/lib/hadoop
187 | #cdh_hbase_home=/usr/lib/hbase
188 | #cdh_hbase_indexer_home=/usr/lib/hbase-solr
189 | #cdh_hcat_home=/usr/lib/hive-hcatalog
190 | #cdh_hdfs_home=/usr/lib/hadoop-hdfs
191 | #cdh_hive_home=/usr/lib/hive
192 | #cdh_httpfs_home=/usr/lib/hadoop-httpfs
193 | #cdh_hue_home=/usr/share/hue
194 | #cdh_hue_plugins_home=/usr/lib/hadoop
195 | #cdh_impala_home=/usr/lib/impala
196 | #cdh_kudu_home=/usr/lib/kudu
197 | #cdh_llama_home=/usr/lib/llama
198 | #cdh_mr1_home=/usr/lib/hadoop-0.20-mapreduce
199 | #cdh_mr2_home=/usr/lib/hadoop-mapreduce
200 | #cdh_oozie_home=/usr/lib/oozie
201 | #cdh_parquet_home=/usr/lib/parquet
202 | #cdh_pig_home=/usr/lib/pig
203 | #cdh_solr_home=/usr/lib/solr
204 | #cdh_spark_home=/usr/lib/spark
205 | #cdh_sqoop_home=/usr/lib/sqoop
206 | #cdh_sqoop2_home=/usr/lib/sqoop2
207 | #cdh_yarn_home=/usr/lib/hadoop-yarn
208 | #cdh_zookeeper_home=/usr/lib/zookeeper
209 | #hive_default_xml=/etc/hive/conf.dist/hive-default.xml
210 | #webhcat_default_xml=/etc/hive-webhcat/conf.dist/webhcat-default.xml
211 | #jsvc_home=/usr/libexec/bigtop-utils
212 | #tomcat_home=/usr/lib/bigtop-tomcat
213 | #oracle_home=/usr/share/oracle/instantclient
214 |
215 | ## Location of Cloudera Management Services files.
216 | [Cloudera]
217 | #mgmt_home=/usr/share/cmf
218 |
219 | ## Location of JDBC Drivers.
220 | [JDBC]
221 | #cloudera_mysql_connector_jar=/usr/share/java/mysql-connector-java.jar
222 | #cloudera_oracle_connector_jar=/usr/share/java/oracle-connector-java.jar
223 | #By default, postgres jar is found dynamically in $MGMT_HOME/lib
224 | #cloudera_postgresql_jdbc_jar=
--------------------------------------------------------------------------------
/roles/kylin/templates/kylin.properties:
--------------------------------------------------------------------------------
1 | #
2 | # Licensed to the Apache Software Foundation (ASF) under one or more
3 | # contributor license agreements. See the NOTICE file distributed with
4 | # this work for additional information regarding copyright ownership.
5 | # The ASF licenses this file to You under the Apache License, Version 2.0
6 | # (the "License"); you may not use this file except in compliance with
7 | # the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 |
18 |
19 |
20 |
21 | # The below commented values will effect as default settings
22 | # Uncomment and override them if necessary
23 |
24 |
25 |
26 | #
27 | #### METADATA | ENV ###
28 | #
29 | ## The metadata store in hbase
30 | #kylin.metadata.url=kylin_metadata@hbase
31 | #
32 | ## metadata cache sync retry times
33 | #kylin.metadata.sync-retries=3
34 | #
35 | ## Working folder in HDFS, better be qualified absolute path, make sure user has the right permission to this directory
36 | #kylin.env.hdfs-working-dir=/kylin
37 | #
38 | ## DEV|QA|PROD. DEV will turn on some dev features, QA and PROD has no difference in terms of functions.
39 | #kylin.env=QA
40 | #
41 | ## kylin zk base path
42 | #kylin.env.zookeeper-base-path=/kylin
43 | #
44 | #### SERVER | WEB | RESTCLIENT ###
45 | #
46 | ## Kylin server mode, valid value [all, query, job]
47 | #kylin.server.mode=all
48 | #
49 | ## List of web servers in use, this enables one web server instance to sync up with other servers.
50 | #kylin.server.cluster-servers=localhost:7070
51 | #
52 | ## Display timezone on UI,format like[GMT+N or GMT-N]
53 | #kylin.web.timezone=GMT+8
54 | #
55 | ## Timeout value for the queries submitted through the Web UI, in milliseconds
56 | #kylin.web.query-timeout=300000
57 | #
58 | #kylin.web.cross-domain-enabled=true
59 | #
60 | ##allow user to export query result
61 | #kylin.web.export-allow-admin=true
62 | #kylin.web.export-allow-other=true
63 | #
64 | ## Hide measures in measure list of cube designer, separate by comma
65 | #kylin.web.hide-measures=RAW
66 | #
67 | ##max connections of one route
68 | #kylin.restclient.connection.default-max-per-route=20
69 | #
70 | ##max connections of one rest-client
71 | #kylin.restclient.connection.max-total=200
72 | #
73 | #### PUBLIC CONFIG ###
74 | #kylin.engine.default=2
75 | #kylin.storage.default=2
76 | #kylin.web.hive-limit=20
77 | #kylin.web.help.length=4
78 | #kylin.web.help.0=start|Getting Started|http://kylin.apache.org/docs21/tutorial/kylin_sample.html
79 | #kylin.web.help.1=odbc|ODBC Driver|http://kylin.apache.org/docs21/tutorial/odbc.html
80 | #kylin.web.help.2=tableau|Tableau Guide|http://kylin.apache.org/docs21/tutorial/tableau_91.html
81 | #kylin.web.help.3=onboard|Cube Design Tutorial|http://kylin.apache.org/docs21/howto/howto_optimize_cubes.html
82 | #kylin.web.link-streaming-guide=http://kylin.apache.org/
83 | #kylin.htrace.show-gui-trace-toggle=false
84 | #kylin.web.link-hadoop=
85 | #kylin.web.link-diagnostic=
86 | #kylin.web.contact-mail=
87 | #kylin.server.external-acl-provider=
88 | #
89 | #### SOURCE ###
90 | #
91 | ## Hive client, valid value [cli, beeline]
92 | #kylin.source.hive.client=cli
93 | #
94 | ## Absolute path to beeline shell, can be set to spark beeline instead of the default hive beeline on PATH
95 | #kylin.source.hive.beeline-shell=beeline
96 | #
97 | ## Parameters for beeline client, only necessary if hive client is beeline
98 | ##kylin.source.hive.beeline-params=-n root --hiveconf hive.security.authorization.sqlstd.confwhitelist.append='mapreduce.job.*|dfs.*' -u jdbc:hive2://localhost:10000
99 | #
100 | ## While hive client uses above settings to read hive table metadata,
101 | ## table operations can go through a separate SparkSQL command line, given SparkSQL connects to the same Hive metastore.
102 | #kylin.source.hive.enable-sparksql-for-table-ops=false
103 | ##kylin.source.hive.sparksql-beeline-shell=/path/to/spark-client/bin/beeline
104 | ##kylin.source.hive.sparksql-beeline-params=-n root --hiveconf hive.security.authorization.sqlstd.confwhitelist.append='mapreduce.job.*|dfs.*' -u jdbc:hive2://localhost:10000
105 | #
106 | #kylin.source.hive.keep-flat-table=false
107 | #
108 | ## Hive database name for putting the intermediate flat tables
109 | #kylin.source.hive.database-for-flat-table=default
110 | #
111 | ## Whether redistribute the intermediate flat table before building
112 | #kylin.source.hive.redistribute-flat-table=true
113 | #
114 | #
115 | #### STORAGE ###
116 | #
117 | ## The storage for final cube file in hbase
118 | #kylin.storage.url=hbase
119 | #
120 | ## The prefix of hbase table
121 | #kylin.storage.hbase.table-name-prefix=KYLIN_
122 | #
123 | ## The namespace for hbase storage
124 | #kylin.storage.hbase.namespace=default
125 | #
126 | ## Compression codec for htable, valid value [none, snappy, lzo, gzip, lz4]
127 | #kylin.storage.hbase.compression-codec=none
128 | #
129 | ## HBase Cluster FileSystem, which serving hbase, format as hdfs://hbase-cluster:8020
130 | ## Leave empty if hbase running on same cluster with hive and mapreduce
131 | ##kylin.storage.hbase.cluster-fs=
132 | #
133 | ## The cut size for hbase region, in GB.
134 | #kylin.storage.hbase.region-cut-gb=5
135 | #
136 | ## The hfile size of GB, smaller hfile leading to the converting hfile MR has more reducers and be faster.
137 | ## Set 0 to disable this optimization.
138 | #kylin.storage.hbase.hfile-size-gb=2
139 | #
140 | #kylin.storage.hbase.min-region-count=1
141 | #kylin.storage.hbase.max-region-count=500
142 | #
143 | ## Optional information for the owner of kylin platform, it can be your team's email
144 | ## Currently it will be attached to each kylin's htable attribute
145 | #kylin.storage.hbase.owner-tag=whoami@kylin.apache.org
146 | #
147 | #kylin.storage.hbase.coprocessor-mem-gb=3
148 | #
149 | ## By default kylin can spill query's intermediate results to disks when it's consuming too much memory.
150 | ## Set it to false if you want query to abort immediately in such condition.
151 | #kylin.storage.partition.aggr-spill-enabled=true
152 | #
153 | ## The maximum number of bytes each coprocessor is allowed to scan.
154 | ## To allow arbitrary large scan, you can set it to 0.
155 | #kylin.storage.partition.max-scan-bytes=3221225472
156 | #
157 | ## The default coprocessor timeout is (hbase.rpc.timeout * 0.9) / 1000 seconds,
158 | ## You can set it to a smaller value. 0 means use default.
159 | ## kylin.storage.hbase.coprocessor-timeout-seconds=0
160 | #
161 | #
162 | #### JOB ###
163 | #
164 | ## Max job retry on error, default 0: no retry
165 | #kylin.job.retry=0
166 | #
167 | ## Max count of concurrent jobs running
168 | #kylin.job.max-concurrent-jobs=10
169 | #
170 | ## The percentage of the sampling, default 100%
171 | #kylin.job.sampling-percentage=100
172 | #
173 | ## If true, will send email notification on job complete
174 | ##kylin.job.notification-enabled=true
175 | ##kylin.job.notification-mail-enable-starttls=true
176 | ##kylin.job.notification-mail-host=smtp.office365.com
177 | ##kylin.job.notification-mail-port=587
178 | ##kylin.job.notification-mail-username=kylin@example.com
179 | ##kylin.job.notification-mail-password=mypassword
180 | ##kylin.job.notification-mail-sender=kylin@example.com
181 | #
182 | #
183 | #### ENGINE ###
184 | #
185 | ## Time interval to check hadoop job status
186 | #kylin.engine.mr.yarn-check-interval-seconds=10
187 | #
188 | #kylin.engine.mr.reduce-input-mb=500
189 | #
190 | #kylin.engine.mr.max-reducer-number=500
191 | #
192 | #kylin.engine.mr.mapper-input-rows=1000000
193 | #
194 | ## Enable dictionary building in MR reducer
195 | #kylin.engine.mr.build-dict-in-reducer=true
196 | #
197 | ## Number of reducers for fetching UHC column distinct values
198 | #kylin.engine.mr.uhc-reducer-count=1
199 | #
200 | ## Whether using an additional step to build UHC dictionary
201 | #kylin.engine.mr.build-uhc-dict-in-additional-step=false
202 | #
203 | #
204 | #### CUBE | DICTIONARY ###
205 | #
206 | #kylin.cube.cuboid-scheduler=org.apache.kylin.cube.cuboid.DefaultCuboidScheduler
207 | #kylin.cube.segment-advisor=org.apache.kylin.cube.CubeSegmentAdvisor
208 | #
209 | ## 'auto', 'inmem', 'layer' or 'random' for testing
210 | #kylin.cube.algorithm=layer
211 | #
212 | ## A smaller threshold prefers layer, a larger threshold prefers in-mem
213 | #kylin.cube.algorithm.layer-or-inmem-threshold=7
214 | #
215 | #kylin.cube.aggrgroup.max-combination=4096
216 | #
217 | #kylin.snapshot.max-mb=300
218 | #
219 | #kylin.cube.cubeplanner.enabled=false
220 | #kylin.cube.cubeplanner.enabled-for-existing-cube=false
221 | #kylin.cube.cubeplanner.expansion-threshold=15.0
222 | #kylin.cube.cubeplanner.recommend-cache-max-size=200
223 | #kylin.cube.cubeplanner.mandatory-rollup-threshold=1000
224 | #kylin.cube.cubeplanner.algorithm-threshold-greedy=10
225 | #kylin.cube.cubeplanner.algorithm-threshold-genetic=23
226 | #
227 | #
228 | #### QUERY ###
229 | #
230 | ## Controls the maximum number of bytes a query is allowed to scan storage.
231 | ## The default value 0 means no limit.
232 | ## The counterpart kylin.storage.partition.max-scan-bytes sets the maximum per coprocessor.
233 | #kylin.query.max-scan-bytes=0
234 | #
235 | #kylin.query.cache-enabled=true
236 | #
237 | ## TABLE ACL
238 | #kylin.query.security.table-acl-enabled=true
239 | #
240 | ## Usually should not modify this
241 | #kylin.query.interceptors=org.apache.kylin.rest.security.TableInterceptor
242 | #
243 | #kylin.query.escape-default-keyword=false
244 | #
245 | ## Usually should not modify this
246 | #kylin.query.transformers=org.apache.kylin.query.util.DefaultQueryTransformer,org.apache.kylin.query.util.KeywordDefaultDirtyHack
247 | #
248 | #### SECURITY ###
249 | #
250 | ## Spring security profile, options: testing, ldap, saml
251 | ## with "testing" profile, user can use pre-defined name/pwd like KYLIN/ADMIN to login
252 | #kylin.security.profile=testing
253 | #
254 | ## Admin roles in LDAP, for ldap and saml
255 | #kylin.security.acl.admin-role=admin
256 | #
257 | ## LDAP authentication configuration
258 | #kylin.security.ldap.connection-server=ldap://ldap_server:389
259 | #kylin.security.ldap.connection-username=
260 | #kylin.security.ldap.connection-password=
261 | #
262 | ## LDAP user account directory;
263 | #kylin.security.ldap.user-search-base=
264 | #kylin.security.ldap.user-search-pattern=
265 | #kylin.security.ldap.user-group-search-base=
266 | #kylin.security.ldap.user-group-search-filter=(|(member={0})(memberUid={1}))
267 | #
268 | ## LDAP service account directory
269 | #kylin.security.ldap.service-search-base=
270 | #kylin.security.ldap.service-search-pattern=
271 | #kylin.security.ldap.service-group-search-base=
272 | #
273 | ### SAML configurations for SSO
274 | ## SAML IDP metadata file location
275 | #kylin.security.saml.metadata-file=classpath:sso_metadata.xml
276 | #kylin.security.saml.metadata-entity-base-url=https://hostname/kylin
277 | #kylin.security.saml.keystore-file=classpath:samlKeystore.jks
278 | #kylin.security.saml.context-scheme=https
279 | #kylin.security.saml.context-server-name=hostname
280 | #kylin.security.saml.context-server-port=443
281 | #kylin.security.saml.context-path=/kylin
282 | #
283 | #### SPARK ENGINE CONFIGS ###
284 | #
285 | ## Hadoop conf folder, will export this as "HADOOP_CONF_DIR" to run spark-submit
286 | ## This must contain site xmls of core, yarn, hive, and hbase in one folder
287 | ##kylin.env.hadoop-conf-dir=/etc/hadoop/conf
288 | #
289 | ## Estimate the RDD partition numbers
290 | #kylin.engine.spark.rdd-partition-cut-mb=10
291 | #
292 | ## Minimal partition numbers of rdd
293 | #kylin.engine.spark.min-partition=1
294 | #
295 | ## Max partition numbers of rdd
296 | #kylin.engine.spark.max-partition=5000
297 | #
298 | ## Spark conf (default is in spark/conf/spark-defaults.conf)
299 | #kylin.engine.spark-conf.spark.master=yarn
300 | ##kylin.engine.spark-conf.spark.submit.deployMode=cluster
301 | #kylin.engine.spark-conf.spark.yarn.queue=default
302 | #kylin.engine.spark-conf.spark.executor.memory=1G
303 | #kylin.engine.spark-conf.spark.executor.cores=2
304 | #kylin.engine.spark-conf.spark.executor.instances=1
305 | #kylin.engine.spark-conf.spark.eventLog.enabled=true
306 | #kylin.engine.spark-conf.spark.eventLog.dir=hdfs\:///kylin/spark-history
307 | #kylin.engine.spark-conf.spark.history.fs.logDirectory=hdfs\:///kylin/spark-history
308 | #kylin.engine.spark-conf.spark.hadoop.yarn.timeline-service.enabled=false
309 | #
310 | ## manually upload spark-assembly jar to HDFS and then set this property will avoid repeatedly uploading jar at runtime
311 | ##kylin.engine.spark-conf.spark.yarn.archive=hdfs://namenode:8020/kylin/spark/spark-libs.jar
312 | ##kylin.engine.spark-conf.spark.io.compression.codec=org.apache.spark.io.SnappyCompressionCodec
313 | #
314 | ## uncomment for HDP
315 | ##kylin.engine.spark-conf.spark.driver.extraJavaOptions=-Dhdp.version=current
316 | ##kylin.engine.spark-conf.spark.yarn.am.extraJavaOptions=-Dhdp.version=current
317 | ##kylin.engine.spark-conf.spark.executor.extraJavaOptions=-Dhdp.version=current
318 | #
319 | #
320 | #### QUERY PUSH DOWN ###
321 | #
322 | ##kylin.query.pushdown.runner-class-name=org.apache.kylin.query.adhoc.PushDownRunnerJdbcImpl
323 | #
324 | ##kylin.query.pushdown.update-enabled=false
325 | ##kylin.query.pushdown.jdbc.url=jdbc:hive2://sandbox:10000/default
326 | ##kylin.query.pushdown.jdbc.driver=org.apache.hive.jdbc.HiveDriver
327 | ##kylin.query.pushdown.jdbc.username=hive
328 | ##kylin.query.pushdown.jdbc.password=
329 | #
330 | ##kylin.query.pushdown.jdbc.pool-max-total=8
331 | ##kylin.query.pushdown.jdbc.pool-max-idle=8
332 | ##kylin.query.pushdown.jdbc.pool-min-idle=0
333 | #
334 | #### JDBC Data Source
335 | ##kylin.source.jdbc.connection-url=
336 | ##kylin.source.jdbc.driver=
337 | ##kylin.source.jdbc.dialect=
338 | ##kylin.source.jdbc.user=
339 | ##kylin.source.jdbc.pass=
340 | ##kylin.source.jdbc.sqoop-home=
341 | ##kylin.source.jdbc.filed-delimiter=|
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # CDH6
2 |
3 | CDH(Cloudera’s Distribution,including Apache Hadoop),是 Hadoop 分支中的一种,由 Cloudera 维护,基于稳定版本的 Apache hadoop 构建,并继承了许多补丁,可以直接用于生产环境。
4 |
5 | 由于整个安装过程涉及了多台服务器,为了更加方便地安装 CDH6,减少出错几率,将安装过程封装成项目驱动的形式。
6 |
7 | **注意:本项目的运行环境基于 CentOS7.5 + CDH6.01 。**
8 |
9 | 运行此项目,需要了解以下基础知识:
10 |
11 | - linux
12 | - ansible
13 | - docker
14 |
15 | ## 安装
16 |
17 | 官方安装文档:[Cloudera Enterprise 6.0.x Installation Guide](https://www.cloudera.com/documentation/enterprise/6/6.0/topics/installation.html)
18 |
19 | ### 准备工作
20 |
21 | #### 准备安装 CDH6 的服务器
22 |
23 | 硬软件需求:[Cloudera Enterprise 6 Requirements and Supported Versions](https://www.cloudera.com/documentation/enterprise/6/release-notes/topics/rg_requirements_supported_versions.html)
24 |
25 | IP | HostName | OS | Cores | Memory | Disk | Remark
26 | --------------|-----------------------------------------------------|------------|-------|--------|------|---------------
27 | 10.240.114.34 | bjds-kubernetes-node-pre-10-240-114-34-vm.belle.lan | CentOS 7.5 | 8 | 16G | 250 | Server & Agent
28 | 10.240.114.38 | bjds-kubernetes-node-pre-10-240-114-38-vm.belle.lan | CentOS 7.5 | 8 | 16G | 250 | Agent
29 | 10.240.114.65 | bjds-kubernetes-node-pre-10-240-114-65-vm.belle.lan | CentOS 7.5 | 8 | 16G | 250 | Agent
30 | 10.240.114.67 | bjds-kubernetes-node-pre-10-240-114-67-vm.belle.lan | CentOS 7.5 | 8 | 16G | 250 | Agent
31 | 10.240.114.54 | bjds-kubernetes-node-pre-10-240-114-54-vm.belle.lan | CentOS 7.5 | 8 | 16G | 250 | MySQL 5.7.24
32 | 10.240.114.45 | bjds-kubernetes-node-pre-10-240-114-45-vm.belle.lan | CentOS 7.5 | 8 | 16G | 250 | 下载服务器
33 |
34 | #### 准备下载服务器
35 |
36 | CDH6 官网自身提供了下载服务器地址:
37 |
38 | - [cm6](https://archive.cloudera.com/cm6/6.0.1/redhat7/yum/RPMS/x86_64/)
39 | - [cdh6](https://archive.cloudera.com/cdh6/6.0.1/parcels/)
40 |
41 | 由于国内服务器需要翻墙才能正常下载,且安装包比较大,因此最佳的方式是在内网中搭建一个类似的下载服务器,然后将这些包下载到内网,极大地提升整个安装的效率。
42 |
43 | 为了简化操作,下载服务器采用 docker 运行。
44 |
45 | 首先 [安装 docker + docker-compose](https://www.zorin.xin/docker-manual/install/Centos7.html)。
46 |
47 | 然后在服务器上初始化下载服务器:
48 |
49 | ```sh
50 | # sfds 意为 static file download service
51 |
52 | # 初始化 sfds 配置目录
53 | mkdir -p /data/docker_volumn/sfds
54 |
55 | # 初始化数据文件目录
56 | mkdir -p /data/sfds
57 |
58 | # 初始化编排文件目录
59 | mkdir -p /data/docker_compose
60 |
61 | # 初始化 sfds 配置文件
62 | tee /data/docker_volumn/sfds/nginx.conf <<-'EOF'
63 | worker_processes 1;
64 | pid /var/run/nginx.pid;
65 | events {
66 | worker_connections 1024;
67 | }
68 | http {
69 | include /etc/nginx/mime.types;
70 | default_type application/octet-stream;
71 | sendfile on;
72 | keepalive_timeout 65;
73 | server
74 | {
75 | listen 9000; #端口
76 | server_name localhost; #服务名
77 | root /usr/share/nginx/html; #显示的根索引目录
78 | autoindex on; #开启索引功能
79 | autoindex_exact_size off; #关闭计算文件确切大小(单位bytes),只显示大概大小(单位kb、mb、gb)
80 | autoindex_localtime on; #显示本机时间而非 GMT 时间
81 | }
82 | }
83 | EOF
84 |
85 | # 初始化编排文件
86 | tee /data/docker_compose/docker-compose.yml <<-'EOF'
87 | version: "3"
88 | services:
89 | # 文件下载服务器
90 | sfds:
91 | image: bjddd192/nginx:1.10.1
92 | container_name: sfds
93 | restart: always
94 | ports:
95 | - "8066:9000"
96 | environment:
97 | - TZ=Asia/Shanghai
98 | volumes:
99 | - /data/docker_volumn/sfds/nginx.conf:/etc/nginx/nginx.conf
100 | - /data/sfds:/usr/share/nginx/html
101 | network_mode: bridge
102 | EOF
103 |
104 | # 启动下载服务器
105 | docker-compose -f /data/docker_compose/docker-compose.yml up -d
106 | ```
107 |
108 | 下载服务器启动好以后,访问一下 `http://serverIP:8066`,如果能正常打开页面,说明下载服务器部署成功。
109 |
110 | #### 下载安装包
111 |
112 | [Cloudera Manager 6 Version and Download Information](https://www.cloudera.com/documentation/enterprise/6/release-notes/topics/rg_cm_6_version_download.html)
113 |
114 | 根据官网的下载路径,建立本地目录:
115 |
116 | ```sh
117 | mkdir -p /data/sfds/cdh6/6.0.1/parcels
118 | mkdir -p /data/sfds/cm6/6.0.1/redhat7/yum/RPMS/x86_64
119 | ```
120 |
121 | 然后将官方的包下载到对应的目录,最终目录结构如下:
122 |
123 | ```cmd
124 | $ tree /data/sfds/cdh6/6.0.1/parcels
125 | /data/sfds/cdh6/6.0.1/parcels
126 | |-- CDH-6.0.1-1.cdh6.0.1.p0.590678-el7.parcel
127 | |-- CDH-6.0.1-1.cdh6.0.1.p0.590678-el7.parcel.sha256
128 | `-- manifest.json
129 |
130 | $ tree /data/sfds/cm6/6.0.1/redhat7/yum/RPMS/x86_64
131 | /data/sfds/cm6/6.0.1/redhat7/yum/RPMS/x86_64
132 | |-- cloudera-manager-agent-6.0.1-610811.el7.x86_64.rpm
133 | |-- cloudera-manager-daemons-6.0.1-610811.el7.x86_64.rpm
134 | |-- cloudera-manager-server-6.0.1-610811.el7.x86_64.rpm
135 | |-- cloudera-manager-server-db-2-6.0.1-610811.el7.x86_64.rpm
136 | `-- oracle-j2sdk1.8-1.8.0+update141-1.x86_64.rpm
137 | ```
138 |
139 | #### 制作本地YUM仓库
140 |
141 | ```sh
142 | yum -y install createrepo
143 | cd /data/sfds/cm6/6.0.1/redhat7/yum
144 | createrepo .
145 |
146 | # 初始化仓库文件
147 | tee /data/sfds/cm6/6.0.1/redhat7/yum/cloudera-manager.repo <<-'EOF'
148 | [cloudera-manager]
149 | name=Cloudera Manager 6.0.1
150 | baseurl=http://10.240.114.45:8066/cm6/6.0.1/redhat7/yum/
151 | gpgcheck=false
152 | enabled=true
153 | EOF
154 | ```
155 |
156 | 验证仓库:
157 |
158 | ```sh
159 | wget http://10.240.114.45:8066/cm6/6.0.1/redhat7/yum/cloudera-manager.repo -P /etc/yum.repos.d/
160 | rpm --import http://10.240.114.45:8066/cm6/6.0.1/redhat7/yum/RPM-GPG-KEY-cloudera
161 | yum makecache
162 | yum search cloudera
163 | yum search cloudera-manager-daemons cloudera-manager-agent cloudera-manager-server
164 | ```
165 |
166 | 能够正常找到包说明本地YUM仓库制作成功。
167 |
168 | #### 数据库准备
169 |
170 | 数据库最好选择 MySQL 5.5.45+, 5.6.26+ and 5.7.6+ 版本,本实验环境使用的是 5.7.24 的版本。
171 |
172 | ```sql
173 | -- 删除数据库(如重新部署时使用)
174 | -- drop database db_cdh6_scm;
175 | -- drop database db_cdh6_amon;
176 | -- drop database db_cdh6_rmon;
177 | -- drop database db_cdh6_hue;
178 | -- drop database db_cdh6_metastore;
179 | -- drop database db_cdh6_sentry;
180 | -- drop database db_cdh6_nav;
181 | -- drop database db_cdh6_navms;
182 | -- drop database db_cdh6_oozie;
183 |
184 | -- 创建数据库
185 | create database db_cdh6_scm default character set utf8 default collate utf8_general_ci;
186 | create database db_cdh6_amon default character set utf8 default collate utf8_general_ci;
187 | create database db_cdh6_rmon default character set utf8 default collate utf8_general_ci;
188 | create database db_cdh6_hue default character set utf8 default collate utf8_general_ci;
189 | create database db_cdh6_metastore default character set utf8 default collate utf8_general_ci;
190 | create database db_cdh6_sentry default character set utf8 default collate utf8_general_ci;
191 | create database db_cdh6_nav default character set utf8 default collate utf8_general_ci;
192 | create database db_cdh6_navms default character set utf8 default collate utf8_general_ci;
193 | create database db_cdh6_oozie default character set utf8 default collate utf8_general_ci;
194 |
195 | -- 简单练习使用相同的数据库用户,如果用于线上环境最好是分别使用独立的用户。
196 | grant all on db_cdh6_scm.* to 'user_cdh6'@'%' identified by '123456';
197 | grant all on db_cdh6_amon.* to 'user_cdh6'@'%' identified by '123456';
198 | grant all on db_cdh6_rmon.* to 'user_cdh6'@'%' identified by '123456';
199 | grant all on db_cdh6_hue.* to 'user_cdh6'@'%' identified by '123456';
200 | grant all on db_cdh6_metastore.* to 'user_cdh6'@'%' identified by '123456';
201 | grant all on db_cdh6_sentry.* to 'user_cdh6'@'%' identified by '123456';
202 | grant all on db_cdh6_nav.* to 'user_cdh6'@'%' identified by '123456';
203 | grant all on db_cdh6_navms.* to 'user_cdh6'@'%' identified by '123456';
204 | grant all on db_cdh6_oozie.* to 'user_cdh6'@'%' identified by '123456';
205 |
206 | -- 刷新权限
207 | flush privileges;
208 | ```
209 |
210 | #### ansible 配置
211 |
212 | [ansible 安装与配置](https://www.zorin.xin/2018/08/05/ansible-install-and-config/)
213 |
214 | 本人是使用 mac 安装了 ansible 作为主控。
215 |
216 | ```sh
217 | # 配置到服务器的信任
218 | ssh-copy-id -p 60777 root@10.240.114.34
219 | ssh-copy-id -p 60777 root@10.240.114.38
220 | ssh-copy-id -p 60777 root@10.240.114.65
221 | ssh-copy-id -p 60777 root@10.240.114.67
222 |
223 | # 测试连接
224 | ansible cdh-cluster -i inventory/uat_cdh6.ini -m ping
225 | ```
226 |
227 | ### 部署 CDH
228 |
229 | #### 安装 CM 和 CDH
230 |
231 | ```sh
232 | cd /Users/yanglei/01_git/github_me/ansible-playbooks-cdh6
233 |
234 | # 测试连接
235 | ansible cdh-cluster -i inventory/uat_cdh6.ini -m ping
236 | ansible cdh-cluster -i inventory/uat_cdh6.ini -m command -a "date"
237 |
238 | # 安装公共组件
239 | ansible-playbook -t common -i inventory/uat_cdh6.ini -e @inventory/uat_cdh6.yml 01.cdh.yml
240 |
241 | # 安装 jdk
242 | ansible-playbook -t jdk -i inventory/uat_cdh6.ini -e @inventory/uat_cdh6.yml 01.cdh.yml
243 |
244 | # 设置 server 免密登录 agent
245 | ansible-playbook -t ssh -i inventory/uat_cdh6.ini -e @inventory/uat_cdh6.yml 01.cdh.yml
246 |
247 | # 安装 scm
248 | ansible-playbook -t cm -i inventory/uat_cdh6.ini -e @inventory/uat_cdh6.yml 01.cdh.yml
249 | # 如安装数据库的过程中报错:java.sql.SQLException: Statement violates GTID consistency: CREATE TABLE ... SELECT.
250 | # 需临时关闭 mysql 的 gtid 功能:
251 | # set global gtid_mode=on_permissive;
252 | # set global gtid_mode=off_permissive;
253 | # set global gtid_mode=off;
254 | # set global enforce_gtid_consistency=off;
255 | # 撸完后恢复:
256 | # set global enforce_gtid_consistency=on;
257 | # set global gtid_mode=off_permissive;
258 | # set global gtid_mode=on_permissive;
259 | # set global gtid_mode=on;
260 |
261 | # 放置 cdh 离线安装包
262 | ansible-playbook -t cdh -i inventory/uat_cdh6.ini -e @inventory/uat_cdh6.yml 01.cdh.yml
263 |
264 | # 在 cdh-server 节点检查服务状态
265 | # 检查 scm 数据库是否已经自动创建了表结构
266 | # 如果都正常说明 scm 安装完成
267 | systemctl status cloudera-scm-agent.service
268 | systemctl status cloudera-scm-server.service
269 | # 查看日志
270 | tail -f /var/log/cloudera-scm-server/cloudera-scm-server.log
271 | ```
272 |
273 | #### 集群配置
274 |
275 | 
276 | 启动 Web 控制台进行配置,地址如:http://10.240.114.34:7180/cmf/login ,默认用户名密码都是:admin。
277 |
278 | 
279 | 点击"继续"。
280 |
281 | 
282 | 接受许可。
283 |
284 | 
285 | 这里选择免费版,收费版请自行选择。
286 |
287 | 
288 | 点击"继续"。
289 |
290 | 
291 | 选择"当前管理的主机"。
292 |
293 | 
294 | 看到 CDH-6.0.1 版本可选后,点击"继续"。
295 |
296 | 
297 | 等待 CDH 包安装完成,点击"继续"。
298 |
299 | 
300 |
301 | 
302 | 点击"完成"。
303 |
304 | 
305 | 根据自己的需求选取服务。
306 |
307 | 
308 | 自定义角色分配。
309 |
310 | 
311 | 数据库设置。
312 |
313 | 
314 | 审核更改,如果有特定目录的设定或者参数的设定,可以在这里进行更正。
315 |
316 | 
317 | 等待首次运行完成。
318 |
319 | 
320 |
321 | 
322 |
323 | 
324 | 顺利进入管理控制台,部署基本完成。
325 |
326 | ### 部署 Kylin
327 |
328 | ```sh
329 | cd /Users/yanglei/01_git/oschina/ansible/big_data
330 |
331 | ansible kylin -i inventory/uat_cdh6.ini -m ping
332 |
333 | # 安装 kylin
334 | ansible-playbook -i inventory/uat_cdh6.ini kylin.yml
335 |
336 | # 给 spark 添加 jars 目录的软链接
337 | ansible kylin -i inventory/uat_cdh6.ini -m file -a 'src=/opt/cloudera/parcels/CDH/jars dest=$SPARK_HOME/jars state=link'
338 |
339 | # 用软链接短 HIVE_LIB 路径长度,防止 kylin 启动出现“参数列表过长”的问题
340 | ansible kylin -i inventory/uat_cdh6.ini -m file -a 'src=$HIVE_HOME/lib dest=/hivelib state=link'
341 |
342 | # 检查环境
343 | su - hdfs
344 | # hdfs dfs -chmod -R 777 /
345 | $KYLIN_HOME/bin/check-env.sh
346 | $KYLIN_HOME/bin/find-hive-dependency.sh
347 | $KYLIN_HOME/bin/find-hbase-dependency.sh
348 | $KYLIN_HOME/bin/find-spark-dependency.sh
349 |
350 | # 启动
351 | $KYLIN_HOME/bin/kylin.sh start
352 |
353 | # 停止
354 | $KYLIN_HOME/bin/kylin.sh stop
355 |
356 | # web验证
357 | http://172.20.32.131:7070/kylin
358 | # 初始用户名和密码是 ADMIN/KYLIN
359 |
360 | # 测试kylin
361 | $KYLIN_HOME/bin/sample.sh
362 | ```
363 |
364 | ### 卸载 CDH
365 |
366 | ```sh
367 | ansible-playbook -i inventory/uat_cdh6.ini 99.clean_all.yml
368 |
369 | # 然后删除已创建的数据库
370 | ```
371 |
372 | ### CDH 配置
373 |
374 | #### 目录位置
375 |
376 | 路径 | 说明
377 | ---------------------------------------|------------------------------
378 | /var/lib/cloudera-scm-server | 服务端目录
379 | /var/log/cloudera-scm-* | CM 日志目录
380 | /opt/cloudera/parcels/ | Hadoop 相关服务安装目录
381 | /opt/cloudera/parcel-repo/ | 下载的服务软件包数据(parcels)
382 | /opt/cloudera/parcel-cache | 下载的服务软件包缓存数据
383 | /opt/cloudera/parcels/CDH/jars | CDH 所有 jar 包所在目录
384 | /etc/cloudera-scm-agent/config.ini | CM Agent 的配置文件
385 | /etc/cloudera-scm-server/ | CM Server 的配置目录
386 | /etc/cloudera-scm-server/db.properties | CM Server 的数据库配置
387 | /etc/hadoop/* | hadoop客户端配置目录
388 | /etc/hive/ | hive 的配置目录
389 | ... |
390 |
391 | #### 环境变量
392 |
393 | CDH 自身有一个环境变量脚本,如下:
394 |
395 | ```sh
396 | cat /opt/cloudera/parcels/CDH/meta/cdh_env.sh
397 | #!/bin/bash
398 | CDH_DIRNAME=${PARCEL_DIRNAME:-"CDH-6.0.1-1.cdh6.0.1.p0.590678"}
399 | export CDH_HADOOP_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/hadoop
400 | export CDH_MR1_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/hadoop-0.20-mapreduce
401 | export CDH_HDFS_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/hadoop-hdfs
402 | export CDH_HTTPFS_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/hadoop-httpfs
403 | export CDH_MR2_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/hadoop-mapreduce
404 | export CDH_YARN_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/hadoop-yarn
405 | export CDH_HBASE_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/hbase
406 | export CDH_ZOOKEEPER_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/zookeeper
407 | export CDH_HIVE_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/hive
408 | export CDH_HUE_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/hue
409 | export CDH_OOZIE_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/oozie
410 | export CDH_HUE_PLUGINS_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/hadoop
411 | export CDH_FLUME_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/flume-ng
412 | export CDH_PIG_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/pig
413 | export CDH_HCAT_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/hive-hcatalog
414 | export CDH_SENTRY_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/sentry
415 | export JSVC_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/bigtop-utils
416 | export CDH_HADOOP_BIN=$CDH_HADOOP_HOME/bin/hadoop
417 | export CDH_IMPALA_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/impala
418 | export CDH_SOLR_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/solr
419 | export CDH_HBASE_INDEXER_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/hbase-solr
420 | export SEARCH_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/search
421 | export CDH_SPARK_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/spark
422 | export WEBHCAT_DEFAULT_XML=$PARCELS_ROOT/$CDH_DIRNAME/etc/hive-webhcat/conf.dist/webhcat-default.xml
423 | export CDH_KMS_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/hadoop-kms
424 | export CDH_PARQUET_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/parquet
425 | export CDH_AVRO_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/avro
426 | export CDH_KAFKA_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/kafka
427 | export CDH_KUDU_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/kudu
428 | ```
429 |
430 | #### 其他技巧
431 |
432 | 在Cloudrea Manager页面上,可以向集群中添加/删除主机,添加服务到集群等。
433 |
434 | Cloudrea Manager页面开启了google-analytics,因为从国内访问很慢,可以关闭google-analytics
435 |
436 | 管理 -> 设置 -> 其他 -> 允许使用情况数据收集 不选
437 |
438 | ### 参考资料
439 |
440 | #### 部署相关
441 |
442 | [CentOS7 ntp 服务器配置](https://www.cnblogs.com/harrymore/p/9566229.html)
443 |
444 | [CentOS7 配置 ntp 时间服务器](https://blog.csdn.net/zzy5066/article/details/79036674)
445 |
446 | [CentOS7 中使用NTP进行时间同步](http://www.cnblogs.com/yangxiansen/p/7860008.html)
447 |
448 | [如何给hadoop集群分配角色](https://blog.csdn.net/chenguangchun1993/article/details/79164857)
449 |
450 | [Cloudera Manager 和CDH6.0.1安装,卸载,各步骤截图](https://blog.csdn.net/tototuzuoquan/article/details/85111018)
451 |
452 | [CentOS7.5,CDH6安装部署](https://blog.csdn.net/TXBSW/article/details/84648269)
453 |
454 | [CDH 最新版本 6.0.1 安装详解](https://blog.csdn.net/u010003835/article/details/85007946)
455 |
456 | [CentOS 7下Cloudera Manager及CDH 6.0.1安装过程详解](https://www.cnblogs.com/wzlinux/p/10183357.html)
457 |
458 | [CDH5.15卸载指南](https://blog.csdn.net/weixin_35852328/article/details/81774627)
459 |
460 | #### 配置相关
461 |
462 | [CDH5快速入门手册](https://www.jianshu.com/p/72dc1c591647)
463 |
--------------------------------------------------------------------------------