├── kylin.yml
├── scala.yml
├── roles
    ├── kylin
    │   ├── templates
    │   │   ├── kylin_env.sh
    │   │   ├── kylin_job_conf_inmem.xml
    │   │   └── kylin.properties
    │   └── tasks
    │   │   └── main.yml
    ├── scala
    │   ├── templates
    │   │   └── scala_env.sh
    │   └── tasks
    │   │   └── main.yml
    ├── ssh
    │   ├── defaults
    │   │   └── main.yml
    │   └── tasks
    │   │   └── main.yml
    ├── jdk
    │   ├── files
    │   │   └── mysql-connector-java-5.1.46.jar
    │   ├── templates
    │   │   └── cdh_java_home.sh
    │   ├── defaults
    │   │   └── main.yml
    │   └── tasks
    │   │   └── main.yml
    ├── cdh
    │   ├── defaults
    │   │   └── main.yml
    │   └── tasks
    │   │   └── main.yml
    ├── cm
    │   ├── defaults
    │   │   └── main.yml
    │   ├── tasks
    │   │   └── main.yml
    │   └── templates
    │   │   └── config.ini
    └── common
    │   ├── defaults
    │       └── main.yml
    │   ├── files
    │       ├── CentOS-7-163.repo
    │       ├── ntp.conf
    │       └── CentOS-7-aliyun.repo
    │   └── tasks
    │       └── main.yml
├── images
    ├── cm_install_step_01.png
    ├── cm_install_step_02.png
    ├── cm_install_step_03.png
    ├── cm_install_step_04.png
    ├── cm_install_step_05.png
    ├── cm_install_step_06.png
    ├── cm_install_step_07.png
    ├── cm_install_step_08.png
    ├── cm_install_step_09.png
    ├── cm_install_step_10.png
    ├── cm_install_step_11.png
    ├── cm_install_step_12.png
    ├── cm_install_step_13.png
    ├── cm_install_step_14.png
    ├── cm_install_step_15.png
    ├── cm_install_step_16.png
    ├── cm_install_step_17.png
    └── cm_install_step_18.png
├── inventory
    ├── uat_cdh6.yml
    ├── dev_cdh6.ini
    └── uat_cdh6.ini
├── 01.cdh.yml
├── shell
    └── cleanLog.sh
├── 99.clean_all.yml
└── README.md


/kylin.yml:
--------------------------------------------------------------------------------
1 | - hosts: 
2 |   - kylin
3 |   roles: 
4 |   - kylin
5 | 


--------------------------------------------------------------------------------
/scala.yml:
--------------------------------------------------------------------------------
1 | - hosts: 
2 |   - spark
3 |   roles: 
4 |   - scala
5 | 


--------------------------------------------------------------------------------
/roles/kylin/templates/kylin_env.sh:
--------------------------------------------------------------------------------
1 | export KYLIN_HOME={{ kylin_path }}
2 | export PATH=$KYLIN_HOME/bin:$PATH
3 | 


--------------------------------------------------------------------------------
/roles/scala/templates/scala_env.sh:
--------------------------------------------------------------------------------
1 | export SCALA_HOME={{ scala_path }}
2 | export PATH=$SCALA_HOME/bin:$PATH
3 | 


--------------------------------------------------------------------------------
/images/cm_install_step_01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/images/cm_install_step_01.png


--------------------------------------------------------------------------------
/images/cm_install_step_02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/images/cm_install_step_02.png


--------------------------------------------------------------------------------
/images/cm_install_step_03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/images/cm_install_step_03.png


--------------------------------------------------------------------------------
/images/cm_install_step_04.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/images/cm_install_step_04.png


--------------------------------------------------------------------------------
/images/cm_install_step_05.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/images/cm_install_step_05.png


--------------------------------------------------------------------------------
/images/cm_install_step_06.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/images/cm_install_step_06.png


--------------------------------------------------------------------------------
/images/cm_install_step_07.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/images/cm_install_step_07.png


--------------------------------------------------------------------------------
/images/cm_install_step_08.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/images/cm_install_step_08.png


--------------------------------------------------------------------------------
/images/cm_install_step_09.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/images/cm_install_step_09.png


--------------------------------------------------------------------------------
/images/cm_install_step_10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/images/cm_install_step_10.png


--------------------------------------------------------------------------------
/images/cm_install_step_11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/images/cm_install_step_11.png


--------------------------------------------------------------------------------
/images/cm_install_step_12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/images/cm_install_step_12.png


--------------------------------------------------------------------------------
/images/cm_install_step_13.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/images/cm_install_step_13.png


--------------------------------------------------------------------------------
/images/cm_install_step_14.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/images/cm_install_step_14.png


--------------------------------------------------------------------------------
/images/cm_install_step_15.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/images/cm_install_step_15.png


--------------------------------------------------------------------------------
/images/cm_install_step_16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/images/cm_install_step_16.png


--------------------------------------------------------------------------------
/images/cm_install_step_17.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/images/cm_install_step_17.png


--------------------------------------------------------------------------------
/images/cm_install_step_18.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/images/cm_install_step_18.png


--------------------------------------------------------------------------------
/roles/ssh/defaults/main.yml:
--------------------------------------------------------------------------------
1 | # SSH 用户
2 | v_ssh_user: "root"
3 | 
4 | # SSH 用户组
5 | v_ssh_group: "root"
6 | 
7 | # SSH 用户默认路径
8 | v_ssh_user_path: "/root"
9 | 


--------------------------------------------------------------------------------
/roles/jdk/files/mysql-connector-java-5.1.46.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bjddd192/ansible-playbooks-cdh6/HEAD/roles/jdk/files/mysql-connector-java-5.1.46.jar


--------------------------------------------------------------------------------
/roles/jdk/templates/cdh_java_home.sh:
--------------------------------------------------------------------------------
1 | export JAVA_HOME=/usr/java/jdk{{ v_jdk_version }}-cloudera
2 | export CLASSPATH=.:$CLASSPTAH:$JAVA_HOME/lib
3 | export JRE_HOME=$JAVA_HOME/jre
4 | export PATH=$PATH:$JAVA_HOME/bin
5 | 


--------------------------------------------------------------------------------
/roles/jdk/defaults/main.yml:
--------------------------------------------------------------------------------
1 | # cdh 推荐 jdk 版本下载地址
2 | v_cdh_oracle_j2sdk_download_url: "{{ v_cdh_download_server }}/cm6/{{ v_cdh_version }}/redhat7/yum/RPMS/x86_64/oracle-j2sdk1.8-1.8.0+update141-1.x86_64.rpm"
3 | 
4 | # 是否安装 jdk
5 | v_jdk_install: "false"
6 | 
7 | # 安装的 jdk 版本
8 | v_jdk_version: "1.8.0_141"
9 | 


--------------------------------------------------------------------------------
/inventory/uat_cdh6.yml:
--------------------------------------------------------------------------------
1 | # 自定义 hosts
2 | v_hosts: {
3 | "bjds-kubernetes-node-pre-10-240-114-34-vm.belle.lan": "10.240.114.34 ",
4 | "bjds-kubernetes-node-pre-10-240-114-38-vm.belle.lan": "10.240.114.38 ",
5 | "bjds-kubernetes-node-pre-10-240-114-65-vm.belle.lan": "10.240.114.65 ",
6 | "bjds-kubernetes-node-pre-10-240-114-67-vm.belle.lan": "10.240.114.67 "
7 | }


--------------------------------------------------------------------------------
/roles/cdh/defaults/main.yml:
--------------------------------------------------------------------------------
1 | v_cdh_download_url: "{{v_cdh_download_server}}/cdh6/{{v_cdh_version}}/parcels/CDH-{{v_cdh_version}}-1.cdh{{v_cdh_version}}.p{{v_cdh_version_p}}-el7.parcel"
2 | 
3 | v_cdh_sha_download_url: "{{v_cdh_download_server}}/cdh6/{{v_cdh_version}}/parcels/CDH-{{v_cdh_version}}-1.cdh{{v_cdh_version}}.p{{v_cdh_version_p}}-el7.parcel.sha256"
4 | 
5 | v_cdh_manifest_download_url: "{{v_cdh_download_server}}/cdh6/{{v_cdh_version}}/parcels/manifest.json"
6 | 


--------------------------------------------------------------------------------
/01.cdh.yml:
--------------------------------------------------------------------------------
 1 | - hosts: 
 2 |   - cdh-cluster
 3 |   roles: 
 4 |   - common
 5 |   tags: "common"
 6 |   
 7 | - hosts: 
 8 |   - cdh-cluster
 9 |   roles: 
10 |   - jdk
11 |   tags: "jdk"
12 |   
13 | - hosts: 
14 |   - cdh-server
15 |   roles: 
16 |   - ssh
17 |   tags: "ssh"
18 | 
19 | - hosts: 
20 |   - cdh-agent
21 |   roles: 
22 |   - ssh
23 |   tags: "ssh"
24 |   
25 | - hosts: 
26 |   - cdh-cluster
27 |   roles: 
28 |   - cm
29 |   tags: "cm"
30 | 
31 | - hosts: 
32 |   - cdh-server
33 |   roles: 
34 |   - cdh
35 |   tags: "cdh"


--------------------------------------------------------------------------------
/roles/cm/defaults/main.yml:
--------------------------------------------------------------------------------
 1 | # cm yum 仓库下载地址
 2 | v_yum_repo_url: "{{ v_cdh_download_server }}/cm6/{{ v_cdh_version }}/redhat7/yum/cloudera-manager.repo"
 3 | 
 4 | # cm 仓库签名 GPG 密钥下载地址
 5 | v_yum_repo_gpgcheck_url: "{{ v_cdh_download_server }}/cm6/{{ v_cdh_version }}/redhat7/yum/RPM-GPG-KEY-cloudera"
 6 | 
 7 | # scm_db_type
 8 | v_cm_db_type: "mysql"
 9 | 
10 | # scm_db_host
11 | v_cm_db_host: ""
12 | 
13 | # scm_db_name
14 | v_cm_db_name: "scm"
15 | 
16 | # scm_db_user
17 | v_cm_db_user: "root"
18 | 
19 | # scm_db_password
20 | v_cm_db_password: ""
21 | 
22 | # scm_db_port
23 | v_cm_db_port: 3306
24 | 


--------------------------------------------------------------------------------
/roles/common/defaults/main.yml:
--------------------------------------------------------------------------------
 1 | # 是否使用 163 的 yum 源
 2 | v_update_yum_with_163: "false"
 3 | 
 4 | # 是否使用 aliyun 的 yum 源
 5 | v_update_yum_with_aliyun: "false"
 6 | 
 7 | # 是否更新内核
 8 | v_update_kernel: "false"
 9 | 
10 | # 是否安装必备组件
11 | v_yum_install: "false"
12 | 
13 | # 是否安装时间同步
14 | v_ntpdate_install: "false"
15 | 
16 | # 时间同步服务器地址
17 | v_ntpdate_address: "ntp1.aliyun.com"
18 | 
19 | # 是否允许设置主机的 hostname
20 | v_enable_set_hostname: "false"
21 | 
22 | # 是否更新操作系统 hosts 文件
23 | v_update_hosts: "false"
24 | 
25 | # 自定义 hosts
26 | v_hosts: {
27 | "localhost localhost.localdomain localhost4 localhost4.localdomain4": "127.0.0.1   ",
28 | "localhost localhost.localdomain localhost6 localhost6.localdomain6": "::1         "
29 | }
30 | 
31 | # 服务器使用 swap 内存的百分比
32 | v_vm_swappiness: 10
33 | 
34 | 


--------------------------------------------------------------------------------
/roles/ssh/tasks/main.yml:
--------------------------------------------------------------------------------
 1 | - name: 检查是否有生成用户的 ssh 公钥
 2 |   shell: "ls {{ v_ssh_user_path }}/.ssh|grep '.pub' |wc -l"
 3 |   register: key_exist
 4 |   ignore_errors: true
 5 | 
 6 | - name: 生成 ssh 公钥
 7 |   user:
 8 |     name: "{{ v_ssh_user }}"
 9 |     generate_ssh_key: yes
10 |     ssh_key_bits: 2048
11 |     ssh_key_file: .ssh/id_rsa
12 |   when: "key_exist.stdout == '0'"
13 |   
14 | - name: 获取 ssh 用户公钥
15 |   fetch: 
16 |     src: "{{ v_ssh_user_path }}/.ssh/id_rsa.pub"
17 |     dest: "/tmp/id_{{ ansible_host }}_{{ v_ssh_user }}.pub"
18 |     flat: yes
19 | 
20 | # 从本地 authorized_keys 文件读取公钥内容
21 | - name: 获取 server 的 ssh 用户公钥完成免密登陆 agent
22 |   authorized_key: 
23 |     user: "{{ v_ssh_user }}"
24 |     key: "{{ lookup('file', '/tmp/id_{{ v_server_ip }}_{{ v_ssh_user }}.pub') }}"
25 | 


--------------------------------------------------------------------------------
/roles/kylin/tasks/main.yml:
--------------------------------------------------------------------------------
 1 | - name: 准备 kylin 工作目录
 2 |   file: name={{ item }} state=directory owner={{ hadoop_user }} group={{ hadoop_group }} mode=0755
 3 |   with_items:
 4 |   - "{{ kylin_work_path }}"
 5 |   
 6 | - name: 下载 kylin 安装包
 7 |   get_url: url={{ kylin_download_url }} dest=/tmp owner={{ hadoop_user }} group={{ hadoop_group }} mode=644
 8 | 
 9 | - name: 解压 kylin 安装包
10 |   unarchive:
11 |     # src也可以直接填写一个URL地址直接进行下载解压
12 |     src: "/tmp/apache-kylin-{{kylin_version}}-bin-{{kylin_env}}.tar.gz"
13 |     copy: no
14 |     dest: "{{ kylin_work_path }}"
15 |     owner: "{{ hadoop_user }}"
16 |     group: "{{ hadoop_group }}"
17 |     
18 | - name: 调整 kylin 目录所有者
19 |   file: name={{ kylin_path }} state=directory recurse=yes owner={{ hadoop_user }} group={{ hadoop_group }} 
20 | 
21 | - name: 设置 kylin 环境变量
22 |   template: src=kylin_env.sh dest=/etc/profile.d
23 | 
24 | - name: 生效 kylin 环境变量
25 |   shell: "source /etc/profile.d/kylin_env.sh"
26 | 


--------------------------------------------------------------------------------
/roles/scala/tasks/main.yml:
--------------------------------------------------------------------------------
 1 | - name: 准备 scala 工作目录
 2 |   file: name={{ item }} state=directory owner={{ hadoop_user }} group={{ hadoop_group }} mode=0755
 3 |   with_items:
 4 |   - "{{ scala_work_path }}"
 5 |   
 6 | - name: 下载 scala 安装包
 7 |   get_url: url={{ scala_download_url }} dest=/tmp owner={{ hadoop_user }} group={{ hadoop_group }} mode=644
 8 | 
 9 | - name: 解压 scala 安装包
10 |   unarchive:
11 |     # src也可以直接填写一个URL地址直接进行下载解压
12 |     src: "/tmp/scala-{{ scala_version }}.tgz"
13 |     copy: no
14 |     dest: "{{ scala_work_path }}"
15 |     owner: "{{ hadoop_user }}"
16 |     group: "{{ hadoop_group }}"
17 |     
18 | - name: 调整 scala 目录所有者
19 |   file: name={{ scala_path }} state=directory recurse=yes owner={{ hadoop_user }} group={{ hadoop_group }} 
20 | 
21 | - name: 设置 scala 环境变量
22 |   template: src=scala_env.sh dest=/etc/profile.d
23 | 
24 | - name: 生效 scala 环境变量
25 |   shell: "source /etc/profile.d/scala_env.sh"
26 | 
27 | 
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/roles/cdh/tasks/main.yml:
--------------------------------------------------------------------------------
 1 | - name: 下载 cdh Parcels
 2 |   get_url: url={{ item }} dest=/opt/cloudera/parcel-repo owner=cloudera-scm group=cloudera-scm mode=644
 3 |   with_items:
 4 |   - "{{ v_cdh_manifest_download_url }}"
 5 |   - "{{ v_cdh_download_url }}"
 6 | 
 7 | - name: 下载 cdh parcel.sha256 并改名为 parcel.sha
 8 |   get_url: url={{ item }} dest="/opt/cloudera/parcel-repo/CDH-{{v_cdh_version}}-1.cdh{{v_cdh_version}}.p{{v_cdh_version_p}}-el7.parcel.sha" owner=cloudera-scm group=cloudera-scm mode=644
 9 |   with_items:
10 |   - "{{ v_cdh_sha_download_url }}"
11 | 
12 | - name: 将 manifest.json 文件中，找到对应版本的秘钥，复制到 .sha 文件中
13 |   shell: 'echo "2e650f1f1ea020a3efc98a231b85c2df1a50b030" > "/opt/cloudera/parcel-repo/CDH-{{v_cdh_version}}-1.cdh{{v_cdh_version}}.p{{v_cdh_version_p}}-el7.parcel.sha"'
14 | 
15 | - name: 重启 cloudera-scm-server 服务，并设置自启动
16 |   systemd:
17 |     name: cloudera-scm-server
18 |     daemon_reload: yes
19 |     state: restarted
20 |     enabled: yes
21 | 


--------------------------------------------------------------------------------
/shell/cleanLog.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | 
 4 | # clear cloudera manager monitor log
 5 | rm -rf /var/lib/cloudera-host-monitor/ts/*/partition*/*
 6 | rm -rf /var/lib/cloudera-service-monitor/ts/*/partition*/*
 7 | 
 8 | # clear cdh log
 9 | rm -rf /var/log/cloudera-scm-eventserver/*.out.*
10 | rm -rf /var/log/cloudera-scm-firehose/*.out.*
11 | rm -rf /var/log/cloudera-scm-agent/*.log.*
12 | rm -rf /var/log/cloudera-scm-agent/*.out.*
13 | rm -rf /var/log/cloudera-scm-server/*.out.*
14 | rm -rf /var/log/cloudera-scm-server/*.log.*
15 | 
16 | rm -rf /var/log/hadoop-hdfs/*.out.*
17 | rm -rf /var/log/hadoop-hdfs/*.log.*
18 | rm -rf /var/log/hadoop-httpfs/*.out.*
19 | rm -rf /var/log/hadoop-kms/*.out.*
20 | rm -rf /var/log/hadoop-mapreduce/*.out.*
21 | rm -rf /var/log/hadoop-yarn/*.out.*
22 | rm -rf /var/log/hadoop-yarn/*.audit.*
23 | rm -rf /var/log/hive/*.out.*
24 | rm -rf /var/log/oozie/*.out.*
25 | rm -rf /var/log/oozie/*.log.*
26 | 
27 | rm -rf /var/log/zookeeper/*.log.*
28 | 


--------------------------------------------------------------------------------
/roles/jdk/tasks/main.yml:
--------------------------------------------------------------------------------
 1 | - name: 卸载系统自带 JDK
 2 |   yum: name={{ item }} state=absent
 3 |   with_items: 
 4 |   - "java*"
 5 |   - "jdk*"
 6 |   - "oracle-j2sdk*"
 7 |   when: v_jdk_install=="true"
 8 |   
 9 | - name: 下载 JDK
10 |   get_url: url={{ item }} dest=/tmp mode=644
11 |   with_items:
12 |   - "{{ v_cdh_oracle_j2sdk_download_url }}"
13 |   when: v_jdk_install=="true"
14 | 
15 | - name: 安装 JDK
16 |   yum: name={{ item }} state=present
17 |   with_items: 
18 |   - "/tmp/oracle-j2sdk1.8-1.8.0+update141-1.x86_64.rpm"
19 |   when: v_jdk_install=="true"
20 | 
21 | - name: 配置 JAVA_HOME
22 |   template: src=cdh_java_home.sh dest=/etc/profile.d
23 |   when: v_jdk_install=="true"
24 | 
25 | - name: 生效 JAVA_HOME
26 |   shell: "source /etc/profile.d/cdh_java_home.sh"
27 |   when: v_jdk_install=="true"
28 | 
29 | - name: 准备 java 共享目录
30 |   file: name={{ item }} state=directory mode=0755
31 |   with_items:
32 |   - "/usr/share/java"
33 | 
34 | - name: 拷贝 mysql-connector 包
35 |   copy: src=mysql-connector-java-5.1.46.jar dest=/usr/share/java/mysql-connector-java.jar mode=644
36 | 


--------------------------------------------------------------------------------
/roles/cm/tasks/main.yml:
--------------------------------------------------------------------------------
 1 | - name: 下载 cm yum 仓库
 2 |   # debug: msg={{ groups['cdh-server'] }}
 3 |   get_url: url={{ v_yum_repo_url }} dest=/etc/yum.repos.d/
 4 |   
 5 | - name: 导入仓库签名 GPG 密钥
 6 |   shell: "rpm --import {{ v_yum_repo_gpgcheck_url }}"
 7 | 
 8 | - name: 安装 cloudera-manager-daemons cloudera-manager-agent
 9 |   yum: update_cache=yes name={{ item }} state=present
10 |   with_items:
11 |   - cloudera-manager-daemons
12 |   - cloudera-manager-agent
13 | 
14 | - name: 安装 cloudera-manager-server
15 |   yum: update_cache=yes name={{ item }} state=present
16 |   with_items:
17 |   - cloudera-manager-server
18 |   when: "'cdh-server' in group_names"
19 | 
20 | - name: 拷贝 cm 配置文件
21 |   template: src={{ item }} dest=/etc/cloudera-scm-agent/config.ini mode=644
22 |   with_items:
23 |   - config.ini
24 | 
25 | - name: 重启 cloudera-scm-agent 服务，并设置自启动
26 |   systemd:
27 |     name: cloudera-scm-agent
28 |     daemon_reload: yes
29 |     state: restarted
30 |     enabled: yes
31 | 
32 | - name: 初始化数据库
33 |   shell: "/opt/cloudera/cm/schema/scm_prepare_database.sh mysql -h {{ v_cm_db_host }} -P {{ v_cm_db_port }} --scm-host {{ v_server_ip }} {{ v_cm_db_name }} {{ v_cm_db_user }} {{ v_cm_db_password }} "
34 |   when: "'cdh-server' in group_names"
35 | 


--------------------------------------------------------------------------------
/roles/common/files/CentOS-7-163.repo:
--------------------------------------------------------------------------------
 1 | # CentOS-Base.repo
 2 | #
 3 | # The mirror system uses the connecting IP address of the client and the
 4 | # update status of each mirror to pick mirrors that are updated to and
 5 | # geographically close to the client.  You should use this for CentOS updates
 6 | # unless you are manually picking other mirrors.
 7 | #
 8 | # If the mirrorlist= does not work for you, as a fall back you can try the 
 9 | # remarked out baseurl= line instead.
10 | #
11 | #
12 | [base]
13 | name=CentOS-$releasever - Base - 163.com
14 | #mirrorlist=http://mirrorlist.centos.org/?release=$releasever&arch=$basearch&repo=os
15 | baseurl=http://mirrors.163.com/centos/$releasever/os/$basearch/
16 | gpgcheck=1
17 | gpgkey=http://mirrors.163.com/centos/RPM-GPG-KEY-CentOS-7
18 | 
19 | #released updates
20 | [updates]
21 | name=CentOS-$releasever - Updates - 163.com
22 | #mirrorlist=http://mirrorlist.centos.org/?release=$releasever&arch=$basearch&repo=updates
23 | baseurl=http://mirrors.163.com/centos/$releasever/updates/$basearch/
24 | gpgcheck=1
25 | gpgkey=http://mirrors.163.com/centos/RPM-GPG-KEY-CentOS-7
26 | 
27 | #additional packages that may be useful
28 | [extras]
29 | name=CentOS-$releasever - Extras - 163.com
30 | #mirrorlist=http://mirrorlist.centos.org/?release=$releasever&arch=$basearch&repo=extras
31 | baseurl=http://mirrors.163.com/centos/$releasever/extras/$basearch/
32 | gpgcheck=1
33 | gpgkey=http://mirrors.163.com/centos/RPM-GPG-KEY-CentOS-7
34 | 
35 | #additional packages that extend functionality of existing packages
36 | [centosplus]
37 | name=CentOS-$releasever - Plus - 163.com
38 | baseurl=http://mirrors.163.com/centos/$releasever/centosplus/$basearch/
39 | gpgcheck=1
40 | enabled=0
41 | gpgkey=http://mirrors.163.com/centos/RPM-GPG-KEY-CentOS-7
42 | 


--------------------------------------------------------------------------------
/inventory/dev_cdh6.ini:
--------------------------------------------------------------------------------
 1 | [cdh-server]
 2 | 10.0.42.182 node_name="sz19f-scm-lmp-test-10-0-42-182-vm.belle.lan" node_ip="10.0.42.182"
 3 | 
 4 | [cdh-agent]
 5 | 10.0.42.140 node_name="sz19f-scm-lmp-test-10-0-42-140-vm.belle.lan" node_ip="10.0.42.140"
 6 | 10.0.42.184 node_name="sz19f-scm-lmp-test-10-0-42-184-vm.belle.lan" node_ip="10.0.42.184"
 7 | 10.0.42.179 node_name="sz19f-scm-lmp-test-10-0-42-179-vm.belle.lan" node_ip="10.0.42.179"
 8 | 
 9 | [cdh-cluster:children]
10 | cdh-server
11 | cdh-agent
12 | 
13 | [kylin]
14 | 172.20.32.125
15 | 
16 | [sqoop]
17 | 172.20.32.125
18 | 
19 | [all:vars]
20 | ; # 是否使用 aliyun 的 yum 源
21 | ; v_update_yum_with_aliyun="true"
22 | 
23 | # 是否安装必备组件
24 | v_yum_install="true"
25 | 
26 | # 是否安装时间同步
27 | v_ntpdate_install="true"
28 | 
29 | ; # 时间同步服务器地址
30 | ; v_ntpdate_address="ntp1.aliyun.com"
31 | 
32 | ; # 是否允许设置主机的 hostname
33 | ; v_enable_set_hostname="true"
34 | 
35 | # 是否更新操作系统 hosts 文件
36 | # 自定义 hosts 请使用变量文件定义，然后使用 -e 引入变量文件。
37 | # 命令参考：ansible-playbook -i inventory/uat_cdh6.ini -e @inventory/uat_cdh6.yml cdh.yml
38 | ; v_update_hosts="true"
39 | 
40 | # 指定 server ip，方便代码内获取
41 | v_server_ip="10.0.42.182"
42 | 
43 | # cdh 包下载服务器地址
44 | ; v_cdh_download_server="https://archive.cloudera.com"
45 | v_cdh_download_server="http://10.0.43.24:8066"
46 | 
47 | # cdh 主版本号
48 | v_cdh_version="6.0.1"
49 | 
50 | # cdh 小版本号
51 | v_cdh_version_p="0.590678"
52 | 
53 | # 是否安装 jdk
54 | v_jdk_install="true"
55 | 
56 | # scm_db_host
57 | v_cm_db_host="10.0.30.39"
58 | 
59 | # scm_db_name
60 | v_cm_db_name="db_cdh6_scm"
61 | 
62 | # scm_db_user
63 | v_cm_db_user="user_cdh6"
64 | 
65 | # scm_db_password
66 | v_cm_db_password="123456"
67 | 
68 | # scm_db_port
69 | v_cm_db_port=3306
70 | 
71 | ; hadoop_user="root"
72 | ; hadoop_group="root"
73 | ; kylin_version="2.4.0"
74 | ; kylin_work_path="/home/cdh"
75 | ; kylin_path="{{kylin_work_path}}/apache-kylin-{{kylin_version}}-bin-{{kylin_env}}"
76 | ; kylin_config_path="{{kylin_path}}/conf"
77 | ; kylin_env="cdh57"
78 | ; kylin_download_url="http://172.20.32.36/package/kylin/apache-kylin-{{kylin_version}}-bin-{{kylin_env}}.tar.gz"
79 | ; #kylin_download_url="http://mirrors.hust.edu.cn/apache/kylin/apache-kylin-{{kylin_version}}/apache-kylin-{{kylin_version}}-bin-{{kylin_env}}.tar.gz"
80 | 


--------------------------------------------------------------------------------
/roles/common/files/ntp.conf:
--------------------------------------------------------------------------------
 1 | # For more information about this file, see the man pages
 2 | # ntp.conf(5), ntp_acc(5), ntp_auth(5), ntp_clock(5), ntp_misc(5), ntp_mon(5).
 3 | 
 4 | driftfile /var/lib/ntp/drift
 5 | 
 6 | # Permit time synchronization with our time source, but do not
 7 | # permit the source to query or modify the service on this system.
 8 | restrict default nomodify notrap nopeer noquery
 9 | 
10 | # Permit all access over the loopback interface.  This could
11 | # be tightened as well, but to do so would effect some of
12 | # the administrative functions.
13 | restrict 127.0.0.1 
14 | restrict ::1
15 | 
16 | # Hosts on local network are less restricted.
17 | #restrict 192.168.1.0 mask 255.255.255.0 nomodify notrap
18 | 
19 | # Use public servers from the pool.ntp.org project.
20 | # Please consider joining the pool (http://www.pool.ntp.org/join.html).
21 | server ntp1.aliyun.com prefer
22 | server ntp3.aliyun.com
23 | server ntp5.aliyun.com
24 | server ntp7.aliyun.com
25 | 
26 | #broadcast 192.168.1.255 autokey        # broadcast server
27 | #broadcastclient                        # broadcast client
28 | #broadcast 224.0.1.1 autokey            # multicast server
29 | #multicastclient 224.0.1.1              # multicast client
30 | #manycastserver 239.255.254.254         # manycast server
31 | #manycastclient 239.255.254.254 autokey # manycast client
32 | 
33 | # Enable public key cryptography.
34 | #crypto
35 | 
36 | includefile /etc/ntp/crypto/pw
37 | 
38 | # Key file containing the keys and key identifiers used when operating
39 | # with symmetric key cryptography. 
40 | keys /etc/ntp/keys
41 | 
42 | # Specify the key identifiers which are trusted.
43 | #trustedkey 4 8 42
44 | 
45 | # Specify the key identifier to use with the ntpdc utility.
46 | #requestkey 8
47 | 
48 | # Specify the key identifier to use with the ntpq utility.
49 | #controlkey 8
50 | 
51 | # Enable writing of statistics records.
52 | #statistics clockstats cryptostats loopstats peerstats
53 | 
54 | # Disable the monitoring facility to prevent amplification attacks using ntpdc
55 | # monlist command when default restrict does not include the noquery flag. See
56 | # CVE-2013-5211 for more details.
57 | # Note: Monitoring will not be disabled with the limited restriction flag.
58 | disable monitor
59 | 


--------------------------------------------------------------------------------
/inventory/uat_cdh6.ini:
--------------------------------------------------------------------------------
 1 | [cdh-server]
 2 | 10.240.114.34 node_name="bjds-kubernetes-node-pre-10-240-114-34-vm.belle.lan" node_ip="10.240.114.34"
 3 | 
 4 | [cdh-agent]
 5 | 10.240.114.38 node_name="bjds-kubernetes-node-pre-10-240-114-38-vm.belle.lan" node_ip="10.240.114.38"
 6 | 10.240.114.65 node_name="bjds-kubernetes-node-pre-10-240-114-65-vm.belle.lan" node_ip="10.240.114.65"
 7 | 10.240.114.67 node_name="bjds-kubernetes-node-pre-10-240-114-67-vm.belle.lan" node_ip="10.240.114.67"
 8 | 
 9 | [cdh-cluster:children]
10 | cdh-server
11 | cdh-agent
12 | 
13 | [kylin]
14 | 172.20.32.125
15 | 
16 | [sqoop]
17 | 172.20.32.125
18 | 
19 | [all:vars]
20 | ; # 是否使用 aliyun 的 yum 源
21 | ; v_update_yum_with_aliyun="true"
22 | 
23 | # 是否安装必备组件
24 | v_yum_install="true"
25 | 
26 | ; # 是否安装时间同步
27 | ; v_ntpdate_install="true"
28 | ; 
29 | ; # 时间同步服务器地址
30 | ; v_ntpdate_address="ntp1.aliyun.com"
31 | 
32 | ; # 是否允许设置主机的 hostname
33 | ; v_enable_set_hostname="true"
34 | 
35 | # 是否更新操作系统 hosts 文件
36 | # 自定义 hosts 请使用变量文件定义，然后使用 -e 引入变量文件。
37 | # 命令参考：ansible-playbook -i inventory/uat_cdh6.ini -e @inventory/uat_cdh6.yml cdh.yml
38 | v_update_hosts="true"
39 | 
40 | # 指定 server ip，方便代码内获取
41 | v_server_ip="10.240.114.34"
42 | 
43 | # cdh 包下载服务器地址
44 | ; v_cdh_download_server="https://archive.cloudera.com"
45 | v_cdh_download_server="http://10.240.114.45:8066"
46 | ; v_cdh_download_server="http://10.0.43.24:8066"
47 | 
48 | # cdh 主版本号
49 | v_cdh_version="6.0.1"
50 | 
51 | # cdh 小版本号
52 | v_cdh_version_p="0.590678"
53 | 
54 | # 是否安装 jdk
55 | v_jdk_install="false"
56 | 
57 | # scm_db_host
58 | v_cm_db_host="10.240.114.54"
59 | 
60 | # scm_db_name
61 | v_cm_db_name="db_cdh6_scm"
62 | 
63 | # scm_db_user
64 | v_cm_db_user="user_cdh6"
65 | 
66 | # scm_db_password
67 | v_cm_db_password="123456"
68 | 
69 | # scm_db_port
70 | v_cm_db_port=3306
71 | 
72 | ; hadoop_user="root"
73 | ; hadoop_group="root"
74 | ; kylin_version="2.4.0"
75 | ; kylin_work_path="/home/cdh"
76 | ; kylin_path="{{kylin_work_path}}/apache-kylin-{{kylin_version}}-bin-{{kylin_env}}"
77 | ; kylin_config_path="{{kylin_path}}/conf"
78 | ; kylin_env="cdh57"
79 | ; kylin_download_url="http://172.20.32.36/package/kylin/apache-kylin-{{kylin_version}}-bin-{{kylin_env}}.tar.gz"
80 | ; #kylin_download_url="http://mirrors.hust.edu.cn/apache/kylin/apache-kylin-{{kylin_version}}/apache-kylin-{{kylin_version}}-bin-{{kylin_env}}.tar.gz"
81 | 


--------------------------------------------------------------------------------
/99.clean_all.yml:
--------------------------------------------------------------------------------
 1 | - hosts:
 2 |   - cdh-cluster
 3 |   tasks:
 4 |   - name: 停止 cloudera-scm-agent
 5 |     systemd:
 6 |       name: cloudera-scm-agent
 7 |       state: stopped
 8 |     ignore_errors: true  
 9 | 
10 | - hosts:
11 |   - cdh-server
12 |   tasks:
13 |   - name: 停止 cloudera-scm-server
14 |     systemd:
15 |       name: cloudera-scm-server
16 |       state: stopped
17 |     ignore_errors: true  
18 | 
19 | - hosts:
20 |   - cdh-cluster
21 |   tasks: 
22 |   - name: 等待服务停止
23 |     shell: "sleep 20"
24 |   - name: umount cm 挂载的目录
25 |     shell: "for cm_mount_path in $(mount | grep 'cloudera-scm'| awk '{print $3}'); do umount $cm_mount_path; done"
26 |     ignore_errors: true
27 |   - name: 卸载 cloudera-manager-daemons cloudera-manager-agent
28 |     yum: name={{ item }} state=absent
29 |     with_items:
30 |     - cloudera-manager-daemons
31 |     - cloudera-manager-agent
32 |     ignore_errors: true  
33 |   - name: 卸载 cloudera-manager-server
34 |     yum: name={{ item }} state=absent
35 |     with_items:
36 |     - cloudera-manager-server
37 |     when: "'cdh-server' in group_names"
38 |     ignore_errors: true
39 | 
40 | - hosts:
41 |   - cdh-cluster
42 |   tasks:
43 |   - name: 清理目录和文件
44 |     shell: "rm -rf /usr/share/cmf /var/lib/cloudera* /var/log/cloudera* /var/run/cloudera* /var/run/hdfs-sockets && \
45 |       rm -rf /tmp/.scmpreparenode.lock /usr/lib/hue && \
46 |       rm -rf /var/lib/flume-ng /var/lib/hadoop* /var/lib/hue /var/lib/navigator /var/lib/oozie /var/lib/solr && \
47 |       rm -rf /var/lib/zookeeper /var/lib/kudu /var/lib/kafka /var/lib/impala /var/lib/sqoop* && \
48 |       rm -rf /usr/bin/hadoop* /usr/bin/zookeeper* /usr/bin/hbase* /usr/bin/hive* /usr/bin/hdfs /usr/bin/mapred && \
49 |       rm -rf /usr/bin/yarn /usr/bin/sqoop* /usr/bin/oozie /usr/bin/impala /usr/bin/spark* && \
50 |       rm -rf /etc/hadoop* /etc/zookeeper* /etc/hive* /etc/hue /etc/impala /etc/sqoop* /etc/oozie && \
51 |       rm -rf /etc/hbase* /etc/hcatalog /etc/spark /etc/solr /etc/cloudera* && \
52 |       rm -rf /opt/cloudera && \
53 |       rm -rf /data/kudu /data/dfs /data/yarn /data/mapred"
54 |     ignore_errors: true
55 |   - name: 清理 alternatives 软连接
56 |     # 注意这里的 ls -l 不能写成 ll，否则命令会执行失败。
57 |     shell: "for alternatives in $(ls -l /etc/alternatives | grep CDH-{{ v_cdh_version }} | awk '{print $9}'); do rm -rf /etc/alternatives/$alternatives; done"
58 |     ignore_errors: true
59 |     
60 | # kill 掉相关进程
61 | # for u in hdfs mapred cloudera-scm hbase hue zookeeper oozie hive impala flume; do sudo kill $(ps -u $u -o pid=); done
62 | 


--------------------------------------------------------------------------------
/roles/common/files/CentOS-7-aliyun.repo:
--------------------------------------------------------------------------------
 1 | # wget http://mirrors.aliyun.com/repo/Centos-7.repo
 2 | 
 3 | # CentOS-Base.repo
 4 | #
 5 | # The mirror system uses the connecting IP address of the client and the
 6 | # update status of each mirror to pick mirrors that are updated to and
 7 | # geographically close to the client.  You should use this for CentOS updates
 8 | # unless you are manually picking other mirrors.
 9 | #
10 | # If the mirrorlist= does not work for you, as a fall back you can try the 
11 | # remarked out baseurl= line instead.
12 | #
13 | #
14 |  
15 | [base]
16 | name=CentOS-$releasever - Base - mirrors.aliyun.com
17 | failovermethod=priority
18 | baseurl=http://mirrors.aliyun.com/centos/$releasever/os/$basearch/
19 |         http://mirrors.aliyuncs.com/centos/$releasever/os/$basearch/
20 |         http://mirrors.cloud.aliyuncs.com/centos/$releasever/os/$basearch/
21 | gpgcheck=1
22 | gpgkey=http://mirrors.aliyun.com/centos/RPM-GPG-KEY-CentOS-7
23 |  
24 | #released updates 
25 | [updates]
26 | name=CentOS-$releasever - Updates - mirrors.aliyun.com
27 | failovermethod=priority
28 | baseurl=http://mirrors.aliyun.com/centos/$releasever/updates/$basearch/
29 |         http://mirrors.aliyuncs.com/centos/$releasever/updates/$basearch/
30 |         http://mirrors.cloud.aliyuncs.com/centos/$releasever/updates/$basearch/
31 | gpgcheck=1
32 | gpgkey=http://mirrors.aliyun.com/centos/RPM-GPG-KEY-CentOS-7
33 |  
34 | #additional packages that may be useful
35 | [extras]
36 | name=CentOS-$releasever - Extras - mirrors.aliyun.com
37 | failovermethod=priority
38 | baseurl=http://mirrors.aliyun.com/centos/$releasever/extras/$basearch/
39 |         http://mirrors.aliyuncs.com/centos/$releasever/extras/$basearch/
40 |         http://mirrors.cloud.aliyuncs.com/centos/$releasever/extras/$basearch/
41 | gpgcheck=1
42 | gpgkey=http://mirrors.aliyun.com/centos/RPM-GPG-KEY-CentOS-7
43 |  
44 | #additional packages that extend functionality of existing packages
45 | [centosplus]
46 | name=CentOS-$releasever - Plus - mirrors.aliyun.com
47 | failovermethod=priority
48 | baseurl=http://mirrors.aliyun.com/centos/$releasever/centosplus/$basearch/
49 |         http://mirrors.aliyuncs.com/centos/$releasever/centosplus/$basearch/
50 |         http://mirrors.cloud.aliyuncs.com/centos/$releasever/centosplus/$basearch/
51 | gpgcheck=1
52 | enabled=0
53 | gpgkey=http://mirrors.aliyun.com/centos/RPM-GPG-KEY-CentOS-7
54 |  
55 | #contrib - packages by Centos Users
56 | [contrib]
57 | name=CentOS-$releasever - Contrib - mirrors.aliyun.com
58 | failovermethod=priority
59 | baseurl=http://mirrors.aliyun.com/centos/$releasever/contrib/$basearch/
60 |         http://mirrors.aliyuncs.com/centos/$releasever/contrib/$basearch/
61 |         http://mirrors.cloud.aliyuncs.com/centos/$releasever/contrib/$basearch/
62 | gpgcheck=1
63 | enabled=0
64 | gpgkey=http://mirrors.aliyun.com/centos/RPM-GPG-KEY-CentOS-7


--------------------------------------------------------------------------------
/roles/kylin/templates/kylin_job_conf_inmem.xml:
--------------------------------------------------------------------------------
  1 | <!--
  2 |   Licensed to the Apache Software Foundation (ASF) under one or more
  3 |   contributor license agreements.  See the NOTICE file distributed with
  4 |   this work for additional information regarding copyright ownership.
  5 |   The ASF licenses this file to You under the Apache License, Version 2.0
  6 |   (the "License"); you may not use this file except in compliance with
  7 |   the License.  You may obtain a copy of the License at
  8 | 
  9 |       http://www.apache.org/licenses/LICENSE-2.0
 10 | 
 11 |   Unless required by applicable law or agreed to in writing, software
 12 |   distributed under the License is distributed on an "AS IS" BASIS,
 13 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |   See the License for the specific language governing permissions and
 15 |   limitations under the License.
 16 | -->
 17 | <configuration>
 18 | 
 19 |     <property>
 20 |         <name>mapreduce.job.split.metainfo.maxsize</name>
 21 |         <value>-1</value>
 22 |         <description>The maximum permissible size of the split metainfo file.
 23 |             The JobTracker won't attempt to read split metainfo files bigger than
 24 |             the configured value. No limits if set to -1.
 25 |         </description>
 26 |     </property>
 27 | 
 28 |     <property>
 29 |         <name>mapreduce.map.output.compress</name>
 30 |         <value>true</value>
 31 |         <description>Compress map outputs</description>
 32 |     </property>
 33 | 
 34 |     <!--
 35 |     The default map outputs compress codec is org.apache.hadoop.io.compress.DefaultCodec,
 36 |     if SnappyCodec is supported, org.apache.hadoop.io.compress.SnappyCodec could be used.
 37 |     -->
 38 |     <!--
 39 |     <property>
 40 |         <name>mapreduce.map.output.compress.codec</name>
 41 |         <value>org.apache.hadoop.io.compress.SnappyCodec</value>
 42 |         <description>The compression codec to use for map outputs
 43 |         </description>
 44 |     </property>
 45 |     -->
 46 |     <property>
 47 |         <name>mapreduce.output.fileoutputformat.compress</name>
 48 |         <value>true</value>
 49 |         <description>Compress the output of a MapReduce job</description>
 50 |     </property>
 51 |     <!--
 52 |     The default job outputs compress codec is org.apache.hadoop.io.compress.DefaultCodec,
 53 |     if SnappyCodec is supported, org.apache.hadoop.io.compress.SnappyCodec could be used.
 54 |     -->
 55 |     <!--
 56 |     <property>
 57 |         <name>mapreduce.output.fileoutputformat.compress.codec</name>
 58 |         <value>org.apache.hadoop.io.compress.SnappyCodec</value>
 59 |         <description>The compression codec to use for job outputs
 60 |         </description>
 61 |     </property>
 62 |     -->
 63 |     <property>
 64 |         <name>mapreduce.output.fileoutputformat.compress.type</name>
 65 |         <value>BLOCK</value>
 66 |         <description>The compression type to use for job outputs</description>
 67 |     </property>
 68 | 
 69 | 
 70 |     <property>
 71 |         <name>mapreduce.job.max.split.locations</name>
 72 |         <value>2000</value>
 73 |         <description>No description</description>
 74 |     </property>
 75 | 
 76 |     <property>
 77 |         <name>dfs.replication</name>
 78 |         <value>2</value>
 79 |         <description>Block replication</description>
 80 |     </property>
 81 | 
 82 |     <property>
 83 |         <name>mapreduce.task.timeout</name>
 84 |         <value>7200000</value>
 85 |         <description>Set task timeout to 1 hour</description>
 86 |     </property>
 87 | 
 88 |     <!--Additional config for in-mem cubing, giving mapper more memory -->
 89 |     <property>
 90 |         <name>mapreduce.map.memory.mb</name>
 91 |         <value>3072</value>
 92 |         <description></description>
 93 |     </property>
 94 | 
 95 |     <property>
 96 |         <name>mapreduce.map.java.opts</name>
 97 |         <value>-Xmx2700m -XX:OnOutOfMemoryError='kill -9 %p'</value>
 98 |         <description></description>
 99 |     </property>
100 | 
101 |     <property>
102 |         <name>mapreduce.task.io.sort.mb</name>
103 |         <value>200</value>
104 |         <description></description>
105 |     </property>
106 | 
107 | </configuration>


--------------------------------------------------------------------------------
/roles/common/tasks/main.yml:
--------------------------------------------------------------------------------
  1 | # 拉取节点的 ansible setup 信息，起到缓存效果，否则后续 when 判断可能失败
  2 | - name: 缓存 ansilbe setup 信息
  3 |   setup: gather_subset=min
  4 |   
  5 | - name: yum 切换 163 源
  6 |   copy: src=CentOS-7-163.repo dest=/etc/yum.repos.d/CentOS-Base.repo
  7 |   when: v_update_yum_with_163=="true"
  8 |   
  9 | - name: yum 切换 aliyun 源
 10 |   copy: src=CentOS-7-aliyun.repo dest=/etc/yum.repos.d/CentOS-Base.repo
 11 |   when: v_update_yum_with_aliyun=="true"
 12 | 
 13 | # 对于低于7.5的centos系统先进行系统内核升级  
 14 | - name: 升级系统内核
 15 |   shell: "yum -y update"
 16 |   when: v_update_kernel=="true" and ansible_distribution_major_version|int == 7 and ansible_distribution_version < "7.5"
 17 | 
 18 | - name: 安装必备组件
 19 |   yum: update_cache=yes name={{ item }} state=present
 20 |   with_items:
 21 |   - wget
 22 |   - ntp
 23 |   - ntpdate
 24 |   - git
 25 |   - tar
 26 |   - rpcbind
 27 |   - telnet
 28 |   - vim
 29 |   - net-tools
 30 |   - bind-utils
 31 |   - lrzsz
 32 |   - epel-release
 33 |   - bash-completion
 34 |   - python-pip
 35 |   when: v_yum_install=="true"
 36 |   
 37 | - name: 升级 Psycopg2 版本
 38 |   shell: "pip install --upgrade psycopg2"
 39 |   
 40 | - name: 重启 rpcbind 服务，并开启自启动
 41 |   systemd:
 42 |     name: rpcbind
 43 |     daemon_reload: yes
 44 |     state: restarted
 45 |     enabled: yes
 46 | 
 47 | # 查看同步情况命令
 48 | # ntpq -p
 49 | # ntpstat
 50 | - name: 重启 ntpd 服务，并设置自启动
 51 |   systemd:
 52 |     name: ntpd
 53 |     daemon_reload: yes
 54 |     state: restarted
 55 |     enabled: yes
 56 |   when: v_ntpdate_install=="true"
 57 |   
 58 | - name: 更新 ntp 的配置文件
 59 |   copy: src=ntp.conf dest=/etc/ntp.conf
 60 |   when: v_ntpdate_install=="true"
 61 |     
 62 | - name: 重启 ntpd 服务，并设置自启动
 63 |   systemd:
 64 |     name: ntpd
 65 |     daemon_reload: yes
 66 |     state: restarted
 67 |     enabled: yes
 68 |   when: v_ntpdate_install=="true"
 69 |   
 70 | - name: 立即同步时间
 71 |   command: ntpdate -u {{ v_ntpdate_address }}
 72 |   when: v_ntpdate_install=="true"
 73 | 
 74 | # 查看定时任务命令：crontab -l 
 75 | # 文件存放目录：cat /var/spool/cron/root
 76 | # centos7 获取时间命令：timedatectl 
 77 | # centos7 设置时间命令：timedatectl set-ntp no && timedatectl set-time "1982-01-01 00:00:00" && timedatectl set-ntp yes
 78 | # 查看任务执行日志：
 79 | # tail -n 500 /var/log/cron 
 80 | # tail -n 500 /var/spool/mail/root
 81 | - name: 设置时间同步定时任务
 82 |   cron:
 83 |     name: "时间同步"
 84 |     minute: "*/30"
 85 |     user: root 
 86 |     # hwclock -w：修改硬件时间，保持和软件 NTP 时间同步
 87 |     job: "/sbin/ntpdate -u {{ v_ntpdate_address }}; /sbin/hwclock -w"
 88 |   when: v_ntpdate_install=="true"
 89 | 
 90 | - name: 设置启用时间同步
 91 |   shell: "timedatectl set-ntp yes"
 92 |   when: v_ntpdate_install=="true"
 93 | 
 94 | - name: 修改机器名
 95 |   hostname: name={{ node_name }}
 96 |   when: v_enable_set_hostname=="true"
 97 |   
 98 | - name: 配置 FQDN
 99 |   lineinfile:
100 |     dest: /etc/sysconfig/network
101 |     regexp: 'HOSTNAME'
102 |     line: 'HOSTNAME={{ ansible_hostname }}'
103 | 
104 | # 删除默认安装
105 | - name: 删除 CentOS 防火墙
106 |   yum: name={{ item }} state=absent
107 |   with_items:
108 |   - firewalld
109 |   - firewalld-filesystem
110 |   - python-firewall
111 |   when: ansible_distribution == "CentOS"
112 | 
113 | - name: 关闭 selinux
114 |   selinux: state=disabled
115 |   
116 | - name: 集群hosts文件更新
117 |   lineinfile:
118 |     dest: /etc/hosts
119 |     regexp: '{{item.key}}'
120 |     line: '{{item.value}} {{item.key}}'
121 |   with_dict: '{{ v_hosts }}'
122 |   when: v_update_hosts=="true"
123 |   
124 | # - name: 集群hosts文件更新
125 | #   lineinfile:
126 | #     dest: /etc/hosts
127 | #     regexp: "{{ item }}"
128 | #     line: "{{ item }}"
129 | #   with_items: "{{ groups['cdh-cluster'] }}"
130 | #   when: v_update_hosts=="true"
131 | 
132 | # Cloudera 建议将 /proc/sys/vm/swappiness 设置为最大值 10。服务器默认设置为 30。
133 | # 使用 sysctl 命令在运行时更改该设置并编辑 /etc/sysctl.conf，以在重启后保存该设置。
134 | # 您可以继续进行安装，但 Cloudera Manager 可能会报告您的主机由于交换而运行状况不良。     
135 | - name: 修改 linux swap 空间的 swappiness，降低对硬盘的缓存
136 |   lineinfile:
137 |     dest: /etc/sysctl.conf
138 |     regexp: "vm.swappiness"
139 |     line: "vm.swappiness={{ v_vm_swappiness }}"
140 | 
141 | - name: 生效 swappiness 参数
142 |   shell: "sysctl -p /etc/sysctl.conf"
143 | 
144 | # 已启用透明大页面压缩，可能会导致重大性能问题。
145 | # 请运行“echo never > /sys/kernel/mm/transparent_hugepage/defrag”
146 | # 和“echo never > /sys/kernel/mm/transparent_hugepage/enabled”以禁用此设置，
147 | # 然后将同一命令添加到 /etc/rc.local 等初始化脚本中，以便在系统重启时予以设置。
148 | # https://blog.csdn.net/csfreebird/article/details/49307935
149 | - name: 禁用透明大页面压缩
150 |   shell: "echo never > /sys/kernel/mm/transparent_hugepage/defrag && \
151 |           echo never > /sys/kernel/mm/transparent_hugepage/enabled"
152 | 
153 | - name: 永久禁用透明大页面压缩
154 |   lineinfile:
155 |     dest: /etc/rc.local
156 |     regexp: "transparent_hugepage"
157 |     line: "echo never > /sys/kernel/mm/transparent_hugepage/defrag && echo never > /sys/kernel/mm/transparent_hugepage/enabled"
158 | 


--------------------------------------------------------------------------------
/roles/cm/templates/config.ini:
--------------------------------------------------------------------------------
  1 | [General]
  2 | # Hostname of the CM server.
  3 | server_host={{ v_server_ip }}
  4 | 
  5 | # Port that the CM server is listening on.
  6 | server_port=7182
  7 | 
  8 | ## It should not normally be necessary to modify these.
  9 | # Port that the CM agent should listen on.
 10 | # listening_port=9000
 11 | 
 12 | # IP Address that the CM agent should listen on.
 13 | # listening_ip=
 14 | 
 15 | # Hostname that the CM agent reports as its hostname. If unset, will be
 16 | # obtained in code through something like this:
 17 | #
 18 | #   python -c 'import socket; \
 19 | #              print socket.getfqdn(), \
 20 | #                    socket.gethostbyname(socket.getfqdn())'
 21 | #
 22 | # listening_hostname=
 23 | 
 24 | # An alternate hostname to report as the hostname for this host in CM.
 25 | # Useful when this agent is behind a load balancer or proxy and all
 26 | # inbound communication must connect through that proxy.
 27 | # reported_hostname=
 28 | 
 29 | # Port that supervisord should listen on.
 30 | # NB: This only takes effect if supervisord is restarted.
 31 | # supervisord_port=19001
 32 | 
 33 | # Log file.  The supervisord log file will be placed into
 34 | # the same directory.  Note that if the agent is being started via the
 35 | # init.d script, /var/log/cloudera-scm-agent/cloudera-scm-agent.out will
 36 | # also have a small amount of output (from before logging is initialized).
 37 | # log_file=/var/log/cloudera-scm-agent/cloudera-scm-agent.log
 38 | 
 39 | # Persistent state directory.  Directory to store CM agent state that
 40 | # persists across instances of the agent process and system reboots.
 41 | # Particularly, the agent's UUID is stored here.
 42 | # lib_dir=/var/lib/cloudera-scm-agent
 43 | 
 44 | # Parcel directory.  Unpacked parcels will be stored in this directory.
 45 | # Downloaded parcels will be stored in <parcel_dir>/../parcel-cache
 46 | # parcel_dir=/opt/cloudera/parcels
 47 | 
 48 | # Enable supervisord event monitoring.  Used in eager heartbeating, amongst
 49 | # other things.
 50 | # enable_supervisord_events=true
 51 | 
 52 | # Maximum time to wait (in seconds) for all metric collectors to finish
 53 | # collecting data.
 54 | max_collection_wait_seconds=10.0
 55 | 
 56 | # Maximum time to wait (in seconds) when connecting to a local role's
 57 | # webserver to fetch metrics.
 58 | metrics_url_timeout_seconds=30.0
 59 | 
 60 | # Maximum time to wait (in seconds) when connecting to a local TaskTracker
 61 | # to fetch task attempt data.
 62 | task_metrics_timeout_seconds=5.0
 63 | 
 64 | # The list of non-device (nodev) filesystem types which will be monitored.
 65 | monitored_nodev_filesystem_types=nfs,nfs4,tmpfs
 66 | 
 67 | # The list of filesystem types which are considered local for monitoring purposes.
 68 | # These filesystems are combined with the other local filesystem types found in
 69 | # /proc/filesystems
 70 | local_filesystem_whitelist=ext2,ext3,ext4,xfs
 71 | 
 72 | # The largest size impala profile log bundle that this agent will serve to the
 73 | # CM server. If the CM server requests more than this amount, the bundle will
 74 | # be limited to this size. All instances of this limit being hit are logged to
 75 | # the agent log.
 76 | impala_profile_bundle_max_bytes=1073741824
 77 | 
 78 | # The largest size stacks log bundle that this agent will serve to the CM
 79 | # server. If the CM server requests more than this amount, the bundle will be
 80 | # limited to this size. All instances of this limit being hit are logged to the
 81 | # agent log.
 82 | stacks_log_bundle_max_bytes=1073741824
 83 | 
 84 | # The size to which the uncompressed portion of a stacks log can grow before it
 85 | # is rotated. The log will then be compressed during rotation.
 86 | stacks_log_max_uncompressed_file_size_bytes=5242880
 87 | 
 88 | # The orphan process directory staleness threshold. If a diretory is more stale
 89 | # than this amount of seconds, CM agent will remove it.
 90 | orphan_process_dir_staleness_threshold=5184000
 91 | 
 92 | # The orphan process directory refresh interval. The CM agent will check the
 93 | # staleness of the orphan processes config directory every this amount of
 94 | # seconds.
 95 | orphan_process_dir_refresh_interval=3600
 96 | 
 97 | # A knob to control the agent logging level. The options are listed as follows:
 98 | # 1) DEBUG (set the agent logging level to 'logging.DEBUG')
 99 | # 2) INFO (set the agent logging level to 'logging.INFO')
100 | scm_debug=INFO
101 | 
102 | # The DNS resolution collecion interval in seconds. A java base test program
103 | # will be executed with at most this frequency to collect java DNS resolution
104 | # metrics. The test program is only executed if the associated health test,
105 | # Host DNS Resolution, is enabled.
106 | dns_resolution_collection_interval_seconds=60
107 | 
108 | # The maximum time to wait (in seconds) for the java test program to collect
109 | # java DNS resolution metrics.
110 | dns_resolution_collection_timeout_seconds=30
111 | 
112 | # The directory location in which the agent-wide kerberos credential cache
113 | # will be created.
114 | # agent_wide_credential_cache_location=/var/run/cloudera-scm-agent
115 | 
116 | [Security]
117 | # Use TLS and certificate validation when connecting to the CM server.
118 | use_tls=0
119 | 
120 | # The maximum allowed depth of the certificate chain returned by the peer.
121 | # The default value of 9 matches the default specified in openssl's
122 | # SSL_CTX_set_verify.
123 | max_cert_depth=9
124 | 
125 | # A file of CA certificates in PEM format. The file can contain several CA
126 | # certificates identified by
127 | #
128 | # -----BEGIN CERTIFICATE-----
129 | # ... (CA certificate in base64 encoding) ...
130 | # -----END CERTIFICATE-----
131 | #
132 | # sequences. Before, between, and after the certificates text is allowed which
133 | # can be used e.g. for descriptions of the certificates.
134 | #
135 | # The file is loaded once, the first time an HTTPS connection is attempted. A
136 | # restart of the agent is required to pick up changes to the file.
137 | #
138 | # Note that if neither verify_cert_file or verify_cert_dir is set, certificate
139 | # verification will not be performed.
140 | # verify_cert_file=
141 | 
142 | # Directory containing CA certificates in PEM format. The files each contain one
143 | # CA certificate. The files are looked up by the CA subject name hash value,
144 | # which must hence be available. If more than one CA certificate with the same
145 | # name hash value exist, the extension must be different (e.g. 9d66eef0.0,
146 | # 9d66eef0.1 etc). The search is performed in the ordering of the extension
147 | # number, regardless of other properties of the certificates. Use the c_rehash
148 | # utility to create the necessary links.
149 | #
150 | # The certificates in the directory are only looked up when required, e.g. when
151 | # building the certificate chain or when actually performing the verification
152 | # of a peer certificate. The contents of the directory can thus be changed
153 | # without an agent restart.
154 | #
155 | # When looking up CA certificates, the verify_cert_file is first searched, then
156 | # those in the directory. Certificate matching is done based on the subject name,
157 | # the key identifier (if present), and the serial number as taken from the
158 | # certificate to be verified. If these data do not match, the next certificate
159 | # will be tried. If a first certificate matching the parameters is found, the
160 | # verification process will be performed; no other certificates for the same
161 | # parameters will be searched in case of failure.
162 | #
163 | # Note that if neither verify_cert_file or verify_cert_dir is set, certificate
164 | # verification will not be performed.
165 | # verify_cert_dir=
166 | 
167 | # PEM file containing client private key.
168 | # client_key_file=
169 | 
170 | # A command to run which returns the client private key password on stdout
171 | # client_keypw_cmd=
172 | 
173 | # If client_keypw_cmd isn't specified, instead a text file containing
174 | # the client private key password can be used.
175 | # client_keypw_file=
176 | 
177 | # PEM file containing client certificate.
178 | # client_cert_file=
179 | 
180 | ## Location of Hadoop files.  These are the CDH locations when installed by
181 | ## packages.  Unused when CDH is installed by parcels.
182 | [Hadoop]
183 | #cdh_crunch_home=/usr/lib/crunch
184 | #cdh_flume_home=/usr/lib/flume-ng
185 | #cdh_hadoop_bin=/usr/bin/hadoop
186 | #cdh_hadoop_home=/usr/lib/hadoop
187 | #cdh_hbase_home=/usr/lib/hbase
188 | #cdh_hbase_indexer_home=/usr/lib/hbase-solr
189 | #cdh_hcat_home=/usr/lib/hive-hcatalog
190 | #cdh_hdfs_home=/usr/lib/hadoop-hdfs
191 | #cdh_hive_home=/usr/lib/hive
192 | #cdh_httpfs_home=/usr/lib/hadoop-httpfs
193 | #cdh_hue_home=/usr/share/hue
194 | #cdh_hue_plugins_home=/usr/lib/hadoop
195 | #cdh_impala_home=/usr/lib/impala
196 | #cdh_kudu_home=/usr/lib/kudu
197 | #cdh_llama_home=/usr/lib/llama
198 | #cdh_mr1_home=/usr/lib/hadoop-0.20-mapreduce
199 | #cdh_mr2_home=/usr/lib/hadoop-mapreduce
200 | #cdh_oozie_home=/usr/lib/oozie
201 | #cdh_parquet_home=/usr/lib/parquet
202 | #cdh_pig_home=/usr/lib/pig
203 | #cdh_solr_home=/usr/lib/solr
204 | #cdh_spark_home=/usr/lib/spark
205 | #cdh_sqoop_home=/usr/lib/sqoop
206 | #cdh_sqoop2_home=/usr/lib/sqoop2
207 | #cdh_yarn_home=/usr/lib/hadoop-yarn
208 | #cdh_zookeeper_home=/usr/lib/zookeeper
209 | #hive_default_xml=/etc/hive/conf.dist/hive-default.xml
210 | #webhcat_default_xml=/etc/hive-webhcat/conf.dist/webhcat-default.xml
211 | #jsvc_home=/usr/libexec/bigtop-utils
212 | #tomcat_home=/usr/lib/bigtop-tomcat
213 | #oracle_home=/usr/share/oracle/instantclient
214 | 
215 | ## Location of Cloudera Management Services files.
216 | [Cloudera]
217 | #mgmt_home=/usr/share/cmf
218 | 
219 | ## Location of JDBC Drivers.
220 | [JDBC]
221 | #cloudera_mysql_connector_jar=/usr/share/java/mysql-connector-java.jar
222 | #cloudera_oracle_connector_jar=/usr/share/java/oracle-connector-java.jar
223 | #By default, postgres jar is found dynamically in $MGMT_HOME/lib
224 | #cloudera_postgresql_jdbc_jar=


--------------------------------------------------------------------------------
/roles/kylin/templates/kylin.properties:
--------------------------------------------------------------------------------
  1 | #
  2 | # Licensed to the Apache Software Foundation (ASF) under one or more
  3 | # contributor license agreements.  See the NOTICE file distributed with
  4 | # this work for additional information regarding copyright ownership.
  5 | # The ASF licenses this file to You under the Apache License, Version 2.0
  6 | # (the "License"); you may not use this file except in compliance with
  7 | # the License.  You may obtain a copy of the License at
  8 | #
  9 | #    http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | #
 17 | 
 18 | 
 19 | 
 20 | 
 21 | # The below commented values will effect as default settings
 22 | # Uncomment and override them if necessary
 23 | 
 24 | 
 25 | 
 26 | #
 27 | #### METADATA | ENV ###
 28 | #
 29 | ## The metadata store in hbase
 30 | #kylin.metadata.url=kylin_metadata@hbase
 31 | #
 32 | ## metadata cache sync retry times
 33 | #kylin.metadata.sync-retries=3
 34 | #
 35 | ## Working folder in HDFS, better be qualified absolute path, make sure user has the right permission to this directory
 36 | #kylin.env.hdfs-working-dir=/kylin
 37 | #
 38 | ## DEV|QA|PROD. DEV will turn on some dev features, QA and PROD has no difference in terms of functions.
 39 | #kylin.env=QA
 40 | #
 41 | ## kylin zk base path
 42 | #kylin.env.zookeeper-base-path=/kylin
 43 | #
 44 | #### SERVER | WEB | RESTCLIENT ###
 45 | #
 46 | ## Kylin server mode, valid value [all, query, job]
 47 | #kylin.server.mode=all
 48 | #
 49 | ## List of web servers in use, this enables one web server instance to sync up with other servers.
 50 | #kylin.server.cluster-servers=localhost:7070
 51 | #
 52 | ## Display timezone on UI,format like[GMT+N or GMT-N]
 53 | #kylin.web.timezone=GMT+8
 54 | #
 55 | ## Timeout value for the queries submitted through the Web UI, in milliseconds
 56 | #kylin.web.query-timeout=300000
 57 | #
 58 | #kylin.web.cross-domain-enabled=true
 59 | #
 60 | ##allow user to export query result
 61 | #kylin.web.export-allow-admin=true
 62 | #kylin.web.export-allow-other=true
 63 | #
 64 | ## Hide measures in measure list of cube designer, separate by comma
 65 | #kylin.web.hide-measures=RAW
 66 | #
 67 | ##max connections of one route
 68 | #kylin.restclient.connection.default-max-per-route=20
 69 | #
 70 | ##max connections of one rest-client
 71 | #kylin.restclient.connection.max-total=200
 72 | #
 73 | #### PUBLIC CONFIG ###
 74 | #kylin.engine.default=2
 75 | #kylin.storage.default=2
 76 | #kylin.web.hive-limit=20
 77 | #kylin.web.help.length=4
 78 | #kylin.web.help.0=start|Getting Started|http://kylin.apache.org/docs21/tutorial/kylin_sample.html
 79 | #kylin.web.help.1=odbc|ODBC Driver|http://kylin.apache.org/docs21/tutorial/odbc.html
 80 | #kylin.web.help.2=tableau|Tableau Guide|http://kylin.apache.org/docs21/tutorial/tableau_91.html
 81 | #kylin.web.help.3=onboard|Cube Design Tutorial|http://kylin.apache.org/docs21/howto/howto_optimize_cubes.html
 82 | #kylin.web.link-streaming-guide=http://kylin.apache.org/
 83 | #kylin.htrace.show-gui-trace-toggle=false
 84 | #kylin.web.link-hadoop=
 85 | #kylin.web.link-diagnostic=
 86 | #kylin.web.contact-mail=
 87 | #kylin.server.external-acl-provider=
 88 | #
 89 | #### SOURCE ###
 90 | #
 91 | ## Hive client, valid value [cli, beeline]
 92 | #kylin.source.hive.client=cli
 93 | #
 94 | ## Absolute path to beeline shell, can be set to spark beeline instead of the default hive beeline on PATH
 95 | #kylin.source.hive.beeline-shell=beeline
 96 | #
 97 | ## Parameters for beeline client, only necessary if hive client is beeline
 98 | ##kylin.source.hive.beeline-params=-n root --hiveconf hive.security.authorization.sqlstd.confwhitelist.append='mapreduce.job.*|dfs.*' -u jdbc:hive2://localhost:10000
 99 | #
100 | ## While hive client uses above settings to read hive table metadata,
101 | ## table operations can go through a separate SparkSQL command line, given SparkSQL connects to the same Hive metastore.
102 | #kylin.source.hive.enable-sparksql-for-table-ops=false
103 | ##kylin.source.hive.sparksql-beeline-shell=/path/to/spark-client/bin/beeline
104 | ##kylin.source.hive.sparksql-beeline-params=-n root --hiveconf hive.security.authorization.sqlstd.confwhitelist.append='mapreduce.job.*|dfs.*' -u jdbc:hive2://localhost:10000
105 | #
106 | #kylin.source.hive.keep-flat-table=false
107 | #
108 | ## Hive database name for putting the intermediate flat tables
109 | #kylin.source.hive.database-for-flat-table=default
110 | #
111 | ## Whether redistribute the intermediate flat table before building
112 | #kylin.source.hive.redistribute-flat-table=true
113 | #
114 | #
115 | #### STORAGE ###
116 | #
117 | ## The storage for final cube file in hbase
118 | #kylin.storage.url=hbase
119 | #
120 | ## The prefix of hbase table
121 | #kylin.storage.hbase.table-name-prefix=KYLIN_
122 | #
123 | ## The namespace for hbase storage
124 | #kylin.storage.hbase.namespace=default
125 | #
126 | ## Compression codec for htable, valid value [none, snappy, lzo, gzip, lz4]
127 | #kylin.storage.hbase.compression-codec=none
128 | #
129 | ## HBase Cluster FileSystem, which serving hbase, format as hdfs://hbase-cluster:8020
130 | ## Leave empty if hbase running on same cluster with hive and mapreduce
131 | ##kylin.storage.hbase.cluster-fs=
132 | #
133 | ## The cut size for hbase region, in GB.
134 | #kylin.storage.hbase.region-cut-gb=5
135 | #
136 | ## The hfile size of GB, smaller hfile leading to the converting hfile MR has more reducers and be faster.
137 | ## Set 0 to disable this optimization.
138 | #kylin.storage.hbase.hfile-size-gb=2
139 | #
140 | #kylin.storage.hbase.min-region-count=1
141 | #kylin.storage.hbase.max-region-count=500
142 | #
143 | ## Optional information for the owner of kylin platform, it can be your team's email
144 | ## Currently it will be attached to each kylin's htable attribute
145 | #kylin.storage.hbase.owner-tag=whoami@kylin.apache.org
146 | #
147 | #kylin.storage.hbase.coprocessor-mem-gb=3
148 | #
149 | ## By default kylin can spill query's intermediate results to disks when it's consuming too much memory.
150 | ## Set it to false if you want query to abort immediately in such condition.
151 | #kylin.storage.partition.aggr-spill-enabled=true
152 | #
153 | ## The maximum number of bytes each coprocessor is allowed to scan.
154 | ## To allow arbitrary large scan, you can set it to 0.
155 | #kylin.storage.partition.max-scan-bytes=3221225472
156 | #
157 | ## The default coprocessor timeout is (hbase.rpc.timeout * 0.9) / 1000 seconds,
158 | ## You can set it to a smaller value. 0 means use default.
159 | ## kylin.storage.hbase.coprocessor-timeout-seconds=0
160 | #
161 | #
162 | #### JOB ###
163 | #
164 | ## Max job retry on error, default 0: no retry
165 | #kylin.job.retry=0
166 | #
167 | ## Max count of concurrent jobs running
168 | #kylin.job.max-concurrent-jobs=10
169 | #
170 | ## The percentage of the sampling, default 100%
171 | #kylin.job.sampling-percentage=100
172 | #
173 | ## If true, will send email notification on job complete
174 | ##kylin.job.notification-enabled=true
175 | ##kylin.job.notification-mail-enable-starttls=true
176 | ##kylin.job.notification-mail-host=smtp.office365.com
177 | ##kylin.job.notification-mail-port=587
178 | ##kylin.job.notification-mail-username=kylin@example.com
179 | ##kylin.job.notification-mail-password=mypassword
180 | ##kylin.job.notification-mail-sender=kylin@example.com
181 | #
182 | #
183 | #### ENGINE ###
184 | #
185 | ## Time interval to check hadoop job status
186 | #kylin.engine.mr.yarn-check-interval-seconds=10
187 | #
188 | #kylin.engine.mr.reduce-input-mb=500
189 | #
190 | #kylin.engine.mr.max-reducer-number=500
191 | #
192 | #kylin.engine.mr.mapper-input-rows=1000000
193 | #
194 | ## Enable dictionary building in MR reducer
195 | #kylin.engine.mr.build-dict-in-reducer=true
196 | #
197 | ## Number of reducers for fetching UHC column distinct values
198 | #kylin.engine.mr.uhc-reducer-count=1
199 | #
200 | ## Whether using an additional step to build UHC dictionary
201 | #kylin.engine.mr.build-uhc-dict-in-additional-step=false
202 | #
203 | #
204 | #### CUBE | DICTIONARY ###
205 | #
206 | #kylin.cube.cuboid-scheduler=org.apache.kylin.cube.cuboid.DefaultCuboidScheduler
207 | #kylin.cube.segment-advisor=org.apache.kylin.cube.CubeSegmentAdvisor
208 | #
209 | ## 'auto', 'inmem', 'layer' or 'random' for testing 
210 | #kylin.cube.algorithm=layer
211 | #
212 | ## A smaller threshold prefers layer, a larger threshold prefers in-mem
213 | #kylin.cube.algorithm.layer-or-inmem-threshold=7
214 | #
215 | #kylin.cube.aggrgroup.max-combination=4096
216 | #
217 | #kylin.snapshot.max-mb=300
218 | #
219 | #kylin.cube.cubeplanner.enabled=false
220 | #kylin.cube.cubeplanner.enabled-for-existing-cube=false
221 | #kylin.cube.cubeplanner.expansion-threshold=15.0
222 | #kylin.cube.cubeplanner.recommend-cache-max-size=200
223 | #kylin.cube.cubeplanner.mandatory-rollup-threshold=1000
224 | #kylin.cube.cubeplanner.algorithm-threshold-greedy=10
225 | #kylin.cube.cubeplanner.algorithm-threshold-genetic=23
226 | #
227 | #
228 | #### QUERY ###
229 | #
230 | ## Controls the maximum number of bytes a query is allowed to scan storage.
231 | ## The default value 0 means no limit.
232 | ## The counterpart kylin.storage.partition.max-scan-bytes sets the maximum per coprocessor.
233 | #kylin.query.max-scan-bytes=0
234 | #
235 | #kylin.query.cache-enabled=true
236 | #
237 | ## TABLE ACL
238 | #kylin.query.security.table-acl-enabled=true
239 | #
240 | ## Usually should not modify this
241 | #kylin.query.interceptors=org.apache.kylin.rest.security.TableInterceptor
242 | #
243 | #kylin.query.escape-default-keyword=false
244 | #
245 | ## Usually should not modify this
246 | #kylin.query.transformers=org.apache.kylin.query.util.DefaultQueryTransformer,org.apache.kylin.query.util.KeywordDefaultDirtyHack
247 | #
248 | #### SECURITY ###
249 | #
250 | ## Spring security profile, options: testing, ldap, saml
251 | ## with "testing" profile, user can use pre-defined name/pwd like KYLIN/ADMIN to login
252 | #kylin.security.profile=testing
253 | #
254 | ## Admin roles in LDAP, for ldap and saml
255 | #kylin.security.acl.admin-role=admin
256 | #
257 | ## LDAP authentication configuration
258 | #kylin.security.ldap.connection-server=ldap://ldap_server:389
259 | #kylin.security.ldap.connection-username=
260 | #kylin.security.ldap.connection-password=
261 | #
262 | ## LDAP user account directory;
263 | #kylin.security.ldap.user-search-base=
264 | #kylin.security.ldap.user-search-pattern=
265 | #kylin.security.ldap.user-group-search-base=
266 | #kylin.security.ldap.user-group-search-filter=(|(member={0})(memberUid={1}))
267 | #
268 | ## LDAP service account directory
269 | #kylin.security.ldap.service-search-base=
270 | #kylin.security.ldap.service-search-pattern=
271 | #kylin.security.ldap.service-group-search-base=
272 | #
273 | ### SAML configurations for SSO
274 | ## SAML IDP metadata file location
275 | #kylin.security.saml.metadata-file=classpath:sso_metadata.xml
276 | #kylin.security.saml.metadata-entity-base-url=https://hostname/kylin
277 | #kylin.security.saml.keystore-file=classpath:samlKeystore.jks
278 | #kylin.security.saml.context-scheme=https
279 | #kylin.security.saml.context-server-name=hostname
280 | #kylin.security.saml.context-server-port=443
281 | #kylin.security.saml.context-path=/kylin
282 | #
283 | #### SPARK ENGINE CONFIGS ###
284 | #
285 | ## Hadoop conf folder, will export this as "HADOOP_CONF_DIR" to run spark-submit
286 | ## This must contain site xmls of core, yarn, hive, and hbase in one folder
287 | ##kylin.env.hadoop-conf-dir=/etc/hadoop/conf
288 | #
289 | ## Estimate the RDD partition numbers
290 | #kylin.engine.spark.rdd-partition-cut-mb=10
291 | #
292 | ## Minimal partition numbers of rdd
293 | #kylin.engine.spark.min-partition=1
294 | #
295 | ## Max partition numbers of rdd
296 | #kylin.engine.spark.max-partition=5000
297 | #
298 | ## Spark conf (default is in spark/conf/spark-defaults.conf)
299 | #kylin.engine.spark-conf.spark.master=yarn
300 | ##kylin.engine.spark-conf.spark.submit.deployMode=cluster
301 | #kylin.engine.spark-conf.spark.yarn.queue=default
302 | #kylin.engine.spark-conf.spark.executor.memory=1G
303 | #kylin.engine.spark-conf.spark.executor.cores=2
304 | #kylin.engine.spark-conf.spark.executor.instances=1
305 | #kylin.engine.spark-conf.spark.eventLog.enabled=true
306 | #kylin.engine.spark-conf.spark.eventLog.dir=hdfs\:///kylin/spark-history
307 | #kylin.engine.spark-conf.spark.history.fs.logDirectory=hdfs\:///kylin/spark-history
308 | #kylin.engine.spark-conf.spark.hadoop.yarn.timeline-service.enabled=false
309 | #
310 | ## manually upload spark-assembly jar to HDFS and then set this property will avoid repeatedly uploading jar at runtime
311 | ##kylin.engine.spark-conf.spark.yarn.archive=hdfs://namenode:8020/kylin/spark/spark-libs.jar
312 | ##kylin.engine.spark-conf.spark.io.compression.codec=org.apache.spark.io.SnappyCompressionCodec
313 | #
314 | ## uncomment for HDP
315 | ##kylin.engine.spark-conf.spark.driver.extraJavaOptions=-Dhdp.version=current
316 | ##kylin.engine.spark-conf.spark.yarn.am.extraJavaOptions=-Dhdp.version=current
317 | ##kylin.engine.spark-conf.spark.executor.extraJavaOptions=-Dhdp.version=current
318 | #
319 | #
320 | #### QUERY PUSH DOWN ###
321 | #
322 | ##kylin.query.pushdown.runner-class-name=org.apache.kylin.query.adhoc.PushDownRunnerJdbcImpl
323 | #
324 | ##kylin.query.pushdown.update-enabled=false
325 | ##kylin.query.pushdown.jdbc.url=jdbc:hive2://sandbox:10000/default
326 | ##kylin.query.pushdown.jdbc.driver=org.apache.hive.jdbc.HiveDriver
327 | ##kylin.query.pushdown.jdbc.username=hive
328 | ##kylin.query.pushdown.jdbc.password=
329 | #
330 | ##kylin.query.pushdown.jdbc.pool-max-total=8
331 | ##kylin.query.pushdown.jdbc.pool-max-idle=8
332 | ##kylin.query.pushdown.jdbc.pool-min-idle=0
333 | #
334 | #### JDBC Data Source
335 | ##kylin.source.jdbc.connection-url=
336 | ##kylin.source.jdbc.driver=
337 | ##kylin.source.jdbc.dialect=
338 | ##kylin.source.jdbc.user=
339 | ##kylin.source.jdbc.pass=
340 | ##kylin.source.jdbc.sqoop-home=
341 | ##kylin.source.jdbc.filed-delimiter=|


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # CDH6
  2 | 
  3 | CDH(Cloudera’s Distribution，including Apache Hadoop)，是 Hadoop 分支中的一种，由 Cloudera 维护，基于稳定版本的 Apache hadoop 构建，并继承了许多补丁，可以直接用于生产环境。
  4 | 
  5 | 由于整个安装过程涉及了多台服务器，为了更加方便地安装 CDH6，减少出错几率，将安装过程封装成项目驱动的形式。
  6 | 
  7 | **注意：本项目的运行环境基于 CentOS7.5 + CDH6.01 。**
  8 | 
  9 | 运行此项目，需要了解以下基础知识：
 10 | 
 11 | - linux
 12 | - ansible
 13 | - docker
 14 | 
 15 | ## 安装
 16 | 
 17 | 官方安装文档：[Cloudera Enterprise 6.0.x Installation Guide](https://www.cloudera.com/documentation/enterprise/6/6.0/topics/installation.html)
 18 | 
 19 | ### 准备工作
 20 | 
 21 | #### 准备安装 CDH6 的服务器
 22 | 
 23 | 硬软件需求：[Cloudera Enterprise 6 Requirements and Supported Versions](https://www.cloudera.com/documentation/enterprise/6/release-notes/topics/rg_requirements_supported_versions.html)
 24 | 
 25 | IP            | HostName                                            | OS         | Cores | Memory | Disk | Remark
 26 | --------------|-----------------------------------------------------|------------|-------|--------|------|---------------
 27 | 10.240.114.34 | bjds-kubernetes-node-pre-10-240-114-34-vm.belle.lan | CentOS 7.5 | 8     | 16G    | 250  | Server & Agent
 28 | 10.240.114.38 | bjds-kubernetes-node-pre-10-240-114-38-vm.belle.lan | CentOS 7.5 | 8     | 16G    | 250  | Agent
 29 | 10.240.114.65 | bjds-kubernetes-node-pre-10-240-114-65-vm.belle.lan | CentOS 7.5 | 8     | 16G    | 250  | Agent
 30 | 10.240.114.67 | bjds-kubernetes-node-pre-10-240-114-67-vm.belle.lan | CentOS 7.5 | 8     | 16G    | 250  | Agent
 31 | 10.240.114.54 | bjds-kubernetes-node-pre-10-240-114-54-vm.belle.lan | CentOS 7.5 | 8     | 16G    | 250  | MySQL 5.7.24
 32 | 10.240.114.45 | bjds-kubernetes-node-pre-10-240-114-45-vm.belle.lan | CentOS 7.5 | 8     | 16G    | 250  | 下载服务器
 33 | 
 34 | #### 准备下载服务器
 35 | 
 36 | CDH6 官网自身提供了下载服务器地址：
 37 | 
 38 | - [cm6](https://archive.cloudera.com/cm6/6.0.1/redhat7/yum/RPMS/x86_64/)
 39 | - [cdh6](https://archive.cloudera.com/cdh6/6.0.1/parcels/)
 40 | 
 41 | 由于国内服务器需要翻墙才能正常下载，且安装包比较大，因此最佳的方式是在内网中搭建一个类似的下载服务器，然后将这些包下载到内网，极大地提升整个安装的效率。
 42 | 
 43 | 为了简化操作，下载服务器采用 docker 运行。
 44 | 
 45 | 首先 [安装 docker + docker-compose](https://www.zorin.xin/docker-manual/install/Centos7.html)。
 46 | 
 47 | 然后在服务器上初始化下载服务器：
 48 | 
 49 | ```sh
 50 | # sfds 意为 static file download service
 51 | 
 52 | # 初始化 sfds 配置目录
 53 | mkdir -p /data/docker_volumn/sfds
 54 | 
 55 | # 初始化数据文件目录
 56 | mkdir -p /data/sfds
 57 | 
 58 | # 初始化编排文件目录
 59 | mkdir -p /data/docker_compose
 60 | 
 61 | # 初始化 sfds 配置文件
 62 | tee /data/docker_volumn/sfds/nginx.conf <<-'EOF'
 63 | worker_processes  1;
 64 | pid        /var/run/nginx.pid;
 65 | events {
 66 |     worker_connections  1024;
 67 | }
 68 | http {
 69 |     include       /etc/nginx/mime.types;
 70 |     default_type  application/octet-stream;
 71 | 	sendfile        on;
 72 |     keepalive_timeout  65;
 73 |     server 
 74 |     {
 75 |         listen        9000;             #端口
 76 |         server_name  localhost;         #服务名
 77 |         root /usr/share/nginx/html;     #显示的根索引目录   
 78 |         autoindex on;                   #开启索引功能
 79 |         autoindex_exact_size off;       #关闭计算文件确切大小（单位bytes），只显示大概大小（单位kb、mb、gb）
 80 |         autoindex_localtime on;         #显示本机时间而非 GMT 时间
 81 |     }
 82 | }
 83 | EOF
 84 | 
 85 | # 初始化编排文件
 86 | tee /data/docker_compose/docker-compose.yml <<-'EOF'
 87 | version: "3"
 88 | services:
 89 |   # 文件下载服务器
 90 |   sfds:
 91 |     image: bjddd192/nginx:1.10.1
 92 |     container_name: sfds
 93 |     restart: always
 94 |     ports:
 95 |     - "8066:9000"
 96 |     environment:
 97 |     - TZ=Asia/Shanghai
 98 |     volumes:
 99 |     - /data/docker_volumn/sfds/nginx.conf:/etc/nginx/nginx.conf
100 |     - /data/sfds:/usr/share/nginx/html
101 |     network_mode: bridge
102 | EOF
103 | 
104 | # 启动下载服务器
105 | docker-compose -f /data/docker_compose/docker-compose.yml up -d
106 | ```
107 | 
108 | 下载服务器启动好以后，访问一下 `http://serverIP:8066`，如果能正常打开页面，说明下载服务器部署成功。
109 | 
110 | #### 下载安装包
111 | 
112 | [Cloudera Manager 6 Version and Download Information](https://www.cloudera.com/documentation/enterprise/6/release-notes/topics/rg_cm_6_version_download.html)
113 | 
114 | 根据官网的下载路径，建立本地目录：
115 | 
116 | ```sh
117 | mkdir -p /data/sfds/cdh6/6.0.1/parcels
118 | mkdir -p /data/sfds/cm6/6.0.1/redhat7/yum/RPMS/x86_64
119 | ```
120 | 
121 | 然后将官方的包下载到对应的目录，最终目录结构如下：
122 | 
123 | ```cmd
124 | $ tree /data/sfds/cdh6/6.0.1/parcels
125 | /data/sfds/cdh6/6.0.1/parcels
126 | |-- CDH-6.0.1-1.cdh6.0.1.p0.590678-el7.parcel
127 | |-- CDH-6.0.1-1.cdh6.0.1.p0.590678-el7.parcel.sha256
128 | `-- manifest.json
129 | 
130 | $ tree /data/sfds/cm6/6.0.1/redhat7/yum/RPMS/x86_64
131 | /data/sfds/cm6/6.0.1/redhat7/yum/RPMS/x86_64
132 | |-- cloudera-manager-agent-6.0.1-610811.el7.x86_64.rpm
133 | |-- cloudera-manager-daemons-6.0.1-610811.el7.x86_64.rpm
134 | |-- cloudera-manager-server-6.0.1-610811.el7.x86_64.rpm
135 | |-- cloudera-manager-server-db-2-6.0.1-610811.el7.x86_64.rpm
136 | `-- oracle-j2sdk1.8-1.8.0+update141-1.x86_64.rpm
137 | ```
138 | 
139 | #### 制作本地YUM仓库
140 | 
141 | ```sh
142 | yum -y install createrepo
143 | cd /data/sfds/cm6/6.0.1/redhat7/yum
144 | createrepo .
145 | 
146 | # 初始化仓库文件
147 | tee /data/sfds/cm6/6.0.1/redhat7/yum/cloudera-manager.repo <<-'EOF'
148 | [cloudera-manager]
149 | name=Cloudera Manager 6.0.1
150 | baseurl=http://10.240.114.45:8066/cm6/6.0.1/redhat7/yum/
151 | gpgcheck=false
152 | enabled=true
153 | EOF
154 | ```
155 | 
156 | 验证仓库：
157 | 
158 | ```sh
159 | wget http://10.240.114.45:8066/cm6/6.0.1/redhat7/yum/cloudera-manager.repo -P /etc/yum.repos.d/
160 | rpm --import http://10.240.114.45:8066/cm6/6.0.1/redhat7/yum/RPM-GPG-KEY-cloudera
161 | yum makecache
162 | yum search cloudera
163 | yum search cloudera-manager-daemons cloudera-manager-agent cloudera-manager-server
164 | ```
165 | 
166 | 能够正常找到包说明本地YUM仓库制作成功。
167 | 
168 | #### 数据库准备
169 | 
170 | 数据库最好选择 MySQL 5.5.45+, 5.6.26+ and 5.7.6+ 版本，本实验环境使用的是 5.7.24 的版本。
171 | 
172 | ```sql
173 | -- 删除数据库(如重新部署时使用)
174 | -- drop database db_cdh6_scm;
175 | -- drop database db_cdh6_amon;
176 | -- drop database db_cdh6_rmon;
177 | -- drop database db_cdh6_hue;
178 | -- drop database db_cdh6_metastore;
179 | -- drop database db_cdh6_sentry;
180 | -- drop database db_cdh6_nav;
181 | -- drop database db_cdh6_navms;
182 | -- drop database db_cdh6_oozie;
183 | 
184 | -- 创建数据库
185 | create database db_cdh6_scm default character set utf8 default collate utf8_general_ci;
186 | create database db_cdh6_amon default character set utf8 default collate utf8_general_ci;
187 | create database db_cdh6_rmon default character set utf8 default collate utf8_general_ci;
188 | create database db_cdh6_hue default character set utf8 default collate utf8_general_ci;
189 | create database db_cdh6_metastore default character set utf8 default collate utf8_general_ci;
190 | create database db_cdh6_sentry default character set utf8 default collate utf8_general_ci;
191 | create database db_cdh6_nav default character set utf8 default collate utf8_general_ci;
192 | create database db_cdh6_navms default character set utf8 default collate utf8_general_ci;
193 | create database db_cdh6_oozie default character set utf8 default collate utf8_general_ci;
194 | 
195 | -- 简单练习使用相同的数据库用户，如果用于线上环境最好是分别使用独立的用户。
196 | grant all on db_cdh6_scm.* to 'user_cdh6'@'%' identified by '123456';
197 | grant all on db_cdh6_amon.* to 'user_cdh6'@'%' identified by '123456';
198 | grant all on db_cdh6_rmon.* to 'user_cdh6'@'%' identified by '123456';
199 | grant all on db_cdh6_hue.* to 'user_cdh6'@'%' identified by '123456';
200 | grant all on db_cdh6_metastore.* to 'user_cdh6'@'%' identified by '123456';
201 | grant all on db_cdh6_sentry.* to 'user_cdh6'@'%' identified by '123456';
202 | grant all on db_cdh6_nav.* to 'user_cdh6'@'%' identified by '123456';
203 | grant all on db_cdh6_navms.* to 'user_cdh6'@'%' identified by '123456';
204 | grant all on db_cdh6_oozie.* to 'user_cdh6'@'%' identified by '123456';
205 | 
206 | -- 刷新权限
207 | flush privileges;
208 | ```
209 | 
210 | #### ansible 配置
211 | 
212 | [ansible 安装与配置](https://www.zorin.xin/2018/08/05/ansible-install-and-config/)
213 | 
214 | 本人是使用 mac 安装了 ansible 作为主控。
215 | 
216 | ```sh
217 | # 配置到服务器的信任
218 | ssh-copy-id -p 60777 root@10.240.114.34
219 | ssh-copy-id -p 60777 root@10.240.114.38
220 | ssh-copy-id -p 60777 root@10.240.114.65
221 | ssh-copy-id -p 60777 root@10.240.114.67
222 | 
223 | # 测试连接
224 | ansible cdh-cluster -i inventory/uat_cdh6.ini -m ping
225 | ```
226 | 
227 | ### 部署 CDH 
228 | 
229 | #### 安装 CM 和 CDH
230 | 
231 | ```sh
232 | cd /Users/yanglei/01_git/github_me/ansible-playbooks-cdh6
233 | 
234 | # 测试连接
235 | ansible cdh-cluster -i inventory/uat_cdh6.ini -m ping
236 | ansible cdh-cluster -i inventory/uat_cdh6.ini -m command -a "date"
237 | 
238 | # 安装公共组件
239 | ansible-playbook -t common -i inventory/uat_cdh6.ini -e @inventory/uat_cdh6.yml 01.cdh.yml
240 | 
241 | # 安装 jdk
242 | ansible-playbook -t jdk -i inventory/uat_cdh6.ini -e @inventory/uat_cdh6.yml 01.cdh.yml
243 | 
244 | # 设置 server 免密登录 agent
245 | ansible-playbook -t ssh -i inventory/uat_cdh6.ini -e @inventory/uat_cdh6.yml 01.cdh.yml
246 | 
247 | # 安装 scm
248 | ansible-playbook -t cm -i inventory/uat_cdh6.ini -e @inventory/uat_cdh6.yml 01.cdh.yml
249 | # 如安装数据库的过程中报错：java.sql.SQLException: Statement violates GTID consistency: CREATE TABLE ... SELECT.
250 | # 需临时关闭 mysql 的 gtid 功能：
251 | # set global gtid_mode=on_permissive;
252 | # set global gtid_mode=off_permissive;
253 | # set global gtid_mode=off;
254 | # set global enforce_gtid_consistency=off;
255 | # 撸完后恢复：
256 | # set global enforce_gtid_consistency=on;
257 | # set global gtid_mode=off_permissive;
258 | # set global gtid_mode=on_permissive;
259 | # set global gtid_mode=on;
260 | 
261 | # 放置 cdh 离线安装包
262 | ansible-playbook -t cdh -i inventory/uat_cdh6.ini -e @inventory/uat_cdh6.yml 01.cdh.yml
263 | 
264 | # 在 cdh-server 节点检查服务状态
265 | # 检查 scm 数据库是否已经自动创建了表结构
266 | # 如果都正常说明 scm 安装完成
267 | systemctl status cloudera-scm-agent.service
268 | systemctl status cloudera-scm-server.service
269 | # 查看日志
270 | tail -f /var/log/cloudera-scm-server/cloudera-scm-server.log
271 | ```
272 | 
273 | #### 集群配置
274 | 
275 | ![cm_install_step_01.png](/images/cm_install_step_01.png)
276 | 启动 Web 控制台进行配置，地址如：http://10.240.114.34:7180/cmf/login ，默认用户名密码都是：admin。
277 | 
278 | ![cm_install_step_02.png](/images/cm_install_step_02.png)
279 | 点击"继续"。
280 | 
281 | ![cm_install_step_03.png](/images/cm_install_step_03.png)
282 | 接受许可。
283 | 
284 | ![cm_install_step_04.png](/images/cm_install_step_04.png)
285 | 这里选择免费版，收费版请自行选择。
286 | 
287 | ![cm_install_step_05.png](/images/cm_install_step_05.png)
288 | 点击"继续"。
289 | 
290 | ![cm_install_step_06.png](/images/cm_install_step_06.png)
291 | 选择"当前管理的主机"。
292 | 
293 | ![cm_install_step_07.png](/images/cm_install_step_07.png)
294 | 看到 CDH-6.0.1 版本可选后，点击"继续"。
295 | 
296 | ![cm_install_step_08.png](/images/cm_install_step_08.png)
297 | 等待 CDH 包安装完成，点击"继续"。
298 | 
299 | ![cm_install_step_09.png](/images/cm_install_step_09.png)
300 | 
301 | ![cm_install_step_10.png](/images/cm_install_step_10.png)
302 | 点击"完成"。
303 | 
304 | ![cm_install_step_11.png](/images/cm_install_step_11.png)
305 | 根据自己的需求选取服务。
306 | 
307 | ![cm_install_step_12.png](/images/cm_install_step_12.png)
308 | 自定义角色分配。
309 | 
310 | ![cm_install_step_13.png](/images/cm_install_step_13.png)
311 | 数据库设置。
312 | 
313 | ![cm_install_step_14.png](/images/cm_install_step_14.png)
314 | 审核更改，如果有特定目录的设定或者参数的设定，可以在这里进行更正。
315 | 
316 | ![cm_install_step_15.png](/images/cm_install_step_15.png)
317 | 等待首次运行完成。
318 | 
319 | ![cm_install_step_16.png](/images/cm_install_step_16.png)
320 | 
321 | ![cm_install_step_17.png](/images/cm_install_step_17.png)
322 | 
323 | ![cm_install_step_18.png](/images/cm_install_step_18.png)
324 | 顺利进入管理控制台，部署基本完成。
325 | 
326 | ### 部署 Kylin
327 | 
328 | ```sh
329 | cd /Users/yanglei/01_git/oschina/ansible/big_data
330 | 
331 | ansible kylin -i inventory/uat_cdh6.ini -m ping
332 | 
333 | # 安装 kylin
334 | ansible-playbook -i inventory/uat_cdh6.ini kylin.yml
335 | 
336 | # 给 spark 添加 jars 目录的软链接
337 | ansible kylin -i inventory/uat_cdh6.ini -m file -a 'src=/opt/cloudera/parcels/CDH/jars dest=$SPARK_HOME/jars state=link'
338 | 
339 | # 用软链接短 HIVE_LIB 路径长度，防止 kylin 启动出现“参数列表过长”的问题
340 | ansible kylin -i inventory/uat_cdh6.ini -m file -a 'src=$HIVE_HOME/lib dest=/hivelib state=link'
341 | 
342 | # 检查环境
343 | su - hdfs
344 | # hdfs dfs -chmod -R 777 /
345 | $KYLIN_HOME/bin/check-env.sh  
346 | $KYLIN_HOME/bin/find-hive-dependency.sh
347 | $KYLIN_HOME/bin/find-hbase-dependency.sh
348 | $KYLIN_HOME/bin/find-spark-dependency.sh
349 | 
350 | # 启动
351 | $KYLIN_HOME/bin/kylin.sh start
352 | 
353 | # 停止
354 | $KYLIN_HOME/bin/kylin.sh stop
355 | 
356 | # web验证
357 | http://172.20.32.131:7070/kylin
358 | # 初始用户名和密码是 ADMIN/KYLIN
359 | 
360 | # 测试kylin
361 | $KYLIN_HOME/bin/sample.sh
362 | ```
363 | 
364 | ### 卸载 CDH
365 | 
366 | ```sh
367 | ansible-playbook -i inventory/uat_cdh6.ini 99.clean_all.yml
368 | 
369 | # 然后删除已创建的数据库
370 | ```
371 | 
372 | ### CDH 配置
373 | 
374 | #### 目录位置
375 | 
376 | 路径                                   | 说明
377 | ---------------------------------------|------------------------------
378 | /var/lib/cloudera-scm-server           | 服务端目录
379 | /var/log/cloudera-scm-*                | CM 日志目录
380 | /opt/cloudera/parcels/                 | Hadoop 相关服务安装目录
381 | /opt/cloudera/parcel-repo/             | 下载的服务软件包数据(parcels)
382 | /opt/cloudera/parcel-cache             | 下载的服务软件包缓存数据
383 | /opt/cloudera/parcels/CDH/jars         | CDH 所有 jar 包所在目录
384 | /etc/cloudera-scm-agent/config.ini     | CM Agent 的配置文件
385 | /etc/cloudera-scm-server/              | CM Server 的配置目录
386 | /etc/cloudera-scm-server/db.properties | CM Server 的数据库配置
387 | /etc/hadoop/*                          | hadoop客户端配置目录
388 | /etc/hive/                             | hive 的配置目录
389 | ...                                    |
390 | 
391 | #### 环境变量
392 | 
393 | CDH 自身有一个环境变量脚本，如下：
394 | 
395 | ```sh
396 | cat /opt/cloudera/parcels/CDH/meta/cdh_env.sh
397 | #!/bin/bash
398 | CDH_DIRNAME=${PARCEL_DIRNAME:-"CDH-6.0.1-1.cdh6.0.1.p0.590678"}
399 | export CDH_HADOOP_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/hadoop
400 | export CDH_MR1_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/hadoop-0.20-mapreduce
401 | export CDH_HDFS_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/hadoop-hdfs
402 | export CDH_HTTPFS_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/hadoop-httpfs
403 | export CDH_MR2_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/hadoop-mapreduce
404 | export CDH_YARN_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/hadoop-yarn
405 | export CDH_HBASE_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/hbase
406 | export CDH_ZOOKEEPER_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/zookeeper
407 | export CDH_HIVE_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/hive
408 | export CDH_HUE_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/hue
409 | export CDH_OOZIE_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/oozie
410 | export CDH_HUE_PLUGINS_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/hadoop
411 | export CDH_FLUME_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/flume-ng
412 | export CDH_PIG_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/pig
413 | export CDH_HCAT_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/hive-hcatalog
414 | export CDH_SENTRY_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/sentry
415 | export JSVC_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/bigtop-utils
416 | export CDH_HADOOP_BIN=$CDH_HADOOP_HOME/bin/hadoop
417 | export CDH_IMPALA_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/impala
418 | export CDH_SOLR_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/solr
419 | export CDH_HBASE_INDEXER_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/hbase-solr
420 | export SEARCH_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/search
421 | export CDH_SPARK_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/spark
422 | export WEBHCAT_DEFAULT_XML=$PARCELS_ROOT/$CDH_DIRNAME/etc/hive-webhcat/conf.dist/webhcat-default.xml
423 | export CDH_KMS_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/hadoop-kms
424 | export CDH_PARQUET_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/parquet
425 | export CDH_AVRO_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/avro
426 | export CDH_KAFKA_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/kafka
427 | export CDH_KUDU_HOME=$PARCELS_ROOT/$CDH_DIRNAME/lib/kudu
428 | ```
429 | 
430 | #### 其他技巧
431 | 
432 | 在Cloudrea Manager页面上，可以向集群中添加/删除主机，添加服务到集群等。
433 | 
434 | Cloudrea Manager页面开启了google-analytics，因为从国内访问很慢，可以关闭google-analytics
435 | 
436 | 管理 -> 设置 -> 其他 -> 允许使用情况数据收集 不选
437 | 
438 | ### 参考资料
439 | 
440 | #### 部署相关
441 | 
442 | [CentOS7 ntp 服务器配置](https://www.cnblogs.com/harrymore/p/9566229.html)
443 | 
444 | [CentOS7 配置 ntp 时间服务器](https://blog.csdn.net/zzy5066/article/details/79036674)
445 | 
446 | [CentOS7 中使用NTP进行时间同步](http://www.cnblogs.com/yangxiansen/p/7860008.html)
447 | 
448 | [如何给hadoop集群分配角色](https://blog.csdn.net/chenguangchun1993/article/details/79164857)
449 | 
450 | [Cloudera Manager 和CDH6.0.1安装，卸载，各步骤截图](https://blog.csdn.net/tototuzuoquan/article/details/85111018)
451 | 
452 | [CentOS7.5,CDH6安装部署](https://blog.csdn.net/TXBSW/article/details/84648269)
453 | 
454 | [CDH 最新版本 6.0.1 安装详解](https://blog.csdn.net/u010003835/article/details/85007946)
455 | 
456 | [CentOS 7下Cloudera Manager及CDH 6.0.1安装过程详解](https://www.cnblogs.com/wzlinux/p/10183357.html)
457 | 
458 | [CDH5.15卸载指南](https://blog.csdn.net/weixin_35852328/article/details/81774627)
459 | 
460 | #### 配置相关
461 | 
462 | [CDH5快速入门手册](https://www.jianshu.com/p/72dc1c591647)
463 | 


--------------------------------------------------------------------------------