├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── compose ├── Chart.yaml ├── templates │ ├── _helpers.tpl │ └── docker-compose.yml └── values.yaml ├── config ├── dashboards │ ├── README.md │ ├── overview.json │ ├── pd.json │ ├── tidb.json │ └── tikv_pull.json ├── drainer.toml ├── grafana-datasource.json ├── grafana │ ├── grafana.ini │ └── provisioning │ │ ├── dashboards │ │ └── dashboards.yaml │ │ └── datasources │ │ └── datasources.yaml ├── overview-dashboard.json ├── pd-dashboard.json ├── pd-nightly-tiflash.toml ├── pd.rules.yml ├── pd.toml ├── prometheus.yml ├── pump.toml ├── spark-defaults.conf ├── tidb-dashboard.json ├── tidb.rules.yml ├── tidb.toml ├── tiflash-learner-nightly.toml ├── tiflash-nightly.toml ├── tikv-dashboard.json ├── tikv.rules.yml └── tikv.toml ├── dashboard-installer ├── Dockerfile ├── README.md └── dashboards │ ├── LICENSE │ ├── datasource.json │ ├── dests.json │ ├── grafana-config-copy.py │ ├── overview.json │ ├── pd.json │ ├── tidb-dashboard-installer.sh │ ├── tidb.json │ └── tikv.json ├── docker-compose-binlog.yml ├── docker-compose-test.yml ├── docker-compose-tiflash-nightly.yml ├── docker-compose.yml ├── docker-swarm.yml ├── docker └── debug │ ├── Dockerfile │ └── run_flamegraph.sh ├── pd └── Dockerfile ├── tidb-binlog └── Dockerfile ├── tidb-vision └── Dockerfile ├── tidb └── Dockerfile ├── tikv └── Dockerfile ├── tispark ├── Dockerfile ├── conf │ └── log4j.properties ├── spark-2.3.3 │ └── session.py ├── spark-2.4.3 │ └── session.py └── tispark-tests │ └── tests │ ├── loaddata.sh │ └── tests.py └── tools └── container_debug /.gitignore: -------------------------------------------------------------------------------- 1 | /values.yaml 2 | /generated-docker-compose.yml 3 | /data 4 | /logs 5 | /pd/bin 6 | /tikv/bin 7 | /tidb/bin 8 | /tidb-vision/tidb-vision 9 | /tmp 10 | /docker/dashboard_installer/dashboard/overview.json 11 | /docker/dashboard_installer/dashboard/pd.json 12 | /docker/dashboard_installer/dashboard/tidb.json 13 | /docker/dashboard_installer/dashboard/tikv.json 14 | /.idea 15 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: required 2 | 3 | services: 4 | - docker 5 | 6 | before_install: 7 | - curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add - 8 | - sudo add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" 9 | - sudo apt-get update 10 | - sudo apt-get -y install docker-ce # update docker version 11 | - sudo curl -L https://github.com/docker/compose/releases/download/1.21.2/docker-compose-$(uname -s)-$(uname -m) -o /usr/local/bin/docker-compose 12 | - docker -v 13 | - docker-compose -v 14 | - docker-compose up -d 15 | - sleep 10 # wait all components get ready 16 | - docker-compose ps 17 | - docker images 18 | - docker network ls 19 | - docker-compose logs 20 | 21 | script: 22 | - docker ps -a --format="{{.Names}} {{.Image}} {{.Status}}" | grep -v 'Up' | grep -v 'Exited (0)' | awk '{print} END {if (NR>0) {exit 1;}}' 23 | - docker-compose -f docker-compose-test.yml run --rm tispark-tests bash /opt/tispark-tests/tests/loaddata.sh # add some data for tests 24 | # - docker-compose -f docker-compose-test.yml run --rm tispark-tests /opt/spark/bin/spark-submit /opt/spark/tests/tests.py # run tispark tests 25 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # TiDB docker-compose 2 | 3 | [![Build Status](https://travis-ci.org/pingcap/tidb-docker-compose.svg?branch=master)](https://travis-ci.org/pingcap/tidb-docker-compose) 4 | 5 | **WARNING: This is for testing only, DO NOT USE IN PRODUCTION!** 6 | 7 | ## Requirements 8 | 9 | * Docker >= 17.03 10 | * Docker Compose >= 1.6.0 11 | 12 | > **Note:** [Legacy Docker Toolbox](https://docs.docker.com/toolbox/toolbox_install_mac/) users must migrate to [Docker for Mac](https://store.docker.com/editions/community/docker-ce-desktop-mac), since it is tested that tidb-docker-compose cannot be started on Docker Toolbox and Docker Machine. 13 | > **Note:** It is recommended to disable SELinux. 14 | 15 | ## Quick start 16 | 17 | ```bash 18 | $ git clone https://github.com/pingcap/tidb-docker-compose.git 19 | $ cd tidb-docker-compose && docker-compose pull # Get the latest Docker images 20 | $ sudo setenforce 0 # Only on Linux 21 | $ docker-compose up -d 22 | $ mysql -h 127.0.0.1 -P 4000 -u root 23 | ``` 24 | 25 | * Access monitor at http://localhost:3000 (login with admin/admin if you want to modify grafana) 26 | 27 | * Access [tidb-vision](https://github.com/pingcap/tidb-vision) at http://localhost:8010 28 | 29 | * Access Spark Web UI at http://localhost:8080 30 | and access [TiSpark](https://github.com/pingcap/tispark) through spark://127.0.0.1:7077 31 | 32 | ## Docker Swarm 33 | 34 | You can also use Docker Swarm to deploy a TiDB Platform cluster, and then you can scale the service using `docker stack` commands. 35 | 36 | ```bash 37 | $ docker swarm init # if your docker daemon is not already part of a swarm 38 | $ mkdir -p data logs 39 | $ docker stack deploy tidb -c docker-swarm.yml 40 | $ mysql -h 127.0.0.1 -P 4000 -u root 41 | ``` 42 | 43 | After deploying the stack, you can scale the number of TiDB Server instances in the cluster like this: 44 | 45 | ```bash 46 | $ docker service scale tidb_tidb=2 47 | ``` 48 | 49 | Docker Swarm automatically load-balances across the containers that implement a scaled service, which you can see if you execute `select @@hostname` several times: 50 | 51 | ```bash 52 | $ mysql -h 127.0.0.1 -P 4000 -u root -te 'select @@hostname' 53 | +--------------+ 54 | | @@hostname | 55 | +--------------+ 56 | | 340092e0ec9e | 57 | +--------------+ 58 | $ mysql -h 127.0.0.1 -P 4000 -u root -te 'select @@hostname' 59 | +--------------+ 60 | | @@hostname | 61 | +--------------+ 62 | | e6f05ffe6274 | 63 | +--------------+ 64 | $ mysql -h 127.0.0.1 -P 4000 -u root -te 'select @@hostname' 65 | +--------------+ 66 | | @@hostname | 67 | +--------------+ 68 | | 340092e0ec9e | 69 | +--------------+ 70 | ``` 71 | 72 | If you want to connect to specific backend instances, for example to test concurrency by ensuring that you are connecting to distinct instances of tidb-server, you can use the `docker service ps` command to assemble a hostname for each container: 73 | 74 | ```bash 75 | $ docker service ps --no-trunc --format '{{.Name}}.{{.ID}}' tidb_tidb 76 | tidb_tidb.1.x3sc2sd66a88phsj103ohr6qq 77 | tidb_tidb.2.lk53apndq394cega46at853zw 78 | ``` 79 | 80 | To be able to resolve those hostnames, it's easiest to run the MySQL client in a container that has access to the swarm network: 81 | 82 | ```bash 83 | $ docker run --rm --network=tidb_default arey/mysql-client -h tidb_tidb.1.x3sc2sd66a88phsj103ohr6qq -P 4000 -u root -t -e 'select @@version' 84 | +-----------------------------------------+ 85 | | @@version | 86 | +-----------------------------------------+ 87 | | 5.7.25-TiDB-v3.0.0-beta.1-40-g873d9514b | 88 | +-----------------------------------------+ 89 | ``` 90 | 91 | To loop through all instances of TiDB Server, you can use a bash loop like this: 92 | 93 | ```bash 94 | for host in $(docker service ps --no-trunc --format '{{.Name}}.{{.ID}}' tidb_tidb) 95 | do docker run --rm --network tidb_default arey/mysql-client \ 96 | -h "$host" -P 4000 -u root -te "select @@hostname" 97 | done 98 | ``` 99 | 100 | To stop all services and remove all containers in the TiDB stack, execute `docker stack rm tidb`. 101 | 102 | ## Customize TiDB Cluster 103 | 104 | ### Configuration 105 | 106 | * config/pd.toml is copied from [PD repo](https://github.com/pingcap/pd/tree/master/conf) 107 | * config/tikv.toml is copied from [TiKV repo](https://github.com/pingcap/tikv/tree/master/etc) 108 | * config/tidb.toml is copied from [TiDB repo](https://github.com/pingcap/tidb/tree/master/config) 109 | * config/pump.toml is copied from [TiDB-Binlog repo](https://github.com/pingcap/tidb-binlog/tree/master/cmd/pump) 110 | * config/drainer.toml is copied from [TiDB-Binlog repo](https://github.com/pingcap/tidb-binlog/tree/master/cmd/drainer) 111 | 112 | If you find these configuration files outdated or mismatch with TiDB version, you can copy these files from their upstream repos and change their metrics addr with `pushgateway:9091`. Also `max-open-files` are configured to `1024` in tikv.toml to simplify quick start on Linux, because setting up ulimit on Linux with docker is quite tedious. 113 | 114 | And config/\*-dashboard.json are copied from [TiDB-Ansible repo](https://github.com/pingcap/tidb-ansible/tree/master/scripts) 115 | 116 | You can customize TiDB cluster configuration by editing docker-compose.yml and the above config files if you know what you're doing. 117 | 118 | But edit these files manually is tedious and error-prone, a template engine is strongly recommended. See the following steps 119 | 120 | ### Install Helm 121 | 122 | [Helm](https://helm.sh) is used as a template render engine 123 | 124 | ``` 125 | curl https://raw.githubusercontent.com/kubernetes/helm/master/scripts/get | bash 126 | ``` 127 | 128 | Or if you use Mac, you can use homebrew to install Helm by `brew install kubernetes-helm` 129 | 130 | ### Bring up TiDB cluster 131 | 132 | ```bash 133 | $ git clone https://github.com/pingcap/tidb-docker-compose.git 134 | $ cd tidb-docker-compose 135 | $ vi compose/values.yaml # custom cluster size, docker image, port mapping etc 136 | $ helm template compose > generated-docker-compose.yaml 137 | $ docker-compose -f generated-docker-compose.yaml pull # Get the latest Docker images 138 | $ docker-compose -f generated-docker-compose.yaml up -d 139 | 140 | # If you want to Bring up TiDB cluster with Binlog support 141 | $ vi compose/values.yaml # set tidb.enableBinlog to true 142 | $ helm template compose > generated-docker-compose-binlog.yaml 143 | $ docker-compose -f generated-docker-compose-binlog.yaml up -d # or you can use 'docker-compose-binlog.yml' file directly 144 | 145 | # Note: If the value of drainer.destDBType is "kafka" and 146 | # you want to consume the kafka messages outside the docker containers, 147 | # please update the kafka.advertisedHostName with your docker host IP in compose/values.yaml and 148 | # regenerate the 'generated-docker-compose-binlog.yaml' file 149 | ``` 150 | 151 | You can build docker image yourself for development test. 152 | 153 | * Build from binary 154 | 155 | For pd, tikv, tidb, pump and drainer comment their `image` and `buildPath` fields out. And then copy their binary files to pd/bin/pd-server, tikv/bin/tikv-server, tidb/bin/tidb-server, tidb-binlog/bin/pump and tidb-binlog/bin/drainer. 156 | 157 | These binary files can be built locally or downloaded from https://download.pingcap.org/tidb-latest-linux-amd64.tar.gz 158 | 159 | For tidbVision, comment its `image` and `buildPath` fields out. And then copy tidb-vision repo to tidb-vision/tidb-vision. 160 | 161 | * Build from source 162 | 163 | Leave pd, tikv, tidb and tidbVision `image` field empty and set their `buildPath` field to their source directory. 164 | 165 | For example, if your local tikv source directory is $GOPATH/src/github.com/pingcap/tikv, just set tikv `buildPath` to `$GOPATH/src/github.com/pingcap/tikv` 166 | 167 | *Note:* Compiling tikv from source consumes lots of memory, memory of Docker for Mac needs to be adjusted to greater than 6GB 168 | 169 | [tidb-vision](https://github.com/pingcap/tidb-vision) is a visiualization page of TiDB Cluster, it's WIP project and can be disabled by commenting `tidbVision` out. 170 | 171 | [TiSpark](https://github.com/pingcap/tispark) is a thin layer built for running Apache Spark on top of TiDB/TiKV to answer the complex OLAP queries. 172 | 173 | #### Host network mode (Linux) 174 | 175 | *Note:* Docker for Mac uses a Linux virtual machine, host network mode will not expose any services to host machine. So it's useless to use this mode. 176 | 177 | When using TiKV directly without TiDB, host network mode must be enabled. This way all services use host network without isolation. So you can access all services on the host machine. 178 | 179 | You can enable this mode by setting `networkMode: host` in compose/values.yaml and regenerate docker-compose.yml. When in this mode, prometheus address in configuration files should be changed from `prometheus:9090` to `127.0.0.1:9090`, and pushgateway address should be changed from `pushgateway:9091` to `127.0.0.1:9091`. 180 | 181 | These modification can be done by: 182 | ```bash 183 | # Note: this only needed when networkMode is `host` 184 | sed -i 's/pushgateway:9091/127.0.0.1:9091/g' config/* 185 | sed -i 's/prometheus:9090/127.0.0.1:9090/g' config/* 186 | ``` 187 | 188 | After all the above is done, you can start tidb-cluster as usual by `docker-compose -f generated-docker-compose.yml up -d` 189 | 190 | ### Debug TiDB/TiKV/PD instances 191 | Prerequisites: 192 | 193 | Pprof: This is a tool for visualization and analysis of profiling data. Follow [these instructions](https://github.com/google/pprof#building-pprof) to install pprof. 194 | 195 | Graphviz: [http://www.graphviz.org/](http://www.graphviz.org/), used to generate graphic visualizations of profiles. 196 | 197 | * debug TiDB or PD instances 198 | 199 | ```bash 200 | ### Use the following command to starts a web server for graphic visualizations of golang program profiles 201 | $ ./tool/container_debug -s pd0 -p /pd-server -w 202 | ``` 203 | The above command will produce graphic visualizations of profiles of `pd0` that can be accessed through the browser. 204 | 205 | * debug TiKV instances 206 | 207 | ```bash 208 | ### step 1: select a tikv instance(here is tikv0) and specify the binary path in container to enter debug container 209 | $ ./tool/container_debug -s tikv0 -p /tikv-server 210 | 211 | ### after step 1, we can generate flame graph for tikv0 in debug container 212 | $ ./run_flamegraph.sh 1 # 1 is the tikv0's process id 213 | 214 | ### also can fetch tikv0's stack informations with GDB in debug container 215 | $ gdb /tikv-server 1 -batch -ex "thread apply all bt" -ex "info threads" 216 | ``` 217 | 218 | ### Access TiDB cluster 219 | 220 | TiDB uses ports: 4000(mysql) and 10080(status) by default 221 | 222 | ```bash 223 | $ mysql -h 127.0.0.1 -P 4000 -u root --comments 224 | ``` 225 | 226 | And Grafana uses port 3000 by default, so open your browser at http://localhost:3000 to view monitor dashboard 227 | 228 | If you enabled tidb-vision, you can view it at http://localhost:8010 229 | 230 | ### Access Spark shell and load TiSpark 231 | 232 | Insert some sample data to the TiDB cluster: 233 | 234 | ```bash 235 | $ docker-compose exec tispark-master bash 236 | $ cd /opt/spark/data/tispark-sample-data 237 | $ mysql --local-infile=1 -h tidb -P 4000 -u root --comments < dss.ddl 238 | ``` 239 | 240 | After the sample data is loaded into the TiDB cluster, you can access Spark Shell by `docker-compose exec tispark-master /opt/spark/bin/spark-shell`. 241 | 242 | ```bash 243 | $ docker-compose exec tispark-master /opt/spark/bin/spark-shell 244 | ... 245 | Spark context available as 'sc' (master = local[*], app id = local-1527045927617). 246 | Spark session available as 'spark'. 247 | Welcome to 248 | ____ __ 249 | / __/__ ___ _____/ /__ 250 | _\ \/ _ \/ _ `/ __/ '_/ 251 | /___/ .__/\_,_/_/ /_/\_\ version 2.1.1 252 | /_/ 253 | 254 | Using Scala version 2.11.8 (Java HotSpot(TM) 64-Bit Server VM, Java 1.8.0_172) 255 | Type in expressions to have them evaluated. 256 | Type :help for more information. 257 | 258 | scala> import org.apache.spark.sql.TiContext 259 | ... 260 | scala> val ti = new TiContext(spark) 261 | ... 262 | scala> ti.tidbMapDatabase("TPCH_001") 263 | ... 264 | scala> spark.sql("select count(*) from lineitem").show 265 | +--------+ 266 | |count(1)| 267 | +--------+ 268 | | 60175| 269 | +--------+ 270 | ``` 271 | 272 | You can also access Spark with Python or R using the following commands: 273 | 274 | ``` 275 | docker-compose exec tispark-master /opt/spark/bin/pyspark 276 | docker-compose exec tispark-master /opt/spark/bin/sparkR 277 | ``` 278 | 279 | More documents about TiSpark can be found [here](https://github.com/pingcap/tispark). 280 | -------------------------------------------------------------------------------- /compose/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | description: tidb-docker-compose 3 | name: tidb-docker-compose 4 | version: 0.1.0 5 | home: https://github.com/pingcap/tidb-docker-compose 6 | sources: 7 | - https://github.com/pingcap/tidb-docker-compose 8 | keywords: 9 | - newsql 10 | - htap 11 | - database 12 | - mysql 13 | - raft 14 | -------------------------------------------------------------------------------- /compose/templates/_helpers.tpl: -------------------------------------------------------------------------------- 1 | {{- define "initial_cluster" }} 2 | {{- range until (.Values.pd.size | int) }} 3 | {{- if . -}} 4 | , 5 | {{- end -}} 6 | pd{{ . }}=http:// 7 | {{- if eq $.Values.networkMode "host" -}} 8 | 127.0.0.1:{{add (add ($.Values.pd.port | int) 10000) . }} 9 | {{- else -}} 10 | pd{{ . }}:2380 11 | {{- end -}} 12 | {{- end -}} 13 | {{- end -}} 14 | 15 | {{- define "pd_list" }} 16 | {{- range until (.Values.pd.size | int) }} 17 | {{- if . -}} 18 | , 19 | {{- end -}} 20 | {{- if eq $.Values.networkMode "host" -}} 21 | 127.0.0.1:{{ add ($.Values.pd.port | int) . }} 22 | {{- else -}} 23 | pd{{ . }}:2379 24 | {{- end -}} 25 | {{- end -}} 26 | {{- end -}} 27 | 28 | {{- define "pd_urls" }} 29 | {{- range until (.Values.pd.size | int) }} 30 | {{- if . -}} 31 | , 32 | {{- end -}} 33 | {{- if eq $.Values.networkMode "host" -}} 34 | http://127.0.0.1:{{ add ($.Values.pd.port | int) . }} 35 | {{- else -}} 36 | http://pd{{ . }}:2379 37 | {{- end -}} 38 | {{- end -}} 39 | {{- end -}} 40 | 41 | {{- define "zoo_servers" }} 42 | {{- range until (.Values.zookeeper.size | int) }} 43 | {{- if eq $.Values.networkMode "host" -}} 44 | {{- if . }} {{ end }}server.{{ add . 1 }}=127.0.0.1:{{ add . 2888 }}:{{ add . 3888 }} 45 | {{- else -}} 46 | {{- if . }} {{ end }}server.{{ add . 1 }}=zoo{{ . }}:2888:3888 47 | {{- end -}} 48 | {{- end -}} 49 | {{- end -}} 50 | 51 | {{- define "zoo_connect" }} 52 | {{- range until (.Values.zookeeper.size | int) }} 53 | {{- if . -}} 54 | , 55 | {{- end -}} 56 | {{- if eq $.Values.networkMode "host" -}} 57 | 127.0.0.1:{{ add $.Values.zookeeper.port . }} 58 | {{- else -}} 59 | zoo{{ add . }}:{{ add $.Values.zookeeper.port . }} 60 | {{- end -}} 61 | {{- end -}} 62 | {{- end -}} 63 | -------------------------------------------------------------------------------- /compose/templates/docker-compose.yml: -------------------------------------------------------------------------------- 1 | {{- $pdSize := .Values.pd.size | int }} 2 | {{- $tikvSize := .Values.tikv.size | int }} 3 | {{- $pdPort := .Values.pd.port | int }} 4 | {{- $pdPeerPort := add $pdPort 10000 }} 5 | {{- $tikvPort := .Values.tikv.port | int -}} 6 | {{- $pumpSize := .Values.pump.size | int }} 7 | {{- $pumpPort := .Values.pump.port | int }} 8 | {{- $zooSize := .Values.zookeeper.size | int }} 9 | {{- $zooPort := .Values.zookeeper.port | int }} 10 | {{- $kafkaSize := .Values.kafka.size | int }} 11 | {{- $kafkaPort := .Values.kafka.port | int }} 12 | version: '2.1' 13 | 14 | services: 15 | {{- range until $pdSize }} 16 | pd{{ . }}: 17 | {{- if $.Values.pd.image }} 18 | image: {{ $.Values.pd.image }} 19 | {{- else }} 20 | image: pd:latest 21 | build: 22 | context: {{ $.Values.pd.buildPath | default "./pd" }} 23 | dockerfile: {{ $.Values.pd.dockerfile | default "Dockerfile" }} 24 | {{- end }} 25 | {{- if eq $.Values.networkMode "host" }} 26 | network_mode: host 27 | {{- else }} 28 | ports: 29 | - "2379" 30 | {{- end }} 31 | volumes: 32 | - ./config/pd.toml:/pd.toml:ro 33 | - {{ $.Values.dataDir }}:/data 34 | - {{ $.Values.logsDir }}:/logs 35 | command: 36 | - --name=pd{{ . }} 37 | {{- if eq $.Values.networkMode "host" }} 38 | - --client-urls=http://0.0.0.0:{{ add $pdPort . }} 39 | - --peer-urls=http://0.0.0.0:{{ add $pdPeerPort . }} 40 | - --advertise-client-urls=http://127.0.0.1:{{ add $pdPort . }} 41 | - --advertise-peer-urls=http://127.0.0.1:{{ add $pdPeerPort . }} 42 | {{- else }} 43 | - --client-urls=http://0.0.0.0:2379 44 | - --peer-urls=http://0.0.0.0:2380 45 | - --advertise-client-urls=http://pd{{ . }}:2379 46 | - --advertise-peer-urls=http://pd{{ . }}:2380 47 | {{- end }} 48 | - --initial-cluster={{- template "initial_cluster" $ }} 49 | - --data-dir=/data/pd{{ . }} 50 | - --config=/pd.toml 51 | - --log-file=/logs/pd{{ . }}.log 52 | # sysctls: 53 | # net.core.somaxconn: 32768 54 | # ulimits: 55 | # nofile: 56 | # soft: 1000000 57 | # hard: 1000000 58 | restart: on-failure 59 | {{ end }} 60 | 61 | {{- range until $tikvSize }} 62 | tikv{{ . }}: 63 | {{- if $.Values.tikv.image }} 64 | image: {{ $.Values.tikv.image }} 65 | {{- else }} 66 | image: tikv:latest 67 | build: 68 | context: {{ $.Values.tikv.buildPath | default "./tikv" }} 69 | dockerfile: {{ $.Values.tikv.dockerfile | default "Dockerfile" }} 70 | {{- end }} 71 | {{- if eq $.Values.networkMode "host" }} 72 | network_mode: host 73 | {{- end }} 74 | volumes: 75 | - ./config/tikv.toml:/tikv.toml:ro 76 | - {{ $.Values.dataDir }}:/data 77 | - {{ $.Values.logsDir }}:/logs 78 | command: 79 | {{- if eq $.Values.networkMode "host" }} 80 | - --addr=0.0.0.0:{{ add $tikvPort . }} 81 | - --advertise-addr=127.0.0.1:{{ add $tikvPort . }} 82 | {{- else }} 83 | - --addr=0.0.0.0:20160 84 | - --advertise-addr=tikv{{ . }}:20160 85 | {{- end }} 86 | - --data-dir=/data/tikv{{ . }} 87 | - --pd={{- template "pd_list" $ }} 88 | - --config=/tikv.toml 89 | - --log-file=/logs/tikv{{ . }}.log 90 | depends_on: 91 | {{- range until $pdSize }} 92 | - "pd{{.}}" 93 | {{- end }} 94 | # sysctls: 95 | # net.core.somaxconn: 32768 96 | # ulimits: 97 | # nofile: 98 | # soft: 1000000 99 | # hard: 1000000 100 | restart: on-failure 101 | {{ end }} 102 | 103 | {{- if .Values.tidb }} 104 | {{- if .Values.tidb.enableBinlog }} 105 | {{- range until $pumpSize }} 106 | pump{{ . }}: 107 | {{- if $.Values.pump.image }} 108 | image: {{ $.Values.pump.image }} 109 | {{- else }} 110 | image: tidb-binlog:latest 111 | build: 112 | context: {{ $.Values.pump.buildPath | default "./tidb-binlog" }} 113 | dockerfile: {{ $.Values.pump.dockerfile | default "Dockerfile" }} 114 | {{- end }} 115 | {{- if eq $.Values.networkMode "host" }} 116 | network_mode: host 117 | {{- end }} 118 | volumes: 119 | - ./config/pump.toml:/pump.toml:ro 120 | - {{ $.Values.dataDir }}:/data 121 | - {{ $.Values.logsDir }}:/logs 122 | command: 123 | - /pump 124 | {{- if eq $.Values.networkMode "host" }} 125 | - --addr=0.0.0.0:{{ add $pumpPort . }} 126 | - --advertise-addr=127.0.0.1:{{ add $pumpPort . }} 127 | {{- else }} 128 | - --addr=0.0.0.0:8250 129 | - --advertise-addr=pump{{ . }}:8250 130 | {{- end }} 131 | - --data-dir=/data/pump{{ . }} 132 | - --log-file=/logs/pump{{ . }}.log 133 | - --node-id=pump{{ . }} 134 | - --pd-urls={{- template "pd_urls" $ }} 135 | - --config=/pump.toml 136 | depends_on: 137 | {{- range until $pdSize }} 138 | - "pd{{.}}" 139 | {{- end }} 140 | restart: on-failure 141 | {{ end }} 142 | drainer: 143 | {{- if $.Values.drainer.image }} 144 | image: {{ $.Values.drainer.image }} 145 | {{- else }} 146 | image: tidb-binlog:latest 147 | build: 148 | context: {{ $.Values.drainer.buildPath | default "./tidb-binlog" }} 149 | dockerfile: {{ $.Values.drainer.dockerfile | default "Dockerfile" }} 150 | {{- end }} 151 | {{- if eq $.Values.networkMode "host" }} 152 | network_mode: host 153 | {{- end }} 154 | volumes: 155 | - ./config/drainer.toml:/drainer.toml:ro 156 | - {{ $.Values.dataDir }}:/data 157 | - {{ $.Values.logsDir }}:/logs 158 | command: 159 | - /drainer 160 | - --addr=0.0.0.0:8249 161 | - --data-dir=/data/data.drainer 162 | - --log-file=/logs/drainer.log 163 | - --pd-urls={{- template "pd_urls" $ }} 164 | - --config=/drainer.toml 165 | - --initial-commit-ts=0 166 | {{- if eq $.Values.drainer.destDBType "kafka" }} 167 | - --dest-db-type=kafka 168 | {{- end }} 169 | depends_on: 170 | {{- range until $pdSize }} 171 | - "pd{{ . }}" 172 | {{- end }} 173 | {{- if eq $.Values.drainer.destDBType "kafka" }} 174 | {{- range until $kafkaSize }} 175 | - "kafka{{ . }}" 176 | {{- end }} 177 | {{- end }} 178 | restart: on-failure 179 | 180 | {{- if eq $.Values.drainer.destDBType "kafka" }} 181 | {{ range until $zooSize }} 182 | zoo{{ . }}: 183 | image: zookeeper:latest 184 | {{- if eq $.Values.networkMode "host" }} 185 | network_mode: host 186 | {{- else }} 187 | ports: 188 | - "{{ add $zooPort . }}:{{ add $zooPort . }}" 189 | {{- end }} 190 | environment: 191 | ZOO_MY_ID: {{ add . 1 }} 192 | ZOO_PORT: {{ add $zooPort . }} 193 | ZOO_SERVERS: {{ template "zoo_servers" $ }} 194 | volumes: 195 | - {{ $.Values.dataDir }}/zoo{{ . }}/data:/data 196 | - {{ $.Values.dataDir }}/zoo{{ . }}/datalog:/datalog 197 | restart: on-failure 198 | {{ end }} 199 | 200 | {{- range until $kafkaSize }} 201 | kafka{{ . }}: 202 | image: {{ $.Values.kafka.image }} 203 | {{- if eq $.Values.networkMode "host" }} 204 | network_mode: host 205 | {{- else }} 206 | ports: 207 | - "{{ add . $kafkaPort }}:{{ add . $kafkaPort }}" 208 | {{- end }} 209 | environment: 210 | KAFKA_BROKER_ID: {{ add . 1 }} 211 | KAFKA_LOG_DIRS: /data/kafka-logs 212 | {{- if $.Values.kafka.advertisedHostName }} 213 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://{{ $.Values.kafka.advertisedHostName }}:{{ add . $kafkaPort }} 214 | {{- else }} 215 | {{- if eq $.Values.networkMode "host" }} 216 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://127.0.0.1:{{ add . $kafkaPort }} 217 | {{- else }} 218 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka{{ . }}:{{ add . $kafkaPort }} 219 | {{- end }} 220 | {{- end }} 221 | KAFKA_LISTENERS: PLAINTEXT://0.0.0.0:{{ add . $kafkaPort }} 222 | KAFKA_ZOOKEEPER_CONNECT: {{ template "zoo_connect" $ }} 223 | volumes: 224 | - {{ $.Values.dataDir }}/kafka-logs/kafka{{ . }}:/data/kafka-logs 225 | - {{ $.Values.logsDir }}/kafka{{ . }}:/opt/kafka/logs 226 | - /var/run/docker.sock:/var/run/docker.sock 227 | depends_on: 228 | {{- range until $zooSize }} 229 | - "zoo{{ . }}" 230 | {{- end }} 231 | restart: on-failure 232 | {{- end }} 233 | {{- end }} 234 | {{- end }} 235 | {{ end }} 236 | 237 | {{- if .Values.tidb }} 238 | tidb: 239 | {{- if .Values.tidb.image }} 240 | image: {{ .Values.tidb.image }} 241 | {{- else }} 242 | image: tidb:latest 243 | build: 244 | context: {{ .Values.tidb.buildPath | default "./tidb" }} 245 | dockerfile: {{ .Values.tidb.dockerfile | default "Dockerfile" }} 246 | {{- end }} 247 | {{- if eq .Values.networkMode "host" }} 248 | network_mode: host 249 | {{- else }} 250 | ports: 251 | - "{{ .Values.tidb.mysqlPort }}:4000" 252 | - "{{ .Values.tidb.statusPort }}:10080" 253 | {{- end }} 254 | volumes: 255 | - ./config/tidb.toml:/tidb.toml:ro 256 | - {{ .Values.logsDir }}:/logs 257 | command: 258 | - --store=tikv 259 | - --path={{- template "pd_list" $ }} 260 | - --config=/tidb.toml 261 | - --log-file=/logs/tidb.log 262 | - --advertise-address=tidb 263 | {{- if .Values.tidb.enableBinlog }} 264 | - --enable-binlog=true 265 | {{- end }} 266 | depends_on: 267 | {{- range until $tikvSize }} 268 | - "tikv{{.}}" 269 | {{- end }} 270 | {{- if .Values.tidb.enableBinlog }} 271 | {{- range until $pumpSize }} 272 | - "pump{{.}}" 273 | {{- end }} 274 | {{- end }} 275 | # sysctls: 276 | # net.core.somaxconn: 32768 277 | # ulimits: 278 | # nofile: 279 | # soft: 1000000 280 | # hard: 1000000 281 | restart: on-failure 282 | {{ end }} 283 | 284 | {{- if .Values.tispark }} 285 | tispark-master: 286 | {{- if .Values.tispark.image }} 287 | image: {{ .Values.tispark.image }} 288 | {{- else }} 289 | image: tispark:latest 290 | build: 291 | context: {{ .Values.tispark.buildPath | default "./tispark" }} 292 | dockerfile: {{ .Values.tispark.dockerfile | default "Dockerfile" }} 293 | {{- end }} 294 | command: 295 | - /opt/spark/sbin/start-master.sh 296 | volumes: 297 | - ./config/spark-defaults.conf:/opt/spark/conf/spark-defaults.conf:ro 298 | environment: 299 | SPARK_MASTER_PORT: {{ .Values.tispark.masterPort }} 300 | SPARK_MASTER_WEBUI_PORT: {{ .Values.tispark.webuiPort }} 301 | ports: 302 | - "{{ .Values.tispark.masterPort }}:7077" 303 | - "{{ .Values.tispark.webuiPort }}:8080" 304 | depends_on: 305 | {{- range until $tikvSize }} 306 | - "tikv{{.}}" 307 | {{- end }} 308 | restart: on-failure 309 | {{- range until ( .Values.tispark.workerCount | int ) }} 310 | tispark-slave{{ . }}: 311 | {{- if $.Values.tispark.image }} 312 | image: {{ $.Values.tispark.image }} 313 | {{- else }} 314 | image: tispark:latest 315 | build: 316 | context: {{ $.Values.tispark.buildPath | default "./tispark" }} 317 | dockerfile: {{ $.Values.tispark.dockerfile | default "Dockerfile" }} 318 | {{- end }} 319 | command: 320 | - /opt/spark/sbin/start-slave.sh 321 | - spark://tispark-master:7077 322 | volumes: 323 | - ./config/spark-defaults.conf:/opt/spark/conf/spark-defaults.conf:ro 324 | environment: 325 | SPARK_WORKER_WEBUI_PORT: {{ add $.Values.tispark.workerWebUIPort . }} 326 | ports: 327 | - "{{ add $.Values.tispark.workerWebUIPort . }}:{{ add $.Values.tispark.workerWebUIPort . }}" 328 | depends_on: 329 | - tispark-master 330 | restart: on-failure 331 | {{- end }} 332 | {{ end }} 333 | 334 | {{- if .Values.tidbVision }} 335 | tidb-vision: 336 | {{- if .Values.tidbVision.image }} 337 | image: {{ .Values.tidbVision.image }} 338 | {{- else }} 339 | image: tidb-vision:latest 340 | build: 341 | context: {{ .Values.tidbVision.buildPath | default "./tidb-vision" }} 342 | dockerfile: {{ .Values.tidbVision.dockerfile | default "Dockerfile" }} 343 | {{- end }} 344 | environment: 345 | PD_ENDPOINT: {{if eq .Values.networkMode "host"}}127.0.0.1:{{.Values.pd.port}}{{else}}pd0:2379{{end}} 346 | {{- if eq .Values.networkMode "host" }} 347 | PORT: {{ .Values.tidbVision.port }} 348 | network_mode: host 349 | {{- else }} 350 | ports: 351 | - "{{ .Values.tidbVision.port }}:8010" 352 | {{- end }} 353 | restart: on-failure 354 | {{- end }} 355 | 356 | {{- if .Values.prometheus }} 357 | pushgateway: 358 | image: {{ .Values.pushgateway.image }} 359 | {{- if eq .Values.networkMode "host" }} 360 | command: 361 | - --web.listen-address=0.0.0.0:{{.Values.pushgateway.port}} 362 | - --log.level={{ .Values.pushgateway.logLevel }} 363 | network_mode: host 364 | {{- else }} 365 | command: 366 | - --log.level={{ .Values.pushgateway.logLevel }} 367 | {{- end }} 368 | restart: on-failure 369 | 370 | prometheus: 371 | user: root 372 | image: {{ .Values.prometheus.image }} 373 | command: 374 | - --log.level={{ .Values.prometheus.logLevel }} 375 | - --storage.tsdb.path=/data/prometheus 376 | - --config.file=/etc/prometheus/prometheus.yml 377 | {{- if eq .Values.networkMode "host" }} 378 | - --web.listen-address=0.0.0.0:{{.Values.prometheus.port}} 379 | network_mode: host 380 | {{- else }} 381 | ports: 382 | - "{{ .Values.prometheus.port }}:9090" 383 | {{- end }} 384 | volumes: 385 | - ./config/prometheus.yml:/etc/prometheus/prometheus.yml:ro 386 | - ./config/pd.rules.yml:/etc/prometheus/pd.rules.yml:ro 387 | - ./config/tikv.rules.yml:/etc/prometheus/tikv.rules.yml:ro 388 | - ./config/tidb.rules.yml:/etc/prometheus/tidb.rules.yml:ro 389 | - {{ .Values.dataDir }}:/data 390 | restart: on-failure 391 | {{- end }} 392 | 393 | {{- if .Values.grafana }} 394 | grafana: 395 | image: {{ .Values.grafana.image }} 396 | user: "0" 397 | {{- if eq .Values.networkMode "host" }} 398 | network_mode: host 399 | environment: 400 | GF_SERVER_HTTP_PORT: {{ .Values.grafana.port }} 401 | GF_LOG_LEVEL: {{ .Values.grafana.logLevel }} 402 | {{- else }} 403 | environment: 404 | GF_LOG_LEVEL: {{ .Values.grafana.logLevel }} 405 | GF_PATHS_PROVISIONING: /etc/grafana/provisioning 406 | GF_PATHS_CONFIG: /etc/grafana/grafana.ini 407 | ports: 408 | - "{{ .Values.grafana.port }}:3000" 409 | {{- end }} 410 | volumes: 411 | - ./config/grafana:/etc/grafana 412 | - ./config/dashboards:/tmp/dashboards 413 | - ./data/grafana:/var/lib/grafana 414 | restart: on-failure 415 | dashboard-installer: 416 | {{- if .Values.dashboardInstaller.image }} 417 | image: {{ .Values.dashboardInstaller.image }} 418 | {{- else }} 419 | image: tidb-dashboard-installer:latest 420 | build: 421 | context: {{ .Values.dashboardInstaller.buildPath | default "./dashboard-installer" }} 422 | dockerfile: {{ .Values.dashboardInstaller.dockerfile | default "Dockerfile" }} 423 | {{- end }} 424 | {{- if eq .Values.networkMode "host" }} 425 | network_mode: host 426 | command: ["127.0.0.1:{{.Values.grafana.port}}"] 427 | {{- else }} 428 | command: ["grafana:3000"] 429 | {{- end }} 430 | restart: on-failure 431 | {{- end -}} 432 | -------------------------------------------------------------------------------- /compose/values.yaml: -------------------------------------------------------------------------------- 1 | dataDir: ./data 2 | logsDir: ./logs 3 | # supported networkMode: bridge | host 4 | # host network mode is useless on Mac 5 | networkMode: bridge 6 | 7 | pd: 8 | size: 3 9 | image: pingcap/pd:latest 10 | 11 | # If you want to build pd image from source, leave image empty and specify pd source directory 12 | # and its dockerfile name 13 | # buildPath: ./pd 14 | # dockerfile: Dockerfile 15 | # when network_mode is host, pd port ranges [port, port+size) 16 | port: 2379 17 | 18 | tikv: 19 | size: 3 20 | image: pingcap/tikv:latest 21 | 22 | # If you want to build tikv image from source, leave image empty and specify tikv source directory 23 | # and its dockerfile name 24 | # buildPath: ./tikv 25 | # dockerfile: Dockerfile 26 | # when network mode is host, tikv port ranges [port, port+size) 27 | port: 20160 28 | 29 | # comment this section out if you don't need SQL layer and want to use TiKV directly 30 | # when using TiKV directly, networkMode must be set to `host` 31 | tidb: 32 | image: pingcap/tidb:latest 33 | 34 | # If you want to build tidb image from source, leave image empty and specify tidb source directory 35 | # and its dockerfile name 36 | # buildPath: ./tidb 37 | # dockerfile: Dockerfile 38 | mysqlPort: "4000" 39 | statusPort: "10080" 40 | enableBinlog: false 41 | 42 | pump: 43 | size: 3 44 | image: pingcap/tidb-binlog:latest 45 | 46 | # If you want to build pump image from source, leave image empty and specify pump source directory 47 | # and its dockerfile name 48 | # buildPath: ./pump 49 | # dockerfile: Dockerfile 50 | # when network_mode is host, pump port ranges [port, port+size) 51 | port: 8250 52 | 53 | drainer: 54 | image: pingcap/tidb-binlog:latest 55 | 56 | # If you want to build drainer image from source, leave image empty and specify drainer source directory 57 | # and its dockerfile name 58 | # buildPath: ./drainer 59 | # dockerfile: Dockerfile 60 | destDBType: "kafka" 61 | 62 | zookeeper: 63 | size: 3 64 | image: zookeeper:latest 65 | port: 2181 66 | 67 | kafka: 68 | size: 3 69 | image: wurstmeister/kafka:2.12-2.1.1 70 | # If you want to consume the kafka messages outside the docker containers, 71 | # Please update the advertisedHostName with your docker host IP 72 | advertisedHostName: 73 | port: 9092 74 | 75 | tispark: 76 | image: pingcap/tispark:latest 77 | 78 | # If you want to build tidb image from source, leave image empty and specify tidb source directory 79 | # and its dockerfile name 80 | # buildPath: ./tidb 81 | # dockerfile: Dockerfile 82 | buildPath: ./tispark 83 | dockerfile: Dockerfile 84 | 85 | masterPort: 7077 86 | webuiPort: 8080 87 | workerCount: 1 88 | # slave web ui port will be workerWebUIPort ~ workerWebUIPort+workerCount-1 89 | workerWebUIPort: 38081 90 | 91 | # comment this out to disable tidb-vision 92 | tidbVision: 93 | image: pingcap/tidb-vision:latest 94 | 95 | # If you want to build tidb-vision image from source, leave image empty and specify tidb-vision source directory 96 | # and its dockerfile name 97 | # buildPath: ./tidb-vision 98 | # dockerfile: Dockerfile 99 | port: "8010" 100 | 101 | # comment following monitor components sections out to disable monitor 102 | grafana: 103 | image: grafana/grafana:5.3.0 104 | port: "3000" 105 | logLevel: error 106 | 107 | pushgateway: 108 | image: prom/pushgateway:v0.3.1 109 | port: "9091" 110 | logLevel: error 111 | 112 | prometheus: 113 | image: prom/prometheus:v2.2.1 114 | port: "9090" 115 | logLevel: error 116 | 117 | # This is used to import tidb monitor dashboard templates to grafana 118 | # this container runs only once and keep running until templates imported successfully 119 | dashboardInstaller: 120 | image: pingcap/tidb-dashboard-installer:v2.0.0 121 | 122 | # If you want to build tidb-dashboard-installer image from source, leave image empty and specify tidb-dashboard-installer source directory 123 | # and its dockerfile name 124 | # buildPath: ./dashboard-installer 125 | # dockerfile: Dockerfile 126 | -------------------------------------------------------------------------------- /config/dashboards/README.md: -------------------------------------------------------------------------------- 1 | # TiDB dashboard 2 | With Grafana v5.x or later, we can use provisioning feature to statically provision datasources and dashboards. No need to use scripts to configure Grafana. 3 | 4 | The JSON files in dashboards are copied from [tidb-ansible](https://github.com/pingcap/tidb-ansible/tree/master/scripts), and need to replace variables in the json file(It was did by python file before). 5 | 6 | It is used in [tidb-docker-compose](https://github.com/pingcap/tidb-docker-compose) and [tidb-operator](https://github.com/pingcap/tidb-operator). 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /config/drainer.toml: -------------------------------------------------------------------------------- 1 | # drainer Configuration. 2 | 3 | # addr (i.e. 'host:port') to listen on for drainer connections 4 | # will register this addr into etcd 5 | # addr = "127.0.0.1:8249" 6 | 7 | # the interval time (in seconds) of detect pumps' status 8 | detect-interval = 10 9 | 10 | # drainer meta data directory path 11 | data-dir = "data.drainer" 12 | 13 | # a comma separated list of PD endpoints 14 | pd-urls = "http://127.0.0.1:2379" 15 | 16 | # Use the specified compressor to compress payload between pump and drainer 17 | compressor = "" 18 | 19 | #[security] 20 | # Path of file that contains list of trusted SSL CAs for connection with cluster components. 21 | # ssl-ca = "/path/to/ca.pem" 22 | # Path of file that contains X509 certificate in PEM format for connection with cluster components. 23 | # ssl-cert = "/path/to/pump.pem" 24 | # Path of file that contains X509 key in PEM format for connection with cluster components. 25 | # ssl-key = "/path/to/pump-key.pem" 26 | 27 | # syncer Configuration. 28 | [syncer] 29 | 30 | # Assume the upstream sql-mode. 31 | # If this is setted , will use the same sql-mode to parse DDL statment, and set the same sql-mode at downstream when db-type is mysql. 32 | # If this is not setted, it will not set any sql-mode. 33 | # sql-mode = "STRICT_TRANS_TABLES,NO_ENGINE_SUBSTITUTION" 34 | 35 | # number of binlog events in a transaction batch 36 | txn-batch = 20 37 | 38 | # work count to execute binlogs 39 | # if the latency between drainer and downstream(mysql or tidb) are too high, you might want to increase this 40 | # to get higher throughput by higher concurrent write to the downstream 41 | worker-count = 16 42 | 43 | disable-dispatch = false 44 | 45 | # safe mode will split update to delete and insert 46 | safe-mode = false 47 | 48 | # downstream storage, equal to --dest-db-type 49 | # valid values are "mysql", "file", "tidb", "flash", "kafka" 50 | db-type = "kafka" 51 | 52 | # disable sync these schema 53 | ignore-schemas = "INFORMATION_SCHEMA,PERFORMANCE_SCHEMA,mysql" 54 | 55 | ##replicate-do-db priority over replicate-do-table if have same db name 56 | ##and we support regex expression , start with '~' declare use regex expression. 57 | # 58 | #replicate-do-db = ["~^b.*","s1"] 59 | 60 | #[[syncer.replicate-do-table]] 61 | #db-name ="test" 62 | #tbl-name = "log" 63 | 64 | #[[syncer.replicate-do-table]] 65 | #db-name ="test" 66 | #tbl-name = "~^a.*" 67 | 68 | # disable sync these table 69 | #[[syncer.ignore-table]] 70 | #db-name = "test" 71 | #tbl-name = "log" 72 | 73 | # the downstream mysql protocol database 74 | #[syncer.to] 75 | #host = "127.0.0.1" 76 | #user = "root" 77 | #password = "" 78 | #port = 3306 79 | 80 | [syncer.to.checkpoint] 81 | # you can uncomment this to change the database to save checkpoint when the downstream is mysql or tidb 82 | #schema = "tidb_binlog" 83 | 84 | # Uncomment this if you want to use file as db-type. 85 | #[syncer.to] 86 | # directory to save binlog file, default same as data-dir(save checkpoint file) if this is not configured. 87 | # dir = "data.drainer" 88 | 89 | 90 | # when db-type is kafka, you can uncomment this to config the down stream kafka, it will be the globle config kafka default 91 | [syncer.to] 92 | # only need config one of zookeeper-addrs and kafka-addrs, will get kafka address if zookeeper-addrs is configed. 93 | # zookeeper-addrs = "127.0.0.1:2181" 94 | kafka-addrs = "kafka0:9092,kafka1:9093,kafka2:9094" 95 | kafka-version = "2.1.1" 96 | kafka-max-messages = 1024 97 | # 98 | # 99 | # the topic name drainer will push msg, the default name is _obinlog 100 | # be careful don't use the same name if run multi drainer instances 101 | # topic-name = "" 102 | -------------------------------------------------------------------------------- /config/grafana-datasource.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "tidb-cluster", 3 | "type": "prometheus", 4 | "url": "http://prometheus:9090", 5 | "access": "proxy", 6 | "basicAuth": false 7 | } 8 | -------------------------------------------------------------------------------- /config/grafana/grafana.ini: -------------------------------------------------------------------------------- 1 | ##################### Grafana Configuration Defaults ##################### 2 | # 3 | # Do not modify this file in grafana installs 4 | # 5 | 6 | # possible values : production, development 7 | app_mode = production 8 | 9 | # instance name, defaults to HOSTNAME environment variable value or hostname if HOSTNAME var is empty 10 | instance_name = ${HOSTNAME} 11 | 12 | #################################### Paths ############################### 13 | [paths] 14 | # Path to where grafana can store temp files, sessions, and the sqlite3 db (if that is used) 15 | data = data 16 | 17 | # Temporary files in `data` directory older than given duration will be removed 18 | temp_data_lifetime = 24h 19 | 20 | # Directory where grafana can store logs 21 | logs = data/log 22 | 23 | # Directory where grafana will automatically scan and look for plugins 24 | plugins = data/plugins 25 | 26 | # folder that contains provisioning config files that grafana will apply on startup and while running. 27 | provisioning = conf/provisioning 28 | 29 | #################################### Server ############################## 30 | [server] 31 | # Protocol (http, https, socket) 32 | protocol = http 33 | 34 | # The ip address to bind to, empty will bind to all interfaces 35 | http_addr = 36 | 37 | # The http port to use 38 | http_port = 3000 39 | 40 | # The public facing domain name used to access grafana from a browser 41 | domain = localhost 42 | 43 | # Redirect to correct domain if host header does not match domain 44 | # Prevents DNS rebinding attacks 45 | enforce_domain = false 46 | 47 | # The full public facing url 48 | root_url = %(protocol)s://%(domain)s:%(http_port)s/ 49 | 50 | # Log web requests 51 | router_logging = false 52 | 53 | # the path relative working path 54 | static_root_path = public 55 | 56 | # enable gzip 57 | enable_gzip = false 58 | 59 | # https certs & key file 60 | cert_file = 61 | cert_key = 62 | 63 | # Unix socket path 64 | socket = /tmp/grafana.sock 65 | 66 | #################################### Database ############################ 67 | [database] 68 | # You can configure the database connection by specifying type, host, name, user and password 69 | # as separate properties or as on string using the url property. 70 | 71 | # Either "mysql", "postgres" or "sqlite3", it's your choice 72 | type = sqlite3 73 | host = 127.0.0.1:3306 74 | name = grafana 75 | user = root 76 | # If the password contains # or ; you have to wrap it with triple quotes. Ex """#password;""" 77 | password = 78 | # Use either URL or the previous fields to configure the database 79 | # Example: mysql://user:secret@host:port/database 80 | url = 81 | 82 | # Max idle conn setting default is 2 83 | max_idle_conn = 2 84 | 85 | # Max conn setting default is 0 (mean not set) 86 | max_open_conn = 87 | 88 | # Connection Max Lifetime default is 14400 (means 14400 seconds or 4 hours) 89 | conn_max_lifetime = 14400 90 | 91 | # Set to true to log the sql calls and execution times. 92 | log_queries = 93 | 94 | # For "postgres", use either "disable", "require" or "verify-full" 95 | # For "mysql", use either "true", "false", or "skip-verify". 96 | ssl_mode = disable 97 | 98 | ca_cert_path = 99 | client_key_path = 100 | client_cert_path = 101 | server_cert_name = 102 | 103 | # For "sqlite3" only, path relative to data_path setting 104 | path = grafana.db 105 | 106 | #################################### Session ############################# 107 | [session] 108 | # Either "memory", "file", "redis", "mysql", "postgres", "memcache", default is "file" 109 | provider = file 110 | 111 | # Provider config options 112 | # memory: not have any config yet 113 | # file: session dir path, is relative to grafana data_path 114 | # redis: config like redis server e.g. `addr=127.0.0.1:6379,pool_size=100,db=grafana` 115 | # postgres: user=a password=b host=localhost port=5432 dbname=c sslmode=disable 116 | # mysql: go-sql-driver/mysql dsn config string, examples: 117 | # `user:password@tcp(127.0.0.1:3306)/database_name` 118 | # `user:password@unix(/var/run/mysqld/mysqld.sock)/database_name` 119 | # memcache: 127.0.0.1:11211 120 | 121 | 122 | provider_config = sessions 123 | 124 | # Session cookie name 125 | cookie_name = grafana_sess 126 | 127 | # If you use session in https only, default is false 128 | cookie_secure = false 129 | 130 | # Session life time, default is 86400 131 | session_life_time = 86400 132 | gc_interval_time = 86400 133 | 134 | # Connection Max Lifetime default is 14400 (means 14400 seconds or 4 hours) 135 | conn_max_lifetime = 14400 136 | 137 | #################################### Data proxy ########################### 138 | [dataproxy] 139 | 140 | # This enables data proxy logging, default is false 141 | logging = false 142 | 143 | #################################### Analytics ########################### 144 | [analytics] 145 | # Server reporting, sends usage counters to stats.grafana.org every 24 hours. 146 | # No ip addresses are being tracked, only simple counters to track 147 | # running instances, dashboard and error counts. It is very helpful to us. 148 | # Change this option to false to disable reporting. 149 | reporting_enabled = true 150 | 151 | # Set to false to disable all checks to https://grafana.com 152 | # for new versions (grafana itself and plugins), check is used 153 | # in some UI views to notify that grafana or plugin update exists 154 | # This option does not cause any auto updates, nor send any information 155 | # only a GET request to https://grafana.com to get latest versions 156 | check_for_updates = true 157 | 158 | # Google Analytics universal tracking code, only enabled if you specify an id here 159 | google_analytics_ua_id = 160 | 161 | # Google Tag Manager ID, only enabled if you specify an id here 162 | google_tag_manager_id = 163 | 164 | #################################### Security ############################ 165 | [security] 166 | # default admin user, created on startup 167 | admin_user = admin 168 | 169 | # default admin password, can be changed before first start of grafana, or in profile settings 170 | admin_password = admin 171 | 172 | # used for signing 173 | secret_key = SW2YcwTIb9zpOOhoPsMm 174 | 175 | # Auto-login remember days 176 | login_remember_days = 7 177 | cookie_username = grafana_user 178 | cookie_remember_name = grafana_remember 179 | 180 | # disable gravatar profile images 181 | disable_gravatar = false 182 | 183 | # data source proxy whitelist (ip_or_domain:port separated by spaces) 184 | data_source_proxy_whitelist = 185 | 186 | # disable protection against brute force login attempts 187 | disable_brute_force_login_protection = false 188 | 189 | #################################### Snapshots ########################### 190 | [snapshots] 191 | # snapshot sharing options 192 | external_enabled = true 193 | external_snapshot_url = https://snapshots-origin.raintank.io 194 | external_snapshot_name = Publish to snapshot.raintank.io 195 | 196 | # remove expired snapshot 197 | snapshot_remove_expired = true 198 | 199 | #################################### Dashboards ################## 200 | 201 | [dashboards] 202 | # Number dashboard versions to keep (per dashboard). Default: 20, Minimum: 1 203 | versions_to_keep = 20 204 | 205 | #################################### Users ############################### 206 | [users] 207 | # disable user signup / registration 208 | allow_sign_up = false 209 | 210 | # Allow non admin users to create organizations 211 | allow_org_create = false 212 | 213 | # Set to true to automatically assign new users to the default organization (id 1) 214 | auto_assign_org = true 215 | 216 | # Set this value to automatically add new users to the provided organization (if auto_assign_org above is set to true) 217 | auto_assign_org_id = 1 218 | 219 | # Default role new users will be automatically assigned (if auto_assign_org above is set to true) 220 | auto_assign_org_role = Viewer 221 | 222 | # Require email validation before sign up completes 223 | verify_email_enabled = false 224 | 225 | # Background text for the user field on the login page 226 | login_hint = email or username 227 | 228 | # Default UI theme ("dark" or "light") 229 | default_theme = dark 230 | 231 | # External user management 232 | external_manage_link_url = 233 | external_manage_link_name = 234 | external_manage_info = 235 | 236 | # Viewers can edit/inspect dashboard settings in the browser. But not save the dashboard. 237 | viewers_can_edit = false 238 | 239 | [auth] 240 | # Set to true to disable (hide) the login form, useful if you use OAuth 241 | disable_login_form = false 242 | 243 | # Set to true to disable the signout link in the side menu. useful if you use auth.proxy 244 | disable_signout_menu = false 245 | 246 | # URL to redirect the user to after sign out 247 | signout_redirect_url = 248 | 249 | #################################### Anonymous Auth ###################### 250 | [auth.anonymous] 251 | # enable anonymous access 252 | enabled = true 253 | 254 | # specify organization name that should be used for unauthenticated users 255 | org_name = Main Org. 256 | 257 | # specify role for unauthenticated users 258 | org_role = Viewer 259 | 260 | #################################### Github Auth ######################### 261 | [auth.github] 262 | enabled = false 263 | allow_sign_up = true 264 | client_id = some_id 265 | client_secret = some_secret 266 | scopes = user:email,read:org 267 | auth_url = https://github.com/login/oauth/authorize 268 | token_url = https://github.com/login/oauth/access_token 269 | api_url = https://api.github.com/user 270 | team_ids = 271 | allowed_organizations = 272 | 273 | #################################### GitLab Auth ######################### 274 | [auth.gitlab] 275 | enabled = false 276 | allow_sign_up = true 277 | client_id = some_id 278 | client_secret = some_secret 279 | scopes = api 280 | auth_url = https://gitlab.com/oauth/authorize 281 | token_url = https://gitlab.com/oauth/token 282 | api_url = https://gitlab.com/api/v4 283 | allowed_groups = 284 | 285 | #################################### Google Auth ######################### 286 | [auth.google] 287 | enabled = false 288 | allow_sign_up = true 289 | client_id = some_client_id 290 | client_secret = some_client_secret 291 | scopes = https://www.googleapis.com/auth/userinfo.profile https://www.googleapis.com/auth/userinfo.email 292 | auth_url = https://accounts.google.com/o/oauth2/auth 293 | token_url = https://accounts.google.com/o/oauth2/token 294 | api_url = https://www.googleapis.com/oauth2/v1/userinfo 295 | allowed_domains = 296 | hosted_domain = 297 | 298 | #################################### Grafana.com Auth #################### 299 | # legacy key names (so they work in env variables) 300 | [auth.grafananet] 301 | enabled = false 302 | allow_sign_up = true 303 | client_id = some_id 304 | client_secret = some_secret 305 | scopes = user:email 306 | allowed_organizations = 307 | 308 | [auth.grafana_com] 309 | enabled = false 310 | allow_sign_up = true 311 | client_id = some_id 312 | client_secret = some_secret 313 | scopes = user:email 314 | allowed_organizations = 315 | 316 | #################################### Generic OAuth ####################### 317 | [auth.generic_oauth] 318 | name = OAuth 319 | enabled = false 320 | allow_sign_up = true 321 | client_id = some_id 322 | client_secret = some_secret 323 | scopes = user:email 324 | email_attribute_name = email:primary 325 | auth_url = 326 | token_url = 327 | api_url = 328 | team_ids = 329 | allowed_organizations = 330 | tls_skip_verify_insecure = false 331 | tls_client_cert = 332 | tls_client_key = 333 | tls_client_ca = 334 | 335 | #################################### Basic Auth ########################## 336 | [auth.basic] 337 | enabled = true 338 | 339 | #################################### Auth Proxy ########################## 340 | [auth.proxy] 341 | enabled = false 342 | header_name = X-WEBAUTH-USER 343 | header_property = username 344 | auto_sign_up = true 345 | ldap_sync_ttl = 60 346 | whitelist = 347 | 348 | #################################### Auth LDAP ########################### 349 | [auth.ldap] 350 | enabled = false 351 | config_file = /etc/grafana/ldap.toml 352 | allow_sign_up = true 353 | 354 | #################################### SMTP / Emailing ##################### 355 | [smtp] 356 | enabled = false 357 | host = localhost:25 358 | user = 359 | # If the password contains # or ; you have to wrap it with triple quotes. Ex """#password;""" 360 | password = 361 | cert_file = 362 | key_file = 363 | skip_verify = false 364 | from_address = admin@grafana.localhost 365 | from_name = Grafana 366 | ehlo_identity = 367 | 368 | [emails] 369 | welcome_email_on_sign_up = false 370 | templates_pattern = emails/*.html 371 | 372 | #################################### Logging ########################## 373 | [log] 374 | # Either "console", "file", "syslog". Default is console and file 375 | # Use space to separate multiple modes, e.g. "console file" 376 | mode = console file 377 | 378 | # Either "debug", "info", "warn", "error", "critical", default is "info" 379 | level = info 380 | 381 | # optional settings to set different levels for specific loggers. Ex filters = sqlstore:debug 382 | filters = 383 | 384 | # For "console" mode only 385 | [log.console] 386 | level = 387 | 388 | # log line format, valid options are text, console and json 389 | format = console 390 | 391 | # For "file" mode only 392 | [log.file] 393 | level = 394 | 395 | # log line format, valid options are text, console and json 396 | format = text 397 | 398 | # This enables automated log rotate(switch of following options), default is true 399 | log_rotate = true 400 | 401 | # Max line number of single file, default is 1000000 402 | max_lines = 1000000 403 | 404 | # Max size shift of single file, default is 28 means 1 << 28, 256MB 405 | max_size_shift = 28 406 | 407 | # Segment log daily, default is true 408 | daily_rotate = true 409 | 410 | # Expired days of log file(delete after max days), default is 7 411 | max_days = 7 412 | 413 | [log.syslog] 414 | level = 415 | 416 | # log line format, valid options are text, console and json 417 | format = text 418 | 419 | # Syslog network type and address. This can be udp, tcp, or unix. If left blank, the default unix endpoints will be used. 420 | network = 421 | address = 422 | 423 | # Syslog facility. user, daemon and local0 through local7 are valid. 424 | facility = 425 | 426 | # Syslog tag. By default, the process' argv[0] is used. 427 | tag = 428 | 429 | #################################### Usage Quotas ######################## 430 | [quota] 431 | enabled = false 432 | 433 | #### set quotas to -1 to make unlimited. #### 434 | # limit number of users per Org. 435 | org_user = 10 436 | 437 | # limit number of dashboards per Org. 438 | org_dashboard = 100 439 | 440 | # limit number of data_sources per Org. 441 | org_data_source = 10 442 | 443 | # limit number of api_keys per Org. 444 | org_api_key = 10 445 | 446 | # limit number of orgs a user can create. 447 | user_org = 10 448 | 449 | # Global limit of users. 450 | global_user = -1 451 | 452 | # global limit of orgs. 453 | global_org = -1 454 | 455 | # global limit of dashboards 456 | global_dashboard = -1 457 | 458 | # global limit of api_keys 459 | global_api_key = -1 460 | 461 | # global limit on number of logged in users. 462 | global_session = -1 463 | 464 | #################################### Alerting ############################ 465 | [alerting] 466 | # Disable alerting engine & UI features 467 | enabled = true 468 | # Makes it possible to turn off alert rule execution but alerting UI is visible 469 | execute_alerts = true 470 | 471 | # Default setting for new alert rules. Defaults to categorize error and timeouts as alerting. (alerting, keep_state) 472 | error_or_timeout = alerting 473 | 474 | # Default setting for how Grafana handles nodata or null values in alerting. (alerting, no_data, keep_state, ok) 475 | nodata_or_nullvalues = no_data 476 | 477 | # Alert notifications can include images, but rendering many images at the same time can overload the server 478 | # This limit will protect the server from render overloading and make sure notifications are sent out quickly 479 | concurrent_render_limit = 5 480 | 481 | #################################### Explore ############################# 482 | [explore] 483 | # Enable the Explore section 484 | enabled = false 485 | 486 | #################################### Internal Grafana Metrics ############ 487 | # Metrics available at HTTP API Url /metrics 488 | [metrics] 489 | enabled = true 490 | interval_seconds = 10 491 | 492 | # Send internal Grafana metrics to graphite 493 | [metrics.graphite] 494 | # Enable by setting the address setting (ex localhost:2003) 495 | address = 496 | prefix = prod.grafana.%(instance_name)s. 497 | 498 | [grafana_net] 499 | url = https://grafana.com 500 | 501 | [grafana_com] 502 | url = https://grafana.com 503 | 504 | #################################### Distributed tracing ############ 505 | [tracing.jaeger] 506 | # jaeger destination (ex localhost:6831) 507 | address = 508 | # tag that will always be included in when creating new spans. ex (tag1:value1,tag2:value2) 509 | always_included_tag = 510 | # Type specifies the type of the sampler: const, probabilistic, rateLimiting, or remote 511 | sampler_type = const 512 | # jaeger samplerconfig param 513 | # for "const" sampler, 0 or 1 for always false/true respectively 514 | # for "probabilistic" sampler, a probability between 0 and 1 515 | # for "rateLimiting" sampler, the number of spans per second 516 | # for "remote" sampler, param is the same as for "probabilistic" 517 | # and indicates the initial sampling rate before the actual one 518 | # is received from the mothership 519 | sampler_param = 1 520 | 521 | #################################### External Image Storage ############## 522 | [external_image_storage] 523 | # You can choose between (s3, webdav, gcs, azure_blob, local) 524 | provider = 525 | 526 | [external_image_storage.s3] 527 | bucket_url = 528 | bucket = 529 | region = 530 | path = 531 | access_key = 532 | secret_key = 533 | 534 | [external_image_storage.webdav] 535 | url = 536 | username = 537 | password = 538 | public_url = 539 | 540 | [external_image_storage.gcs] 541 | key_file = 542 | bucket = 543 | path = 544 | 545 | [external_image_storage.azure_blob] 546 | account_name = 547 | account_key = 548 | container_name = 549 | 550 | [external_image_storage.local] 551 | # does not require any configuration 552 | 553 | [rendering] 554 | # Options to configure external image rendering server like https://github.com/grafana/grafana-image-renderer 555 | server_url = 556 | callback_url = 557 | -------------------------------------------------------------------------------- /config/grafana/provisioning/dashboards/dashboards.yaml: -------------------------------------------------------------------------------- 1 | # # config file version 2 | apiVersion: 1 3 | 4 | providers: 5 | - name: 'default' 6 | orgId: 1 7 | folder: '' 8 | type: file 9 | options: 10 | path: /tmp/dashboards 11 | -------------------------------------------------------------------------------- /config/grafana/provisioning/datasources/datasources.yaml: -------------------------------------------------------------------------------- 1 | # # config file version 2 | apiVersion: 1 3 | 4 | # # list of datasources that should be deleted from the database 5 | #deleteDatasources: 6 | # - name: Graphite 7 | # orgId: 1 8 | 9 | # # list of datasources to insert/update depending 10 | # # on what's available in the datbase 11 | datasources: 12 | # name of the datasource. Required 13 | - name: tidb-cluster 14 | # datasource type. Required 15 | type: prometheus 16 | # access mode. direct or proxy. Required 17 | access: proxy 18 | # # org id. will default to orgId 1 if not specified 19 | # orgId: 1 20 | # url 21 | url: http://prometheus:9090 22 | # # database password, if used 23 | # password: 24 | # # database user, if used 25 | # user: 26 | # # database name, if used 27 | # database: 28 | # enable/disable basic auth 29 | basicAuth: false 30 | # # basic auth username 31 | # basicAuthUser: 32 | # # basic auth password 33 | # basicAuthPassword: 34 | # # enable/disable with credentials headers 35 | # withCredentials: 36 | # # mark as default datasource. Max one per org 37 | # isDefault: 38 | # # fields that will be converted to json and stored in json_data 39 | # jsonData: 40 | # graphiteVersion: "1.1" 41 | # tlsAuth: true 42 | # tlsAuthWithCACert: true 43 | # httpHeaderName1: "Authorization" 44 | # # json object of data that will be encrypted. 45 | # secureJsonData: 46 | # tlsCACert: "..." 47 | # tlsClientCert: "..." 48 | # tlsClientKey: "..." 49 | # # 50 | # httpHeaderValue1: "Bearer xf5yhfkpsnmgo" 51 | # version: 1 52 | # # allow users to edit datasources from the UI. 53 | # editable: false 54 | -------------------------------------------------------------------------------- /config/pd-nightly-tiflash.toml: -------------------------------------------------------------------------------- 1 | [replication] 2 | enable-placement-rules = true 3 | max-replicas = 1 4 | -------------------------------------------------------------------------------- /config/pd.rules.yml: -------------------------------------------------------------------------------- 1 | groups: 2 | - name: alert.rules 3 | rules: 4 | - alert: PD_cluster_offline_tikv_nums 5 | expr: sum ( pd_cluster_status{type="store_down_count"} ) > 0 6 | for: 1m 7 | labels: 8 | env: test-cluster 9 | level: emergency 10 | expr: sum ( pd_cluster_status{type="store_down_count"} ) > 0 11 | annotations: 12 | description: 'cluster: test-cluster, instance: {{ $labels.instance }}, values:{{ $value }}' 13 | value: '{{ $value }}' 14 | summary: PD_cluster_offline_tikv_nums 15 | 16 | - alert: PD_etcd_write_disk_latency 17 | expr: histogram_quantile(0.99, sum(rate(etcd_disk_wal_fsync_duration_seconds_bucket[1m])) by (instance,job,le) ) > 1 18 | for: 1m 19 | labels: 20 | env: test-cluster 21 | level: critical 22 | expr: histogram_quantile(0.99, sum(rate(etcd_disk_wal_fsync_duration_seconds_bucket[1m])) by (instance,job,le) ) > 1 23 | annotations: 24 | description: 'cluster: test-cluster, instance: {{ $labels.instance }}, values:{{ $value }}' 25 | value: '{{ $value }}' 26 | summary: PD_etcd_write_disk_latency 27 | 28 | - alert: PD_miss_peer_region_count 29 | expr: sum( pd_regions_status{type="miss_peer_region_count"} ) > 100 30 | for: 1m 31 | labels: 32 | env: test-cluster 33 | level: critical 34 | expr: sum( pd_regions_status{type="miss_peer_region_count"} ) > 100 35 | annotations: 36 | description: 'cluster: test-cluster, instance: {{ $labels.instance }}, values:{{ $value }}' 37 | value: '{{ $value }}' 38 | summary: PD_miss_peer_region_count 39 | 40 | - alert: PD_cluster_lost_connect_tikv_nums 41 | expr: sum ( pd_cluster_status{type="store_disconnected_count"} ) > 0 42 | for: 1m 43 | labels: 44 | env: test-cluster 45 | level: warning 46 | expr: sum ( pd_cluster_status{type="store_disconnected_count"} ) > 0 47 | annotations: 48 | description: 'cluster: test-cluster, instance: {{ $labels.instance }}, values:{{ $value }}' 49 | value: '{{ $value }}' 50 | summary: PD_cluster_lost_connect_tikv_nums 51 | 52 | - alert: PD_cluster_low_space 53 | expr: sum ( pd_cluster_status{type="store_low_space_count"} ) > 0 54 | for: 1m 55 | labels: 56 | env: test-cluster 57 | level: warning 58 | expr: sum ( pd_cluster_status{type="store_low_space_count"} ) > 0 59 | annotations: 60 | description: 'cluster: test-cluster, instance: {{ $labels.instance }}, values:{{ $value }}' 61 | value: '{{ $value }}' 62 | summary: PD_cluster_low_space 63 | 64 | - alert: PD_etcd_network_peer_latency 65 | expr: histogram_quantile(0.99, sum(rate(etcd_network_peer_round_trip_time_seconds_bucket[1m])) by (To,instance,job,le) ) > 1 66 | for: 1m 67 | labels: 68 | env: test-cluster 69 | level: warning 70 | expr: histogram_quantile(0.99, sum(rate(etcd_network_peer_round_trip_time_seconds_bucket[1m])) by (To,instance,job,le) ) > 1 71 | annotations: 72 | description: 'cluster: test-cluster, instance: {{ $labels.instance }}, values:{{ $value }}' 73 | value: '{{ $value }}' 74 | summary: PD_etcd_network_peer_latency 75 | 76 | - alert: PD_tidb_handle_requests_duration 77 | expr: histogram_quantile(0.99, sum(rate(pd_client_request_handle_requests_duration_seconds_bucket{type="tso"}[1m])) by (instance,job,le) ) > 0.1 78 | for: 1m 79 | labels: 80 | env: test-cluster 81 | level: warning 82 | expr: histogram_quantile(0.99, sum(rate(pd_client_request_handle_requests_duration_seconds_bucket{type="tso"}[1m])) by (instance,job,le) ) > 0.1 83 | annotations: 84 | description: 'cluster: test-cluster, instance: {{ $labels.instance }}, values:{{ $value }}' 85 | value: '{{ $value }}' 86 | summary: PD_tidb_handle_requests_duration 87 | 88 | - alert: PD_down_peer_region_nums 89 | expr: sum ( pd_regions_status{type="down_peer_region_count"} ) > 0 90 | for: 1m 91 | labels: 92 | env: test-cluster 93 | level: warning 94 | expr: sum ( pd_regions_status{type="down_peer_region_count"} ) > 0 95 | annotations: 96 | description: 'cluster: test-cluster, instance: {{ $labels.instance }}, values:{{ $value }}' 97 | value: '{{ $value }}' 98 | summary: PD_down_peer_region_nums 99 | 100 | - alert: PD_incorrect_namespace_region_count 101 | expr: sum ( pd_regions_status{type="incorrect_namespace_region_count"} ) > 100 102 | for: 1m 103 | labels: 104 | env: test-cluster 105 | level: warning 106 | expr: sum ( pd_regions_status{type="incorrect_namespace_region_count"} ) > 0 107 | annotations: 108 | description: 'cluster: test-cluster, instance: {{ $labels.instance }}, values:{{ $value }}' 109 | value: '{{ $value }}' 110 | summary: PD_incorrect_namespace_region_count 111 | 112 | - alert: PD_pending_peer_region_count 113 | expr: sum( pd_regions_status{type="pending_peer_region_count"} ) > 100 114 | for: 1m 115 | labels: 116 | env: test-cluster 117 | level: warning 118 | expr: sum( pd_regions_status{type="pending_peer_region_count"} ) > 100 119 | annotations: 120 | description: 'cluster: test-cluster, instance: {{ $labels.instance }}, values:{{ $value }}' 121 | value: '{{ $value }}' 122 | summary: PD_pending_peer_region_count 123 | 124 | - alert: PD_leader_change 125 | expr: count( changes(pd_server_tso{type="save"}[10m]) > 0 ) >= 2 126 | for: 1m 127 | labels: 128 | env: test-cluster 129 | level: warning 130 | expr: count( changes(pd_server_tso{type="save"}[10m]) > 0 ) >= 2 131 | annotations: 132 | description: 'cluster: test-cluster, instance: {{ $labels.instance }}, values:{{ $value }}' 133 | value: '{{ $value }}' 134 | summary: PD_leader_change 135 | 136 | - alert: TiKV_space_used_more_than_80% 137 | expr: sum(pd_cluster_status{type="storage_size"}) / sum(pd_cluster_status{type="storage_capacity"}) * 100 > 80 138 | for: 1m 139 | labels: 140 | env: test-cluster 141 | level: warning 142 | expr: sum(pd_cluster_status{type="storage_size"}) / sum(pd_cluster_status{type="storage_capacity"}) * 100 > 80 143 | annotations: 144 | description: 'cluster: test-cluster, type: {{ $labels.type }}, instance: {{ $labels.instance }}, values: {{ $value }}' 145 | value: '{{ $value }}' 146 | summary: TiKV_space_used_more_than_80% 147 | -------------------------------------------------------------------------------- /config/pd.toml: -------------------------------------------------------------------------------- 1 | # PD Configuration. 2 | 3 | name = "pd" 4 | data-dir = "default.pd" 5 | 6 | client-urls = "http://127.0.0.1:2379" 7 | # if not set, use ${client-urls} 8 | advertise-client-urls = "" 9 | 10 | peer-urls = "http://127.0.0.1:2380" 11 | # if not set, use ${peer-urls} 12 | advertise-peer-urls = "" 13 | 14 | initial-cluster = "pd=http://127.0.0.1:2380" 15 | initial-cluster-state = "new" 16 | 17 | lease = 3 18 | tso-save-interval = "3s" 19 | 20 | [security] 21 | # Path of file that contains list of trusted SSL CAs. if set, following four settings shouldn't be empty 22 | cacert-path = "" 23 | # Path of file that contains X509 certificate in PEM format. 24 | cert-path = "" 25 | # Path of file that contains X509 key in PEM format. 26 | key-path = "" 27 | 28 | [log] 29 | level = "error" 30 | 31 | # log format, one of json, text, console 32 | #format = "text" 33 | 34 | # disable automatic timestamps in output 35 | #disable-timestamp = false 36 | 37 | # file logging 38 | [log.file] 39 | #filename = "" 40 | # max log file size in MB 41 | #max-size = 300 42 | # max log file keep days 43 | #max-days = 28 44 | # maximum number of old log files to retain 45 | #max-backups = 7 46 | # rotate log by day 47 | #log-rotate = true 48 | 49 | [metric] 50 | # prometheus client push interval, set "0s" to disable prometheus. 51 | interval = "15s" 52 | # prometheus pushgateway address, leaves it empty will disable prometheus. 53 | address = "pushgateway:9091" 54 | 55 | [schedule] 56 | max-merge-region-size = 0 57 | split-merge-interval = "1h" 58 | max-snapshot-count = 3 59 | max-pending-peer-count = 16 60 | max-store-down-time = "30m" 61 | leader-schedule-limit = 4 62 | region-schedule-limit = 4 63 | replica-schedule-limit = 8 64 | merge-schedule-limit = 8 65 | tolerant-size-ratio = 5.0 66 | 67 | # customized schedulers, the format is as below 68 | # if empty, it will use balance-leader, balance-region, hot-region as default 69 | # [[schedule.schedulers]] 70 | # type = "evict-leader" 71 | # args = ["1"] 72 | 73 | [replication] 74 | # The number of replicas for each region. 75 | max-replicas = 3 76 | # The label keys specified the location of a store. 77 | # The placement priorities is implied by the order of label keys. 78 | # For example, ["zone", "rack"] means that we should place replicas to 79 | # different zones first, then to different racks if we don't have enough zones. 80 | location-labels = [] 81 | 82 | [label-property] 83 | # Do not assign region leaders to stores that have these tags. 84 | # [[label-property.reject-leader]] 85 | # key = "zone" 86 | # value = "cn1 87 | -------------------------------------------------------------------------------- /config/prometheus.yml: -------------------------------------------------------------------------------- 1 | global: 2 | scrape_interval: 15s 3 | evaluation_interval: 15s 4 | scrape_configs: 5 | - job_name: 'tidb-cluster' 6 | scrape_interval: 5s 7 | honor_labels: true 8 | static_configs: 9 | - targets: ['pushgateway:9091'] 10 | labels: 11 | cluster: 'tidb-cluster' 12 | rule_files: 13 | - 'pd.rules.yml' 14 | - 'tikv.rules.yml' 15 | - 'tidb.rules.yml' 16 | -------------------------------------------------------------------------------- /config/pump.toml: -------------------------------------------------------------------------------- 1 | # pump Configuration. 2 | 3 | # addr(i.e. 'host:port') to listen on for client traffic 4 | addr = "127.0.0.1:8250" 5 | 6 | # addr(i.e. 'host:port') to advertise to the public 7 | advertise-addr = "" 8 | 9 | # a integer value to control expiry date of the binlog data, indicates for how long (in days) the binlog data would be stored. 10 | # must bigger than 0 11 | gc = 7 12 | 13 | # path to the data directory of pump's data 14 | data-dir = "data.pump" 15 | 16 | # number of seconds between heartbeat ticks (in 2 seconds) 17 | heartbeat-interval = 2 18 | 19 | # a comma separated list of PD endpoints 20 | pd-urls = "http://127.0.0.1:2379" 21 | 22 | #[security] 23 | # Path of file that contains list of trusted SSL CAs for connection with cluster components. 24 | # ssl-ca = "/path/to/ca.pem" 25 | # Path of file that contains X509 certificate in PEM format for connection with cluster components. 26 | # ssl-cert = "/path/to/drainer.pem" 27 | # Path of file that contains X509 key in PEM format for connection with cluster components. 28 | # ssl-key = "/path/to/drainer-key.pem" 29 | # 30 | # [storage] 31 | # Set to `true` (default) for best reliability, which prevents data loss when there is a power failure. 32 | # sync-log = true 33 | # 34 | # we suggest using the default config of the embedded LSM DB now, do not change it useless you know what you are doing 35 | # [storage.kv] 36 | # block-cache-capacity = 8388608 37 | # block-restart-interval = 16 38 | # block-size = 4096 39 | # compaction-L0-trigger = 8 40 | # compaction-table-size = 67108864 41 | # compaction-total-size = 536870912 42 | # compaction-total-size-multiplier = 8.0 43 | # write-buffer = 67108864 44 | # write-L0-pause-trigger = 24 45 | # write-L0-slowdown-trigger = 17 46 | -------------------------------------------------------------------------------- /config/spark-defaults.conf: -------------------------------------------------------------------------------- 1 | spark.tispark.pd.addresses pd0:2379 2 | spark.sql.extensions org.apache.spark.sql.TiExtensions 3 | -------------------------------------------------------------------------------- /config/tidb.rules.yml: -------------------------------------------------------------------------------- 1 | groups: 2 | - name: alert.rules 3 | rules: 4 | - alert: TiDB_schema_error 5 | expr: increase(tidb_session_schema_lease_error_total{type="outdated"}[15m]) > 0 6 | for: 1m 7 | labels: 8 | env: test-cluster 9 | level: emergency 10 | expr: increase(tidb_session_schema_lease_error_total{type="outdated"}[15m]) > 0 11 | annotations: 12 | description: 'cluster: test-cluster, instance: {{ $labels.instance }}, values:{{ $value }}' 13 | value: '{{ $value }}' 14 | summary: TiDB schema error 15 | 16 | - alert: TiDB_tikvclient_region_err_total 17 | expr: increase( tidb_tikvclient_region_err_total[10m] ) > 6000 18 | for: 1m 19 | labels: 20 | env: test-cluster 21 | level: emergency 22 | expr: increase( tidb_tikvclient_region_err_total[10m] ) > 6000 23 | annotations: 24 | description: 'cluster: test-cluster, instance: {{ $labels.instance }}, values:{{ $value }}' 25 | value: '{{ $value }}' 26 | summary: TiDB tikvclient_backoff_count error 27 | 28 | - alert: TiDB_domain_load_schema_total 29 | expr: increase( tidb_domain_load_schema_total{type="failed"}[10m] ) > 10 30 | for: 1m 31 | labels: 32 | env: test-cluster 33 | level: emergency 34 | expr: increase( tidb_domain_load_schema_total{type="failed"}[10m] ) > 10 35 | annotations: 36 | description: 'cluster: test-cluster, instance: {{ $labels.instance }}, values:{{ $value }}' 37 | value: '{{ $value }}' 38 | summary: TiDB domain_load_schema_total error 39 | 40 | - alert: TiDB_monitor_keep_alive 41 | expr: increase(tidb_monitor_keep_alive_total{job="tidb"}[10m]) < 100 42 | for: 1m 43 | labels: 44 | env: test-cluster 45 | level: emergency 46 | expr: increase(tidb_monitor_keep_alive_total{job="tidb"}[10m]) < 100 47 | annotations: 48 | description: 'cluster: test-cluster, instance: {{ $labels.instance }}, values:{{ $value }}' 49 | value: '{{ $value }}' 50 | summary: TiDB monitor_keep_alive error 51 | 52 | - alert: TiDB_server_panic_total 53 | expr: increase(tidb_server_panic_total[10m]) > 0 54 | for: 1m 55 | labels: 56 | env: test-cluster 57 | level: critical 58 | expr: increase(tidb_server_panic_total[10m]) > 0 59 | annotations: 60 | description: 'cluster: test-cluster, instance: {{ $labels.instance }}, values:{{ $value }}' 61 | value: '{{ $value }}' 62 | summary: TiDB server panic total 63 | 64 | - alert: TiDB_memory_abnormal 65 | expr: go_memstats_heap_inuse_bytes{job="tidb"} > 1e+10 66 | for: 1m 67 | labels: 68 | env: test-cluster 69 | level: warning 70 | expr: go_memstats_heap_inuse_bytes{job="tidb"} > 1e+10 71 | annotations: 72 | description: 'cluster: test-cluster, instance: {{ $labels.instance }}, values:{{ $value }}' 73 | value: '{{ $value }}' 74 | summary: TiDB heap memory usage is over 10 GB 75 | 76 | - alert: TiDB_query_duration 77 | expr: histogram_quantile(0.99, sum(rate(tidb_server_handle_query_duration_seconds_bucket[1m])) BY (le, instance)) > 1 78 | for: 1m 79 | labels: 80 | env: test-cluster 81 | level: warning 82 | expr: histogram_quantile(0.99, sum(rate(tidb_server_handle_query_duration_seconds_bucket[1m])) BY (le, instance)) > 1 83 | annotations: 84 | description: 'cluster: test-cluster, instance: {{ $labels.instance }}, values:{{ $value }}' 85 | value: '{{ $value }}' 86 | summary: TiDB query duration 99th percentile is above 1s 87 | 88 | - alert: TiDB_server_event_error 89 | expr: increase(tidb_server_server_event{type=~"server_start|server_hang"}[15m]) > 0 90 | for: 1m 91 | labels: 92 | env: test-cluster 93 | level: warning 94 | expr: increase(tidb_server_server_event{type=~"server_start|server_hang"}[15m]) > 0 95 | annotations: 96 | description: 'cluster: test-cluster, instance: {{ $labels.instance }}, values:{{ $value }}' 97 | value: '{{ $value }}' 98 | summary: TiDB server event error 99 | 100 | - alert: TiDB_tikvclient_backoff_count 101 | expr: increase( tidb_tikvclient_backoff_count[10m] ) > 10 102 | for: 1m 103 | labels: 104 | env: test-cluster 105 | level: warning 106 | expr: increase( tidb_tikvclient_backoff_count[10m] ) > 10 107 | annotations: 108 | description: 'cluster: test-cluster, instance: {{ $labels.instance }}, values:{{ $value }}' 109 | value: '{{ $value }}' 110 | summary: TiDB tikvclient_backoff_count error 111 | 112 | - alert: TiDB_monitor_time_jump_back_error 113 | expr: increase(tidb_monitor_time_jump_back_total[10m]) > 0 114 | for: 1m 115 | labels: 116 | env: test-cluster 117 | level: warning 118 | expr: increase(tidb_monitor_time_jump_back_total[10m]) > 0 119 | annotations: 120 | description: 'cluster: test-cluster, instance: {{ $labels.instance }}, values:{{ $value }}' 121 | value: '{{ $value }}' 122 | summary: TiDB monitor time_jump_back error 123 | 124 | - alert: TiDB_ddl_waiting_jobs 125 | expr: sum(tidb_ddl_waiting_jobs) > 5 126 | for: 1m 127 | labels: 128 | env: test-cluster 129 | level: warning 130 | expr: sum(tidb_ddl_waiting_jobs) > 5 131 | annotations: 132 | description: 'cluster: test-cluster, instance: {{ $labels.instance }}, values:{{ $value }}' 133 | value: '{{ $value }}' 134 | summary: TiDB ddl waiting_jobs too much 135 | -------------------------------------------------------------------------------- /config/tidb.toml: -------------------------------------------------------------------------------- 1 | # TiDB Configuration. 2 | 3 | # TiDB server host. 4 | host = "0.0.0.0" 5 | 6 | # TiDB server port. 7 | port = 4000 8 | 9 | # Registered store name, [tikv, mocktikv] 10 | store = "mocktikv" 11 | 12 | # TiDB storage path. 13 | path = "/tmp/tidb" 14 | 15 | # The socket file to use for connection. 16 | socket = "" 17 | 18 | # Run ddl worker on this tidb-server. 19 | run-ddl = true 20 | 21 | # Schema lease duration, very dangerous to change only if you know what you do. 22 | lease = "0" 23 | 24 | # When create table, split a separated region for it. It is recommended to 25 | # turn off this option if there will be a large number of tables created. 26 | split-table = true 27 | 28 | # The limit of concurrent executed sessions. 29 | token-limit = 1000 30 | 31 | # Only print a log when out of memory quota. 32 | # Valid options: ["log", "cancel"] 33 | oom-action = "log" 34 | 35 | # Set the memory quota for a query in bytes. Default: 32GB 36 | mem-quota-query = 34359738368 37 | 38 | # Enable coprocessor streaming. 39 | enable-streaming = false 40 | 41 | # Set system variable 'lower_case_table_names' 42 | lower-case-table-names = 2 43 | 44 | [log] 45 | # Log level: debug, info, warn, error, fatal. 46 | level = "error" 47 | 48 | # Log format, one of json, text, console. 49 | format = "text" 50 | 51 | # Disable automatic timestamp in output 52 | disable-timestamp = false 53 | 54 | # Stores slow query log into separated files. 55 | slow-query-file = "" 56 | 57 | # Queries with execution time greater than this value will be logged. (Milliseconds) 58 | slow-threshold = 300 59 | 60 | # Queries with internal result greater than this value will be logged. 61 | expensive-threshold = 10000 62 | 63 | # Maximum query length recorded in log. 64 | query-log-max-len = 2048 65 | 66 | # File logging. 67 | [log.file] 68 | # Log file name. 69 | filename = "" 70 | 71 | # Max log file size in MB (upper limit to 4096MB). 72 | max-size = 300 73 | 74 | # Max log file keep days. No clean up by default. 75 | max-days = 0 76 | 77 | # Maximum number of old log files to retain. No clean up by default. 78 | max-backups = 0 79 | 80 | # Rotate log by day 81 | log-rotate = true 82 | 83 | [security] 84 | # Path of file that contains list of trusted SSL CAs for connection with mysql client. 85 | ssl-ca = "" 86 | 87 | # Path of file that contains X509 certificate in PEM format for connection with mysql client. 88 | ssl-cert = "" 89 | 90 | # Path of file that contains X509 key in PEM format for connection with mysql client. 91 | ssl-key = "" 92 | 93 | # Path of file that contains list of trusted SSL CAs for connection with cluster components. 94 | cluster-ssl-ca = "" 95 | 96 | # Path of file that contains X509 certificate in PEM format for connection with cluster components. 97 | cluster-ssl-cert = "" 98 | 99 | # Path of file that contains X509 key in PEM format for connection with cluster components. 100 | cluster-ssl-key = "" 101 | 102 | [status] 103 | # If enable status report HTTP service. 104 | report-status = true 105 | 106 | # TiDB status port. 107 | status-port = 10080 108 | 109 | # Prometheus pushgateway address, leaves it empty will disable prometheus push. 110 | metrics-addr = "pushgateway:9091" 111 | 112 | # Prometheus client push interval in second, set \"0\" to disable prometheus push. 113 | metrics-interval = 15 114 | 115 | [performance] 116 | # Max CPUs to use, 0 use number of CPUs in the machine. 117 | max-procs = 0 118 | # StmtCountLimit limits the max count of statement inside a transaction. 119 | stmt-count-limit = 5000 120 | 121 | # Set keep alive option for tcp connection. 122 | tcp-keep-alive = true 123 | 124 | # The maximum number of retries when commit a transaction. 125 | retry-limit = 10 126 | 127 | # Whether support cartesian product. 128 | cross-join = true 129 | 130 | # Stats lease duration, which influences the time of analyze and stats load. 131 | stats-lease = "3s" 132 | 133 | # Run auto analyze worker on this tidb-server. 134 | run-auto-analyze = true 135 | 136 | # Probability to use the query feedback to update stats, 0 or 1 for always false/true. 137 | feedback-probability = 0.0 138 | 139 | # The max number of query feedback that cache in memory. 140 | query-feedback-limit = 1024 141 | 142 | # Pseudo stats will be used if the ratio between the modify count and 143 | # row count in statistics of a table is greater than it. 144 | pseudo-estimate-ratio = 0.7 145 | 146 | [proxy-protocol] 147 | # PROXY protocol acceptable client networks. 148 | # Empty string means disable PROXY protocol, * means all networks. 149 | networks = "" 150 | 151 | # PROXY protocol header read timeout, unit is second 152 | header-timeout = 5 153 | 154 | [plan-cache] 155 | enabled = false 156 | capacity = 2560 157 | shards = 256 158 | 159 | [prepared-plan-cache] 160 | enabled = false 161 | capacity = 100 162 | 163 | [opentracing] 164 | # Enable opentracing. 165 | enable = false 166 | 167 | # Whether to enable the rpc metrics. 168 | rpc-metrics = false 169 | 170 | [opentracing.sampler] 171 | # Type specifies the type of the sampler: const, probabilistic, rateLimiting, or remote 172 | type = "const" 173 | 174 | # Param is a value passed to the sampler. 175 | # Valid values for Param field are: 176 | # - for "const" sampler, 0 or 1 for always false/true respectively 177 | # - for "probabilistic" sampler, a probability between 0 and 1 178 | # - for "rateLimiting" sampler, the number of spans per second 179 | # - for "remote" sampler, param is the same as for "probabilistic" 180 | # and indicates the initial sampling rate before the actual one 181 | # is received from the mothership 182 | param = 1.0 183 | 184 | # SamplingServerURL is the address of jaeger-agent's HTTP sampling server 185 | sampling-server-url = "" 186 | 187 | # MaxOperations is the maximum number of operations that the sampler 188 | # will keep track of. If an operation is not tracked, a default probabilistic 189 | # sampler will be used rather than the per operation specific sampler. 190 | max-operations = 0 191 | 192 | # SamplingRefreshInterval controls how often the remotely controlled sampler will poll 193 | # jaeger-agent for the appropriate sampling strategy. 194 | sampling-refresh-interval = 0 195 | 196 | [opentracing.reporter] 197 | # QueueSize controls how many spans the reporter can keep in memory before it starts dropping 198 | # new spans. The queue is continuously drained by a background go-routine, as fast as spans 199 | # can be sent out of process. 200 | queue-size = 0 201 | 202 | # BufferFlushInterval controls how often the buffer is force-flushed, even if it's not full. 203 | # It is generally not useful, as it only matters for very low traffic services. 204 | buffer-flush-interval = 0 205 | 206 | # LogSpans, when true, enables LoggingReporter that runs in parallel with the main reporter 207 | # and logs all submitted spans. Main Configuration.Logger must be initialized in the code 208 | # for this option to have any effect. 209 | log-spans = false 210 | 211 | # LocalAgentHostPort instructs reporter to send spans to jaeger-agent at this address 212 | local-agent-host-port = "" 213 | 214 | [tikv-client] 215 | # Max gRPC connections that will be established with each tikv-server. 216 | grpc-connection-count = 16 217 | 218 | # After a duration of this time in seconds if the client doesn't see any activity it pings 219 | # the server to see if the transport is still alive. 220 | grpc-keepalive-time = 10 221 | 222 | # After having pinged for keepalive check, the client waits for a duration of Timeout in seconds 223 | # and if no activity is seen even after that the connection is closed. 224 | grpc-keepalive-timeout = 3 225 | 226 | # max time for commit command, must be twice bigger than raft election timeout. 227 | commit-timeout = "41s" 228 | 229 | [binlog] 230 | 231 | # Socket file to write binlog. 232 | binlog-socket = "" 233 | 234 | # WriteTimeout specifies how long it will wait for writing binlog to pump. 235 | write-timeout = "15s" 236 | 237 | # If IgnoreError is true, when writting binlog meets error, TiDB would stop writting binlog, 238 | # but still provide service. 239 | ignore-error = false 240 | -------------------------------------------------------------------------------- /config/tiflash-learner-nightly.toml: -------------------------------------------------------------------------------- 1 | log-file = "/logs/tiflash_tikv.log" 2 | 3 | [readpool] 4 | 5 | [readpool.coprocessor] 6 | 7 | [readpool.storage] 8 | 9 | [server] 10 | engine-addr = "tiflash:4030" 11 | addr = "0.0.0.0:20280" 12 | advertise-addr = "tiflash:20280" 13 | #status-addr = "tiflash:20292" 14 | 15 | [storage] 16 | data-dir = "/data/flash" 17 | 18 | [pd] 19 | 20 | [metric] 21 | 22 | [raftstore] 23 | capacity = "10GB" 24 | 25 | [coprocessor] 26 | 27 | [rocksdb] 28 | wal-dir = "" 29 | 30 | [rocksdb.defaultcf] 31 | 32 | [rocksdb.lockcf] 33 | 34 | [rocksdb.writecf] 35 | 36 | [raftdb] 37 | 38 | [raftdb.defaultcf] 39 | 40 | [security] 41 | ca-path = "" 42 | cert-path = "" 43 | key-path = "" 44 | 45 | [import] 46 | -------------------------------------------------------------------------------- /config/tiflash-nightly.toml: -------------------------------------------------------------------------------- 1 | default_profile = "default" 2 | display_name = "TiFlash" 3 | listen_host = "0.0.0.0" 4 | mark_cache_size = 5368709120 5 | tmp_path = "/data/tmp" 6 | path = "/data" 7 | tcp_port = 9110 8 | http_port = 8223 9 | 10 | [flash] 11 | tidb_status_addr = "tidb:10080" 12 | service_addr = "tiflash:4030" 13 | 14 | [flash.flash_cluster] 15 | cluster_manager_path = "/tiflash/flash_cluster_manager" 16 | log = "/logs/tiflash_cluster_manager.log" 17 | master_ttl = 60 18 | refresh_interval = 20 19 | update_rule_interval = 5 20 | 21 | [flash.proxy] 22 | config = "/tiflash-learner.toml" 23 | 24 | [status] 25 | metrics_port = 8234 26 | 27 | [logger] 28 | errorlog = "/logs/tiflash_error.log" 29 | log = "/logs/tiflash.log" 30 | count = 20 31 | level = "debug" 32 | size = "1000M" 33 | 34 | [application] 35 | runAsDaemon = true 36 | 37 | [raft] 38 | pd_addr = "pd0:2379" 39 | storage_engine = "tmt" 40 | 41 | [quotas] 42 | 43 | [quotas.default] 44 | 45 | [quotas.default.interval] 46 | duration = 3600 47 | errors = 0 48 | execution_time = 0 49 | queries = 0 50 | read_rows = 0 51 | result_rows = 0 52 | 53 | [users] 54 | 55 | [users.default] 56 | password = "" 57 | profile = "default" 58 | quota = "default" 59 | 60 | [users.default.networks] 61 | ip = "::/0" 62 | 63 | [users.readonly] 64 | password = "" 65 | profile = "readonly" 66 | quota = "default" 67 | 68 | [users.readonly.networks] 69 | ip = "::/0" 70 | 71 | [profiles] 72 | 73 | [profiles.default] 74 | load_balancing = "random" 75 | max_memory_usage = 10000000000 76 | use_uncompressed_cache = 0 77 | 78 | [profiles.readonly] 79 | readonly = 1 80 | -------------------------------------------------------------------------------- /config/tikv.rules.yml: -------------------------------------------------------------------------------- 1 | groups: 2 | - name: alert.rules 3 | rules: 4 | - alert: TiKV_memory_used_too_fast 5 | expr: process_resident_memory_bytes{job=~"tikv.*"} - (process_resident_memory_bytes{job=~"tikv.*"} offset 5m) > 5*1024*1024*1024 6 | for: 5m 7 | labels: 8 | env: test-cluster 9 | level: emergency 10 | expr: process_resident_memory_bytes{job=~"tikv.*"} - (process_resident_memory_bytes{job=~"tikv.*"} offset 5m) > 5*1024*1024*1024 11 | annotations: 12 | description: 'cluster: test-cluster, instance: {{ $labels.instance }}, job: {{ $labels.job }}, values: {{ $value }}' 13 | value: '{{ $value }}' 14 | summary: TiKV memory used too fast 15 | 16 | - alert: TiKV_GC_can_not_work 17 | expr: sum(increase(tidb_tikvclient_gc_action_result{type="success"}[6h])) < 1 18 | for: 1m 19 | labels: 20 | env: test-cluster 21 | level: emergency 22 | expr: sum(increase(tidb_tikvclient_gc_action_result{type="success"}[6h])) < 1 23 | annotations: 24 | description: 'cluster: test-cluster, instance: {{ $labels.instance }}, values:{{ $value }}' 25 | value: '{{ $value }}' 26 | summary: TiKV GC can not work 27 | 28 | - alert: TiKV_server_report_failure_msg_total 29 | expr: sum(rate(tikv_server_report_failure_msg_total{type="unreachable"}[10m])) BY (store_id) > 10 30 | for: 1m 31 | labels: 32 | env: test-cluster 33 | level: critical 34 | expr: sum(rate(tikv_server_report_failure_msg_total{type="unreachable"}[10m])) BY (store_id) > 10 35 | annotations: 36 | description: 'cluster: test-cluster, instance: {{ $labels.instance }}, values:{{ $value }}' 37 | value: '{{ $value }}' 38 | summary: TiKV server_report_failure_msg_total error 39 | 40 | - alert: TiKV_channel_full_total 41 | expr: sum(rate(tikv_channel_full_total[10m])) BY (type, instance) > 0 42 | for: 1m 43 | labels: 44 | env: test-cluster 45 | level: critical 46 | expr: sum(rate(tikv_channel_full_total[10m])) BY (type, instance) > 0 47 | annotations: 48 | description: 'cluster: test-cluster, instance: {{ $labels.instance }}, values:{{ $value }}' 49 | value: '{{ $value }}' 50 | summary: TiKV channel full 51 | 52 | - alert: TiKV_write_stall 53 | expr: delta( tikv_engine_write_stall[10m]) > 0 54 | for: 1m 55 | labels: 56 | env: test-cluster 57 | level: critical 58 | expr: delta( tikv_engine_write_stall[10m]) > 0 59 | annotations: 60 | description: 'cluster: test-cluster, type: {{ $labels.type }}, instance: {{ $labels.instance }}, values: {{ $value }}' 61 | value: '{{ $value }}' 62 | summary: TiKV write stall 63 | 64 | - alert: TiKV_raft_log_lag 65 | expr: histogram_quantile(0.99, sum(rate(tikv_raftstore_log_lag_bucket[1m])) by (le, instance, job)) > 5000 66 | for: 1m 67 | labels: 68 | env: test-cluster 69 | level: critical 70 | expr: histogram_quantile(0.99, sum(rate(tikv_raftstore_log_lag_bucket[1m])) by (le, instance, job)) > 5000 71 | annotations: 72 | description: 'cluster: test-cluster, instance {{ $labels.instance }}, values: {{ $value }}' 73 | value: '{{ $value }}' 74 | summary: TiKV raftstore log lag more than 5000 75 | 76 | - alert: TiKV_async_request_snapshot_duration_seconds 77 | expr: histogram_quantile(0.99, sum(rate(tikv_storage_engine_async_request_duration_seconds_bucket{type="snapshot"}[1m])) by (le, instance, job,type)) > 1 78 | for: 1m 79 | labels: 80 | env: test-cluster 81 | level: critical 82 | expr: histogram_quantile(0.99, sum(rate(tikv_storage_engine_async_request_duration_seconds_bucket{type="snapshot"}[1m])) by (le, instance, job,type)) > 1 83 | annotations: 84 | description: 'cluster: test-cluster, instance: {{ $labels.instance }}, values:{{ $value }}' 85 | value: '{{ $value }}' 86 | summary: TiKV async request snapshot duration seconds more than 1s 87 | 88 | - alert: TiKV_async_request_write_duration_seconds 89 | expr: histogram_quantile(0.99, sum(rate(tikv_storage_engine_async_request_duration_seconds_bucket{type="write"}[1m])) by (le, instance, job,type)) > 1 90 | for: 1m 91 | labels: 92 | env: test-cluster 93 | level: critical 94 | expr: histogram_quantile(0.99, sum(rate(tikv_storage_engine_async_request_duration_seconds_bucket{type="write"}[1m])) by (le, instance, job,type)) > 1 95 | annotations: 96 | description: 'cluster: test-cluster, instance: {{ $labels.instance }}, values:{{ $value }}' 97 | value: '{{ $value }}' 98 | summary: TiKV async request write duration seconds more than 1s 99 | 100 | - alert: TiKV_coprocessor_request_wait_seconds 101 | expr: histogram_quantile(0.9999, sum(rate(tikv_coprocessor_request_wait_seconds_bucket[1m])) by (le, instance, job,req)) > 10 102 | for: 1m 103 | labels: 104 | env: test-cluster 105 | level: critical 106 | expr: histogram_quantile(0.9999, sum(rate(tikv_coprocessor_request_wait_seconds_bucket[1m])) by (le, instance, job,req)) > 10 107 | annotations: 108 | description: 'cluster: test-cluster, instance: {{ $labels.instance }}, values:{{ $value }}' 109 | value: '{{ $value }}' 110 | summary: TiKV coprocessor request wait seconds more than 10s 111 | 112 | - alert: TiKV_raftstore_thread_cpu_seconds_total 113 | expr: sum(rate(tikv_thread_cpu_seconds_total{name=~"raftstore_.*"}[1m])) by (job, name) > 0.8 114 | for: 1m 115 | labels: 116 | env: test-cluster 117 | level: critical 118 | expr: sum(rate(tikv_thread_cpu_seconds_total{name=~"raftstore_.*"}[1m])) by (job, name) > 0.8 119 | annotations: 120 | description: 'cluster: test-cluster, instance: {{ $labels.instance }}, values:{{ $value }}' 121 | value: '{{ $value }}' 122 | summary: TiKV raftstore thread CPU seconds is high 123 | 124 | - alert: TiKV_raft_append_log_duration_secs 125 | expr: histogram_quantile(0.99, sum(rate(tikv_raftstore_append_log_duration_seconds_bucket[1m])) by (le, instance, job)) > 1 126 | for: 1m 127 | labels: 128 | env: test-cluster 129 | level: critical 130 | expr: histogram_quantile(0.99, sum(rate(tikv_raftstore_append_log_duration_seconds_bucket[1m])) by (le, instance, job)) > 1 131 | annotations: 132 | description: 'cluster: test-cluster, instance: {{ $labels.instance }}, values:{{ $value }}' 133 | value: '{{ $value }}' 134 | summary: TiKV_raft_append_log_duration_secs 135 | 136 | - alert: TiKV_raft_apply_log_duration_secs 137 | expr: histogram_quantile(0.99, sum(rate(tikv_raftstore_apply_log_duration_seconds_bucket[1m])) by (le, instance, job)) > 1 138 | for: 1m 139 | labels: 140 | env: test-cluster 141 | level: critical 142 | expr: histogram_quantile(0.99, sum(rate(tikv_raftstore_apply_log_duration_seconds_bucket[1m])) by (le, instance, job)) > 1 143 | annotations: 144 | description: 'cluster: test-cluster, instance: {{ $labels.instance }}, values:{{ $value }}' 145 | value: '{{ $value }}' 146 | summary: TiKV_raft_apply_log_duration_secs 147 | 148 | - alert: TiKV_scheduler_latch_wait_duration_seconds 149 | expr: histogram_quantile(0.99, sum(rate(tikv_scheduler_latch_wait_duration_seconds_bucket[1m])) by (le, instance, job,type)) > 1 150 | for: 1m 151 | labels: 152 | env: test-cluster 153 | level: critical 154 | expr: histogram_quantile(0.99, sum(rate(tikv_scheduler_latch_wait_duration_seconds_bucket[1m])) by (le, instance, job,type)) > 1 155 | annotations: 156 | description: 'cluster: test-cluster, instance: {{ $labels.instance }}, values:{{ $value }}' 157 | value: '{{ $value }}' 158 | summary: TiKV scheduler latch wait duration seconds more than 1s 159 | 160 | - alert: TiKV_thread_apply_worker_cpu_seconds 161 | expr: sum(rate(tikv_thread_cpu_seconds_total{name="apply_worker"}[1m])) by (job) > 0.9 162 | for: 1m 163 | labels: 164 | env: test-cluster 165 | level: critical 166 | expr: sum(rate(tikv_thread_cpu_seconds_total{name="apply_worker"}[1m])) by (job) > 0.9 167 | annotations: 168 | description: 'cluster: test-cluster, type: {{ $labels.type }}, instance: {{ $labels.instance }}, values: {{ $value }}' 169 | value: '{{ $value }}' 170 | summary: TiKV thread apply worker cpu seconds is high 171 | 172 | - alert: TiDB_tikvclient_gc_action_fail 173 | expr: sum(increase(tidb_tikvclient_gc_action_result{type="fail"}[1m])) > 10 174 | for: 1m 175 | labels: 176 | env: test-cluster 177 | level: critical 178 | expr: sum(increase(tidb_tikvclient_gc_action_result{type="fail"}[1m])) > 10 179 | annotations: 180 | description: 'cluster: test-cluster, type: {{ $labels.type }}, instance: {{ $labels.instance }}, values: {{ $value }}' 181 | value: '{{ $value }}' 182 | summary: TiDB_tikvclient_gc_action_fail 183 | 184 | - alert: TiKV_leader_drops 185 | expr: delta(tikv_pd_heartbeat_tick_total{type="leader"}[30s]) < -10 186 | for: 1m 187 | labels: 188 | env: test-cluster 189 | level: warning 190 | expr: delta(tikv_pd_heartbeat_tick_total{type="leader"}[30s]) < -10 191 | annotations: 192 | description: 'cluster: test-cluster, instance: {{ $labels.instance }}, values:{{ $value }}' 193 | value: '{{ $value }}' 194 | summary: TiKV leader drops 195 | 196 | - alert: TiKV_raft_process_ready_duration_secs 197 | expr: histogram_quantile(0.999, sum(rate(tikv_raftstore_raft_process_duration_secs_bucket{type='ready'}[1m])) by (le, instance, job,type)) > 2 198 | for: 1m 199 | labels: 200 | env: test-cluster 201 | level: warning 202 | expr: histogram_quantile(0.999, sum(rate(tikv_raftstore_raft_process_duration_secs_bucket{type='ready'}[1m])) by (le, instance, job,type)) > 2 203 | annotations: 204 | description: 'cluster: test-cluster, instance: {{ $labels.instance }}, values: {{ $value }}' 205 | value: '{{ $value }}' 206 | summary: TiKV_raft_process_ready_duration_secs 207 | 208 | - alert: TiKV_raft_process_tick_duration_secs 209 | expr: histogram_quantile(0.999, sum(rate(tikv_raftstore_raft_process_duration_secs_bucket{type='tick'}[1m])) by (le, instance, job,type)) > 2 210 | for: 1m 211 | labels: 212 | env: test-cluster 213 | level: warning 214 | expr: histogram_quantile(0.999, sum(rate(tikv_raftstore_raft_process_duration_secs_bucket{type='tick'}[1m])) by (le, instance, job,type)) > 2 215 | annotations: 216 | description: 'cluster: test-cluster, instance: {{ $labels.instance }}, values: {{ $value }}' 217 | value: '{{ $value }}' 218 | summary: TiKV_raft_process_tick_duration_secs 219 | 220 | - alert: TiKV_scheduler_context_total 221 | expr: abs(delta( tikv_scheduler_contex_total[5m])) > 1000 222 | for: 1m 223 | labels: 224 | env: test-cluster 225 | level: warning 226 | expr: abs(delta( tikv_scheduler_contex_total[5m])) > 1000 227 | annotations: 228 | description: 'cluster: test-cluster, instance: {{ $labels.instance }}, values:{{ $value }}' 229 | value: '{{ $value }}' 230 | summary: TiKV scheduler context total 231 | 232 | - alert: TiKV_scheduler_command_duration_seconds 233 | expr: histogram_quantile(0.99, sum(rate(tikv_scheduler_command_duration_seconds_bucket[1m])) by (le, instance, job,type) / 1000) > 1 234 | for: 1m 235 | labels: 236 | env: test-cluster 237 | level: warning 238 | expr: histogram_quantile(0.99, sum(rate(tikv_scheduler_command_duration_seconds_bucket[1m])) by (le, instance, job,type) / 1000) > 1 239 | annotations: 240 | description: 'cluster: test-cluster, instance: {{ $labels.instance }}, values:{{ $value }}' 241 | value: '{{ $value }}' 242 | summary: TiKV scheduler command duration seconds more than 1s 243 | 244 | - alert: TiKV_thread_storage_scheduler_cpu_seconds 245 | expr: sum(rate(tikv_thread_cpu_seconds_total{name=~"storage_schedul.*"}[1m])) by (job) > 0.8 246 | for: 1m 247 | labels: 248 | env: test-cluster 249 | level: warning 250 | expr: sum(rate(tikv_thread_cpu_seconds_total{name=~"storage_schedul.*"}[1m])) by (job) > 0.8 251 | annotations: 252 | description: 'cluster: test-cluster, instance: {{ $labels.instance }}, values:{{ $value }}' 253 | value: '{{ $value }}' 254 | summary: TiKV storage scheduler cpu seconds more than 80% 255 | 256 | - alert: TiKV_coprocessor_outdated_request_wait_seconds 257 | expr: delta( tikv_coprocessor_outdated_request_wait_seconds_count[10m] ) > 0 258 | for: 1m 259 | labels: 260 | env: test-cluster 261 | level: warning 262 | expr: delta( tikv_coprocessor_outdated_request_wait_seconds_count[10m] ) > 0 263 | annotations: 264 | description: 'cluster: test-cluster, instance: {{ $labels.instance }}, values: {{ $value }}' 265 | value: '{{ $value }}' 266 | summary: TiKV coprocessor outdated request wait seconds 267 | 268 | - alert: TiKV_coprocessor_request_error 269 | expr: increase(tikv_coprocessor_request_error{reason!="lock"}[10m]) > 100 270 | for: 1m 271 | labels: 272 | env: test-cluster 273 | level: warning 274 | expr: increase(tikv_coprocessor_request_error{reason!="lock"}[10m]) > 100 275 | annotations: 276 | description: 'cluster: test-cluster, reason: {{ $labels.reason }}, instance: {{ $labels.instance }}, values: {{ $value }}' 277 | value: '{{ $value }}' 278 | summary: TiKV coprocessor request error 279 | 280 | - alert: TiKV_coprocessor_request_lock_error 281 | expr: increase(tikv_coprocessor_request_error{reason="lock"}[10m]) > 10000 282 | for: 1m 283 | labels: 284 | env: test-cluster 285 | level: warning 286 | expr: increase(tikv_coprocessor_request_error{reason="lock"}[10m]) > 10000 287 | annotations: 288 | description: 'cluster: test-cluster, reason: {{ $labels.reason }}, instance: {{ $labels.instance }}, values: {{ $value }}' 289 | value: '{{ $value }}' 290 | summary: TiKV coprocessor request lock error 291 | 292 | - alert: TiKV_coprocessor_pending_request 293 | expr: delta( tikv_coprocessor_pending_request[10m]) > 5000 294 | for: 1m 295 | labels: 296 | env: test-cluster 297 | level: warning 298 | expr: delta( tikv_coprocessor_pending_request[10m]) > 5000 299 | annotations: 300 | description: 'cluster: test-cluster, type: {{ $labels.type }}, instance: {{ $labels.instance }}, values: {{ $value }}' 301 | value: '{{ $value }}' 302 | summary: TiKV pending {{ $labels.type }} request is high 303 | 304 | - alert: TiKV_batch_request_snapshot_nums 305 | expr: sum(rate(tikv_thread_cpu_seconds_total{name=~"endpoint.*"}[1m])) by (job) / ( count(tikv_thread_cpu_seconds_total{name=~"endpoint.*"}) * 0.9 ) / count(count(tikv_thread_cpu_seconds_total) by (instance)) > 0 306 | for: 1m 307 | labels: 308 | env: test-cluster 309 | level: warning 310 | expr: sum(rate(tikv_thread_cpu_seconds_total{name=~"endpoint.*"}[1m])) by (job) / ( count(tikv_thread_cpu_seconds_total{name=~"endpoint.*"}) * 0.9 ) / count(count(tikv_thread_cpu_seconds_total) by (instance)) > 0 311 | annotations: 312 | description: 'cluster: test-cluster, type: {{ $labels.type }}, instance: {{ $labels.instance }}, values: {{ $value }}' 313 | value: '{{ $value }}' 314 | summary: TiKV batch request snapshot nums is high 315 | 316 | - alert: TiKV_pending_task 317 | expr: sum(tikv_worker_pending_task_total) BY (job,instance,name) > 1000 318 | for: 1m 319 | labels: 320 | env: test-cluster 321 | level: warning 322 | expr: sum(tikv_worker_pending_task_total) BY (job,instance,name) > 1000 323 | annotations: 324 | description: 'cluster: test-cluster, type: {{ $labels.type }}, instance: {{ $labels.instance }}, values: {{ $value }}' 325 | value: '{{ $value }}' 326 | summary: TiKV pending task too much 327 | 328 | - alert: TiKV_low_space_and_add_region 329 | expr: count( (sum(tikv_store_size_bytes{type="available"}) by (job) / sum(tikv_store_size_bytes{type="capacity"}) by (job) < 0.2) and (sum(tikv_raftstore_snapshot_traffic_total{type="applying"}) by (job) > 0 ) ) > 0 330 | for: 1m 331 | labels: 332 | env: test-cluster 333 | level: warning 334 | expr: count( (sum(tikv_store_size_bytes{type="available"}) by (job) / sum(tikv_store_size_bytes{type="capacity"}) by (job) < 0.2) and (sum(tikv_raftstore_snapshot_traffic_total{type="applying"}) by (job) > 0 ) ) > 0 335 | annotations: 336 | description: 'cluster: test-cluster, type: {{ $labels.type }}, instance: {{ $labels.instance }}, values: {{ $value }}' 337 | value: '{{ $value }}' 338 | summary: TiKV low_space and add_region 339 | 340 | - alert: TiKV_approximate_region_size 341 | expr: histogram_quantile(0.99, sum(rate(tikv_raftstore_region_size_bucket[1m])) by (le)) > 1073741824 342 | for: 1m 343 | labels: 344 | env: test-cluster 345 | level: warning 346 | expr: histogram_quantile(0.99, sum(rate(tikv_raftstore_region_size_bucket[1m])) by (le)) > 1073741824 347 | annotations: 348 | description: 'cluster: test-cluster, type: {{ $labels.type }}, instance: {{ $labels.instance }}, values: {{ $value }}' 349 | value: '{{ $value }}' 350 | summary: TiKV approximate region size is more than 1GB 351 | -------------------------------------------------------------------------------- /config/tikv.toml: -------------------------------------------------------------------------------- 1 | # TiKV config template 2 | # Human-readable big numbers: 3 | # File size(based on byte): KB, MB, GB, TB, PB 4 | # e.g.: 1_048_576 = "1MB" 5 | # Time(based on ms): ms, s, m, h 6 | # e.g.: 78_000 = "1.3m" 7 | 8 | # log level: trace, debug, info, warn, error, off. 9 | [log] 10 | level = "error" 11 | # file to store log, write to stderr if it's empty. 12 | [log.file] 13 | # filename = "" 14 | 15 | [readpool.storage] 16 | # size of thread pool for high-priority operations 17 | # high-concurrency = 4 18 | # size of thread pool for normal-priority operations 19 | # normal-concurrency = 4 20 | # size of thread pool for low-priority operations 21 | # low-concurrency = 4 22 | # max running high-priority operations, reject if exceed 23 | # max-tasks-high = 8000 24 | # max running normal-priority operations, reject if exceed 25 | # max-tasks-normal = 8000 26 | # max running low-priority operations, reject if exceed 27 | # max-tasks-low = 8000 28 | # size of stack size for each thread pool 29 | # stack-size = "10MB" 30 | 31 | [readpool.coprocessor] 32 | # Notice: if CPU_NUM > 8, default thread pool size for coprocessors 33 | # will be set to CPU_NUM * 0.8. 34 | 35 | # high-concurrency = 8 36 | # normal-concurrency = 8 37 | # low-concurrency = 8 38 | # max-tasks-high = 16000 39 | # max-tasks-normal = 16000 40 | # max-tasks-low = 16000 41 | # stack-size = "10MB" 42 | 43 | [server] 44 | # set listening address. 45 | # addr = "127.0.0.1:20160" 46 | # set advertise listening address for client communication, if not set, use addr instead. 47 | # advertise-addr = "" 48 | # notify capacity, 40960 is suitable for about 7000 regions. 49 | # notify-capacity = 40960 50 | # maximum number of messages can be processed in one tick. 51 | # messages-per-tick = 4096 52 | 53 | # compression type for grpc channel, available values are no, deflate and gzip. 54 | # grpc-compression-type = "no" 55 | # size of thread pool for grpc server. 56 | # grpc-concurrency = 4 57 | # The number of max concurrent streams/requests on a client connection. 58 | # grpc-concurrent-stream = 1024 59 | # The number of connections with each tikv server to send raft messages. 60 | # grpc-raft-conn-num = 10 61 | # Amount to read ahead on individual grpc streams. 62 | # grpc-stream-initial-window-size = "2MB" 63 | 64 | # How many snapshots can be sent concurrently. 65 | # concurrent-send-snap-limit = 32 66 | # How many snapshots can be recv concurrently. 67 | # concurrent-recv-snap-limit = 32 68 | 69 | # max count of tasks being handled, new tasks will be rejected. 70 | # end-point-max-tasks = 2000 71 | 72 | # max recursion level allowed when decoding dag expression 73 | # end-point-recursion-limit = 1000 74 | 75 | # max time to handle coprocessor request before timeout 76 | # end-point-request-max-handle-duration = "60s" 77 | 78 | # the max bytes that snapshot can be written to disk in one second, 79 | # should be set based on your disk performance 80 | # snap-max-write-bytes-per-sec = "100MB" 81 | 82 | # set attributes about this server, e.g. { zone = "us-west-1", disk = "ssd" }. 83 | # labels = {} 84 | 85 | [storage] 86 | # set the path to rocksdb directory. 87 | # data-dir = "/tmp/tikv/store" 88 | 89 | # notify capacity of scheduler's channel 90 | # scheduler-notify-capacity = 10240 91 | 92 | # maximum number of messages can be processed in one tick 93 | # scheduler-messages-per-tick = 1024 94 | 95 | # the number of slots in scheduler latches, concurrency control for write. 96 | # scheduler-concurrency = 2048000 97 | 98 | # scheduler's worker pool size, should increase it in heavy write cases, 99 | # also should less than total cpu cores. 100 | # scheduler-worker-pool-size = 4 101 | 102 | # When the pending write bytes exceeds this threshold, 103 | # the "scheduler too busy" error is displayed. 104 | # scheduler-pending-write-threshold = "100MB" 105 | 106 | [pd] 107 | # pd endpoints 108 | # endpoints = [] 109 | 110 | [metric] 111 | # the Prometheus client push interval. Setting the value to 0s stops Prometheus client from pushing. 112 | # interval = "15s" 113 | # the Prometheus pushgateway address. Leaving it empty stops Prometheus client from pushing. 114 | address = "pushgateway:9091" 115 | # the Prometheus client push job name. Note: A node id will automatically append, e.g., "tikv_1". 116 | # job = "tikv" 117 | 118 | [raftstore] 119 | # true (default value) for high reliability, this can prevent data loss when power failure. 120 | # sync-log = true 121 | 122 | # set the path to raftdb directory, default value is data-dir/raft 123 | # raftdb-path = "" 124 | 125 | # set store capacity, if no set, use disk capacity. 126 | # capacity = 0 127 | 128 | # notify capacity, 40960 is suitable for about 7000 regions. 129 | # notify-capacity = 40960 130 | 131 | # maximum number of messages can be processed in one tick. 132 | # messages-per-tick = 4096 133 | 134 | # Region heartbeat tick interval for reporting to pd. 135 | # pd-heartbeat-tick-interval = "60s" 136 | # Store heartbeat tick interval for reporting to pd. 137 | # pd-store-heartbeat-tick-interval = "10s" 138 | 139 | # When region size changes exceeds region-split-check-diff, we should check 140 | # whether the region should be split or not. 141 | # region-split-check-diff = "6MB" 142 | 143 | # Interval to check region whether need to be split or not. 144 | # split-region-check-tick-interval = "10s" 145 | 146 | # When raft entry exceed the max size, reject to propose the entry. 147 | # raft-entry-max-size = "8MB" 148 | 149 | # Interval to gc unnecessary raft log. 150 | # raft-log-gc-tick-interval = "10s" 151 | # A threshold to gc stale raft log, must >= 1. 152 | # raft-log-gc-threshold = 50 153 | # When entry count exceed this value, gc will be forced trigger. 154 | # raft-log-gc-count-limit = 72000 155 | # When the approximate size of raft log entries exceed this value, gc will be forced trigger. 156 | # It's recommanded to set it to 3/4 of region-split-size. 157 | # raft-log-gc-size-limit = "72MB" 158 | 159 | # When a peer hasn't been active for max-peer-down-duration, 160 | # we will consider this peer to be down and report it to pd. 161 | # max-peer-down-duration = "5m" 162 | 163 | # Interval to check whether start manual compaction for a region, 164 | # region-compact-check-interval = "5m" 165 | # Number of regions for each time to check. 166 | # region-compact-check-step = 100 167 | # The minimum number of delete tombstones to trigger manual compaction. 168 | # region-compact-min-tombstones = 10000 169 | # Interval to check whether should start a manual compaction for lock column family, 170 | # if written bytes reach lock-cf-compact-threshold for lock column family, will fire 171 | # a manual compaction for lock column family. 172 | # lock-cf-compact-interval = "10m" 173 | # lock-cf-compact-bytes-threshold = "256MB" 174 | 175 | # Interval (s) to check region whether the data are consistent. 176 | # consistency-check-interval = 0 177 | 178 | # Use delete range to drop a large number of continuous keys. 179 | # use-delete-range = false 180 | 181 | # delay time before deleting a stale peer 182 | # clean-stale-peer-delay = "10m" 183 | 184 | # Interval to cleanup import sst files. 185 | # cleanup-import-sst-interval = "10m" 186 | 187 | [coprocessor] 188 | # When it is true, it will try to split a region with table prefix if 189 | # that region crosses tables. It is recommended to turn off this option 190 | # if there will be a large number of tables created. 191 | # split-region-on-table = true 192 | # When the region's size exceeds region-max-size, we will split the region 193 | # into two which the left region's size will be region-split-size or a little 194 | # bit smaller. 195 | # region-max-size = "144MB" 196 | # region-split-size = "96MB" 197 | 198 | [rocksdb] 199 | # Maximum number of concurrent background jobs (compactions and flushes) 200 | # max-background-jobs = 8 201 | 202 | # This value represents the maximum number of threads that will concurrently perform a 203 | # compaction job by breaking it into multiple, smaller ones that are run simultaneously. 204 | # Default: 1 (i.e. no subcompactions) 205 | # max-sub-compactions = 1 206 | 207 | # Number of open files that can be used by the DB. You may need to 208 | # increase this if your database has a large working set. Value -1 means 209 | # files opened are always kept open. You can estimate number of files based 210 | # on target_file_size_base and target_file_size_multiplier for level-based 211 | # compaction. 212 | # If max-open-files = -1, RocksDB will prefetch index and filter blocks into 213 | # block cache at startup, so if your database has a large working set, it will 214 | # take several minutes to open the db. 215 | max-open-files = 1024 216 | 217 | # Max size of rocksdb's MANIFEST file. 218 | # For detailed explanation please refer to https://github.com/facebook/rocksdb/wiki/MANIFEST 219 | # max-manifest-file-size = "20MB" 220 | 221 | # If true, the database will be created if it is missing. 222 | # create-if-missing = true 223 | 224 | # rocksdb wal recovery mode 225 | # 0 : TolerateCorruptedTailRecords, tolerate incomplete record in trailing data on all logs; 226 | # 1 : AbsoluteConsistency, We don't expect to find any corruption in the WAL; 227 | # 2 : PointInTimeRecovery, Recover to point-in-time consistency; 228 | # 3 : SkipAnyCorruptedRecords, Recovery after a disaster; 229 | # wal-recovery-mode = 2 230 | 231 | # rocksdb write-ahead logs dir path 232 | # This specifies the absolute dir path for write-ahead logs (WAL). 233 | # If it is empty, the log files will be in the same dir as data. 234 | # When you set the path to rocksdb directory in memory like in /dev/shm, you may want to set 235 | # wal-dir to a directory on a persistent storage. 236 | # See https://github.com/facebook/rocksdb/wiki/How-to-persist-in-memory-RocksDB-database 237 | # wal-dir = "/tmp/tikv/store" 238 | 239 | # The following two fields affect how archived write-ahead logs will be deleted. 240 | # 1. If both set to 0, logs will be deleted asap and will not get into the archive. 241 | # 2. If wal-ttl-seconds is 0 and wal-size-limit is not 0, 242 | # WAL files will be checked every 10 min and if total size is greater 243 | # then wal-size-limit, they will be deleted starting with the 244 | # earliest until size_limit is met. All empty files will be deleted. 245 | # 3. If wal-ttl-seconds is not 0 and wal-size-limit is 0, then 246 | # WAL files will be checked every wal-ttl-seconds / 2 and those that 247 | # are older than wal-ttl-seconds will be deleted. 248 | # 4. If both are not 0, WAL files will be checked every 10 min and both 249 | # checks will be performed with ttl being first. 250 | # When you set the path to rocksdb directory in memory like in /dev/shm, you may want to set 251 | # wal-ttl-seconds to a value greater than 0 (like 86400) and backup your db on a regular basis. 252 | # See https://github.com/facebook/rocksdb/wiki/How-to-persist-in-memory-RocksDB-database 253 | # wal-ttl-seconds = 0 254 | # wal-size-limit = 0 255 | 256 | # rocksdb max total wal size 257 | # max-total-wal-size = "4GB" 258 | 259 | # Rocksdb Statistics provides cumulative stats over time. 260 | # Turn statistics on will introduce about 5%-10% overhead for RocksDB, 261 | # but it is worthy to know the internal status of RocksDB. 262 | # enable-statistics = true 263 | 264 | # Dump statistics periodically in information logs. 265 | # Same as rocksdb's default value (10 min). 266 | # stats-dump-period = "10m" 267 | 268 | # Due to Rocksdb FAQ: https://github.com/facebook/rocksdb/wiki/RocksDB-FAQ, 269 | # If you want to use rocksdb on multi disks or spinning disks, you should set value at 270 | # least 2MB; 271 | # compaction-readahead-size = 0 272 | 273 | # This is the maximum buffer size that is used by WritableFileWrite 274 | # writable-file-max-buffer-size = "1MB" 275 | 276 | # Use O_DIRECT for both reads and writes in background flush and compactions 277 | # use-direct-io-for-flush-and-compaction = false 278 | 279 | # Limit the disk IO of compaction and flush. Compaction and flush can cause 280 | # terrible spikes if they exceed a certain threshold. Consider setting this to 281 | # 50% ~ 80% of the disk throughput for a more stable result. However, in heavy 282 | # write workload, limiting compaction and flush speed can cause write stalls too. 283 | # rate-bytes-per-sec = 0 284 | 285 | # Enable or disable the pipelined write 286 | # enable-pipelined-write = true 287 | 288 | # Allows OS to incrementally sync files to disk while they are being 289 | # written, asynchronously, in the background. 290 | # bytes-per-sync = "0MB" 291 | 292 | # Allows OS to incrementally sync WAL to disk while it is being written. 293 | # wal-bytes-per-sync = "0KB" 294 | 295 | # Specify the maximal size of the Rocksdb info log file. If the log file 296 | # is larger than `max_log_file_size`, a new info log file will be created. 297 | # If max_log_file_size == 0, all logs will be written to one log file. 298 | # Default: 1GB 299 | # info-log-max-size = "1GB" 300 | 301 | # Time for the Rocksdb info log file to roll (in seconds). 302 | # If specified with non-zero value, log file will be rolled 303 | # if it has been active longer than `log_file_time_to_roll`. 304 | # Default: 0 (disabled) 305 | # info-log-roll-time = "0" 306 | 307 | # Maximal Rocksdb info log files to be kept. 308 | # Default: 10 309 | # info-log-keep-log-file-num = 10 310 | 311 | # This specifies the Rocksdb info LOG dir. 312 | # If it is empty, the log files will be in the same dir as data. 313 | # If it is non empty, the log files will be in the specified dir, 314 | # and the db data dir's absolute path will be used as the log file 315 | # name's prefix. 316 | # Default: empty 317 | # info-log-dir = "" 318 | 319 | # Column Family default used to store actual data of the database. 320 | [rocksdb.defaultcf] 321 | # compression method (if any) is used to compress a block. 322 | # no: kNoCompression 323 | # snappy: kSnappyCompression 324 | # zlib: kZlibCompression 325 | # bzip2: kBZip2Compression 326 | # lz4: kLZ4Compression 327 | # lz4hc: kLZ4HCCompression 328 | # zstd: kZSTD 329 | 330 | # per level compression 331 | # compression-per-level = ["no", "no", "lz4", "lz4", "lz4", "zstd", "zstd"] 332 | 333 | # Approximate size of user data packed per block. Note that the 334 | # block size specified here corresponds to uncompressed data. 335 | # block-size = "64KB" 336 | 337 | # If you're doing point lookups you definitely want to turn bloom filters on, We use 338 | # bloom filters to avoid unnecessary disk reads. Default bits_per_key is 10, which 339 | # yields ~1% false positive rate. Larger bits_per_key values will reduce false positive 340 | # rate, but increase memory usage and space amplification. 341 | # bloom-filter-bits-per-key = 10 342 | 343 | # false means one sst file one bloom filter, true means evry block has a corresponding bloom filter 344 | # block-based-bloom-filter = false 345 | 346 | # level0-file-num-compaction-trigger = 4 347 | 348 | # Soft limit on number of level-0 files. We start slowing down writes at this point. 349 | # level0-slowdown-writes-trigger = 20 350 | 351 | # Maximum number of level-0 files. We stop writes at this point. 352 | # level0-stop-writes-trigger = 36 353 | 354 | # Amount of data to build up in memory (backed by an unsorted log 355 | # on disk) before converting to a sorted on-disk file. 356 | # write-buffer-size = "128MB" 357 | 358 | # The maximum number of write buffers that are built up in memory. 359 | # max-write-buffer-number = 5 360 | 361 | # The minimum number of write buffers that will be merged together 362 | # before writing to storage. 363 | # min-write-buffer-number-to-merge = 1 364 | 365 | # Control maximum total data size for base level (level 1). 366 | # max-bytes-for-level-base = "512MB" 367 | 368 | # Target file size for compaction. 369 | # target-file-size-base = "8MB" 370 | 371 | # Max bytes for compaction.max_compaction_bytes 372 | # max-compaction-bytes = "2GB" 373 | 374 | # There are four different algorithms to pick files to compact. 375 | # 0 : ByCompensatedSize 376 | # 1 : OldestLargestSeqFirst 377 | # 2 : OldestSmallestSeqFirst 378 | # 3 : MinOverlappingRatio 379 | # compaction-pri = 3 380 | 381 | # block-cache used to cache uncompressed blocks, big block-cache can speed up read. 382 | # in normal cases should tune to 30%-50% system's total memory. 383 | # block-cache-size = "1GB" 384 | 385 | # Indicating if we'd put index/filter blocks to the block cache. 386 | # If not specified, each "table reader" object will pre-load index/filter block 387 | # during table initialization. 388 | # cache-index-and-filter-blocks = true 389 | 390 | # Pin level0 filter and index blocks in cache. 391 | # pin-l0-filter-and-index-blocks = true 392 | 393 | # Enable read amplication statistics. 394 | # value => memory usage (percentage of loaded blocks memory) 395 | # 1 => 12.50 % 396 | # 2 => 06.25 % 397 | # 4 => 03.12 % 398 | # 8 => 01.56 % 399 | # 16 => 00.78 % 400 | # read-amp-bytes-per-bit = 0 401 | 402 | # Pick target size of each level dynamically. 403 | # dynamic-level-bytes = true 404 | 405 | # Options for Column Family write 406 | # Column Family write used to store commit informations in MVCC model 407 | [rocksdb.writecf] 408 | # compression-per-level = ["no", "no", "lz4", "lz4", "lz4", "zstd", "zstd"] 409 | # block-size = "64KB" 410 | # write-buffer-size = "128MB" 411 | # max-write-buffer-number = 5 412 | # min-write-buffer-number-to-merge = 1 413 | # max-bytes-for-level-base = "512MB" 414 | # target-file-size-base = "8MB" 415 | 416 | # in normal cases should tune to 10%-30% system's total memory. 417 | # block-cache-size = "256MB" 418 | # level0-file-num-compaction-trigger = 4 419 | # level0-slowdown-writes-trigger = 20 420 | # level0-stop-writes-trigger = 36 421 | # cache-index-and-filter-blocks = true 422 | # pin-l0-filter-and-index-blocks = true 423 | # compaction-pri = 3 424 | # read-amp-bytes-per-bit = 0 425 | # dynamic-level-bytes = true 426 | 427 | [rocksdb.lockcf] 428 | # compression-per-level = ["no", "no", "no", "no", "no", "no", "no"] 429 | # block-size = "16KB" 430 | # write-buffer-size = "128MB" 431 | # max-write-buffer-number = 5 432 | # min-write-buffer-number-to-merge = 1 433 | # max-bytes-for-level-base = "128MB" 434 | # target-file-size-base = "8MB" 435 | # block-cache-size = "256MB" 436 | # level0-file-num-compaction-trigger = 1 437 | # level0-slowdown-writes-trigger = 20 438 | # level0-stop-writes-trigger = 36 439 | # cache-index-and-filter-blocks = true 440 | # pin-l0-filter-and-index-blocks = true 441 | # compaction-pri = 0 442 | # read-amp-bytes-per-bit = 0 443 | # dynamic-level-bytes = true 444 | 445 | [raftdb] 446 | # max-sub-compactions = 1 447 | max-open-files = 1024 448 | # max-manifest-file-size = "20MB" 449 | # create-if-missing = true 450 | 451 | # enable-statistics = true 452 | # stats-dump-period = "10m" 453 | 454 | # compaction-readahead-size = 0 455 | # writable-file-max-buffer-size = "1MB" 456 | # use-direct-io-for-flush-and-compaction = false 457 | # enable-pipelined-write = true 458 | # allow-concurrent-memtable-write = false 459 | # bytes-per-sync = "0MB" 460 | # wal-bytes-per-sync = "0KB" 461 | 462 | # info-log-max-size = "1GB" 463 | # info-log-roll-time = "0" 464 | # info-log-keep-log-file-num = 10 465 | # info-log-dir = "" 466 | 467 | [raftdb.defaultcf] 468 | # compression-per-level = ["no", "no", "lz4", "lz4", "lz4", "zstd", "zstd"] 469 | # block-size = "64KB" 470 | # write-buffer-size = "128MB" 471 | # max-write-buffer-number = 5 472 | # min-write-buffer-number-to-merge = 1 473 | # max-bytes-for-level-base = "512MB" 474 | # target-file-size-base = "8MB" 475 | 476 | # should tune to 256MB~2GB. 477 | # block-cache-size = "256MB" 478 | # level0-file-num-compaction-trigger = 4 479 | # level0-slowdown-writes-trigger = 20 480 | # level0-stop-writes-trigger = 36 481 | # cache-index-and-filter-blocks = true 482 | # pin-l0-filter-and-index-blocks = true 483 | # compaction-pri = 0 484 | # read-amp-bytes-per-bit = 0 485 | # dynamic-level-bytes = true 486 | 487 | [security] 488 | # set the path for certificates. Empty string means disabling secure connectoins. 489 | # ca-path = "" 490 | # cert-path = "" 491 | # key-path = "" 492 | 493 | [import] 494 | # the directory to store importing kv data. 495 | # import-dir = "/tmp/tikv/import" 496 | # number of threads to handle RPC requests. 497 | # num-threads = 8 498 | # stream channel window size, stream will be blocked on channel full. 499 | # stream-channel-window = 128 500 | -------------------------------------------------------------------------------- /dashboard-installer/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:2.7-alpine 2 | 3 | RUN apk add --no-cache ca-certificates curl 4 | 5 | ADD dashboards / 6 | 7 | ENTRYPOINT ["/tidb-dashboard-installer.sh"] 8 | -------------------------------------------------------------------------------- /dashboard-installer/README.md: -------------------------------------------------------------------------------- 1 | # TiDB dashboard installer 2 | 3 | This image is used to configure Grafana datasource and dashboards for TiDB cluster. It is used in [tidb-docker-compose](https://github.com/pingcap/tidb-docker-compose) and [tidb-operator](https://github.com/pingcap/tidb-operator). 4 | 5 | The JSON files in dashboards are copied from [tidb-ansible](https://github.com/pingcap/tidb-ansible/tree/master/scripts). 6 | 7 | Grafana version prior to v5.0.0 can only use import API to automate datasource and dashboard configuration. So this image is needed to run in docker environment. It runs only once in this environment. 8 | 9 | With Grafana v5.x, we can use [provisioning](http://docs.grafana.org/administration/provisioning) feature to statically provision datasources and dashboards. No need to use scripts to configure Grafana. 10 | 11 | But currently, the dashboards in [tidb-ansible](https://github.com/pingcap/tidb-ansible/tree/master/scripts) repository are incompatible with Grafana v5.x and cannot be statically provisioned. So this image is still required. 12 | 13 | In the future, we can use [grafonnet](https://github.com/grafana/grafonnet-lib) to migrate old dashboards and make dashboard updating reviewable. 14 | -------------------------------------------------------------------------------- /dashboard-installer/dashboards/LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /dashboard-installer/dashboards/datasource.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "tidb-cluster", 3 | "type": "prometheus", 4 | "url": "http://127.0.0.1:9090", 5 | "access": "proxy", 6 | "basicAuth": false 7 | } 8 | -------------------------------------------------------------------------------- /dashboard-installer/dashboards/dests.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "datasource": "tidb-cluster", 4 | "name": "TiDB-Cluster", 5 | "titles": { 6 | "overview": "TiDB-Cluster-Overview", 7 | "pd": "TiDB-Cluster-PD", 8 | "tidb": "TiDB-Cluster-TiDB", 9 | "tikv": "TiDB-Cluster-TiKV" 10 | } 11 | } 12 | ] 13 | -------------------------------------------------------------------------------- /dashboard-installer/dashboards/grafana-config-copy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from __future__ import print_function, \ 4 | unicode_literals 5 | 6 | import urllib 7 | import urllib2 8 | import base64 9 | import json 10 | import sys 11 | from pprint import pprint 12 | 13 | try: 14 | input = raw_input 15 | except: 16 | pass 17 | 18 | ############################################################ 19 | ################## CONFIGURATION ########################### 20 | ############################################################ 21 | 22 | # use a viewer key 23 | src = dict( 24 | dashboards={ 25 | "pd" : 'pd.json', 26 | "tidb": 'tidb.json', 27 | "tikv": 'tikv.json', 28 | "overview": 'overview.json' 29 | }) 30 | 31 | dests = [ 32 | ] 33 | 34 | if not dests: 35 | with open("./dests.json") as fp: 36 | dests = json.load(fp) 37 | 38 | 39 | ############################################################ 40 | ################## CONFIGURATION ENDS ###################### 41 | ############################################################ 42 | 43 | def export_dashboard(api_url, api_key, dashboard_name): 44 | req = urllib2.Request(api_url + 'api/dashboards/db/' + dashboard_name, 45 | headers={'Authorization': "Bearer {}".format(api_key)}) 46 | 47 | resp = urllib2.urlopen(req) 48 | data = json.load(resp) 49 | return data['dashboard'] 50 | 51 | 52 | def fill_dashboard_with_dest_config(dashboard, dest, type_='node'): 53 | dashboard['title'] = dest['titles'][type_] 54 | dashboard['id'] = None 55 | # pprint(dashboard) 56 | for row in dashboard['rows']: 57 | for panel in row['panels']: 58 | panel['datasource'] = dest['datasource'] 59 | 60 | if 'templating' in dashboard: 61 | for templating in dashboard['templating']['list']: 62 | if templating['type'] == 'query': 63 | templating['current'] = {} 64 | templating['options'] = [] 65 | templating['datasource'] = dest['datasource'] 66 | 67 | if 'annotations' in dashboard: 68 | for annotation in dashboard['annotations']['list']: 69 | annotation['datasource'] = dest['datasource'] 70 | return dashboard 71 | 72 | def import_dashboard_via_user_pass(api_url, user, password, dashboard): 73 | payload = {'dashboard': dashboard, 74 | 'overwrite': True} 75 | auth_string = base64.b64encode('%s:%s' % (user, password)) 76 | headers = {'Authorization': "Basic {}".format(auth_string), 77 | 'Content-Type': 'application/json'} 78 | req = urllib2.Request(api_url + 'api/dashboards/db', 79 | headers=headers, 80 | data=json.dumps(payload)) 81 | try: 82 | resp = urllib2.urlopen(req) 83 | data = json.load(resp) 84 | return data 85 | except urllib2.HTTPError, error: 86 | data = json.load(error) 87 | return data 88 | 89 | 90 | if __name__ == '__main__': 91 | url = sys.argv[1] 92 | user = sys.argv[2] 93 | password = sys.argv[3] 94 | print(url) 95 | for type_ in src['dashboards']: 96 | print("[load] from <{}>:{}".format( 97 | src['dashboards'][type_], type_)) 98 | 99 | dashboard = json.load(open(src['dashboards'][type_])) 100 | 101 | for dest in dests: 102 | dashboard = fill_dashboard_with_dest_config(dashboard, dest, type_) 103 | print("[import] as <{}> to [{}]".format( 104 | dashboard['title'], dest['name']), end='\t............. ') 105 | ret = import_dashboard_via_user_pass(url, user, password, dashboard) 106 | print(ret) 107 | 108 | if ret['status'] != 'success': 109 | print(' > ERROR: ', ret) 110 | raise RuntimeError 111 | -------------------------------------------------------------------------------- /dashboard-installer/dashboards/tidb-dashboard-installer.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | url=$1 4 | userName=${GRAFANA_USERNAME:-admin} 5 | password=${GRAFANA_PASSWORD:-admin} 6 | datasource_url="http://${url}/api/datasources" 7 | echo "Adding datasource..." 8 | until curl -s -XPOST -H "Content-Type: application/json" --connect-timeout 1 -u ${userName}:${password} ${datasource_url} -d @/datasource.json >/dev/null; do 9 | sleep 1 10 | done 11 | 12 | python grafana-config-copy.py "http://${url}/" ${userName} ${password} 13 | -------------------------------------------------------------------------------- /docker-compose-binlog.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # Source: tidb-docker-compose/templates/docker-compose.yml 3 | # dashboard-installer has been deleted, because we don't need it any more in docker-compose model 4 | version: '2.1' 5 | 6 | services: 7 | pd0: 8 | image: pingcap/pd:latest 9 | ports: 10 | - "2379" 11 | volumes: 12 | - ./config/pd.toml:/pd.toml:ro 13 | - ./data:/data 14 | - ./logs:/logs 15 | command: 16 | - --name=pd0 17 | - --client-urls=http://0.0.0.0:2379 18 | - --peer-urls=http://0.0.0.0:2380 19 | - --advertise-client-urls=http://pd0:2379 20 | - --advertise-peer-urls=http://pd0:2380 21 | - --initial-cluster=pd0=http://pd0:2380,pd1=http://pd1:2380,pd2=http://pd2:2380 22 | - --data-dir=/data/pd0 23 | - --config=/pd.toml 24 | - --log-file=/logs/pd0.log 25 | # sysctls: 26 | # net.core.somaxconn: 32768 27 | # ulimits: 28 | # nofile: 29 | # soft: 1000000 30 | # hard: 1000000 31 | restart: on-failure 32 | 33 | pd1: 34 | image: pingcap/pd:latest 35 | ports: 36 | - "2379" 37 | volumes: 38 | - ./config/pd.toml:/pd.toml:ro 39 | - ./data:/data 40 | - ./logs:/logs 41 | command: 42 | - --name=pd1 43 | - --client-urls=http://0.0.0.0:2379 44 | - --peer-urls=http://0.0.0.0:2380 45 | - --advertise-client-urls=http://pd1:2379 46 | - --advertise-peer-urls=http://pd1:2380 47 | - --initial-cluster=pd0=http://pd0:2380,pd1=http://pd1:2380,pd2=http://pd2:2380 48 | - --data-dir=/data/pd1 49 | - --config=/pd.toml 50 | - --log-file=/logs/pd1.log 51 | # sysctls: 52 | # net.core.somaxconn: 32768 53 | # ulimits: 54 | # nofile: 55 | # soft: 1000000 56 | # hard: 1000000 57 | restart: on-failure 58 | 59 | pd2: 60 | image: pingcap/pd:latest 61 | ports: 62 | - "2379" 63 | volumes: 64 | - ./config/pd.toml:/pd.toml:ro 65 | - ./data:/data 66 | - ./logs:/logs 67 | command: 68 | - --name=pd2 69 | - --client-urls=http://0.0.0.0:2379 70 | - --peer-urls=http://0.0.0.0:2380 71 | - --advertise-client-urls=http://pd2:2379 72 | - --advertise-peer-urls=http://pd2:2380 73 | - --initial-cluster=pd0=http://pd0:2380,pd1=http://pd1:2380,pd2=http://pd2:2380 74 | - --data-dir=/data/pd2 75 | - --config=/pd.toml 76 | - --log-file=/logs/pd2.log 77 | # sysctls: 78 | # net.core.somaxconn: 32768 79 | # ulimits: 80 | # nofile: 81 | # soft: 1000000 82 | # hard: 1000000 83 | restart: on-failure 84 | 85 | tikv0: 86 | image: pingcap/tikv:latest 87 | volumes: 88 | - ./config/tikv.toml:/tikv.toml:ro 89 | - ./data:/data 90 | - ./logs:/logs 91 | command: 92 | - --addr=0.0.0.0:20160 93 | - --advertise-addr=tikv0:20160 94 | - --data-dir=/data/tikv0 95 | - --pd=pd0:2379,pd1:2379,pd2:2379 96 | - --config=/tikv.toml 97 | - --log-file=/logs/tikv0.log 98 | depends_on: 99 | - "pd0" 100 | - "pd1" 101 | - "pd2" 102 | # sysctls: 103 | # net.core.somaxconn: 32768 104 | # ulimits: 105 | # nofile: 106 | # soft: 1000000 107 | # hard: 1000000 108 | restart: on-failure 109 | 110 | tikv1: 111 | image: pingcap/tikv:latest 112 | volumes: 113 | - ./config/tikv.toml:/tikv.toml:ro 114 | - ./data:/data 115 | - ./logs:/logs 116 | command: 117 | - --addr=0.0.0.0:20160 118 | - --advertise-addr=tikv1:20160 119 | - --data-dir=/data/tikv1 120 | - --pd=pd0:2379,pd1:2379,pd2:2379 121 | - --config=/tikv.toml 122 | - --log-file=/logs/tikv1.log 123 | depends_on: 124 | - "pd0" 125 | - "pd1" 126 | - "pd2" 127 | # sysctls: 128 | # net.core.somaxconn: 32768 129 | # ulimits: 130 | # nofile: 131 | # soft: 1000000 132 | # hard: 1000000 133 | restart: on-failure 134 | 135 | tikv2: 136 | image: pingcap/tikv:latest 137 | volumes: 138 | - ./config/tikv.toml:/tikv.toml:ro 139 | - ./data:/data 140 | - ./logs:/logs 141 | command: 142 | - --addr=0.0.0.0:20160 143 | - --advertise-addr=tikv2:20160 144 | - --data-dir=/data/tikv2 145 | - --pd=pd0:2379,pd1:2379,pd2:2379 146 | - --config=/tikv.toml 147 | - --log-file=/logs/tikv2.log 148 | depends_on: 149 | - "pd0" 150 | - "pd1" 151 | - "pd2" 152 | # sysctls: 153 | # net.core.somaxconn: 32768 154 | # ulimits: 155 | # nofile: 156 | # soft: 1000000 157 | # hard: 1000000 158 | restart: on-failure 159 | 160 | pump0: 161 | image: pingcap/tidb-binlog:latest 162 | volumes: 163 | - ./config/pump.toml:/pump.toml:ro 164 | - ./data:/data 165 | - ./logs:/logs 166 | command: 167 | - /pump 168 | - --addr=0.0.0.0:8250 169 | - --advertise-addr=pump0:8250 170 | - --data-dir=/data/pump0 171 | - --log-file=/logs/pump0.log 172 | - --node-id=pump0 173 | - --pd-urls=http://pd0:2379,http://pd1:2379,http://pd2:2379 174 | - --config=/pump.toml 175 | depends_on: 176 | - "pd0" 177 | - "pd1" 178 | - "pd2" 179 | restart: on-failure 180 | 181 | pump1: 182 | image: pingcap/tidb-binlog:latest 183 | volumes: 184 | - ./config/pump.toml:/pump.toml:ro 185 | - ./data:/data 186 | - ./logs:/logs 187 | command: 188 | - /pump 189 | - --addr=0.0.0.0:8250 190 | - --advertise-addr=pump1:8250 191 | - --data-dir=/data/pump1 192 | - --log-file=/logs/pump1.log 193 | - --node-id=pump1 194 | - --pd-urls=http://pd0:2379,http://pd1:2379,http://pd2:2379 195 | - --config=/pump.toml 196 | depends_on: 197 | - "pd0" 198 | - "pd1" 199 | - "pd2" 200 | restart: on-failure 201 | 202 | pump2: 203 | image: pingcap/tidb-binlog:latest 204 | volumes: 205 | - ./config/pump.toml:/pump.toml:ro 206 | - ./data:/data 207 | - ./logs:/logs 208 | command: 209 | - /pump 210 | - --addr=0.0.0.0:8250 211 | - --advertise-addr=pump2:8250 212 | - --data-dir=/data/pump2 213 | - --log-file=/logs/pump2.log 214 | - --node-id=pump2 215 | - --pd-urls=http://pd0:2379,http://pd1:2379,http://pd2:2379 216 | - --config=/pump.toml 217 | depends_on: 218 | - "pd0" 219 | - "pd1" 220 | - "pd2" 221 | restart: on-failure 222 | 223 | drainer: 224 | image: pingcap/tidb-binlog:latest 225 | volumes: 226 | - ./config/drainer.toml:/drainer.toml:ro 227 | - ./data:/data 228 | - ./logs:/logs 229 | command: 230 | - /drainer 231 | - --addr=0.0.0.0:8249 232 | - --data-dir=/data/data.drainer 233 | - --log-file=/logs/drainer.log 234 | - --pd-urls=http://pd0:2379,http://pd1:2379,http://pd2:2379 235 | - --config=/drainer.toml 236 | - --initial-commit-ts=0 237 | - --dest-db-type=kafka 238 | depends_on: 239 | - "pd0" 240 | - "pd1" 241 | - "pd2" 242 | - "kafka0" 243 | - "kafka1" 244 | - "kafka2" 245 | restart: on-failure 246 | 247 | zoo0: 248 | image: zookeeper:latest 249 | ports: 250 | - "2181:2181" 251 | environment: 252 | ZOO_MY_ID: 1 253 | ZOO_PORT: 2181 254 | ZOO_SERVERS: server.1=zoo0:2888:3888 server.2=zoo1:2888:3888 server.3=zoo2:2888:3888 255 | volumes: 256 | - ./data/zoo0/data:/data 257 | - ./data/zoo0/datalog:/datalog 258 | restart: on-failure 259 | 260 | zoo1: 261 | image: zookeeper:latest 262 | ports: 263 | - "2182:2182" 264 | environment: 265 | ZOO_MY_ID: 2 266 | ZOO_PORT: 2182 267 | ZOO_SERVERS: server.1=zoo0:2888:3888 server.2=zoo1:2888:3888 server.3=zoo2:2888:3888 268 | volumes: 269 | - ./data/zoo1/data:/data 270 | - ./data/zoo1/datalog:/datalog 271 | restart: on-failure 272 | 273 | zoo2: 274 | image: zookeeper:latest 275 | ports: 276 | - "2183:2183" 277 | environment: 278 | ZOO_MY_ID: 3 279 | ZOO_PORT: 2183 280 | ZOO_SERVERS: server.1=zoo0:2888:3888 server.2=zoo1:2888:3888 server.3=zoo2:2888:3888 281 | volumes: 282 | - ./data/zoo2/data:/data 283 | - ./data/zoo2/datalog:/datalog 284 | restart: on-failure 285 | 286 | kafka0: 287 | image: wurstmeister/kafka:2.12-2.1.1 288 | ports: 289 | - "9092:9092" 290 | environment: 291 | KAFKA_BROKER_ID: 1 292 | KAFKA_LOG_DIRS: /data/kafka-logs 293 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka0:9092 294 | KAFKA_LISTENERS: PLAINTEXT://0.0.0.0:9092 295 | KAFKA_ZOOKEEPER_CONNECT: zoo0:2181,zoo1:2182,zoo2:2183 296 | volumes: 297 | - ./data/kafka-logs/kafka0:/data/kafka-logs 298 | - ./logs/kafka0:/opt/kafka/logs 299 | - /var/run/docker.sock:/var/run/docker.sock 300 | depends_on: 301 | - "zoo0" 302 | - "zoo1" 303 | - "zoo2" 304 | restart: on-failure 305 | kafka1: 306 | image: wurstmeister/kafka:2.12-2.1.1 307 | ports: 308 | - "9093:9093" 309 | environment: 310 | KAFKA_BROKER_ID: 2 311 | KAFKA_LOG_DIRS: /data/kafka-logs 312 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka1:9093 313 | KAFKA_LISTENERS: PLAINTEXT://0.0.0.0:9093 314 | KAFKA_ZOOKEEPER_CONNECT: zoo0:2181,zoo1:2182,zoo2:2183 315 | volumes: 316 | - ./data/kafka-logs/kafka1:/data/kafka-logs 317 | - ./logs/kafka1:/opt/kafka/logs 318 | - /var/run/docker.sock:/var/run/docker.sock 319 | depends_on: 320 | - "zoo0" 321 | - "zoo1" 322 | - "zoo2" 323 | restart: on-failure 324 | kafka2: 325 | image: wurstmeister/kafka:2.12-2.1.1 326 | ports: 327 | - "9094:9094" 328 | environment: 329 | KAFKA_BROKER_ID: 3 330 | KAFKA_LOG_DIRS: /data/kafka-logs 331 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka2:9094 332 | KAFKA_LISTENERS: PLAINTEXT://0.0.0.0:9094 333 | KAFKA_ZOOKEEPER_CONNECT: zoo0:2181,zoo1:2182,zoo2:2183 334 | volumes: 335 | - ./data/kafka-logs/kafka2:/data/kafka-logs 336 | - ./logs/kafka2:/opt/kafka/logs 337 | - /var/run/docker.sock:/var/run/docker.sock 338 | depends_on: 339 | - "zoo0" 340 | - "zoo1" 341 | - "zoo2" 342 | restart: on-failure 343 | 344 | tidb: 345 | image: pingcap/tidb:latest 346 | ports: 347 | - "4000:4000" 348 | - "10080:10080" 349 | volumes: 350 | - ./config/tidb.toml:/tidb.toml:ro 351 | - ./logs:/logs 352 | command: 353 | - --store=tikv 354 | - --path=pd0:2379,pd1:2379,pd2:2379 355 | - --config=/tidb.toml 356 | - --log-file=/logs/tidb.log 357 | - --advertise-address=tidb 358 | - --enable-binlog=true 359 | depends_on: 360 | - "tikv0" 361 | - "tikv1" 362 | - "tikv2" 363 | - "pump0" 364 | - "pump1" 365 | - "pump2" 366 | # sysctls: 367 | # net.core.somaxconn: 32768 368 | # ulimits: 369 | # nofile: 370 | # soft: 1000000 371 | # hard: 1000000 372 | restart: on-failure 373 | 374 | tispark-master: 375 | image: pingcap/tispark:v2.1.1 376 | command: 377 | - /opt/spark/sbin/start-master.sh 378 | volumes: 379 | - ./config/spark-defaults.conf:/opt/spark/conf/spark-defaults.conf:ro 380 | environment: 381 | SPARK_MASTER_PORT: 7077 382 | SPARK_MASTER_WEBUI_PORT: 8080 383 | ports: 384 | - "7077:7077" 385 | - "8080:8080" 386 | depends_on: 387 | - "tikv0" 388 | - "tikv1" 389 | - "tikv2" 390 | restart: on-failure 391 | tispark-slave0: 392 | image: pingcap/tispark:v2.1.1 393 | command: 394 | - /opt/spark/sbin/start-slave.sh 395 | - spark://tispark-master:7077 396 | volumes: 397 | - ./config/spark-defaults.conf:/opt/spark/conf/spark-defaults.conf:ro 398 | environment: 399 | SPARK_WORKER_WEBUI_PORT: 38081 400 | ports: 401 | - "38081:38081" 402 | depends_on: 403 | - tispark-master 404 | restart: on-failure 405 | 406 | tidb-vision: 407 | image: pingcap/tidb-vision:latest 408 | environment: 409 | PD_ENDPOINT: pd0:2379 410 | ports: 411 | - "8010:8010" 412 | restart: on-failure 413 | pushgateway: 414 | image: prom/pushgateway:v0.3.1 415 | command: 416 | - --log.level=error 417 | restart: on-failure 418 | 419 | prometheus: 420 | user: root 421 | image: prom/prometheus:v2.2.1 422 | command: 423 | - --log.level=error 424 | - --storage.tsdb.path=/data/prometheus 425 | - --config.file=/etc/prometheus/prometheus.yml 426 | ports: 427 | - "9090:9090" 428 | volumes: 429 | - ./config/prometheus.yml:/etc/prometheus/prometheus.yml:ro 430 | - ./config/pd.rules.yml:/etc/prometheus/pd.rules.yml:ro 431 | - ./config/tikv.rules.yml:/etc/prometheus/tikv.rules.yml:ro 432 | - ./config/tidb.rules.yml:/etc/prometheus/tidb.rules.yml:ro 433 | - ./data:/data 434 | restart: on-failure 435 | grafana: 436 | image: grafana/grafana:5.3.0 437 | user: "0" 438 | environment: 439 | GF_LOG_LEVEL: error 440 | GF_PATHS_PROVISIONING: /etc/grafana/provisioning 441 | GF_PATHS_CONFIG: /etc/grafana/grafana.ini 442 | ports: 443 | - "3000:3000" 444 | volumes: 445 | - ./config/grafana:/etc/grafana 446 | - ./config/dashboards:/tmp/dashboards 447 | - ./data/grafana:/var/lib/grafana 448 | restart: on-failure 449 | -------------------------------------------------------------------------------- /docker-compose-test.yml: -------------------------------------------------------------------------------- 1 | version: '2.1' 2 | 3 | services: 4 | tispark-tests: 5 | image: pingcap/tispark:latest 6 | volumes: 7 | - ./config/spark-defaults.conf:/opt/spark/conf/spark-defaults.conf:ro 8 | - ./tispark-tests/tests:/opt/spark/tests:ro 9 | 10 | networks: 11 | default: 12 | external: 13 | name: tidb-docker-compose_default 14 | -------------------------------------------------------------------------------- /docker-compose-tiflash-nightly.yml: -------------------------------------------------------------------------------- 1 | version: '2.1' 2 | 3 | services: 4 | pd0: 5 | image: pingcap/pd:nightly 6 | ports: 7 | - "2379" 8 | volumes: 9 | - ./config/pd-nightly-tiflash.toml:/pd.toml:ro 10 | - ./data:/data 11 | - ./logs:/logs 12 | command: 13 | - --name=pd0 14 | - --client-urls=http://0.0.0.0:2379 15 | - --peer-urls=http://0.0.0.0:2380 16 | - --advertise-client-urls=http://pd0:2379 17 | - --advertise-peer-urls=http://pd0:2380 18 | - --initial-cluster=pd0=http://pd0:2380 19 | - --data-dir=/data/pd 20 | - --config=/pd.toml 21 | - --log-file=/logs/pd.log 22 | restart: on-failure 23 | tikv: 24 | image: pingcap/tikv:nightly 25 | volumes: 26 | - ./data:/data 27 | - ./logs:/logs 28 | command: 29 | - --addr=0.0.0.0:20160 30 | - --advertise-addr=tikv:20160 31 | - --status-addr=tikv:20180 32 | - --data-dir=/data/tikv 33 | - --pd=pd0:2379 34 | - --log-file=/logs/tikv.log 35 | depends_on: 36 | - "pd0" 37 | restart: on-failure 38 | tidb: 39 | image: pingcap/tidb:nightly 40 | ports: 41 | - "4000:4000" 42 | - "10080:10080" 43 | volumes: 44 | - ./logs:/logs 45 | command: 46 | - --status=10080 47 | - --advertise-address=tidb 48 | - --store=tikv 49 | - --path=pd0:2379 50 | - --log-file=/logs/tidb.log 51 | depends_on: 52 | - "tikv" 53 | restart: on-failure 54 | tiflash: 55 | image: pingcap/tiflash:nightly 56 | volumes: 57 | - ./config/tiflash-nightly.toml:/tiflash.toml:ro 58 | - ./config/tiflash-learner-nightly.toml:/tiflash-learner.toml:ro 59 | - ./data:/data 60 | - ./logs:/logs 61 | command: 62 | - --config=/tiflash.toml 63 | depends_on: 64 | - "tikv" 65 | - "tidb" 66 | restart: on-failure 67 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '2.1' 2 | 3 | services: 4 | pd0: 5 | image: pingcap/pd:latest 6 | ports: 7 | - "2379" 8 | volumes: 9 | - ./config/pd.toml:/pd.toml:ro 10 | - ./data:/data 11 | - ./logs:/logs 12 | command: 13 | - --name=pd0 14 | - --client-urls=http://0.0.0.0:2379 15 | - --peer-urls=http://0.0.0.0:2380 16 | - --advertise-client-urls=http://pd0:2379 17 | - --advertise-peer-urls=http://pd0:2380 18 | - --initial-cluster=pd0=http://pd0:2380,pd1=http://pd1:2380,pd2=http://pd2:2380 19 | - --data-dir=/data/pd0 20 | - --config=/pd.toml 21 | - --log-file=/logs/pd0.log 22 | restart: on-failure 23 | pd1: 24 | image: pingcap/pd:latest 25 | ports: 26 | - "2379" 27 | volumes: 28 | - ./config/pd.toml:/pd.toml:ro 29 | - ./data:/data 30 | - ./logs:/logs 31 | command: 32 | - --name=pd1 33 | - --client-urls=http://0.0.0.0:2379 34 | - --peer-urls=http://0.0.0.0:2380 35 | - --advertise-client-urls=http://pd1:2379 36 | - --advertise-peer-urls=http://pd1:2380 37 | - --initial-cluster=pd0=http://pd0:2380,pd1=http://pd1:2380,pd2=http://pd2:2380 38 | - --data-dir=/data/pd1 39 | - --config=/pd.toml 40 | - --log-file=/logs/pd1.log 41 | restart: on-failure 42 | pd2: 43 | image: pingcap/pd:latest 44 | ports: 45 | - "2379" 46 | volumes: 47 | - ./config/pd.toml:/pd.toml:ro 48 | - ./data:/data 49 | - ./logs:/logs 50 | command: 51 | - --name=pd2 52 | - --client-urls=http://0.0.0.0:2379 53 | - --peer-urls=http://0.0.0.0:2380 54 | - --advertise-client-urls=http://pd2:2379 55 | - --advertise-peer-urls=http://pd2:2380 56 | - --initial-cluster=pd0=http://pd0:2380,pd1=http://pd1:2380,pd2=http://pd2:2380 57 | - --data-dir=/data/pd2 58 | - --config=/pd.toml 59 | - --log-file=/logs/pd2.log 60 | restart: on-failure 61 | tikv0: 62 | image: pingcap/tikv:latest 63 | volumes: 64 | - ./config/tikv.toml:/tikv.toml:ro 65 | - ./data:/data 66 | - ./logs:/logs 67 | command: 68 | - --addr=0.0.0.0:20160 69 | - --advertise-addr=tikv0:20160 70 | - --data-dir=/data/tikv0 71 | - --pd=pd0:2379,pd1:2379,pd2:2379 72 | - --config=/tikv.toml 73 | - --log-file=/logs/tikv0.log 74 | depends_on: 75 | - "pd0" 76 | - "pd1" 77 | - "pd2" 78 | restart: on-failure 79 | tikv1: 80 | image: pingcap/tikv:latest 81 | volumes: 82 | - ./config/tikv.toml:/tikv.toml:ro 83 | - ./data:/data 84 | - ./logs:/logs 85 | command: 86 | - --addr=0.0.0.0:20160 87 | - --advertise-addr=tikv1:20160 88 | - --data-dir=/data/tikv1 89 | - --pd=pd0:2379,pd1:2379,pd2:2379 90 | - --config=/tikv.toml 91 | - --log-file=/logs/tikv1.log 92 | depends_on: 93 | - "pd0" 94 | - "pd1" 95 | - "pd2" 96 | restart: on-failure 97 | tikv2: 98 | image: pingcap/tikv:latest 99 | volumes: 100 | - ./config/tikv.toml:/tikv.toml:ro 101 | - ./data:/data 102 | - ./logs:/logs 103 | command: 104 | - --addr=0.0.0.0:20160 105 | - --advertise-addr=tikv2:20160 106 | - --data-dir=/data/tikv2 107 | - --pd=pd0:2379,pd1:2379,pd2:2379 108 | - --config=/tikv.toml 109 | - --log-file=/logs/tikv2.log 110 | depends_on: 111 | - "pd0" 112 | - "pd1" 113 | - "pd2" 114 | restart: on-failure 115 | 116 | tidb: 117 | image: pingcap/tidb:latest 118 | ports: 119 | - "4000:4000" 120 | - "10080:10080" 121 | volumes: 122 | - ./config/tidb.toml:/tidb.toml:ro 123 | - ./logs:/logs 124 | command: 125 | - --store=tikv 126 | - --path=pd0:2379,pd1:2379,pd2:2379 127 | - --config=/tidb.toml 128 | - --log-file=/logs/tidb.log 129 | - --advertise-address=tidb 130 | depends_on: 131 | - "tikv0" 132 | - "tikv1" 133 | - "tikv2" 134 | restart: on-failure 135 | tispark-master: 136 | image: pingcap/tispark:v2.1.1 137 | command: 138 | - /opt/spark/sbin/start-master.sh 139 | volumes: 140 | - ./config/spark-defaults.conf:/opt/spark/conf/spark-defaults.conf:ro 141 | environment: 142 | SPARK_MASTER_PORT: 7077 143 | SPARK_MASTER_WEBUI_PORT: 8080 144 | ports: 145 | - "7077:7077" 146 | - "8080:8080" 147 | depends_on: 148 | - "tikv0" 149 | - "tikv1" 150 | - "tikv2" 151 | restart: on-failure 152 | tispark-slave0: 153 | image: pingcap/tispark:v2.1.1 154 | command: 155 | - /opt/spark/sbin/start-slave.sh 156 | - spark://tispark-master:7077 157 | volumes: 158 | - ./config/spark-defaults.conf:/opt/spark/conf/spark-defaults.conf:ro 159 | environment: 160 | SPARK_WORKER_WEBUI_PORT: 38081 161 | ports: 162 | - "38081:38081" 163 | depends_on: 164 | - tispark-master 165 | restart: on-failure 166 | 167 | tidb-vision: 168 | image: pingcap/tidb-vision:latest 169 | environment: 170 | PD_ENDPOINT: pd0:2379 171 | ports: 172 | - "8010:8010" 173 | restart: on-failure 174 | 175 | # monitors 176 | pushgateway: 177 | image: prom/pushgateway:v0.3.1 178 | command: 179 | - --log.level=error 180 | restart: on-failure 181 | prometheus: 182 | user: root 183 | image: prom/prometheus:v2.2.1 184 | command: 185 | - --log.level=error 186 | - --storage.tsdb.path=/data/prometheus 187 | - --config.file=/etc/prometheus/prometheus.yml 188 | ports: 189 | - "9090:9090" 190 | volumes: 191 | - ./config/prometheus.yml:/etc/prometheus/prometheus.yml:ro 192 | - ./config/pd.rules.yml:/etc/prometheus/pd.rules.yml:ro 193 | - ./config/tikv.rules.yml:/etc/prometheus/tikv.rules.yml:ro 194 | - ./config/tidb.rules.yml:/etc/prometheus/tidb.rules.yml:ro 195 | - ./data:/data 196 | restart: on-failure 197 | grafana: 198 | image: grafana/grafana:6.0.1 199 | user: "0" 200 | environment: 201 | GF_LOG_LEVEL: error 202 | GF_PATHS_PROVISIONING: /etc/grafana/provisioning 203 | GF_PATHS_CONFIG: /etc/grafana/grafana.ini 204 | volumes: 205 | - ./config/grafana:/etc/grafana 206 | - ./config/dashboards:/tmp/dashboards 207 | - ./data/grafana:/var/lib/grafana 208 | ports: 209 | - "3000:3000" 210 | restart: on-failure 211 | -------------------------------------------------------------------------------- /docker-swarm.yml: -------------------------------------------------------------------------------- 1 | version: '3.3' 2 | 3 | networks: 4 | default: 5 | driver: overlay 6 | attachable: true 7 | 8 | services: 9 | pd0: 10 | image: pingcap/pd:latest 11 | ports: 12 | - "2379" 13 | volumes: 14 | - ./config/pd.toml:/pd.toml:ro 15 | - ./data:/data 16 | - ./logs:/logs 17 | command: 18 | - --name=pd0 19 | - --client-urls=http://0.0.0.0:2379 20 | - --peer-urls=http://0.0.0.0:2380 21 | - --advertise-client-urls=http://pd0:2379 22 | - --advertise-peer-urls=http://pd0:2380 23 | - --initial-cluster=pd0=http://pd0:2380,pd1=http://pd1:2380,pd2=http://pd2:2380 24 | - --data-dir=/data/pd0 25 | - --config=/pd.toml 26 | - --log-file=/logs/pd0.log 27 | pd1: 28 | image: pingcap/pd:latest 29 | ports: 30 | - "2379" 31 | volumes: 32 | - ./config/pd.toml:/pd.toml:ro 33 | - ./data:/data 34 | - ./logs:/logs 35 | command: 36 | - --name=pd1 37 | - --client-urls=http://0.0.0.0:2379 38 | - --peer-urls=http://0.0.0.0:2380 39 | - --advertise-client-urls=http://pd1:2379 40 | - --advertise-peer-urls=http://pd1:2380 41 | - --initial-cluster=pd0=http://pd0:2380,pd1=http://pd1:2380,pd2=http://pd2:2380 42 | - --data-dir=/data/pd1 43 | - --config=/pd.toml 44 | - --log-file=/logs/pd1.log 45 | pd2: 46 | image: pingcap/pd:latest 47 | ports: 48 | - "2379" 49 | volumes: 50 | - ./config/pd.toml:/pd.toml:ro 51 | - ./data:/data 52 | - ./logs:/logs 53 | command: 54 | - --name=pd2 55 | - --client-urls=http://0.0.0.0:2379 56 | - --peer-urls=http://0.0.0.0:2380 57 | - --advertise-client-urls=http://pd2:2379 58 | - --advertise-peer-urls=http://pd2:2380 59 | - --initial-cluster=pd0=http://pd0:2380,pd1=http://pd1:2380,pd2=http://pd2:2380 60 | - --data-dir=/data/pd2 61 | - --config=/pd.toml 62 | - --log-file=/logs/pd2.log 63 | tikv: 64 | image: pingcap/tikv:latest 65 | ports: 66 | - target: 20160 67 | published: 20160 68 | environment: 69 | - TASK_SLOT={{.Task.Slot}} 70 | volumes: 71 | - ./config/tikv.toml:/tikv.toml:ro 72 | - ./data:/data 73 | - ./logs:/logs 74 | entrypoint: [ "/bin/sh", "-c", "/tikv-server --advertise-addr=$$HOSTNAME:20160 --addr=0.0.0.0:20160 --data-dir=/data/tikv$$TASK_SLOT --pd=pd0:2379,pd1:2379,pd2:2379 --config=/tikv.toml --log-file=/logs/tikv$$TASK_SLOT.log --log-level=info" ] 75 | depends_on: 76 | - "pd0" 77 | - "pd1" 78 | - "pd2" 79 | deploy: 80 | replicas: 3 81 | restart_policy: 82 | condition: on-failure 83 | 84 | tidb: 85 | image: pingcap/tidb:latest 86 | ports: 87 | - target: 4000 88 | published: 4000 89 | - target: 10080 90 | published: 10080 91 | environment: 92 | - TASK_SLOT={{.Task.Slot}} 93 | volumes: 94 | - ./config/tidb.toml:/tidb.toml:ro 95 | - ./logs:/logs 96 | entrypoint: [ "/bin/sh", "-c", "/tidb-server --advertise-address=$$HOSTNAME --store=tikv --path=pd0:2379,pd1:2379,pd2:2379 --config=/tidb.toml --log-file=/logs/tidb$$TASK_SLOT.log -L info" ] 97 | depends_on: 98 | - "tikv" 99 | deploy: 100 | replicas: 1 101 | 102 | tispark-master: 103 | image: pingcap/tispark:v2.1.1 104 | command: 105 | - /opt/spark/sbin/start-master.sh 106 | volumes: 107 | - ./config/spark-defaults.conf:/opt/spark/conf/spark-defaults.conf:ro 108 | environment: 109 | SPARK_MASTER_PORT: 7077 110 | SPARK_MASTER_WEBUI_PORT: 8080 111 | ports: 112 | - "7077:7077" 113 | - "8080:8080" 114 | depends_on: 115 | - "tikv" 116 | deploy: 117 | replicas: 1 118 | tispark-slave: 119 | image: pingcap/tispark:v2.1.1 120 | command: 121 | - /opt/spark/sbin/start-slave.sh 122 | - spark://tispark-master:7077 123 | volumes: 124 | - ./config/spark-defaults.conf:/opt/spark/conf/spark-defaults.conf:ro 125 | environment: 126 | SPARK_WORKER_WEBUI_PORT: 38081 127 | ports: 128 | - "38081:38081" 129 | depends_on: 130 | - tispark-master 131 | deploy: 132 | replicas: 1 133 | 134 | tidb-vision: 135 | image: pingcap/tidb-vision:latest 136 | environment: 137 | PD_ENDPOINT: pd0:2379 138 | ports: 139 | - "8010:8010" 140 | 141 | # monitors 142 | pushgateway: 143 | image: prom/pushgateway:v0.3.1 144 | command: 145 | - --log.level=error 146 | prometheus: 147 | user: root 148 | image: prom/prometheus:v2.2.1 149 | command: 150 | - --log.level=error 151 | - --storage.tsdb.path=/data/prometheus 152 | - --config.file=/etc/prometheus/prometheus.yml 153 | ports: 154 | - "9090:9090" 155 | volumes: 156 | - ./config/prometheus.yml:/etc/prometheus/prometheus.yml:ro 157 | - ./config/pd.rules.yml:/etc/prometheus/pd.rules.yml:ro 158 | - ./config/tikv.rules.yml:/etc/prometheus/tikv.rules.yml:ro 159 | - ./config/tidb.rules.yml:/etc/prometheus/tidb.rules.yml:ro 160 | - ./data:/data 161 | grafana: 162 | image: grafana/grafana:6.0.1 163 | environment: 164 | GF_LOG_LEVEL: error 165 | GF_PATHS_PROVISIONING: /etc/grafana/provisioning 166 | GF_PATHS_CONFIG: /etc/grafana/grafana.ini 167 | volumes: 168 | - ./config/grafana:/etc/grafana 169 | - ./config/dashboards:/var/lib/grafana/dashboards 170 | ports: 171 | - "3000:3000" 172 | -------------------------------------------------------------------------------- /docker/debug/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM centos:7 2 | 3 | RUN yum update -y && yum install -y \ 4 | curl \ 5 | file \ 6 | gdb \ 7 | git \ 8 | iotop \ 9 | linux-perf \ 10 | mysql \ 11 | net-tools \ 12 | perf \ 13 | perl \ 14 | procps-ng \ 15 | psmisc \ 16 | strace \ 17 | sysstat \ 18 | tree \ 19 | tcpdump \ 20 | unzip \ 21 | vim \ 22 | wget \ 23 | which \ 24 | && yum clean all \ 25 | && rm -rf /var/cache/yum/* 26 | 27 | RUN wget -q http://download.pingcap.org/tidb-latest-linux-amd64.tar.gz \ 28 | && tar xzf tidb-latest-linux-amd64.tar.gz \ 29 | && mv tidb-latest-linux-amd64/bin/* /usr/local/bin/ \ 30 | && rm -rf tidb-latest-linux-amd64.tar.gz tidb-latest-linux-amd64 31 | 32 | RUN wget https://github.com/brendangregg/FlameGraph/archive/master.zip \ 33 | && unzip master.zip \ 34 | && mv FlameGraph-master /opt/FlameGraph \ 35 | && rm master.zip 36 | ADD run_flamegraph.sh /run_flamegraph.sh 37 | 38 | # used for go pprof 39 | ENV GOLANG_VERSION 1.10 40 | ENV GOLANG_DOWNLOAD_URL https://golang.org/dl/go$GOLANG_VERSION.linux-amd64.tar.gz 41 | ENV GOLANG_DOWNLOAD_SHA256 b5a64335f1490277b585832d1f6c7f8c6c11206cba5cd3f771dcb87b98ad1a33 42 | RUN curl -fsSL "$GOLANG_DOWNLOAD_URL" -o golang.tar.gz \ 43 | && echo "$GOLANG_DOWNLOAD_SHA256 golang.tar.gz" | sha256sum -c - \ 44 | && tar -C /usr/local -xzf golang.tar.gz \ 45 | && rm golang.tar.gz 46 | ENV GOPATH /go 47 | ENV GOROOT /usr/local/go 48 | ENV PATH $GOPATH/bin:$GOROOT/bin:$PATH 49 | 50 | ENTRYPOINT ["/bin/bash"] 51 | -------------------------------------------------------------------------------- /docker/debug/run_flamegraph.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | perf record -F 99 -p $1 -g -- sleep 60 6 | perf script > out.perf 7 | /opt/FlameGraph/stackcollapse-perf.pl out.perf > out.folded 8 | /opt/FlameGraph/flamegraph.pl out.folded > kernel.svg 9 | curl --upload-file ./kernel.svg https://transfer.sh/kernel.svg 10 | -------------------------------------------------------------------------------- /pd/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM alpine:3.5 2 | 3 | ADD bin/pd-server /pd-server 4 | 5 | WORKDIR / 6 | 7 | EXPOSE 2379 2380 8 | 9 | ENTRYPOINT ["/pd-server"] 10 | -------------------------------------------------------------------------------- /tidb-binlog/Dockerfile: -------------------------------------------------------------------------------- 1 | from alpine:3.5 2 | 3 | ADD bin/pump /pump 4 | 5 | ADD bin/drainer /drainer 6 | 7 | RUN chmod +x /pump /drainer 8 | 9 | WORKDIR / 10 | 11 | EXPOSE 8249 8250 12 | -------------------------------------------------------------------------------- /tidb-vision/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM node:8 2 | 3 | ADD tidb-vision /home/node/tidb-vision 4 | 5 | WORKDIR /home/node/tidb-vision 6 | 7 | RUN npm install 8 | 9 | ENV PD_ENDPOINT=localhost:9000 10 | 11 | EXPOSE 8010 12 | 13 | CMD ["npm", "start"] 14 | -------------------------------------------------------------------------------- /tidb/Dockerfile: -------------------------------------------------------------------------------- 1 | from alpine:3.5 2 | 3 | ADD bin/tidb-server /tidb-server 4 | 5 | RUN chmod +x /tidb-server 6 | 7 | WORKDIR / 8 | 9 | EXPOSE 4000 10080 10 | 11 | ENTRYPOINT ["/tidb-server"] 12 | -------------------------------------------------------------------------------- /tikv/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM pingcap/alpine-glibc 2 | 3 | ADD bin/tikv-server /tikv-server 4 | 5 | RUN chmod +x /tikv-server 6 | 7 | WORKDIR / 8 | 9 | EXPOSE 20160 10 | 11 | ENTRYPOINT ["/tikv-server"] 12 | -------------------------------------------------------------------------------- /tispark/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM anapsix/alpine-java:8 2 | 3 | ENV SPARK_VERSION=2.4.3 \ 4 | HADOOP_VERSION=2.7 \ 5 | TISPARK_PYTHON_VERSION=2.0 \ 6 | SPARK_HOME=/opt/spark \ 7 | SPARK_NO_DAEMONIZE=true \ 8 | SPARK_MASTER_PORT=7077 \ 9 | SPARK_MASTER_HOST=0.0.0.0 \ 10 | SPARK_MASTER_WEBUI_PORT=8080 11 | 12 | ADD tispark-tests /opt/tispark-tests 13 | 14 | # base image only contains busybox version nohup and ps 15 | # spark scripts needs nohup in coreutils and ps in procps 16 | # and we can use mysql-client to test tidb connection 17 | RUN apk --no-cache add \ 18 | coreutils \ 19 | mysql-client \ 20 | procps \ 21 | python \ 22 | py-pip \ 23 | R 24 | 25 | RUN wget -q https://download.pingcap.org/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz \ 26 | && tar zxf spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz -C /opt/ \ 27 | && ln -s /opt/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION} ${SPARK_HOME} \ 28 | && wget -q http://download.pingcap.org/tispark-assembly-latest-linux-amd64.tar.gz \ 29 | && tar zxf ./tispark-assembly-latest-linux-amd64.tar.gz -C /opt/ \ 30 | && cp /opt/assembly/target/tispark-assembly-*.jar ${SPARK_HOME}/jars \ 31 | && wget -q http://download.pingcap.org/tispark-sample-data.tar.gz \ 32 | && tar zxf tispark-sample-data.tar.gz -C ${SPARK_HOME}/data/ \ 33 | && rm -rf /opt/assembly/ spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz tispark-latest-linux-amd64.tar.gz tispark-sample-data.tar.gz 34 | 35 | ADD spark-${SPARK_VERSION}/session.py ${SPARK_HOME}/python/pyspark/sql/ 36 | ADD conf/log4j.properties /opt/spark/conf/log4j.properties 37 | 38 | ENV PYTHONPATH=${SPARK_HOME}/python/lib/py4j-0.10.4-src.zip:${SPARK_HOME}/python:$PYTHONPATH 39 | 40 | WORKDIR ${SPARK_HOME} 41 | -------------------------------------------------------------------------------- /tispark/conf/log4j.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # Set everything to be logged to the console 19 | log4j.rootCategory=INFO, console 20 | log4j.appender.console=org.apache.log4j.ConsoleAppender 21 | log4j.appender.console.target=System.err 22 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 23 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n 24 | 25 | # Set the default spark-shell log level to WARN. When running the spark-shell, the 26 | # log level for this class is used to overwrite the root logger's log level, so that 27 | # the user can have different defaults for the shell and regular Spark apps. 28 | log4j.logger.org.apache.spark.repl.Main=WARN 29 | 30 | # Settings to quiet third party logs that are too verbose 31 | log4j.logger.org.spark_project.jetty=WARN 32 | log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR 33 | log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO 34 | log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO 35 | log4j.logger.org.apache.parquet=ERROR 36 | log4j.logger.parquet=ERROR 37 | 38 | # SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support 39 | log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL 40 | log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR 41 | 42 | # tispark disable "WARN ObjectStore:568 - Failed to get database" 43 | log4j.logger.org.apache.hadoop.hive.metastore.ObjectStore=ERROR 44 | -------------------------------------------------------------------------------- /tispark/tispark-tests/tests/loaddata.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd /opt/spark/data/tispark-sample-data 4 | 5 | mysql -h tidb -P 4000 -u root < dss.ddl 6 | -------------------------------------------------------------------------------- /tispark/tispark-tests/tests/tests.py: -------------------------------------------------------------------------------- 1 | from pyspark.sql import SparkSession 2 | 3 | spark = SparkSession.builder.master("spark://tispark-master:7077").appName("TiSpark tests").getOrCreate() 4 | 5 | spark.sql("use TPCH_001") 6 | 7 | count = spark.sql("select count(*) as c from lineitem").first()['c'] 8 | 9 | assert 60175 == count 10 | -------------------------------------------------------------------------------- /tools/container_debug: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2018 PingCAP, Inc. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | #* --------------------------------------------------------------------- */ 17 | #* log configure */ 18 | #* --------------------------------------------------------------------- */ 19 | ### Define logging color 20 | COLOR_ORIGIN="\033[0m" 21 | COLOR_GREEN="\033[32m" 22 | COLOR_YELLOW="\033[33m" 23 | COLOR_RED="\033[31m" 24 | 25 | ### Define logging level 26 | LOGGER_LEVEL="3" 27 | 28 | ### Define common logger 29 | function logger() { 30 | cur_level=$1 31 | cur_type=$2 32 | cur_color=$3 33 | shift && shift && shift 34 | cur_msg=$* 35 | 36 | [[ ${LOGGER_LEVEL} -lt ${cur_level} ]] && return 0 37 | 38 | pre_fix="${cur_color}[${cur_type}][$(date +%F)][$(date +%T)]" 39 | pos_fix="${COLOR_ORIGIN}" 40 | echo -e "${pre_fix} ${cur_msg} ${pos_fix}" 41 | } 42 | 43 | ### Define notice logger 44 | function notice() { 45 | logger 3 "NOTICE" ${COLOR_GREEN} $* 46 | } 47 | 48 | ### Define warning logger 49 | function warning() { 50 | logger 2 "WARNING" ${COLOR_YELLOW} $* 51 | } 52 | 53 | ### Define fatal logger 54 | function fatal() { 55 | logger 1 "FATAL" ${COLOR_RED} $* 56 | exit 1 57 | } 58 | ########################################################################## 59 | 60 | function print_help() { 61 | echo "\ 62 | ${1:-Debug tool for container.} 63 | 64 | Usage: 65 | container_debug [OPTIONS] [ARG] 66 | 67 | Options: 68 | -i The container's identity, possible values are 'containerID' or 'containerName' 69 | -s The service name defined in docker-compose 70 | -w Run pprof via a web interface for go program 71 | -p The binary path of the debugged process in its own container 72 | -h Print help infomation 73 | 74 | When you enter the debug container, you can find the pid of the debugged process through the ps command, 75 | then you can find the binary of the debugged process through this path /proc/\${pid}/root/\${binary_path}. 76 | 77 | \${binary_path} represents the binary path of the debugged process in its own container. 78 | \${pid} represents the process id of the debugged process as seen in the debug container. 79 | " >&2 80 | exit 81 | } 82 | 83 | ###############################variable define################################## 84 | WORKSPACE=$(cd $(dirname $0)/..; pwd) 85 | DEBUG_IMAGE=${DEBUG_IMAGE:-uhub.service.ucloud.cn/pingcap/tidb-debug:latest} 86 | SUFFIX=$(uuidgen|cut -d'-' -f1|tr '[A-Z]' '[a-z]') 87 | DEBUG_CONTAINER_NAME=debug-${SUFFIX} 88 | TMP_FILE=$(mktemp /tmp/binary.XXXXXX) 89 | ################################################################################ 90 | 91 | if [[ $# -eq 0 ]] 92 | then 93 | print_help 94 | fi 95 | 96 | function cleanup() { 97 | notice "start to clean tmp file ${TMP_FILE}" 98 | [[ -f ${TMP_FILE} ]] && rm -f ${TMP_FILE} 99 | } 100 | 101 | ### register signal processing function 102 | trap cleanup EXIT 103 | 104 | ### change workspace 105 | cd $WORKSPACE 106 | 107 | optstring=":i:s:p:wh" 108 | 109 | while getopts "$optstring" opt; do 110 | case $opt in 111 | i) 112 | container_id=${OPTARG} 113 | ;; 114 | s) 115 | service_name=${OPTARG} 116 | ;; 117 | p) 118 | binary_path=${OPTARG} 119 | ;; 120 | w) 121 | web=true 122 | ;; 123 | h) 124 | print_help 125 | ;; 126 | \?) 127 | fatal "Invalid option: -$OPTARG" >&2 128 | ;; 129 | :) 130 | fatal "Option -$OPTARG requires an argument" >&2 131 | ;; 132 | esac 133 | done 134 | 135 | if [[ -z ${service_name} && -z ${container_id} ]] 136 | then 137 | fatal "please use -s or -i options to select the target container" >&2 138 | elif [[ ! -z ${container_id} ]] 139 | then 140 | ### If both -s and -i options are specified, the -i option is preferred 141 | cid=${container_id} 142 | else 143 | cprefix=$(basename $(pwd)|tr -Cd '[A-Za-z0-9]'|tr '[A-Z]' '[a-z]') 144 | cid="${cprefix}_${service_name}_1" 145 | docker ps | grep ${cid} >/dev/null 146 | [[ $? -ne 0 ]] && fatal "not found docker-compose service ${service_name}, please confirm the correct docker-compose service name" >&2 147 | fi 148 | 149 | if [[ ! -z ${binary_path} ]] 150 | then 151 | binary_name=$(basename ${binary_path}) 152 | docker cp ${cid}:${binary_path} ${TMP_FILE} 153 | if [[ $? -ne 0 ]] 154 | then 155 | ### not found binary in container, reset variable ${binary_name} 156 | binary_name= 157 | warning "not found ${binary_path} in container ${cid}, please specify the correct binary path in container" >&2 158 | fi 159 | fi 160 | 161 | if [[ ! -z ${web} ]] 162 | then 163 | ### starts a web server for graphic visualizations of golang program profiles 164 | 165 | ### generate a random web port 166 | ### TODO: Test whether this port has been used 167 | wport=${RANDOM} 168 | [[ ${wport} -lt 10000 ]] && wport=$((wport+10000)) 169 | 170 | ### get the container exposed port 171 | cport=$(docker port ${cid}|grep -E '[0-9]{5}'|awk -F: '{print $NF}') 172 | notice "starts a web server on localhost:${wport}" 173 | pprof -http=:${wport} ${TMP_FILE} http://localhost:${cport}/debug/pprof/profile 174 | else 175 | ### enter debug container to debug the specified container 176 | docker_run_args=(-ti --rm --name=${DEBUG_CONTAINER_NAME}) 177 | docker_run_args+=(--pid=container:${cid}) 178 | docker_run_args+=(--network=container:${cid}) 179 | docker_run_args+=(--ipc=container:${cid}) 180 | docker_run_args+=(--cap-add=SYS_PTRACE) 181 | docker_run_args+=(--privileged=true) 182 | if [[ ! -z ${binary_name} && -e ${TMP_FILE} ]] 183 | then 184 | docker_run_args+=(-v ${TMP_FILE}:/${binary_name}) 185 | else 186 | notice "you can access the debugged container ${cid} file system through this path /proc/\${DEBUGGED_PROCESS_PID}/root" 187 | fi 188 | docker_run_args+=($DEBUG_IMAGE) 189 | docker run ${docker_run_args[@]} 190 | fi 191 | --------------------------------------------------------------------------------