├── .gitignore
├── README.md
├── config
    ├── flink-jars
    │   ├── flink-connector-clickhouse-1.12.0.jar
    │   ├── flink-connector-jdbc_2.12-1.11.1.jar
    │   ├── flink-connector-mysql-cdc-1.1.0.jar
    │   ├── flink-tidb-connector-0.0.2-SNAPSHOT.jar
    │   └── mysql-connector-java-8.0.21.jar
    └── tidb-config
    │   ├── pd.toml
    │   ├── tidb.toml
    │   └── tikv.toml
└── docker-compose.yml


/.gitignore:
--------------------------------------------------------------------------------
 1 | # IntelliJ IDEA Project
 2 | /.idea
 3 | *.iml
 4 | 
 5 | # Maven Build
 6 | /target
 7 | !/target/flink-tidb-rdw.jar
 8 | 
 9 | # macOS
10 | .DS_Store
11 | 
12 | # logs
13 | /logs
14 | *.db
15 | *.dat


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Flink-TiDB-RDW
  2 | 
  3 | A sample of Flink TiDB Realtime Datawarhouse.
  4 | 
  5 | [Tutorial Slides(In Chinese)](https://www.slidestalk.com/TiDB/FlinkTidbRdw)
  6 | 
  7 | [Blog(In Chinese)](https://pingcap.com/blog-cn/when-tidb-and-flink-are-combined/)
  8 | 
  9 | [Blog(In English)](https://pingcap.com/blog/flink-+-tidb-a-scale-out-real-time-data-warehouse-for-second-level-analytics/)
 10 | 
 11 | Sincerely thanks to [TiDB](https://docs.pingcap.com/zh/tidb/stable) and [Apache Flink](https://flink.apache.org/)
 12 | 
 13 | ## How to use
 14 | 
 15 | 1. bootstrap
 16 | 
 17 | ```bash
 18 | # Clone Project
 19 | git clone https://github.com/LittleFall/flink-tidb-rdw && cd ./flink-tidb-rdw/
 20 | 
 21 | # Reset Env
 22 | docker-compose down -v; rm -rf logs
 23 | 
 24 | # Startup
 25 | docker-compose up -d
 26 | ```
 27 | 
 28 | You can use flink dashboard in [localhost:8081](localhost:8081).
 29 | 
 30 | 2. Create Table in MySQL and TiDB.
 31 | 
 32 | ```bash
 33 | docker-compose exec mysql mysql -uroot
 34 | 
 35 | DROP DATABASE IF EXISTS test;
 36 | CREATE DATABASE test; 
 37 | USE test;
 38 | 
 39 | create table base (
 40 |   base_id int primary key,
 41 |   base_location varchar(20)
 42 | );
 43 | create table stuff(
 44 |   stuff_id int primary key,
 45 |   stuff_base_id int,
 46 |   stuff_name varchar(20)
 47 | );
 48 | ```
 49 | 
 50 | ```bash
 51 | docker-compose exec mysql mysql -htidb -uroot -P4000
 52 | 
 53 | use test;
 54 | create table wide_stuff(
 55 |     stuff_id int primary key,
 56 |     base_id int,
 57 |     base_location varchar(20),
 58 |     stuff_name varchar(20)
 59 | );
 60 | ```
 61 | 
 62 | 
 63 | 3. Submit Task in Flink SQL Client.
 64 | 
 65 | ```bash
 66 | docker-compose exec jobmanager ./bin/sql-client.sh embedded -l ./connector-lib
 67 | ```
 68 | 
 69 | create Flink table
 70 | 
 71 | ```sql
 72 | create table base (
 73 |     base_id int primary key,
 74 |     base_location varchar(20)
 75 | ) WITH (
 76 |     'connector' = 'mysql-cdc',
 77 |     'hostname' = 'mysql',
 78 |     'port' = '3306',
 79 |     'username' = 'root',
 80 |     'password' = '',
 81 |     'database-name' = 'test',
 82 |     'table-name' = 'base'
 83 | );
 84 | 
 85 | 
 86 | create table stuff(
 87 |     stuff_id int primary key,
 88 |     stuff_base_id int,
 89 |     stuff_name varchar(20)
 90 | ) WITH (
 91 |     'connector' = 'mysql-cdc',
 92 |     'hostname' = 'mysql',
 93 |     'port' = '3306',
 94 |     'username' = 'root',
 95 |     'password' = '',
 96 |     'database-name' = 'test',
 97 |     'table-name' = 'stuff'
 98 | ); 
 99 | 
100 | create table wide_stuff(
101 |     stuff_id int primary key,
102 |     base_id int,
103 |     base_location varchar(20),
104 |     stuff_name varchar(20)
105 | ) WITH (
106 | 	'connector'  = 'jdbc',
107 |     'driver'     = 'com.mysql.cj.jdbc.Driver',
108 |     'url'        = 'jdbc:mysql://tidb:4000/test?rewriteBatchedStatements=true',
109 |     'table-name' = 'wide_stuff',
110 |     'username'   = 'root',
111 |     'password'   = ''
112 | );
113 | 
114 | create table print_base WITH ('connector' = 'print') LIKE base (EXCLUDING ALL);
115 | 
116 | create table print_stuff WITH ('connector' = 'print') LIKE stuff (EXCLUDING ALL);
117 | 
118 | create table print_wide_stuff WITH ('connector' = 'print') LIKE wide_stuff (EXCLUDING ALL);
119 | ```
120 | 
121 | submit task to Flink Server
122 | 
123 | ```sql
124 | insert into wide_stuff
125 | select stuff.stuff_id, base.base_id, base.base_location, stuff.stuff_name
126 | from stuff inner join base
127 | on stuff.stuff_base_id = base.base_id;
128 | 
129 | insert into print_base select * from base;
130 | 
131 | insert into print_stuff select * from stuff;
132 | 
133 | insert into print_wide_stuff
134 | select stuff.stuff_id, base.base_id, base.base_location, stuff.stuff_name
135 | from stuff inner join base
136 | on stuff.stuff_base_id = base.base_id;
137 | ```
138 | 
139 | Then you can see four tasks in localhost:8081, they are:
140 | - Print the changelog of `base` table in MySQL to standard output.
141 | - Print the changelog of `stuff` table in MySQL to standard output.
142 | - Join `base` and `stuff` to `wide_stuff`, Print the changelog of `wide_stuff` table to standard output.
143 | - Wrint the changelog of `wide_stuff` table to TiDB.
144 | 
145 | You can use `docker-compose logs -f taskmanager` to see standard output.
146 | 
147 | 4. Write data to MySQL for testing.
148 | 
149 | ```bash
150 | docker-compose exec mysql mysql -uroot
151 | 
152 | use test;
153 | insert into base values (1, 'bj');
154 | insert into stuff values (1, 1, 'zhangsan');
155 | insert into stuff values (2, 1, 'lisi');
156 | insert into base values (2, 'sh');
157 | insert into stuff values (3, 2, 'wangliu');
158 | update stuff set stuff_name = 'wangwu' where stuff_id = 3;
159 | delete from stuff where stuff_name = 'lisi';
160 | ```
161 | 
162 | See result in standard output:
163 | 
164 | ```bash
165 | taskmanager_1   | +I(1,bj)
166 | taskmanager_1   | +I(1,1,zhangsan)
167 | taskmanager_1   | +I(2,sh)
168 | taskmanager_1   | +I(2,1,lisi)
169 | taskmanager_1   | +I(3,2,wangliu)
170 | taskmanager_1   | -U(3,2,wangliu)
171 | taskmanager_1   | +U(3,2,wangwu)
172 | taskmanager_1   | +I(1,1,bj,zhangsan)
173 | taskmanager_1   | +I(2,1,bj,lisi)
174 | taskmanager_1   | +I(3,2,sh,wangliu)
175 | taskmanager_1   | -U(3,2,sh,wangliu)
176 | taskmanager_1   | +U(3,2,sh,wangwu)
177 | taskmanager_1   | -D(2,1,lisi)
178 | taskmanager_1   | -D(2,1,bj,lisi)
179 | ```
180 | 
181 | See result in TiDB:
182 | 
183 | ```bash
184 | docker-compose exec mysql mysql -htidb -uroot -P4000 -e"select * from test.wide_stuff";
185 | 
186 | +----------+---------+---------------+------------+
187 | | stuff_id | base_id | base_location | stuff_name |
188 | +----------+---------+---------------+------------+
189 | |        1 |       1 | bj            | zhangsan   |
190 | |        3 |       2 | sh            | wangwu     |
191 | +----------+---------+---------------+------------+
192 | ```
193 | 
194 | ## Note
195 | 
196 | 1. It is recommended to adjust the available memory of docker compose to 8G or above.
197 | 2. Flink SQL client is designed for interactive execution. Currently, it does not support multiple statements input at a time. An available alternative is Apache Zeppelin.
198 | 3. If you want to connect to the outside in docker, use `host.docker.internal` as host.
199 | 4. If you use kafka, you can use following command to check if Kafka received data.
200 | 
201 | ```bash
202 | docker-compose exec mysql mysql -uroot -e"insert into test.base values (1, 'bj')";
203 | docker-compose exec kafka /opt/kafka/bin/kafka-topics.sh --list --zookeeper zookeeper:2181  
204 | docker-compose exec kafka /opt/kafka/bin/kafka-console-consumer.sh --bootstrap-server kafka:9092 --topic test.base --from-beginning
205 | 
206 | docker-compose down -v && rm -rf ./logs && find ./config/canal-config -name "meta.dat"|xargs rm -f && docker-compose up -d
207 | ```
208 | 
209 | # demos
210 | 
211 | ```bash
212 | docker-compose exec mysql mysql -uroot # mysql
213 | docker-compose exec mysql mysql -uroot -htidb -P4000 # tidb
214 | docker-compose exec jobmanager ./bin/sql-client.sh embedded -l ./connector-lib # flink sql client
215 | ```
216 | 
217 | ## Demo1: Datagen to Print
218 | 
219 | In Flink sql client
220 | ```sql
221 | create table `source`(`a` int) with ('connector' = 'datagen', 'rows-per-second'='1');
222 | create table `sink`(`a` int) with ('connector' = 'print');
223 | insert into `sink` select * from `source`;
224 | ```
225 | 
226 | ## Demo2: MySQL to TiDB
227 | 
228 | In MySQL
229 | ```sql
230 | create database if not exists test; use test;
231 | drop table if exists username;
232 | create table username (
233 |     id int primary key,
234 |     name varchar(10),
235 |     create_time timestamp(6) NOT NULL DEFAULT CURRENT_TIMESTAMP(6)
236 | );
237 | ```
238 | 
239 | In TiDB
240 | ```sql
241 | create database if not exists test; use test;
242 | drop table if exists username;
243 | create table username (
244 |     id int primary key,
245 |     name varchar(10),
246 |     mysql_create_time timestamp(6) NULL,
247 |     tidb_create_time timestamp(6) NOT NULL DEFAULT CURRENT_TIMESTAMP(6)
248 | );
249 | ```
250 | 
251 | In Flink sql
252 | ```sql
253 | create table source (
254 |     id int primary key,
255 |     name varchar(10),
256 |     create_time timestamp(6)
257 | ) with (
258 |     'connector' = 'mysql-cdc',
259 |     'hostname' = 'mysql', 'port' = '3306',
260 |     'username' = 'root', 'password' = '',
261 |     'database-name' = 'test', 'table-name' = 'username'
262 | );
263 | 
264 | 
265 | # https://github.com/ververica/flink-cdc-connectors/wiki/MySQL-CDC-Connector#connector-options
266 | 
267 | create table sink (
268 |     id int primary key,
269 |     name varchar(10),
270 |     mysql_create_time timestamp(6)
271 | ) WITH (
272 | 	'connector' = 'jdbc', 'driver' = 'com.mysql.cj.jdbc.Driver',
273 |     'username' = 'root', 'password' = '',
274 |     'url' = 'jdbc:mysql://tidb:4000/test?rewriteBatchedStatements=true',
275 |     'table-name' = 'username', 
276 |     'sink.buffer-flush.max-rows' = '1', 'sink.buffer-flush.interval' = '0'
277 | );
278 | # https://ci.apache.org/projects/flink/flink-docs-release-1.11/zh/dev/table/connectors/jdbc.html#connector-options
279 | 
280 | insert into sink select * from source;
281 | ```
282 | 
283 | You can use TiDB log to check the statements actually executed in TiDB.
284 | 
285 | ```sql
286 | -- Set TiDB slowlog threshold to see actually statements executed in TiDB.
287 | set tidb_slow_log_threshold = 0;
288 | 
289 | -- Statements in MySQL
290 | insert into `username`(`id`, `name`) values (1, 'a'), (2, 'b'), (3, 'c');
291 | update username set name='d' where id=2; select * from username;
292 | delete from username where id=1; select * from username;
293 | 
294 | -- Statements actually executed in TiDB
295 | INSERT INTO `username`
296 | (`id`, `name`, `mysql_create_time`) VALUES (1, 'a', '2020-09-14 12:44:24.581219') 
297 | ON DUPLICATE KEY UPDATE `id`=VALUES(`id`), `name`=VALUES(`name`), `mysql_create_time`=VALUES(`mysql_create_time`);
298 | 
299 | 
300 | INSERT INTO `username`(`id`, `name`) VALUES (1, 'c')  ON DUPLICATE KEY UPDATE `id`=VALUES(`id`), `name`=VALUES(`name`); -- batch execute
301 | DELETE FROM `username` WHERE `id`=1;
302 | ```
303 | ## datagen to mysql
304 | 
305 | ```sql
306 | create table data_gen (
307 |     id int primary key,
308 |     name varchar(10)
309 | ) with (
310 |     'connector' = 'datagen', 'rows-per-second'='100000',
311 |     'fields.id.kind'='sequence', 'fields.id.start'='1', 'fields.id.end'='1000000',
312 |     'fields.name.length'='10'
313 | );
314 | 
315 | create table mysql_sink (
316 |     id int primary key,
317 |     name varchar(10)
318 | ) WITH (
319 | 	'connector' = 'jdbc', 'driver' = 'com.mysql.cj.jdbc.Driver',
320 |     'username' = 'root', 'password' = '',
321 |     'url' = 'jdbc:mysql://host.docker.internal:4000/test?rewriteBatchedStatements=true',
322 |     'table-name' = 'username', 
323 |     'sink.buffer-flush.max-rows' = '10000', 'sink.buffer-flush.interval' = '1'
324 | );
325 | 
326 | insert into mysql_sink (id, name) select * from data_gen;
327 | ```
328 | 
329 | ## Demo3: Stream Stream Join
330 | 
331 | In MySQL:
332 | ```sql
333 | create database if not exists test; use test;
334 | drop table if exists base;
335 | create table base (
336 |     id int primary key,
337 |     location varchar(20)
338 | );
339 | drop table if exists stuff;
340 | create table stuff(
341 |     id int primary key,
342 |     base_id int,
343 |     name varchar(20)
344 | );
345 | ```
346 | 
347 | In TiDB:
348 | ```sql
349 | create database if not exists test; use test;
350 | 
351 | create table wide_stuff(
352 |     stuff_id int primary key,
353 |     base_id int,
354 |     base_location varchar(20),
355 |     stuff_name varchar(20)
356 | );
357 | ```
358 | 
359 | In Flink SQL Client:
360 | ```sql
361 | create table base (
362 |     id int primary key,
363 |     location varchar(20)
364 | ) with (
365 |     'connector' = 'mysql-cdc',
366 |     'hostname' = 'mysql', 'port' = '3306',
367 |     'username' = 'root', 'password' = '',
368 |     'database-name' = 'test', 'table-name' = 'base'
369 | );
370 | 
371 | create table stuff(
372 |     id int primary key,
373 |     base_id int,
374 |     name varchar(20)
375 | ) WITH (
376 |     'connector' = 'mysql-cdc',
377 |     'hostname' = 'mysql', 'port' = '3306',
378 |     'username' = 'root', 'password' = '',
379 |     'database-name' = 'test', 'table-name' = 'stuff'
380 | );
381 | 
382 | create table wide_stuff(
383 |     stuff_id int primary key,
384 |     base_id int,
385 |     base_location varchar(20),
386 |     stuff_name varchar(20)
387 | ) WITH (
388 | 	'connector' = 'jdbc', 'driver' = 'com.mysql.cj.jdbc.Driver',
389 |     'username' = 'root', 'password' = '',
390 |     'url' = 'jdbc:mysql://tidb:4000/test?rewriteBatchedStatements=true',
391 |     'table-name' = 'wide_stuff', 
392 |     'sink.buffer-flush.max-rows' = '1', 'sink.buffer-flush.interval' = '0'
393 | );
394 | 
395 | explain
396 | insert into wide_stuff
397 | select s.id, b.id, b.location, s.name
398 | from stuff s, base b
399 | where s.base_id = b.id;
400 | 
401 | SET execution.result-mode=tableau;
402 | 
403 | insert into wide_stuff
404 | select s.id, b.id, b.location, s.name
405 | from stuff s 
406 | left join base b on s.base_id = b.id;
407 | ```
408 | 
409 | 
410 | Test
411 | 
412 | ```sql
413 | insert into base values (1, 'bj');
414 | insert into stuff values (1, 1, 'zhangsan');
415 | insert into stuff values (2, 1, 'lisi');
416 | insert into base values (2, 'sh');
417 | insert into stuff values (3, 2, 'wangliu');
418 | update stuff set name = 'wangwu' where id = 3;
419 | delete from stuff where name = 'lisi';
420 | update base set location = 'gz' where location = 'bj';
421 | ```
422 | 
423 | ## Demo4: Stream Table Join
424 | 
425 | Mysql and TiDB command is as same as above.
426 | 
427 | flink sql
428 | ```sql
429 | create table stuff(
430 |     id int primary key,
431 |     base_id int,
432 |     name varchar(20),
433 |     proc_time as PROCTIME()
434 | ) WITH (
435 |     'connector' = 'mysql-cdc',
436 |     'hostname' = 'mysql', 'port' = '3306',
437 |     'username' = 'root', 'password' = '',
438 |     'database-name' = 'test', 'table-name' = 'stuff'
439 | );
440 | 
441 | create table base (
442 |     id int primary key,
443 |     location varchar(20)
444 | ) WITH (
445 |     'connector' = 'jdbc', 'driver' = 'com.mysql.cj.jdbc.Driver',
446 |     'username' = 'root', 'password' = '',
447 |     'url' = 'jdbc:mysql://mysql:3306/test', 'table-name' = 'base', 
448 |     'lookup.cache.max-rows' = '10000', 'lookup.cache.ttl' = '5s'
449 | );
450 | 
451 | create table wide_stuff(
452 |     stuff_id int primary key,
453 |     base_id int,
454 |     base_location varchar(20),
455 |     stuff_name varchar(20)
456 | ) WITH (
457 |     'connector' = 'jdbc', 'driver' = 'com.mysql.cj.jdbc.Driver',
458 |     'username' = 'root', 'password' = '',
459 |     'url' = 'jdbc:mysql://tidb:4000/test?rewriteBatchedStatements=true',
460 |     'table-name' = 'wide_stuff', 
461 |     'sink.buffer-flush.max-rows' =  '10000', 'sink.buffer-flush.interval' = '1s'
462 | );
463 | 
464 | insert into wide_stuff
465 | select s.id, b.id, b.location, s.name
466 | from stuff as s
467 | join base FOR SYSTEM_TIME AS OF s.proc_time  b on s.base_id = b.id;
468 | ```
469 | 
470 | Test
471 | 
472 | ```sql
473 | insert into stuff values (1, 1, 'zhangsan');
474 | insert into base values (1, 'bj');
475 | insert into stuff values (2, 1, 'lisi');
476 | insert into stuff values (3, 1, 'wangliu');
477 | update base set location = 'gz' where location = 'bj';
478 | insert into stuff values (4, 1, 'zhaoliu');
479 | update stuff set name = 'wangwu' where name = 'wangliu';
480 | ```
481 | 


--------------------------------------------------------------------------------
/config/flink-jars/flink-connector-clickhouse-1.12.0.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LittleFall/flink-tidb-rdw/7a86a021c2b9b943ea7635a3f088e13a9957a789/config/flink-jars/flink-connector-clickhouse-1.12.0.jar


--------------------------------------------------------------------------------
/config/flink-jars/flink-connector-jdbc_2.12-1.11.1.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LittleFall/flink-tidb-rdw/7a86a021c2b9b943ea7635a3f088e13a9957a789/config/flink-jars/flink-connector-jdbc_2.12-1.11.1.jar


--------------------------------------------------------------------------------
/config/flink-jars/flink-connector-mysql-cdc-1.1.0.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LittleFall/flink-tidb-rdw/7a86a021c2b9b943ea7635a3f088e13a9957a789/config/flink-jars/flink-connector-mysql-cdc-1.1.0.jar


--------------------------------------------------------------------------------
/config/flink-jars/flink-tidb-connector-0.0.2-SNAPSHOT.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LittleFall/flink-tidb-rdw/7a86a021c2b9b943ea7635a3f088e13a9957a789/config/flink-jars/flink-tidb-connector-0.0.2-SNAPSHOT.jar


--------------------------------------------------------------------------------
/config/flink-jars/mysql-connector-java-8.0.21.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LittleFall/flink-tidb-rdw/7a86a021c2b9b943ea7635a3f088e13a9957a789/config/flink-jars/mysql-connector-java-8.0.21.jar


--------------------------------------------------------------------------------
/config/tidb-config/pd.toml:
--------------------------------------------------------------------------------
 1 | # PD Configuration.
 2 | 
 3 | name = "pd"
 4 | data-dir = "default.pd"
 5 | 
 6 | client-urls = "http://127.0.0.1:2379"
 7 | # if not set, use ${client-urls}
 8 | advertise-client-urls = ""
 9 | 
10 | peer-urls = "http://127.0.0.1:2380"
11 | # if not set, use ${peer-urls}
12 | advertise-peer-urls = ""
13 | 
14 | initial-cluster = "pd=http://127.0.0.1:2380"
15 | initial-cluster-state = "new"
16 | 
17 | lease = 3
18 | tso-save-interval = "3s"
19 | 
20 | [security]
21 | # Path of file that contains list of trusted SSL CAs. if set, following four settings shouldn't be empty
22 | cacert-path = ""
23 | # Path of file that contains X509 certificate in PEM format.
24 | cert-path = ""
25 | # Path of file that contains X509 key in PEM format.
26 | key-path = ""
27 | 
28 | [log]
29 | level = "error"
30 | 
31 | # log format, one of json, text, console
32 | #format = "text"
33 | 
34 | # disable automatic timestamps in output
35 | #disable-timestamp = false
36 | 
37 | # file logging
38 | [log.file]
39 | #filename = ""
40 | # max log file size in MB
41 | #max-size = 300
42 | # max log file keep days
43 | #max-days = 28
44 | # maximum number of old log files to retain
45 | #max-backups = 7
46 | # rotate log by day
47 | #log-rotate = true
48 | 
49 | [metric]
50 | # prometheus client push interval, set "0s" to disable prometheus.
51 | interval = "15s"
52 | # prometheus pushgateway address, leaves it empty will disable prometheus.
53 | address = "pushgateway:9091"
54 | 
55 | [schedule]
56 | max-merge-region-size = 0
57 | split-merge-interval = "1h"
58 | max-snapshot-count = 3
59 | max-pending-peer-count = 16
60 | max-store-down-time = "30m"
61 | leader-schedule-limit = 4
62 | region-schedule-limit = 4
63 | replica-schedule-limit = 8
64 | merge-schedule-limit = 8
65 | tolerant-size-ratio = 5.0
66 | 
67 | # customized schedulers, the format is as below
68 | # if empty, it will use balance-leader, balance-region, hot-region as default
69 | # [[schedule.schedulers]]
70 | # type = "evict-leader"
71 | # args = ["1"]
72 | 
73 | [replication]
74 | # The number of replicas for each region.
75 | max-replicas = 3
76 | # The label keys specified the location of a store.
77 | # The placement priorities is implied by the order of label keys.
78 | # For example, ["zone", "rack"] means that we should place replicas to
79 | # different zones first, then to different racks if we don't have enough zones.
80 | location-labels = []
81 | 
82 | [label-property]
83 | # Do not assign region leaders to stores that have these tags.
84 | #  [[label-property.reject-leader]]
85 | #  key = "zone"
86 | #  value = "cn1


--------------------------------------------------------------------------------
/config/tidb-config/tidb.toml:
--------------------------------------------------------------------------------
  1 | # TiDB Configuration.
  2 | 
  3 | # TiDB server host.
  4 | host = "0.0.0.0"
  5 | 
  6 | # TiDB server port.
  7 | port = 4000
  8 | 
  9 | # Registered store name, [tikv, mocktikv]
 10 | store = "mocktikv"
 11 | 
 12 | # TiDB storage path.
 13 | path = "/tmp/tidb"
 14 | 
 15 | # The socket file to use for connection.
 16 | socket = ""
 17 | 
 18 | # Run ddl worker on this tidb-server.
 19 | run-ddl = true
 20 | 
 21 | # Schema lease duration, very dangerous to change only if you know what you do.
 22 | lease = "0"
 23 | 
 24 | # When create table, split a separated region for it. It is recommended to
 25 | # turn off this option if there will be a large number of tables created.
 26 | split-table = true
 27 | 
 28 | # The limit of concurrent executed sessions.
 29 | token-limit = 1000
 30 | 
 31 | # Only print a log when out of memory quota.
 32 | # Valid options: ["log", "cancel"]
 33 | oom-action = "log"
 34 | 
 35 | # Set the memory quota for a query in bytes. Default: 32GB
 36 | mem-quota-query = 34359738368
 37 | 
 38 | # Enable coprocessor streaming.
 39 | enable-streaming = false
 40 | 
 41 | # Set system variable 'lower_case_table_names'
 42 | lower-case-table-names = 2
 43 | 
 44 | [log]
 45 | # Log level: debug, info, warn, error, fatal.
 46 | level = "error"
 47 | 
 48 | # Log format, one of json, text, console.
 49 | format = "text"
 50 | 
 51 | # Disable automatic timestamp in output
 52 | disable-timestamp = false
 53 | 
 54 | # Stores slow query log into separated files.
 55 | slow-query-file = ""
 56 | 
 57 | # Queries with execution time greater than this value will be logged. (Milliseconds)
 58 | slow-threshold = 300
 59 | 
 60 | # Queries with internal result greater than this value will be logged.
 61 | expensive-threshold = 10000
 62 | 
 63 | # Maximum query length recorded in log.
 64 | query-log-max-len = 2048
 65 | 
 66 | # File logging.
 67 | [log.file]
 68 | # Log file name.
 69 | filename = ""
 70 | 
 71 | # Max log file size in MB (upper limit to 4096MB).
 72 | max-size = 300
 73 | 
 74 | # Max log file keep days. No clean up by default.
 75 | max-days = 0
 76 | 
 77 | # Maximum number of old log files to retain. No clean up by default.
 78 | max-backups = 0
 79 | 
 80 | # Rotate log by day
 81 | log-rotate = true
 82 | 
 83 | [security]
 84 | # Path of file that contains list of trusted SSL CAs for connection with mysql client.
 85 | ssl-ca = ""
 86 | 
 87 | # Path of file that contains X509 certificate in PEM format for connection with mysql client.
 88 | ssl-cert = ""
 89 | 
 90 | # Path of file that contains X509 key in PEM format for connection with mysql client.
 91 | ssl-key = ""
 92 | 
 93 | # Path of file that contains list of trusted SSL CAs for connection with cluster components.
 94 | cluster-ssl-ca = ""
 95 | 
 96 | # Path of file that contains X509 certificate in PEM format for connection with cluster components.
 97 | cluster-ssl-cert = ""
 98 | 
 99 | # Path of file that contains X509 key in PEM format for connection with cluster components.
100 | cluster-ssl-key = ""
101 | 
102 | [status]
103 | # If enable status report HTTP service.
104 | report-status = true
105 | 
106 | # TiDB status port.
107 | status-port = 10080
108 | 
109 | # Prometheus pushgateway address, leaves it empty will disable prometheus push.
110 | metrics-addr = ""
111 | 
112 | # Prometheus client push interval in second, set \"0\" to disable prometheus push.
113 | metrics-interval = 15
114 | 
115 | [performance]
116 | # Max CPUs to use, 0 use number of CPUs in the machine.
117 | max-procs = 0
118 | # StmtCountLimit limits the max count of statement inside a transaction.
119 | stmt-count-limit = 5000
120 | 
121 | # Set keep alive option for tcp connection.
122 | tcp-keep-alive = true
123 | 
124 | # The maximum number of retries when commit a transaction.
125 | retry-limit = 10
126 | 
127 | # Whether support cartesian product.
128 | cross-join = true
129 | 
130 | # Stats lease duration, which influences the time of analyze and stats load.
131 | stats-lease = "3s"
132 | 
133 | # Run auto analyze worker on this tidb-server.
134 | run-auto-analyze = true
135 | 
136 | # Probability to use the query feedback to update stats, 0 or 1 for always false/true.
137 | feedback-probability = 0.0
138 | 
139 | # The max number of query feedback that cache in memory.
140 | query-feedback-limit = 1024
141 | 
142 | # Pseudo stats will be used if the ratio between the modify count and
143 | # row count in statistics of a table is greater than it.
144 | pseudo-estimate-ratio = 0.7
145 | 
146 | [proxy-protocol]
147 | # PROXY protocol acceptable client networks.
148 | # Empty string means disable PROXY protocol, * means all networks.
149 | networks = ""
150 | 
151 | # PROXY protocol header read timeout, unit is second
152 | header-timeout = 5
153 | 
154 | [plan-cache]
155 | enabled = false
156 | capacity = 2560
157 | shards = 256
158 | 
159 | [prepared-plan-cache]
160 | enabled = false
161 | capacity = 100
162 | 
163 | [opentracing]
164 | # Enable opentracing.
165 | enable = false
166 | 
167 | # Whether to enable the rpc metrics.
168 | rpc-metrics = false
169 | 
170 | [opentracing.sampler]
171 | # Type specifies the type of the sampler: const, probabilistic, rateLimiting, or remote
172 | type = "const"
173 | 
174 | # Param is a value passed to the sampler.
175 | # Valid values for Param field are:
176 | # - for "const" sampler, 0 or 1 for always false/true respectively
177 | # - for "probabilistic" sampler, a probability between 0 and 1
178 | # - for "rateLimiting" sampler, the number of spans per second
179 | # - for "remote" sampler, param is the same as for "probabilistic"
180 | # and indicates the initial sampling rate before the actual one
181 | # is received from the mothership
182 | param = 1.0
183 | 
184 | # SamplingServerURL is the address of jaeger-agent's HTTP sampling server
185 | sampling-server-url = ""
186 | 
187 | # MaxOperations is the maximum number of operations that the sampler
188 | # will keep track of. If an operation is not tracked, a default probabilistic
189 | # sampler will be used rather than the per operation specific sampler.
190 | max-operations = 0
191 | 
192 | # SamplingRefreshInterval controls how often the remotely controlled sampler will poll
193 | # jaeger-agent for the appropriate sampling strategy.
194 | sampling-refresh-interval = 0
195 | 
196 | [opentracing.reporter]
197 | # QueueSize controls how many spans the reporter can keep in memory before it starts dropping
198 | # new spans. The queue is continuously drained by a background go-routine, as fast as spans
199 | # can be sent out of process.
200 | queue-size = 0
201 | 
202 | # BufferFlushInterval controls how often the buffer is force-flushed, even if it's not full.
203 | # It is generally not useful, as it only matters for very low traffic services.
204 | buffer-flush-interval = 0
205 | 
206 | # LogSpans, when true, enables LoggingReporter that runs in parallel with the main reporter
207 | # and logs all submitted spans. Main Configuration.Logger must be initialized in the code
208 | # for this option to have any effect.
209 | log-spans = false
210 | 
211 | #  LocalAgentHostPort instructs reporter to send spans to jaeger-agent at this address
212 | local-agent-host-port = ""
213 | 
214 | [tikv-client]
215 | # Max gRPC connections that will be established with each tikv-server.
216 | grpc-connection-count = 16
217 | 
218 | # After a duration of this time in seconds if the client doesn't see any activity it pings
219 | # the server to see if the transport is still alive.
220 | grpc-keepalive-time = 10
221 | 
222 | # After having pinged for keepalive check, the client waits for a duration of Timeout in seconds
223 | # and if no activity is seen even after that the connection is closed.
224 | grpc-keepalive-timeout = 3
225 | 
226 | # max time for commit command, must be twice bigger than raft election timeout.
227 | commit-timeout = "41s"
228 | 
229 | [binlog]
230 | 
231 | # Socket file to write binlog.
232 | binlog-socket = ""
233 | 
234 | # WriteTimeout specifies how long it will wait for writing binlog to pump.
235 | write-timeout = "15s"
236 | 
237 | # If IgnoreError is true, when writting binlog meets error, TiDB would stop writting binlog,
238 | # but still provide service.
239 | ignore-error = false


--------------------------------------------------------------------------------
/config/tidb-config/tikv.toml:
--------------------------------------------------------------------------------
  1 | # TiKV config template
  2 | #  Human-readable big numbers:
  3 | #   File size(based on byte): KB, MB, GB, TB, PB
  4 | #    e.g.: 1_048_576 = "1MB"
  5 | #   Time(based on ms): ms, s, m, h
  6 | #    e.g.: 78_000 = "1.3m"
  7 | 
  8 | # log level: trace, debug, info, warn, error, off.
  9 | log-level = "error"
 10 | # file to store log, write to stderr if it's empty.
 11 | # log-file = ""
 12 | 
 13 | [readpool.storage]
 14 | # size of thread pool for high-priority operations
 15 | # high-concurrency = 4
 16 | # size of thread pool for normal-priority operations
 17 | # normal-concurrency = 4
 18 | # size of thread pool for low-priority operations
 19 | # low-concurrency = 4
 20 | # max running high-priority operations, reject if exceed
 21 | # max-tasks-high = 8000
 22 | # max running normal-priority operations, reject if exceed
 23 | # max-tasks-normal = 8000
 24 | # max running low-priority operations, reject if exceed
 25 | # max-tasks-low = 8000
 26 | # size of stack size for each thread pool
 27 | # stack-size = "10MB"
 28 | 
 29 | [readpool.coprocessor]
 30 | # Notice: if CPU_NUM > 8, default thread pool size for coprocessors
 31 | # will be set to CPU_NUM * 0.8.
 32 | 
 33 | # high-concurrency = 8
 34 | # normal-concurrency = 8
 35 | # low-concurrency = 8
 36 | # max-tasks-high = 16000
 37 | # max-tasks-normal = 16000
 38 | # max-tasks-low = 16000
 39 | # stack-size = "10MB"
 40 | 
 41 | [server]
 42 | # set listening address.
 43 | # addr = "127.0.0.1:20160"
 44 | # set advertise listening address for client communication, if not set, use addr instead.
 45 | # advertise-addr = ""
 46 | # notify capacity, 40960 is suitable for about 7000 regions.
 47 | # notify-capacity = 40960
 48 | # maximum number of messages can be processed in one tick.
 49 | # messages-per-tick = 4096
 50 | 
 51 | # compression type for grpc channel, available values are no, deflate and gzip.
 52 | # grpc-compression-type = "no"
 53 | # size of thread pool for grpc server.
 54 | # grpc-concurrency = 4
 55 | # The number of max concurrent streams/requests on a client connection.
 56 | # grpc-concurrent-stream = 1024
 57 | # The number of connections with each tikv server to send raft messages.
 58 | # grpc-raft-conn-num = 10
 59 | # Amount to read ahead on individual grpc streams.
 60 | # grpc-stream-initial-window-size = "2MB"
 61 | 
 62 | # How many snapshots can be sent concurrently.
 63 | # concurrent-send-snap-limit = 32
 64 | # How many snapshots can be recv concurrently.
 65 | # concurrent-recv-snap-limit = 32
 66 | 
 67 | # max count of tasks being handled, new tasks will be rejected.
 68 | # end-point-max-tasks = 2000
 69 | 
 70 | # max recursion level allowed when decoding dag expression
 71 | # end-point-recursion-limit = 1000
 72 | 
 73 | # max time to handle coprocessor request before timeout
 74 | # end-point-request-max-handle-duration = "60s"
 75 | 
 76 | # the max bytes that snapshot can be written to disk in one second,
 77 | # should be set based on your disk performance
 78 | # snap-max-write-bytes-per-sec = "100MB"
 79 | 
 80 | # set attributes about this server, e.g. { zone = "us-west-1", disk = "ssd" }.
 81 | # labels = {}
 82 | 
 83 | [storage]
 84 | # set the path to rocksdb directory.
 85 | # data-dir = "/tmp/tikv/store"
 86 | 
 87 | # notify capacity of scheduler's channel
 88 | # scheduler-notify-capacity = 10240
 89 | 
 90 | # maximum number of messages can be processed in one tick
 91 | # scheduler-messages-per-tick = 1024
 92 | 
 93 | # the number of slots in scheduler latches, concurrency control for write.
 94 | # scheduler-concurrency = 2048000
 95 | 
 96 | # scheduler's worker pool size, should increase it in heavy write cases,
 97 | # also should less than total cpu cores.
 98 | # scheduler-worker-pool-size = 4
 99 | 
100 | # When the pending write bytes exceeds this threshold,
101 | # the "scheduler too busy" error is displayed.
102 | # scheduler-pending-write-threshold = "100MB"
103 | 
104 | [pd]
105 | # pd endpoints
106 | # endpoints = []
107 | 
108 | [metric]
109 | # the Prometheus client push interval. Setting the value to 0s stops Prometheus client from pushing.
110 | # interval = "15s"
111 | # the Prometheus pushgateway address. Leaving it empty stops Prometheus client from pushing.
112 | address = "pushgateway:9091"
113 | # the Prometheus client push job name. Note: A node id will automatically append, e.g., "tikv_1".
114 | # job = "tikv"
115 | 
116 | [raftstore]
117 | # true (default value) for high reliability, this can prevent data loss when power failure.
118 | # sync-log = true
119 | 
120 | # set the path to raftdb directory, default value is data-dir/raft
121 | # raftdb-path = ""
122 | 
123 | # set store capacity, if no set, use disk capacity.
124 | # capacity = 0
125 | 
126 | # notify capacity, 40960 is suitable for about 7000 regions.
127 | # notify-capacity = 40960
128 | 
129 | # maximum number of messages can be processed in one tick.
130 | # messages-per-tick = 4096
131 | 
132 | # Region heartbeat tick interval for reporting to pd.
133 | # pd-heartbeat-tick-interval = "60s"
134 | # Store heartbeat tick interval for reporting to pd.
135 | # pd-store-heartbeat-tick-interval = "10s"
136 | 
137 | # When region size changes exceeds region-split-check-diff, we should check
138 | # whether the region should be split or not.
139 | # region-split-check-diff = "6MB"
140 | 
141 | # Interval to check region whether need to be split or not.
142 | # split-region-check-tick-interval = "10s"
143 | 
144 | # When raft entry exceed the max size, reject to propose the entry.
145 | # raft-entry-max-size = "8MB"
146 | 
147 | # Interval to gc unnecessary raft log.
148 | # raft-log-gc-tick-interval = "10s"
149 | # A threshold to gc stale raft log, must >= 1.
150 | # raft-log-gc-threshold = 50
151 | # When entry count exceed this value, gc will be forced trigger.
152 | # raft-log-gc-count-limit = 72000
153 | # When the approximate size of raft log entries exceed this value, gc will be forced trigger.
154 | # It's recommanded to set it to 3/4 of region-split-size.
155 | # raft-log-gc-size-limit = "72MB"
156 | 
157 | # When a peer hasn't been active for max-peer-down-duration,
158 | # we will consider this peer to be down and report it to pd.
159 | # max-peer-down-duration = "5m"
160 | 
161 | # Interval to check whether start manual compaction for a region,
162 | # region-compact-check-interval = "5m"
163 | # Number of regions for each time to check.
164 | # region-compact-check-step = 100
165 | # The minimum number of delete tombstones to trigger manual compaction.
166 | # region-compact-min-tombstones = 10000
167 | # Interval to check whether should start a manual compaction for lock column family,
168 | # if written bytes reach lock-cf-compact-threshold for lock column family, will fire
169 | # a manual compaction for lock column family.
170 | # lock-cf-compact-interval = "10m"
171 | # lock-cf-compact-bytes-threshold = "256MB"
172 | 
173 | # Interval (s) to check region whether the data are consistent.
174 | # consistency-check-interval = 0
175 | 
176 | # Use delete range to drop a large number of continuous keys.
177 | # use-delete-range = false
178 | 
179 | # delay time before deleting a stale peer
180 | # clean-stale-peer-delay = "10m"
181 | 
182 | # Interval to cleanup import sst files.
183 | # cleanup-import-sst-interval = "10m"
184 | 
185 | [coprocessor]
186 | # When it is true, it will try to split a region with table prefix if
187 | # that region crosses tables. It is recommended to turn off this option
188 | # if there will be a large number of tables created.
189 | # split-region-on-table = true
190 | # When the region's size exceeds region-max-size, we will split the region
191 | # into two which the left region's size will be region-split-size or a little
192 | # bit smaller.
193 | # region-max-size = "144MB"
194 | # region-split-size = "96MB"
195 | 
196 | [rocksdb]
197 | # Maximum number of concurrent background jobs (compactions and flushes)
198 | # max-background-jobs = 8
199 | 
200 | # This value represents the maximum number of threads that will concurrently perform a
201 | # compaction job by breaking it into multiple, smaller ones that are run simultaneously.
202 | # Default: 1 (i.e. no subcompactions)
203 | # max-sub-compactions = 1
204 | 
205 | # Number of open files that can be used by the DB.  You may need to
206 | # increase this if your database has a large working set. Value -1 means
207 | # files opened are always kept open. You can estimate number of files based
208 | # on target_file_size_base and target_file_size_multiplier for level-based
209 | # compaction.
210 | # If max-open-files = -1, RocksDB will prefetch index and filter blocks into
211 | # block cache at startup, so if your database has a large working set, it will
212 | # take several minutes to open the db.
213 | max-open-files = 1024
214 | 
215 | # Max size of rocksdb's MANIFEST file.
216 | # For detailed explanation please refer to https://github.com/facebook/rocksdb/wiki/MANIFEST
217 | # max-manifest-file-size = "20MB"
218 | 
219 | # If true, the database will be created if it is missing.
220 | # create-if-missing = true
221 | 
222 | # rocksdb wal recovery mode
223 | # 0 : TolerateCorruptedTailRecords, tolerate incomplete record in trailing data on all logs;
224 | # 1 : AbsoluteConsistency, We don't expect to find any corruption in the WAL;
225 | # 2 : PointInTimeRecovery, Recover to point-in-time consistency;
226 | # 3 : SkipAnyCorruptedRecords, Recovery after a disaster;
227 | # wal-recovery-mode = 2
228 | 
229 | # rocksdb write-ahead logs dir path
230 | # This specifies the absolute dir path for write-ahead logs (WAL).
231 | # If it is empty, the log files will be in the same dir as data.
232 | # When you set the path to rocksdb directory in memory like in /dev/shm, you may want to set
233 | # wal-dir to a directory on a persistent storage.
234 | # See https://github.com/facebook/rocksdb/wiki/How-to-persist-in-memory-RocksDB-database
235 | # wal-dir = "/tmp/tikv/store"
236 | 
237 | # The following two fields affect how archived write-ahead logs will be deleted.
238 | # 1. If both set to 0, logs will be deleted asap and will not get into the archive.
239 | # 2. If wal-ttl-seconds is 0 and wal-size-limit is not 0,
240 | #    WAL files will be checked every 10 min and if total size is greater
241 | #    then wal-size-limit, they will be deleted starting with the
242 | #    earliest until size_limit is met. All empty files will be deleted.
243 | # 3. If wal-ttl-seconds is not 0 and wal-size-limit is 0, then
244 | #    WAL files will be checked every wal-ttl-seconds / 2 and those that
245 | #    are older than wal-ttl-seconds will be deleted.
246 | # 4. If both are not 0, WAL files will be checked every 10 min and both
247 | #    checks will be performed with ttl being first.
248 | # When you set the path to rocksdb directory in memory like in /dev/shm, you may want to set
249 | # wal-ttl-seconds to a value greater than 0 (like 86400) and backup your db on a regular basis.
250 | # See https://github.com/facebook/rocksdb/wiki/How-to-persist-in-memory-RocksDB-database
251 | # wal-ttl-seconds = 0
252 | # wal-size-limit = 0
253 | 
254 | # rocksdb max total wal size
255 | # max-total-wal-size = "4GB"
256 | 
257 | # Rocksdb Statistics provides cumulative stats over time.
258 | # Turn statistics on will introduce about 5%-10% overhead for RocksDB,
259 | # but it is worthy to know the internal status of RocksDB.
260 | # enable-statistics = true
261 | 
262 | # Dump statistics periodically in information logs.
263 | # Same as rocksdb's default value (10 min).
264 | # stats-dump-period = "10m"
265 | 
266 | # Due to Rocksdb FAQ: https://github.com/facebook/rocksdb/wiki/RocksDB-FAQ,
267 | # If you want to use rocksdb on multi disks or spinning disks, you should set value at
268 | # least 2MB;
269 | # compaction-readahead-size = 0
270 | 
271 | # This is the maximum buffer size that is used by WritableFileWrite
272 | # writable-file-max-buffer-size = "1MB"
273 | 
274 | # Use O_DIRECT for both reads and writes in background flush and compactions
275 | # use-direct-io-for-flush-and-compaction = false
276 | 
277 | # Limit the disk IO of compaction and flush. Compaction and flush can cause
278 | # terrible spikes if they exceed a certain threshold. Consider setting this to
279 | # 50% ~ 80% of the disk throughput for a more stable result. However, in heavy
280 | # write workload, limiting compaction and flush speed can cause write stalls too.
281 | # rate-bytes-per-sec = 0
282 | 
283 | # Enable or disable the pipelined write
284 | # enable-pipelined-write = true
285 | 
286 | # Allows OS to incrementally sync files to disk while they are being
287 | # written, asynchronously, in the background.
288 | # bytes-per-sync = "0MB"
289 | 
290 | # Allows OS to incrementally sync WAL to disk while it is being written.
291 | # wal-bytes-per-sync = "0KB"
292 | 
293 | # Specify the maximal size of the Rocksdb info log file. If the log file
294 | # is larger than `max_log_file_size`, a new info log file will be created.
295 | # If max_log_file_size == 0, all logs will be written to one log file.
296 | # Default: 1GB
297 | # info-log-max-size = "1GB"
298 | 
299 | # Time for the Rocksdb info log file to roll (in seconds).
300 | # If specified with non-zero value, log file will be rolled
301 | # if it has been active longer than `log_file_time_to_roll`.
302 | # Default: 0 (disabled)
303 | # info-log-roll-time = "0"
304 | 
305 | # Maximal Rocksdb info log files to be kept.
306 | # Default: 10
307 | # info-log-keep-log-file-num = 10
308 | 
309 | # This specifies the Rocksdb info LOG dir.
310 | # If it is empty, the log files will be in the same dir as data.
311 | # If it is non empty, the log files will be in the specified dir,
312 | # and the db data dir's absolute path will be used as the log file
313 | # name's prefix.
314 | # Default: empty
315 | # info-log-dir = ""
316 | 
317 | # Column Family default used to store actual data of the database.
318 | [rocksdb.defaultcf]
319 | # compression method (if any) is used to compress a block.
320 | #   no:     kNoCompression
321 | #   snappy: kSnappyCompression
322 | #   zlib:   kZlibCompression
323 | #   bzip2:  kBZip2Compression
324 | #   lz4:    kLZ4Compression
325 | #   lz4hc:  kLZ4HCCompression
326 | #   zstd:   kZSTD
327 | 
328 | # per level compression
329 | # compression-per-level = ["no", "no", "lz4", "lz4", "lz4", "zstd", "zstd"]
330 | 
331 | # Approximate size of user data packed per block.  Note that the
332 | # block size specified here corresponds to uncompressed data.
333 | # block-size = "64KB"
334 | 
335 | # If you're doing point lookups you definitely want to turn bloom filters on, We use
336 | # bloom filters to avoid unnecessary disk reads. Default bits_per_key is 10, which
337 | # yields ~1% false positive rate. Larger bits_per_key values will reduce false positive
338 | # rate, but increase memory usage and space amplification.
339 | # bloom-filter-bits-per-key = 10
340 | 
341 | # false means one sst file one bloom filter, true means evry block has a corresponding bloom filter
342 | # block-based-bloom-filter = false
343 | 
344 | # level0-file-num-compaction-trigger = 4
345 | 
346 | # Soft limit on number of level-0 files. We start slowing down writes at this point.
347 | # level0-slowdown-writes-trigger = 20
348 | 
349 | # Maximum number of level-0 files.  We stop writes at this point.
350 | # level0-stop-writes-trigger = 36
351 | 
352 | # Amount of data to build up in memory (backed by an unsorted log
353 | # on disk) before converting to a sorted on-disk file.
354 | # write-buffer-size = "128MB"
355 | 
356 | # The maximum number of write buffers that are built up in memory.
357 | # max-write-buffer-number = 5
358 | 
359 | # The minimum number of write buffers that will be merged together
360 | # before writing to storage.
361 | # min-write-buffer-number-to-merge = 1
362 | 
363 | # Control maximum total data size for base level (level 1).
364 | # max-bytes-for-level-base = "512MB"
365 | 
366 | # Target file size for compaction.
367 | # target-file-size-base = "8MB"
368 | 
369 | # Max bytes for compaction.max_compaction_bytes
370 | # max-compaction-bytes = "2GB"
371 | 
372 | # There are four different algorithms to pick files to compact.
373 | # 0 : ByCompensatedSize
374 | # 1 : OldestLargestSeqFirst
375 | # 2 : OldestSmallestSeqFirst
376 | # 3 : MinOverlappingRatio
377 | # compaction-pri = 3
378 | 
379 | # block-cache used to cache uncompressed blocks, big block-cache can speed up read.
380 | # in normal cases should tune to 30%-50% system's total memory.
381 | # block-cache-size = "1GB"
382 | 
383 | # Indicating if we'd put index/filter blocks to the block cache.
384 | # If not specified, each "table reader" object will pre-load index/filter block
385 | # during table initialization.
386 | # cache-index-and-filter-blocks = true
387 | 
388 | # Pin level0 filter and index blocks in cache.
389 | # pin-l0-filter-and-index-blocks = true
390 | 
391 | # Enable read amplication statistics.
392 | # value  =>  memory usage (percentage of loaded blocks memory)
393 | # 1      =>  12.50 %
394 | # 2      =>  06.25 %
395 | # 4      =>  03.12 %
396 | # 8      =>  01.56 %
397 | # 16     =>  00.78 %
398 | # read-amp-bytes-per-bit = 0
399 | 
400 | # Pick target size of each level dynamically.
401 | # dynamic-level-bytes = true
402 | 
403 | # Options for Column Family write
404 | # Column Family write used to store commit informations in MVCC model
405 | [rocksdb.writecf]
406 | # compression-per-level = ["no", "no", "lz4", "lz4", "lz4", "zstd", "zstd"]
407 | # block-size = "64KB"
408 | # write-buffer-size = "128MB"
409 | # max-write-buffer-number = 5
410 | # min-write-buffer-number-to-merge = 1
411 | # max-bytes-for-level-base = "512MB"
412 | # target-file-size-base = "8MB"
413 | 
414 | # in normal cases should tune to 10%-30% system's total memory.
415 | # block-cache-size = "256MB"
416 | # level0-file-num-compaction-trigger = 4
417 | # level0-slowdown-writes-trigger = 20
418 | # level0-stop-writes-trigger = 36
419 | # cache-index-and-filter-blocks = true
420 | # pin-l0-filter-and-index-blocks = true
421 | # compaction-pri = 3
422 | # read-amp-bytes-per-bit = 0
423 | # dynamic-level-bytes = true
424 | 
425 | [rocksdb.lockcf]
426 | # compression-per-level = ["no", "no", "no", "no", "no", "no", "no"]
427 | # block-size = "16KB"
428 | # write-buffer-size = "128MB"
429 | # max-write-buffer-number = 5
430 | # min-write-buffer-number-to-merge = 1
431 | # max-bytes-for-level-base = "128MB"
432 | # target-file-size-base = "8MB"
433 | # block-cache-size = "256MB"
434 | # level0-file-num-compaction-trigger = 1
435 | # level0-slowdown-writes-trigger = 20
436 | # level0-stop-writes-trigger = 36
437 | # cache-index-and-filter-blocks = true
438 | # pin-l0-filter-and-index-blocks = true
439 | # compaction-pri = 0
440 | # read-amp-bytes-per-bit = 0
441 | # dynamic-level-bytes = true
442 | 
443 | [raftdb]
444 | # max-sub-compactions = 1
445 | max-open-files = 1024
446 | # max-manifest-file-size = "20MB"
447 | # create-if-missing = true
448 | 
449 | # enable-statistics = true
450 | # stats-dump-period = "10m"
451 | 
452 | # compaction-readahead-size = 0
453 | # writable-file-max-buffer-size = "1MB"
454 | # use-direct-io-for-flush-and-compaction = false
455 | # enable-pipelined-write = true
456 | # allow-concurrent-memtable-write = false
457 | # bytes-per-sync = "0MB"
458 | # wal-bytes-per-sync = "0KB"
459 | 
460 | # info-log-max-size = "1GB"
461 | # info-log-roll-time = "0"
462 | # info-log-keep-log-file-num = 10
463 | # info-log-dir = ""
464 | 
465 | [raftdb.defaultcf]
466 | # compression-per-level = ["no", "no", "lz4", "lz4", "lz4", "zstd", "zstd"]
467 | # block-size = "64KB"
468 | # write-buffer-size = "128MB"
469 | # max-write-buffer-number = 5
470 | # min-write-buffer-number-to-merge = 1
471 | # max-bytes-for-level-base = "512MB"
472 | # target-file-size-base = "8MB"
473 | 
474 | # should tune to 256MB~2GB.
475 | # block-cache-size = "256MB"
476 | # level0-file-num-compaction-trigger = 4
477 | # level0-slowdown-writes-trigger = 20
478 | # level0-stop-writes-trigger = 36
479 | # cache-index-and-filter-blocks = true
480 | # pin-l0-filter-and-index-blocks = true
481 | # compaction-pri = 0
482 | # read-amp-bytes-per-bit = 0
483 | # dynamic-level-bytes = true
484 | 
485 | [security]
486 | # set the path for certificates. Empty string means disabling secure connectoins.
487 | # ca-path = ""
488 | # cert-path = ""
489 | # key-path = ""
490 | 
491 | [import]
492 | # the directory to store importing kv data.
493 | # import-dir = "/tmp/tikv/import"
494 | # number of threads to handle RPC requests.
495 | # num-threads = 8
496 | # stream channel window size, stream will be blocked on channel full.
497 | # stream-channel-window = 128


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
  1 | version: '3.3'
  2 | services:
  3 | 
  4 |   jobmanager:
  5 |     image: flink:1.12.2-scala_2.12
  6 |     volumes:
  7 |       - ./logs/jobmanager/:/opt/flink/log/
  8 |       - ./config/flink-jars/:/opt/flink/connector-lib/
  9 |       - ./logs/flink-checkpoints/:/flink-checkpoints
 10 |     hostname: "jobmanager"
 11 |     expose:
 12 |       - "6123"
 13 |     ports:
 14 |       - "8081:8081"
 15 |     command: jobmanager
 16 |     environment:
 17 |       - |
 18 |         FLINK_PROPERTIES=
 19 |         jobmanager.rpc.address: jobmanager
 20 |         state.backend: rocksdb
 21 |         state.checkpoints.dir: file:///flink-checkpoints
 22 | 
 23 |   taskmanager:
 24 |     image: flink:1.12.2-scala_2.12
 25 |     volumes:
 26 |       - ./logs/taskmanager/:/opt/flink/log/
 27 |       - ./logs/flink-checkpoints/:/flink-checkpoints
 28 |     expose:
 29 |       - "6121"
 30 |       - "6122"
 31 |     depends_on:
 32 |       - jobmanager
 33 |     command: taskmanager
 34 |     links:
 35 |       - jobmanager:jobmanager
 36 |     environment:
 37 |       - |
 38 |         FLINK_PROPERTIES=
 39 |         jobmanager.rpc.address: jobmanager
 40 |         taskmanager.numberOfTaskSlots: 8
 41 |         state.backend: rocksdb
 42 |         state.checkpoints.dir: file:///flink-checkpoints
 43 | 
 44 |   # zookeeper:
 45 |   #   image: wurstmeister/zookeeper:3.4.6
 46 |   #   ports:
 47 |   #     - "2181:2181"
 48 | 
 49 |   mysql:
 50 |     image: mysql
 51 |     command: --default-authentication-plugin=mysql_native_password
 52 |     restart: always
 53 |     ports:
 54 |       - 3307:3306
 55 |     environment:
 56 |       MYSQL_ALLOW_EMPTY_PASSWORD: 'yes'
 57 |     security_opt:
 58 |       - seccomp:unconfined
 59 | 
 60 |   pd:
 61 |     image: pingcap/pd:latest
 62 |     ports:
 63 |       - "2379"
 64 |     volumes:
 65 |       - ./config/tidb-config/pd.toml:/pd.toml:ro
 66 |       - ./logs/tidbs:/logs/
 67 |     command:
 68 |       - --name=pd
 69 |       - --client-urls=http://0.0.0.0:2379
 70 |       - --peer-urls=http://0.0.0.0:2380
 71 |       - --advertise-client-urls=http://pd:2379
 72 |       - --advertise-peer-urls=http://pd:2380
 73 |       - --initial-cluster=pd=http://pd:2380
 74 |       - --data-dir=/data/pd
 75 |       - --config=/pd.toml
 76 |       - --log-file=/logs/pd.log
 77 |     restart: on-failure
 78 | 
 79 |   tikv:
 80 |     image: pingcap/tikv:latest
 81 |     volumes:
 82 |       - ./config/tidb-config/tikv.toml:/tikv.toml:ro
 83 |       - ./logs/tidbs:/logs/
 84 |     command:
 85 |       - --addr=0.0.0.0:20160
 86 |       - --advertise-addr=tikv:20160
 87 |       - --data-dir=/data/tikv
 88 |       - --pd=pd:2379
 89 |       - --config=/tikv.toml
 90 |       - --log-file=/logs/tikv.log
 91 |     depends_on:
 92 |       - "pd"
 93 |     restart: on-failure
 94 | 
 95 |   tidb:
 96 |     image: pingcap/tidb:latest
 97 |     ports:
 98 |       - "4000:4000"
 99 |       - "10080:10080"
100 |     volumes:
101 |       - ./config/tidb-config/tidb.toml:/tidb.toml:ro
102 |       - ./logs/tidbs:/logs/
103 |     command:
104 |       - --store=tikv
105 |       - --path=pd:2379
106 |       - --config=/tidb.toml
107 |       - --log-file=/logs/tidb.log
108 |       - --log-slow-query=/logs/tidb_slow.log
109 |       - --advertise-address=tidb
110 |     depends_on:
111 |       - "tikv"
112 |     restart: on-failure


--------------------------------------------------------------------------------