├── .gitignore
├── LICENSE
├── README.md
├── pom.xml
└── src
├── main
└── java
│ ├── cn
│ └── net
│ │ └── communion
│ │ └── sync
│ │ ├── elasticsearch
│ │ └── Client.java
│ │ ├── entity
│ │ ├── JobInfo.java
│ │ └── Node.java
│ │ ├── helper
│ │ └── SysProps.java
│ │ ├── main
│ │ └── App.java
│ │ ├── quartz
│ │ ├── JobScheduler.java
│ │ └── Listener.java
│ │ ├── service
│ │ └── JdbcConnector.java
│ │ └── task
│ │ └── Task.java
│ ├── log4j.properties
│ └── spring
│ ├── root.xml
│ └── xml
│ ├── client.xml
│ ├── db.xml
│ ├── jobs.xml
│ └── nodes.xml
└── test
└── java
└── cn
└── net
└── communion
└── sync
└── AppTest.java
/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | .settings
3 | .classpath
4 | .project
5 | *.log
6 | sys.properties
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2016 GongDexing
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## elasticsearch-mysql简单实用的同步工具
2 | 简单实用的同步工具,实现mysql数据库中数据定期同步到elasticsearch,只需简单的配置,便能达到非凡的效果。
3 |
4 | ## 配置说明
5 | 主要配置在spring/xml目录下面的四个xml文件中
6 | - **client.xml**:配置cluster.name(集群名称),集群名称要和elasticsearch保持一致,可以参考elasticsearch.yaml文件
7 | - **db.xml**:配置mysql数据库的连接方式,数据连接池使用的Druid,主要是设置url、username、password,其他采用默认配置即可
8 | - **jobs.xml**:配置数据同步的任务,可以配置多个job,每个job包括:name(任务名称,不要出现重复)、index(elasticsearch的索引)、type(elasticsearch的type)、cron(任务的调度机制)、sql(查询mysql的sql语句)、step(分页查询的的每页数量limit x, **step**)、params(sql语句的参数,用于实现增量同步)、paramTypes(参数类型)
9 | - **nodes.xml**:配置同步到的elasticsearch节点,可以配置多个,每个节点包括:ip(节点的ip地址)、port(节点的端口,一般为9300)
10 |
11 | ## 编译运行
12 | > git clone git@192.168.1.244:hainan-bigdata/elasticsearch-mysql.git
13 | > cd elasticsearch-mysql
14 | > mvn clean package
15 | > java -jar target/elasticsearch-mysql-0.0.1-SNAPSHOT.jar
16 |
17 | ## 测试
18 | > $ curl -XGET "127.0.0.1:9200/data/comment/_search?pretty"
19 |
20 | ```js
21 | {
22 | "took" : 34,
23 | "timed_out" : false,
24 | "_shards" : {
25 | "total" : 5,
26 | "successful" : 5,
27 | "failed" : 0
28 | },
29 | "hits" : {
30 | "total" : 39,
31 | "max_score" : 1.0,
32 | "hits" : [
33 | {
34 | "_index" : "data",
35 | "_type" : "comment",
36 | ...
37 | ```
38 |
39 | ## 实现原理
40 | ### 增量同步
41 | 为了提供同步的效率和对数据库的压力,建议在配置是都采用增量同步的方式,前提是数据表中有设置与时间相关的字段,根据该字段每次同步只会同步新的数据,而不是把已经同步过的字段再同步一遍。
42 | ##### 举例说明
43 | ```
44 |
52 | ```
53 | 在job2的配置中设置sql的条件为 **time > ?** ,而 **?** 指的是 **sys.lastTime2** ,这样每次同步都只会上次同步后更新的数据,**sys.lastTime2** 的名称可以随意更改,但是不要出现重复,每次同步完以后, 将开始同步的时间会为**value**, **sys.lastTime2** 作为 **key** 更新到sys.properties文件中。
54 | > sys.properties文件内容举例
55 |
56 | ```
57 | #last job finish at
58 | #Wed Dec 21 15:08:10 CST 2016
59 | sys.lastTime2=2016-12-21T15\:08\:10.003
60 | sys.lastTime1=2016-12-21T15\:08\:10.002
61 | ```
62 |
63 | 因此,要想实现全量同步也非常简单,只需将sys.properties文件删除即可, 当程序没有检测到sys.properties文件,便会将所有的数据同步到elasticsearch中。
64 | ### 调度机制
65 | 采用quartz实现任务调度,最小的粒度可以到秒级,涉及quartz相关的代码主要在JobScheduler.java和Listener.java两个文件中
66 | > JobScheduler部分代码片段
67 |
68 | ```java
69 | public JobScheduler pushJobs(Collection infos) {
70 | infos.forEach(info -> {
71 | JobDetail job = newJob(Task.class).withIdentity(info.getName(), "jobs").build();
72 | job.getJobDataMap().put("jobInfo", info);
73 | CronTrigger trigger = newTrigger().withIdentity(info.getName(), "triggers")
74 | .withSchedule(cronSchedule(info.getCron())).build();
75 | try {
76 | scheduler.scheduleJob(job, trigger);
77 | } catch (SchedulerException e) {
78 | e.printStackTrace();
79 | }
80 | });
81 | return this;
82 | }
83 |
84 | public boolean isRunning(String jobKey) {
85 | try {
86 | for (JobExecutionContext context : scheduler.getCurrentlyExecutingJobs()) {
87 | if (context.getJobDetail().getKey().getName().equals(jobKey)) {
88 | return true;
89 | }
90 | }
91 | } catch (SchedulerException e) {
92 | logger.info("get jobs status failed");
93 | e.printStackTrace();
94 | }
95 | return false;
96 | }
97 |
98 | public void start() throws SchedulerException {
99 | scheduler.start();
100 | }
101 | ```
102 |
103 | 代码说明:
104 | - **pushJobs()** 实现将jobs.xml配置的job添加进quartz的scheduler中
105 | - **isRunning()** 根据job.name检测某个任务是否处于运行状态,对于调度间隔比较短或者同步时间的比较的任务,可能出现新的任务已经开始然而上个任务还未执行完成,这时新的任务会直接被取消执行
106 | - **start()** 启动任务调度
107 |
108 |
109 | > Listener部分代码片段
110 |
111 | ```java
112 | @Override
113 | public void jobToBeExecuted(JobExecutionContext context) {
114 | String jobKey = context.getJobDetail().getKey().getName();
115 | boolean isCancel = JobScheduler.getInstance().isRunning(jobKey);
116 | context.getJobDetail().getJobDataMap().put("cancel", isCancel);
117 | }
118 | ```
119 | 代码说明:
120 | - 每一个任务执行之前均会调用 **jobToBeExecuted()** ,在该方法中调用 **JobScheduler** 的 **isRunning** 并且设置 **isCancel** 值,如果 **isCancel** 为 **true** 便会取消任务的执行。
121 |
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
3 | 4.0.0
4 |
5 | cn.net.communion
6 | elasticsearch-mysql
7 | 0.0.1-SNAPSHOT
8 | jar
9 |
10 | elasticsearch-mysql
11 | http://maven.apache.org
12 |
13 |
14 | UTF-8
15 |
16 |
17 |
18 |
19 | junit
20 | junit
21 | 3.8.1
22 | test
23 |
24 |
25 | org.springframework
26 | spring-core
27 | 4.3.4.RELEASE
28 |
29 |
30 |
31 | org.springframework
32 | spring-context
33 | 4.3.4.RELEASE
34 |
35 |
36 | org.springframework
37 | spring-jdbc
38 | 4.3.4.RELEASE
39 |
40 |
41 | org.springframework
42 | spring-beans
43 | 4.3.4.RELEASE
44 |
45 |
46 | mysql
47 | mysql-connector-java
48 | 5.1.22
49 |
50 |
51 | org.quartz-scheduler
52 | quartz
53 | 2.1.3
54 |
55 |
56 | log4j
57 | log4j
58 | 1.2.14
59 |
60 |
61 | org.apache.logging.log4j
62 | log4j-to-slf4j
63 | 2.7
64 |
65 |
66 | org.slf4j
67 | slf4j-api
68 | 1.7.21
69 |
70 |
71 | org.slf4j
72 | slf4j-simple
73 | 1.7.21
74 |
75 |
76 | com.alibaba
77 | druid
78 | 1.0.27
79 |
80 |
81 | org.elasticsearch.client
82 | transport
83 | 5.1.1
84 |
85 |
86 | org.elasticsearch
87 | elasticsearch
88 | 5.1.1
89 |
90 |
91 |
92 |
93 |
94 |
95 | org.apache.maven.plugins
96 | maven-jar-plugin
97 |
98 |
99 |
100 | true
101 | lib/
102 | cn.net.communion.sync.main.App
103 |
104 |
105 |
106 |
107 |
108 | org.apache.maven.plugins
109 | maven-dependency-plugin
110 |
111 |
112 | copy
113 | package
114 |
115 | copy-dependencies
116 |
117 |
118 | ${project.build.directory}/lib
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 | src/main/java
127 |
128 | **/*.properties
129 | **/*.xml
130 |
131 | true
132 |
133 |
134 |
135 |
136 |
--------------------------------------------------------------------------------
/src/main/java/cn/net/communion/sync/elasticsearch/Client.java:
--------------------------------------------------------------------------------
1 | package cn.net.communion.sync.elasticsearch;
2 |
3 | import java.net.InetAddress;
4 | import java.net.UnknownHostException;
5 | import java.util.List;
6 | import java.util.Map;
7 | import java.util.Set;
8 |
9 | import org.apache.log4j.Logger;
10 | import org.elasticsearch.action.bulk.BulkRequestBuilder;
11 | import org.elasticsearch.action.search.SearchResponse;
12 | import org.elasticsearch.client.transport.TransportClient;
13 | import org.elasticsearch.common.settings.Settings;
14 | import org.elasticsearch.common.transport.InetSocketTransportAddress;
15 | import org.elasticsearch.transport.client.PreBuiltTransportClient;
16 |
17 | import cn.net.communion.sync.entity.Node;
18 |
19 | public class Client {
20 | private static TransportClient client;
21 | private static Logger logger = Logger.getLogger(Client.class);
22 |
23 | private Client(String clusterName, Set nodes) {
24 | Settings settings = Settings.builder().put("client.transport.sniff", true)
25 | .put("cluster.name", clusterName).build();
26 | client = new PreBuiltTransportClient(settings);
27 | nodes.stream().forEach(node -> {
28 | try {
29 | client.addTransportAddress(new InetSocketTransportAddress(
30 | InetAddress.getByName(node.getIp()), node.getPort()));
31 | } catch (UnknownHostException e) {
32 | e.printStackTrace();
33 | }
34 | });
35 | }
36 |
37 | public static boolean bulkIndex(String index, String type, List