├── .gitignore
├── .travis.yml
├── Dockerfile
├── LICENSE
├── Makefile
├── README.md
├── clear_vendor.sh
├── cmd
    └── go-mysql-elasticsearch
    │   └── main.go
├── elastic
    ├── client.go
    └── client_test.go
├── etc
    └── river.toml
├── go.mod
├── go.sum
└── river
    ├── config.go
    ├── master.go
    ├── river.go
    ├── river_extra_test.go
    ├── river_test.go
    ├── rule.go
    ├── status.go
    └── sync.go


/.gitignore:
--------------------------------------------------------------------------------
1 | bin


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: go
 2 | 
 3 | go:
 4 |   - "1.11"
 5 |   
 6 | services:
 7 |   - elasticsearch
 8 | 
 9 | addons:
10 |   apt:
11 |     sources:
12 |       - mysql-5.7-trusty
13 |     packages:
14 |       - mysql-server
15 |       - mysql-client
16 | 
17 | before_install:
18 |   - sudo mysql -e "use mysql; update user set authentication_string=PASSWORD('') where User='root'; update user set plugin='mysql_native_password';FLUSH PRIVILEGES;"
19 |   - sudo mysql_upgrade
20 | 
21 |   # stop mysql and use row-based format binlog
22 |   - "sudo service mysql stop || true"
23 |   - "echo '[mysqld]'            | sudo tee /etc/mysql/conf.d/replication.cnf"
24 |   - "echo 'server-id=1'         | sudo tee -a /etc/mysql/conf.d/replication.cnf"
25 |   - "echo 'log-bin=mysql-bin'   | sudo tee -a /etc/mysql/conf.d/replication.cnf"
26 |   - "echo 'binlog-format = row' | sudo tee -a /etc/mysql/conf.d/replication.cnf"
27 | 
28 |   # Start mysql (avoid errors to have logs)
29 |   - "sudo service mysql start || true"
30 |   - "sudo tail -1000 /var/log/syslog"
31 | 
32 |   - mysql -e "CREATE DATABASE IF NOT EXISTS test;" -uroot
33 | 
34 | script:
35 |   - go test --race ./...
36 | 
37 | env:
38 |   - GO111MODULE=on


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM golang:alpine
 2 | 
 3 | MAINTAINER siddontang
 4 | 
 5 | RUN apk add --no-cache tini mariadb-client
 6 | 
 7 | ADD . /go/src/github.com/siddontang/go-mysql-elasticsearch
 8 | 
 9 | RUN apk add --no-cache mariadb-client
10 | RUN cd /go/src/github.com/siddontang/go-mysql-elasticsearch/ && \
11 |     go build -o bin/go-mysql-elasticsearch ./cmd/go-mysql-elasticsearch && \
12 |     cp -f ./bin/go-mysql-elasticsearch /go/bin/go-mysql-elasticsearch
13 | 
14 | ENTRYPOINT ["/sbin/tini","--","go-mysql-elasticsearch"]
15 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 siddontang
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | all: build
 2 | 
 3 | build: build-elasticsearch
 4 | 
 5 | build-elasticsearch:
 6 | 	GO111MODULE=on go build -o bin/go-mysql-elasticsearch ./cmd/go-mysql-elasticsearch
 7 | 
 8 | test:
 9 | 	GO111MODULE=on go test -timeout 1m --race ./...
10 | 
11 | clean:
12 | 	GO111MODULE=on go clean -i ./...
13 | 	@rm -rf bin


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | go-mysql-elasticsearch is a service syncing your MySQL data into Elasticsearch automatically.
  2 | 
  3 | It uses `mysqldump` to fetch the origin data at first, then syncs data incrementally with binlog.
  4 | 
  5 | ## Install
  6 | 
  7 | + Install Go (1.9+) and set your [GOPATH](https://golang.org/doc/code.html#GOPATH)
  8 | + `go get github.com/siddontang/go-mysql-elasticsearch`, it will print some messages in console, skip it. :-)
  9 | + cd `$GOPATH/src/github.com/siddontang/go-mysql-elasticsearch`
 10 | + `make`
 11 | 
 12 | ## How to use?
 13 | 
 14 | + Create table in MySQL.
 15 | + Create the associated Elasticsearch index, document type and mappings if possible, if not, Elasticsearch will create these automatically.
 16 | + Config base, see the example config [river.toml](./etc/river.toml).
 17 | + Set MySQL source in config file, see [Source](#source) below.
 18 | + Customize MySQL and Elasticsearch mapping rule in config file, see [Rule](#rule) below.
 19 | + Start `./bin/go-mysql-elasticsearch -config=./etc/river.toml` and enjoy it.
 20 | 
 21 | ## Notice
 22 | 
 23 | + MySQL supported version < 8.0
 24 | + ES supported version < 6.0
 25 | + binlog format must be **row**.
 26 | + binlog row image must be **full** for MySQL, you may lost some field data if you update PK data in MySQL with minimal or noblob binlog row image. MariaDB only supports full row image.
 27 | + Can not alter table format at runtime.
 28 | + MySQL table which will be synced should have a PK(primary key), multi columns PK is allowed now, e,g, if the PKs is (a, b), we will use "a:b" as the key. The PK data will be used as "id" in Elasticsearch. And you can also config the id's constituent part with other column.
 29 | + You should create the associated mappings in Elasticsearch first, I don't think using the default mapping is a wise decision, you must know how to search accurately.
 30 | + `mysqldump` must exist in the same node with go-mysql-elasticsearch, if not, go-mysql-elasticsearch will try to sync binlog only.
 31 | + Don't change too many rows at same time in one SQL.
 32 | 
 33 | ## Source
 34 | 
 35 | In go-mysql-elasticsearch, you must decide which tables you want to sync into elasticsearch in the source config.
 36 | 
 37 | The format in config file is below:
 38 | 
 39 | ```
 40 | [[source]]
 41 | schema = "test"
 42 | tables = ["t1", t2]
 43 | 
 44 | [[source]]
 45 | schema = "test_1"
 46 | tables = ["t3", t4]
 47 | ```
 48 | 
 49 | `schema` is the database name, and `tables` includes the table need to be synced.
 50 | 
 51 | If you want to sync **all table in database**, you can use **asterisk(\*)**.  
 52 | ```
 53 | [[source]]
 54 | schema = "test"
 55 | tables = ["*"]
 56 | 
 57 | # When using an asterisk, it is not allowed to sync multiple tables
 58 | # tables = ["*", "table"]
 59 | ```
 60 | 
 61 | ## Rule
 62 | 
 63 | By default, go-mysql-elasticsearch will use MySQL table name as the Elasticserach's index and type name, use MySQL table field name as the Elasticserach's field name.  
 64 | e.g, if a table named blog, the default index and type in Elasticserach are both named blog, if the table field named title,
 65 | the default field name is also named title.
 66 | 
 67 | Notice: go-mysql-elasticsearch will use the lower-case name for the ES index and type. E.g, if your table named BLOG, the ES index and type are both named blog.
 68 | 
 69 | Rule can let you change this name mapping. Rule format in config file is below:
 70 | 
 71 | ```
 72 | [[rule]]
 73 | schema = "test"
 74 | table = "t1"
 75 | index = "t"
 76 | type = "t"
 77 | parent = "parent_id"
 78 | id = ["id"]
 79 | 
 80 |     [rule.field]
 81 |     mysql = "title"
 82 |     elastic = "my_title"
 83 | ```
 84 | 
 85 | In the example above, we will use a new index and type both named "t" instead of default "t1", and use "my_title" instead of field name "title".
 86 | 
 87 | ## Rule field types
 88 | 
 89 | In order to map a mysql column on different elasticsearch types you can define the field type as follows:
 90 | 
 91 | ```
 92 | [[rule]]
 93 | schema = "test"
 94 | table = "t1"
 95 | index = "t"
 96 | type = "t"
 97 | 
 98 |     [rule.field]
 99 |     // This will map column title to elastic search my_title
100 |     title="my_title"
101 | 
102 |     // This will map column title to elastic search my_title and use array type
103 |     title="my_title,list"
104 | 
105 |     // This will map column title to elastic search title and use array type
106 |     title=",list"
107 | 
108 |     // If the created_time field type is "int", and you want to convert it to "date" type in es, you can do it as below
109 |     created_time=",date"
110 | ```
111 | 
112 | Modifier "list" will translates a mysql string field like "a,b,c" on an elastic array type '{"a", "b", "c"}' this is specially useful if you need to use those fields on filtering on elasticsearch.
113 | 
114 | ## Wildcard table
115 | 
116 | go-mysql-elasticsearch only allows you determind which table to be synced, but sometimes, if you split a big table into multi sub tables, like 1024, table_0000, table_0001, ... table_1023, it is very hard to write rules for every table.
117 | 
118 | go-mysql-elasticserach supports using wildcard table, e.g:
119 | 
120 | ```
121 | [[source]]
122 | schema = "test"
123 | tables = ["test_river_[0-9]{4}"]
124 | 
125 | [[rule]]
126 | schema = "test"
127 | table = "test_river_[0-9]{4}"
128 | index = "river"
129 | type = "river"
130 | ```
131 | 
132 | "test_river_[0-9]{4}" is a wildcard table definition, which represents "test_river_0000" to "test_river_9999", at the same time, the table in the rule must be same as it.
133 | 
134 | At the above example, if you have 1024 sub tables, all tables will be synced into Elasticsearch with index "river" and type "river".
135 | 
136 | ## Parent-Child Relationship
137 | 
138 | One-to-many join ( [parent-child relationship](https://www.elastic.co/guide/en/elasticsearch/guide/current/parent-child.html) in Elasticsearch ) is supported. Simply specify the field name for `parent` property.
139 | 
140 | ```
141 | [[rule]]
142 | schema = "test"
143 | table = "t1"
144 | index = "t"
145 | type = "t"
146 | parent = "parent_id"
147 | ```
148 | 
149 | Note: you should [setup relationship](https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-parent-field.html) with creating the mapping manually.
150 | 
151 | ## Filter fields
152 | 
153 | You can use `filter` to sync specified fields, like:
154 | 
155 | ```
156 | [[rule]]
157 | schema = "test"
158 | table = "tfilter"
159 | index = "test"
160 | type = "tfilter"
161 | 
162 | # Only sync following columns
163 | filter = ["id", "name"]
164 | ```
165 | 
166 | In the above example, we will only sync MySQL table tfiler's columns `id` and `name` to Elasticsearch. 
167 | 
168 | ## Ignore table without a primary key
169 | When you sync table without a primary key, you can see below error message.
170 | ```
171 | schema.table must have a PK for a column
172 | ```
173 | You can ignore these tables in the configuration like:
174 | ```
175 | # Ignore table without a primary key
176 | skip_no_pk_table = true
177 | ```
178 | 
179 | ## Elasticsearch Pipeline
180 | You can use [Ingest Node Pipeline](https://www.elastic.co/guide/en/elasticsearch/reference/current/ingest.html) to pre-process documents before indexing, like JSON string decode, merge fileds and more.
181 | 
182 | ```
183 | [[rule]]
184 | schema = "test"
185 | table = "t1"
186 | index = "t"
187 | type = "_doc"
188 | 
189 | # pipeline id
190 | pipeline = "my-pipeline-id"
191 | ```
192 | Node: you should [create pipeline](https://www.elastic.co/guide/en/elasticsearch/reference/current/put-pipeline-api.html) manually and Elasticsearch >= 5.0.
193 | 
194 | ## Why not other rivers?
195 | 
196 | Although there are some other MySQL rivers for Elasticsearch, like [elasticsearch-river-jdbc](https://github.com/jprante/elasticsearch-river-jdbc), [elasticsearch-river-mysql](https://github.com/scharron/elasticsearch-river-mysql), I still want to build a new one with Go, why?
197 | 
198 | + Customization, I want to decide which table to be synced, the associated index and type name, or even the field name in Elasticsearch.
199 | + Incremental update with binlog, and can resume from the last sync position when the service starts again.
200 | + A common sync framework not only for Elasticsearch but also for others, like memcached, redis, etc...
201 | + Wildcard tables support, we have many sub tables like table_0000 - table_1023, but want use a unique Elasticsearch index and type.
202 | 
203 | ## Todo
204 | 
205 | + MySQL 8
206 | + ES 6
207 | + Statistic.
208 | 
209 | ## Donate
210 | 
211 | If you like the project and want to buy me a cola, you can through: 
212 | 
213 | |PayPal|微信|
214 | |------|---|
215 | |[![](https://www.paypalobjects.com/webstatic/paypalme/images/pp_logo_small.png)](https://paypal.me/siddontang)|[![](https://github.com/siddontang/blog/blob/master/donate/weixin.png)|
216 | 
217 | ## Feedback
218 | 
219 | go-mysql-elasticsearch is still in development, and we will try to use it in production later. Any feedback is very welcome.
220 | 
221 | Email: siddontang@gmail.com
222 | 


--------------------------------------------------------------------------------
/clear_vendor.sh:
--------------------------------------------------------------------------------
1 | find vendor \( -type f -or -type l \)  -not -name "*.go" -not -name "LICENSE" -not -name "*.s" -not -name "PATENTS" -not -name "*.h" -not -name "*.c" | xargs -I {} rm {}
2 | # delete all test files
3 | find vendor -type f -name "*_generated.go" | xargs -I {} rm {}
4 | find vendor -type f -name "*_test.go" | xargs -I {} rm {}
5 | find vendor -type d -name "_vendor" | xargs -I {} rm -rf {}
6 | find vendor -type d -empty | xargs -I {} rm -rf {}


--------------------------------------------------------------------------------
/cmd/go-mysql-elasticsearch/main.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"flag"
  5 | 	"os"
  6 | 	"os/signal"
  7 | 	"runtime"
  8 | 	"syscall"
  9 | 
 10 | 	"github.com/juju/errors"
 11 | 	"github.com/siddontang/go-log/log"
 12 | 	"github.com/siddontang/go-mysql-elasticsearch/river"
 13 | )
 14 | 
 15 | var configFile = flag.String("config", "./etc/river.toml", "go-mysql-elasticsearch config file")
 16 | var my_addr = flag.String("my_addr", "", "MySQL addr")
 17 | var my_user = flag.String("my_user", "", "MySQL user")
 18 | var my_pass = flag.String("my_pass", "", "MySQL password")
 19 | var es_addr = flag.String("es_addr", "", "Elasticsearch addr")
 20 | var data_dir = flag.String("data_dir", "", "path for go-mysql-elasticsearch to save data")
 21 | var server_id = flag.Int("server_id", 0, "MySQL server id, as a pseudo slave")
 22 | var flavor = flag.String("flavor", "", "flavor: mysql or mariadb")
 23 | var execution = flag.String("exec", "", "mysqldump execution path")
 24 | var logLevel = flag.String("log_level", "info", "log level")
 25 | 
 26 | func main() {
 27 | 	runtime.GOMAXPROCS(runtime.NumCPU())
 28 | 	flag.Parse()
 29 | 
 30 | 	log.SetLevelByName(*logLevel)
 31 | 
 32 | 	sc := make(chan os.Signal, 1)
 33 | 	signal.Notify(sc,
 34 | 		os.Kill,
 35 | 		os.Interrupt,
 36 | 		syscall.SIGHUP,
 37 | 		syscall.SIGINT,
 38 | 		syscall.SIGTERM,
 39 | 		syscall.SIGQUIT)
 40 | 
 41 | 	cfg, err := river.NewConfigWithFile(*configFile)
 42 | 	if err != nil {
 43 | 		println(errors.ErrorStack(err))
 44 | 		return
 45 | 	}
 46 | 
 47 | 	if len(*my_addr) > 0 {
 48 | 		cfg.MyAddr = *my_addr
 49 | 	}
 50 | 
 51 | 	if len(*my_user) > 0 {
 52 | 		cfg.MyUser = *my_user
 53 | 	}
 54 | 
 55 | 	if len(*my_pass) > 0 {
 56 | 		cfg.MyPassword = *my_pass
 57 | 	}
 58 | 
 59 | 	if *server_id > 0 {
 60 | 		cfg.ServerID = uint32(*server_id)
 61 | 	}
 62 | 
 63 | 	if len(*es_addr) > 0 {
 64 | 		cfg.ESAddr = *es_addr
 65 | 	}
 66 | 
 67 | 	if len(*data_dir) > 0 {
 68 | 		cfg.DataDir = *data_dir
 69 | 	}
 70 | 
 71 | 	if len(*flavor) > 0 {
 72 | 		cfg.Flavor = *flavor
 73 | 	}
 74 | 
 75 | 	if len(*execution) > 0 {
 76 | 		cfg.DumpExec = *execution
 77 | 	}
 78 | 
 79 | 	r, err := river.NewRiver(cfg)
 80 | 	if err != nil {
 81 | 		println(errors.ErrorStack(err))
 82 | 		return
 83 | 	}
 84 | 
 85 | 	done := make(chan struct{}, 1)
 86 | 	go func() {
 87 | 		r.Run()
 88 | 		done <- struct{}{}
 89 | 	}()
 90 | 
 91 | 	select {
 92 | 	case n := <-sc:
 93 | 		log.Infof("receive signal %v, closing", n)
 94 | 	case <-r.Ctx().Done():
 95 | 		log.Infof("context is done with %v, closing", r.Ctx().Err())
 96 | 	}
 97 | 
 98 | 	r.Close()
 99 | 	<-done
100 | }
101 | 


--------------------------------------------------------------------------------
/elastic/client.go:
--------------------------------------------------------------------------------
  1 | package elastic
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"crypto/tls"
  6 | 	"encoding/json"
  7 | 	"fmt"
  8 | 	"io/ioutil"
  9 | 	"net/http"
 10 | 	"net/url"
 11 | 
 12 | 	"github.com/juju/errors"
 13 | )
 14 | 
 15 | // Client is the client to communicate with ES.
 16 | // Although there are many Elasticsearch clients with Go, I still want to implement one by myself.
 17 | // Because we only need some very simple usages.
 18 | type Client struct {
 19 | 	Protocol string
 20 | 	Addr     string
 21 | 	User     string
 22 | 	Password string
 23 | 
 24 | 	c *http.Client
 25 | }
 26 | 
 27 | // ClientConfig is the configuration for the client.
 28 | type ClientConfig struct {
 29 | 	HTTPS    bool
 30 | 	Addr     string
 31 | 	User     string
 32 | 	Password string
 33 | }
 34 | 
 35 | // NewClient creates the Cient with configuration.
 36 | func NewClient(conf *ClientConfig) *Client {
 37 | 	c := new(Client)
 38 | 
 39 | 	c.Addr = conf.Addr
 40 | 	c.User = conf.User
 41 | 	c.Password = conf.Password
 42 | 
 43 | 	if conf.HTTPS {
 44 | 		c.Protocol = "https"
 45 | 		tr := &http.Transport{
 46 | 			TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
 47 | 		}
 48 | 		c.c = &http.Client{Transport: tr}
 49 | 	} else {
 50 | 		c.Protocol = "http"
 51 | 		c.c = &http.Client{}
 52 | 	}
 53 | 
 54 | 	return c
 55 | }
 56 | 
 57 | // ResponseItem is the ES item in the response.
 58 | type ResponseItem struct {
 59 | 	ID      string                 `json:"_id"`
 60 | 	Index   string                 `json:"_index"`
 61 | 	Type    string                 `json:"_type"`
 62 | 	Version int                    `json:"_version"`
 63 | 	Found   bool                   `json:"found"`
 64 | 	Source  map[string]interface{} `json:"_source"`
 65 | }
 66 | 
 67 | // Response is the ES response
 68 | type Response struct {
 69 | 	Code int
 70 | 	ResponseItem
 71 | }
 72 | 
 73 | // See http://www.elasticsearch.org/guide/en/elasticsearch/guide/current/bulk.html
 74 | const (
 75 | 	ActionCreate = "create"
 76 | 	ActionUpdate = "update"
 77 | 	ActionDelete = "delete"
 78 | 	ActionIndex  = "index"
 79 | )
 80 | 
 81 | // BulkRequest is used to send multi request in batch.
 82 | type BulkRequest struct {
 83 | 	Action   string
 84 | 	Index    string
 85 | 	Type     string
 86 | 	ID       string
 87 | 	Parent   string
 88 | 	Pipeline string
 89 | 
 90 | 	Data map[string]interface{}
 91 | }
 92 | 
 93 | func (r *BulkRequest) bulk(buf *bytes.Buffer) error {
 94 | 	meta := make(map[string]map[string]string)
 95 | 	metaData := make(map[string]string)
 96 | 	if len(r.Index) > 0 {
 97 | 		metaData["_index"] = r.Index
 98 | 	}
 99 | 	if len(r.Type) > 0 {
100 | 		metaData["_type"] = r.Type
101 | 	}
102 | 
103 | 	if len(r.ID) > 0 {
104 | 		metaData["_id"] = r.ID
105 | 	}
106 | 	if len(r.Parent) > 0 {
107 | 		metaData["_parent"] = r.Parent
108 | 	}
109 | 	if len(r.Pipeline) > 0 {
110 | 		metaData["pipeline"] = r.Pipeline
111 | 	}
112 | 
113 | 	meta[r.Action] = metaData
114 | 
115 | 	data, err := json.Marshal(meta)
116 | 	if err != nil {
117 | 		return errors.Trace(err)
118 | 	}
119 | 
120 | 	buf.Write(data)
121 | 	buf.WriteByte('\n')
122 | 
123 | 	switch r.Action {
124 | 	case ActionDelete:
125 | 		//nothing to do
126 | 	case ActionUpdate:
127 | 		doc := map[string]interface{}{
128 | 			"doc": r.Data,
129 | 		}
130 | 		data, err = json.Marshal(doc)
131 | 		if err != nil {
132 | 			return errors.Trace(err)
133 | 		}
134 | 
135 | 		buf.Write(data)
136 | 		buf.WriteByte('\n')
137 | 	default:
138 | 		//for create and index
139 | 		data, err = json.Marshal(r.Data)
140 | 		if err != nil {
141 | 			return errors.Trace(err)
142 | 		}
143 | 
144 | 		buf.Write(data)
145 | 		buf.WriteByte('\n')
146 | 	}
147 | 
148 | 	return nil
149 | }
150 | 
151 | // BulkResponse is the response for the bulk request.
152 | type BulkResponse struct {
153 | 	Code   int
154 | 	Took   int  `json:"took"`
155 | 	Errors bool `json:"errors"`
156 | 
157 | 	Items []map[string]*BulkResponseItem `json:"items"`
158 | }
159 | 
160 | // BulkResponseItem is the item in the bulk response.
161 | type BulkResponseItem struct {
162 | 	Index   string          `json:"_index"`
163 | 	Type    string          `json:"_type"`
164 | 	ID      string          `json:"_id"`
165 | 	Version int             `json:"_version"`
166 | 	Status  int             `json:"status"`
167 | 	Error   json.RawMessage `json:"error"`
168 | 	Found   bool            `json:"found"`
169 | }
170 | 
171 | // MappingResponse is the response for the mapping request.
172 | type MappingResponse struct {
173 | 	Code    int
174 | 	Mapping Mapping
175 | }
176 | 
177 | // Mapping represents ES mapping.
178 | type Mapping map[string]struct {
179 | 	Mappings map[string]struct {
180 | 		Properties map[string]struct {
181 | 			Type   string      `json:"type"`
182 | 			Fields interface{} `json:"fields"`
183 | 		} `json:"properties"`
184 | 	} `json:"mappings"`
185 | }
186 | 
187 | // DoRequest sends a request with body to ES.
188 | func (c *Client) DoRequest(method string, url string, body *bytes.Buffer) (*http.Response, error) {
189 | 	req, err := http.NewRequest(method, url, body)
190 | 	req.Header.Add("Content-Type", "application/json")
191 | 	if err != nil {
192 | 		return nil, errors.Trace(err)
193 | 	}
194 | 	if len(c.User) > 0 && len(c.Password) > 0 {
195 | 		req.SetBasicAuth(c.User, c.Password)
196 | 	}
197 | 	resp, err := c.c.Do(req)
198 | 
199 | 	return resp, err
200 | }
201 | 
202 | // Do sends the request with body to ES.
203 | func (c *Client) Do(method string, url string, body map[string]interface{}) (*Response, error) {
204 | 	bodyData, err := json.Marshal(body)
205 | 	if err != nil {
206 | 		return nil, errors.Trace(err)
207 | 	}
208 | 
209 | 	buf := bytes.NewBuffer(bodyData)
210 | 	if body == nil {
211 | 		buf = bytes.NewBuffer(nil)
212 | 	}
213 | 
214 | 	resp, err := c.DoRequest(method, url, buf)
215 | 	if err != nil {
216 | 		return nil, errors.Trace(err)
217 | 	}
218 | 
219 | 	defer resp.Body.Close()
220 | 
221 | 	ret := new(Response)
222 | 	ret.Code = resp.StatusCode
223 | 
224 | 	data, err := ioutil.ReadAll(resp.Body)
225 | 	if err != nil {
226 | 		return nil, errors.Trace(err)
227 | 	}
228 | 
229 | 	if len(data) > 0 {
230 | 		err = json.Unmarshal(data, &ret.ResponseItem)
231 | 	}
232 | 
233 | 	return ret, errors.Trace(err)
234 | }
235 | 
236 | // DoBulk sends the bulk request to the ES.
237 | func (c *Client) DoBulk(url string, items []*BulkRequest) (*BulkResponse, error) {
238 | 	var buf bytes.Buffer
239 | 
240 | 	for _, item := range items {
241 | 		if err := item.bulk(&buf); err != nil {
242 | 			return nil, errors.Trace(err)
243 | 		}
244 | 	}
245 | 
246 | 	resp, err := c.DoRequest("POST", url, &buf)
247 | 	if err != nil {
248 | 		return nil, errors.Trace(err)
249 | 	}
250 | 
251 | 	defer resp.Body.Close()
252 | 
253 | 	ret := new(BulkResponse)
254 | 	ret.Code = resp.StatusCode
255 | 
256 | 	data, err := ioutil.ReadAll(resp.Body)
257 | 	if err != nil {
258 | 		return nil, errors.Trace(err)
259 | 	}
260 | 
261 | 	if len(data) > 0 {
262 | 		err = json.Unmarshal(data, &ret)
263 | 	}
264 | 
265 | 	return ret, errors.Trace(err)
266 | }
267 | 
268 | // CreateMapping creates a ES mapping.
269 | func (c *Client) CreateMapping(index string, docType string, mapping map[string]interface{}) error {
270 | 	reqURL := fmt.Sprintf("%s://%s/%s", c.Protocol, c.Addr,
271 | 		url.QueryEscape(index))
272 | 
273 | 	r, err := c.Do("HEAD", reqURL, nil)
274 | 	if err != nil {
275 | 		return errors.Trace(err)
276 | 	}
277 | 
278 | 	// if index doesn't exist, will get 404 not found, create index first
279 | 	if r.Code == http.StatusNotFound {
280 | 		_, err = c.Do("PUT", reqURL, nil)
281 | 
282 | 		if err != nil {
283 | 			return errors.Trace(err)
284 | 		}
285 | 	} else if r.Code != http.StatusOK {
286 | 		return errors.Errorf("Error: %s, code: %d", http.StatusText(r.Code), r.Code)
287 | 	}
288 | 
289 | 	reqURL = fmt.Sprintf("%s://%s/%s/%s/_mapping", c.Protocol, c.Addr,
290 | 		url.QueryEscape(index),
291 | 		url.QueryEscape(docType))
292 | 
293 | 	_, err = c.Do("POST", reqURL, mapping)
294 | 	return errors.Trace(err)
295 | }
296 | 
297 | // GetMapping gets the mapping.
298 | func (c *Client) GetMapping(index string, docType string) (*MappingResponse, error) {
299 | 	reqURL := fmt.Sprintf("%s://%s/%s/%s/_mapping", c.Protocol, c.Addr,
300 | 		url.QueryEscape(index),
301 | 		url.QueryEscape(docType))
302 | 	buf := bytes.NewBuffer(nil)
303 | 	resp, err := c.DoRequest("GET", reqURL, buf)
304 | 
305 | 	if err != nil {
306 | 		return nil, errors.Trace(err)
307 | 	}
308 | 
309 | 	defer resp.Body.Close()
310 | 
311 | 	data, err := ioutil.ReadAll(resp.Body)
312 | 	if err != nil {
313 | 		return nil, errors.Trace(err)
314 | 	}
315 | 
316 | 	ret := new(MappingResponse)
317 | 	err = json.Unmarshal(data, &ret.Mapping)
318 | 	if err != nil {
319 | 		return nil, errors.Trace(err)
320 | 	}
321 | 
322 | 	ret.Code = resp.StatusCode
323 | 	return ret, errors.Trace(err)
324 | }
325 | 
326 | // DeleteIndex deletes the index.
327 | func (c *Client) DeleteIndex(index string) error {
328 | 	reqURL := fmt.Sprintf("%s://%s/%s", c.Protocol, c.Addr,
329 | 		url.QueryEscape(index))
330 | 
331 | 	r, err := c.Do("DELETE", reqURL, nil)
332 | 	if err != nil {
333 | 		return errors.Trace(err)
334 | 	}
335 | 
336 | 	if r.Code == http.StatusOK || r.Code == http.StatusNotFound {
337 | 		return nil
338 | 	}
339 | 
340 | 	return errors.Errorf("Error: %s, code: %d", http.StatusText(r.Code), r.Code)
341 | }
342 | 
343 | // Get gets the item by id.
344 | func (c *Client) Get(index string, docType string, id string) (*Response, error) {
345 | 	reqURL := fmt.Sprintf("%s://%s/%s/%s/%s", c.Protocol, c.Addr,
346 | 		url.QueryEscape(index),
347 | 		url.QueryEscape(docType),
348 | 		url.QueryEscape(id))
349 | 
350 | 	return c.Do("GET", reqURL, nil)
351 | }
352 | 
353 | // Update creates or updates the data
354 | func (c *Client) Update(index string, docType string, id string, data map[string]interface{}) error {
355 | 	reqURL := fmt.Sprintf("%s://%s/%s/%s/%s", c.Protocol, c.Addr,
356 | 		url.QueryEscape(index),
357 | 		url.QueryEscape(docType),
358 | 		url.QueryEscape(id))
359 | 
360 | 	r, err := c.Do("PUT", reqURL, data)
361 | 	if err != nil {
362 | 		return errors.Trace(err)
363 | 	}
364 | 
365 | 	if r.Code == http.StatusOK || r.Code == http.StatusCreated {
366 | 		return nil
367 | 	}
368 | 
369 | 	return errors.Errorf("Error: %s, code: %d", http.StatusText(r.Code), r.Code)
370 | }
371 | 
372 | // Exists checks whether id exists or not.
373 | func (c *Client) Exists(index string, docType string, id string) (bool, error) {
374 | 	reqURL := fmt.Sprintf("%s://%s/%s/%s/%s", c.Protocol, c.Addr,
375 | 		url.QueryEscape(index),
376 | 		url.QueryEscape(docType),
377 | 		url.QueryEscape(id))
378 | 
379 | 	r, err := c.Do("HEAD", reqURL, nil)
380 | 	if err != nil {
381 | 		return false, err
382 | 	}
383 | 
384 | 	return r.Code == http.StatusOK, nil
385 | }
386 | 
387 | // Delete deletes the item by id.
388 | func (c *Client) Delete(index string, docType string, id string) error {
389 | 	reqURL := fmt.Sprintf("%s://%s/%s/%s/%s", c.Protocol, c.Addr,
390 | 		url.QueryEscape(index),
391 | 		url.QueryEscape(docType),
392 | 		url.QueryEscape(id))
393 | 
394 | 	r, err := c.Do("DELETE", reqURL, nil)
395 | 	if err != nil {
396 | 		return errors.Trace(err)
397 | 	}
398 | 
399 | 	if r.Code == http.StatusOK || r.Code == http.StatusNotFound {
400 | 		return nil
401 | 	}
402 | 
403 | 	return errors.Errorf("Error: %s, code: %d", http.StatusText(r.Code), r.Code)
404 | }
405 | 
406 | // Bulk sends the bulk request.
407 | // only support parent in 'Bulk' related apis
408 | func (c *Client) Bulk(items []*BulkRequest) (*BulkResponse, error) {
409 | 	reqURL := fmt.Sprintf("%s://%s/_bulk", c.Protocol, c.Addr)
410 | 
411 | 	return c.DoBulk(reqURL, items)
412 | }
413 | 
414 | // IndexBulk sends the bulk request for index.
415 | func (c *Client) IndexBulk(index string, items []*BulkRequest) (*BulkResponse, error) {
416 | 	reqURL := fmt.Sprintf("%s://%s/%s/_bulk", c.Protocol, c.Addr,
417 | 		url.QueryEscape(index))
418 | 
419 | 	return c.DoBulk(reqURL, items)
420 | }
421 | 
422 | // IndexTypeBulk sends the bulk request for index and doc type.
423 | func (c *Client) IndexTypeBulk(index string, docType string, items []*BulkRequest) (*BulkResponse, error) {
424 | 	reqURL := fmt.Sprintf("%s://%s/%s/%s/_bulk", c.Protocol, c.Addr,
425 | 		url.QueryEscape(index),
426 | 		url.QueryEscape(docType))
427 | 
428 | 	return c.DoBulk(reqURL, items)
429 | }
430 | 


--------------------------------------------------------------------------------
/elastic/client_test.go:
--------------------------------------------------------------------------------
  1 | package elastic
  2 | 
  3 | import (
  4 | 	"flag"
  5 | 	"fmt"
  6 | 	"testing"
  7 | 
  8 | 	. "github.com/pingcap/check"
  9 | )
 10 | 
 11 | var host = flag.String("host", "127.0.0.1", "Elasticsearch host")
 12 | var port = flag.Int("port", 9200, "Elasticsearch port")
 13 | 
 14 | func Test(t *testing.T) {
 15 | 	TestingT(t)
 16 | }
 17 | 
 18 | type elasticTestSuite struct {
 19 | 	c *Client
 20 | }
 21 | 
 22 | var _ = Suite(&elasticTestSuite{})
 23 | 
 24 | func (s *elasticTestSuite) SetUpSuite(c *C) {
 25 | 	cfg := new(ClientConfig)
 26 | 	cfg.Addr = fmt.Sprintf("%s:%d", *host, *port)
 27 | 	cfg.User = ""
 28 | 	cfg.Password = ""
 29 | 	s.c = NewClient(cfg)
 30 | }
 31 | 
 32 | func (s *elasticTestSuite) TearDownSuite(c *C) {
 33 | 
 34 | }
 35 | 
 36 | func makeTestData(arg1 string, arg2 string) map[string]interface{} {
 37 | 	m := make(map[string]interface{})
 38 | 	m["name"] = arg1
 39 | 	m["content"] = arg2
 40 | 
 41 | 	return m
 42 | }
 43 | 
 44 | func (s *elasticTestSuite) TestSimple(c *C) {
 45 | 	index := "dummy"
 46 | 	docType := "blog"
 47 | 
 48 | 	//key1 := "name"
 49 | 	//key2 := "content"
 50 | 
 51 | 	err := s.c.Update(index, docType, "1", makeTestData("abc", "hello world"))
 52 | 	c.Assert(err, IsNil)
 53 | 
 54 | 	exists, err := s.c.Exists(index, docType, "1")
 55 | 	c.Assert(err, IsNil)
 56 | 	c.Assert(exists, Equals, true)
 57 | 
 58 | 	r, err := s.c.Get(index, docType, "1")
 59 | 	c.Assert(err, IsNil)
 60 | 	c.Assert(r.Code, Equals, 200)
 61 | 	c.Assert(r.ID, Equals, "1")
 62 | 
 63 | 	err = s.c.Delete(index, docType, "1")
 64 | 	c.Assert(err, IsNil)
 65 | 
 66 | 	exists, err = s.c.Exists(index, docType, "1")
 67 | 	c.Assert(err, IsNil)
 68 | 	c.Assert(exists, Equals, false)
 69 | 
 70 | 	items := make([]*BulkRequest, 10)
 71 | 
 72 | 	for i := 0; i < 10; i++ {
 73 | 		id := fmt.Sprintf("%d", i)
 74 | 		req := new(BulkRequest)
 75 | 		req.Action = ActionIndex
 76 | 		req.ID = id
 77 | 		req.Data = makeTestData(fmt.Sprintf("abc %d", i), fmt.Sprintf("hello world %d", i))
 78 | 		items[i] = req
 79 | 	}
 80 | 
 81 | 	resp, err := s.c.IndexTypeBulk(index, docType, items)
 82 | 	c.Assert(err, IsNil)
 83 | 	c.Assert(resp.Code, Equals, 200)
 84 | 	c.Assert(resp.Errors, Equals, false)
 85 | 
 86 | 	for i := 0; i < 10; i++ {
 87 | 		id := fmt.Sprintf("%d", i)
 88 | 		req := new(BulkRequest)
 89 | 		req.Action = ActionDelete
 90 | 		req.ID = id
 91 | 		items[i] = req
 92 | 	}
 93 | 
 94 | 	resp, err = s.c.IndexTypeBulk(index, docType, items)
 95 | 	c.Assert(err, IsNil)
 96 | 	c.Assert(resp.Code, Equals, 200)
 97 | 	c.Assert(resp.Errors, Equals, false)
 98 | }
 99 | 
100 | // this requires a parent setting in _mapping
101 | func (s *elasticTestSuite) TestParent(c *C) {
102 | 	index := "dummy"
103 | 	docType := "comment"
104 | 	ParentType := "parent"
105 | 
106 | 	mapping := map[string]interface{}{
107 | 		docType: map[string]interface{}{
108 | 			"_parent": map[string]string{"type": ParentType},
109 | 		},
110 | 	}
111 | 	err := s.c.CreateMapping(index, docType, mapping)
112 | 	c.Assert(err, IsNil)
113 | 
114 | 	items := make([]*BulkRequest, 10)
115 | 
116 | 	for i := 0; i < 10; i++ {
117 | 		id := fmt.Sprintf("%d", i)
118 | 		req := new(BulkRequest)
119 | 		req.Action = ActionIndex
120 | 		req.ID = id
121 | 		req.Data = makeTestData(fmt.Sprintf("abc %d", i), fmt.Sprintf("hello world %d", i))
122 | 		req.Parent = "1"
123 | 		items[i] = req
124 | 	}
125 | 
126 | 	resp, err := s.c.IndexTypeBulk(index, docType, items)
127 | 	c.Assert(err, IsNil)
128 | 	c.Assert(resp.Code, Equals, 200)
129 | 	c.Assert(resp.Errors, Equals, false)
130 | 
131 | 	for i := 0; i < 10; i++ {
132 | 		id := fmt.Sprintf("%d", i)
133 | 		req := new(BulkRequest)
134 | 		req.Index = index
135 | 		req.Type = docType
136 | 		req.Action = ActionDelete
137 | 		req.ID = id
138 | 		req.Parent = "1"
139 | 		items[i] = req
140 | 	}
141 | 	resp, err = s.c.Bulk(items)
142 | 	c.Assert(err, IsNil)
143 | 	c.Assert(resp.Code, Equals, 200)
144 | 	c.Assert(resp.Errors, Equals, false)
145 | }
146 | 


--------------------------------------------------------------------------------
/etc/river.toml:
--------------------------------------------------------------------------------
  1 | # MySQL address, user and password
  2 | # user must have replication privilege in MySQL.
  3 | my_addr = "127.0.0.1:3306"
  4 | my_user = "root"
  5 | my_pass = ""
  6 | my_charset = "utf8"
  7 | 
  8 | # Set true when elasticsearch use https
  9 | #es_https = false
 10 | # Elasticsearch address
 11 | es_addr = "127.0.0.1:9200"
 12 | # Elasticsearch user and password, maybe set by shield, nginx, or x-pack
 13 | es_user = ""
 14 | es_pass = ""
 15 | 
 16 | # Path to store data, like master.info, if not set or empty,
 17 | # we must use this to support breakpoint resume syncing. 
 18 | # TODO: support other storage, like etcd. 
 19 | data_dir = "./var"
 20 | 
 21 | # Inner Http status address
 22 | stat_addr = "127.0.0.1:12800"
 23 | 
 24 | # pseudo server id like a slave 
 25 | server_id = 1001
 26 | 
 27 | # mysql or mariadb
 28 | flavor = "mysql"
 29 | 
 30 | # mysqldump execution path
 31 | # if not set or empty, ignore mysqldump.
 32 | mysqldump = "mysqldump"
 33 | 
 34 | # if we have no privilege to use mysqldump with --master-data,
 35 | # we must skip it.
 36 | #skip_master_data = false
 37 | 
 38 | # minimal items to be inserted in one bulk
 39 | bulk_size = 128
 40 | 
 41 | # force flush the pending requests if we don't have enough items >= bulk_size
 42 | flush_bulk_time = "200ms"
 43 | 
 44 | # Ignore table without primary key
 45 | skip_no_pk_table = false
 46 | 
 47 | # MySQL data source
 48 | [[source]]
 49 | schema = "test"
 50 | 
 51 | # Only below tables will be synced into Elasticsearch.
 52 | # "t_[0-9]{4}" is a wildcard table format, you can use it if you have many sub tables, like table_0000 - table_1023
 53 | # I don't think it is necessary to sync all tables in a database.
 54 | tables = ["t", "t_[0-9]{4}", "tfield", "tfilter"]
 55 | 
 56 | # Below is for special rule mapping
 57 | 
 58 | # Very simple example
 59 | # 
 60 | # desc t;
 61 | # +-------+--------------+------+-----+---------+-------+
 62 | # | Field | Type         | Null | Key | Default | Extra |
 63 | # +-------+--------------+------+-----+---------+-------+
 64 | # | id    | int(11)      | NO   | PRI | NULL    |       |
 65 | # | name  | varchar(256) | YES  |     | NULL    |       |
 66 | # +-------+--------------+------+-----+---------+-------+
 67 | # 
 68 | # The table `t` will be synced to ES index `test` and type `t`.
 69 | [[rule]]
 70 | schema = "test"
 71 | table = "t"
 72 | index = "test"
 73 | type = "t"
 74 | 
 75 | # Wildcard table rule, the wildcard table must be in source tables 
 76 | # All tables which match the wildcard format will be synced to ES index `test` and type `t`.
 77 | # In this example, all tables must have same schema with above table `t`;
 78 | [[rule]]
 79 | schema = "test"
 80 | table = "t_[0-9]{4}"
 81 | index = "test"
 82 | type = "t"
 83 | 
 84 | # Simple field rule 
 85 | #
 86 | # desc tfield;
 87 | # +----------+--------------+------+-----+---------+-------+
 88 | # | Field    | Type         | Null | Key | Default | Extra |
 89 | # +----------+--------------+------+-----+---------+-------+
 90 | # | id       | int(11)      | NO   | PRI | NULL    |       |
 91 | # | tags     | varchar(256) | YES  |     | NULL    |       |
 92 | # | keywords | varchar(256) | YES  |     | NULL    |       |
 93 | # +----------+--------------+------+-----+---------+-------+
 94 | #
 95 | [[rule]]
 96 | schema = "test"
 97 | table = "tfield"
 98 | index = "test"
 99 | type = "tfield"
100 | 
101 | [rule.field]
102 | # Map column `id` to ES field `es_id`
103 | id="es_id"
104 | # Map column `tags` to ES field `es_tags` with array type 
105 | tags="es_tags,list"
106 | # Map column `keywords` to ES with array type
107 | keywords=",list"
108 | 
109 | # Filter rule 
110 | #
111 | # desc tfilter;
112 | # +-------+--------------+------+-----+---------+-------+
113 | # | Field | Type         | Null | Key | Default | Extra |
114 | # +-------+--------------+------+-----+---------+-------+
115 | # | id    | int(11)      | NO   | PRI | NULL    |       |
116 | # | c1    | int(11)      | YES  |     | 0       |       |
117 | # | c2    | int(11)      | YES  |     | 0       |       |
118 | # | name  | varchar(256) | YES  |     | NULL    |       |
119 | # +-------+--------------+------+-----+---------+-------+
120 | #
121 | [[rule]]
122 | schema = "test"
123 | table = "tfilter"
124 | index = "test"
125 | type = "tfilter"
126 | 
127 | # Only sync following columns
128 | filter = ["id", "name"]
129 | 
130 | # id rule
131 | #
132 | # desc tid_[0-9]{4};
133 | # +----------+--------------+------+-----+---------+-------+
134 | # | Field    | Type         | Null | Key | Default | Extra |
135 | # +----------+--------------+------+-----+---------+-------+
136 | # | id       | int(11)      | NO   | PRI | NULL    |       |
137 | # | tag      | varchar(256) | YES  |     | NULL    |       |
138 | # | desc     | varchar(256) | YES  |     | NULL    |       |
139 | # +----------+--------------+------+-----+---------+-------+
140 | #
141 | [[rule]]
142 | schema = "test"
143 | table = "tid_[0-9]{4}"
144 | index = "test"
145 | type = "t"
146 | # The es doc's id will be `id`:`tag`
147 | # It is useful for merge muliple table into one type while theses tables have same PK 
148 | id = ["id", "tag"]
149 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
 1 | module github.com/siddontang/go-mysql-elasticsearch
 2 | 
 3 | require (
 4 | 	github.com/BurntSushi/toml v0.3.1
 5 | 	github.com/juju/errors v0.0.0-20190207033735-e65537c515d7
 6 | 	github.com/siddontang/go v0.0.0-20180604090527-bdc77568d726
 7 | 	github.com/siddontang/go-log v0.0.0-20180807004314-8d05993dda07
 8 | 	github.com/siddontang/go-mysql v0.0.0-20190303113352-670f74e8daf5
 9 | )
10 | 


--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
 1 | github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ=
 2 | github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
 3 | github.com/juju/errors v0.0.0-20190207033735-e65537c515d7 h1:dMIPRDg6gi7CUp0Kj2+HxqJ5kTr1iAdzsXYIrLCNSmU=
 4 | github.com/juju/errors v0.0.0-20190207033735-e65537c515d7/go.mod h1:W54LbzXuIE0boCoNJfwqpmkKJ1O4TCTZMetAt6jGk7Q=
 5 | github.com/pingcap/errors v0.11.0 h1:DCJQB8jrHbQ1VVlMFIrbj2ApScNNotVmkSNplu2yUt4=
 6 | github.com/pingcap/errors v0.11.0/go.mod h1:Oi8TUi2kEtXXLMJk9l1cGmz20kV3TaQ0usTwv5KuLY8=
 7 | github.com/satori/go.uuid v1.2.0 h1:0uYX9dsZ2yD7q2RtLRtPSdGDWzjeM3TbMJP9utgA0ww=
 8 | github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0=
 9 | github.com/shopspring/decimal v0.0.0-20180709203117-cd690d0c9e24 h1:pntxY8Ary0t43dCZ5dqY4YTJCObLY1kIXl0uzMv+7DE=
10 | github.com/shopspring/decimal v0.0.0-20180709203117-cd690d0c9e24/go.mod h1:M+9NzErvs504Cn4c5DxATwIqPbtswREoFCre64PpcG4=
11 | github.com/siddontang/go v0.0.0-20180604090527-bdc77568d726 h1:xT+JlYxNGqyT+XcU8iUrN18JYed2TvG9yN5ULG2jATM=
12 | github.com/siddontang/go v0.0.0-20180604090527-bdc77568d726/go.mod h1:3yhqj7WBBfRhbBlzyOC3gUxftwsU0u8gqevxwIHQpMw=
13 | github.com/siddontang/go-log v0.0.0-20180807004314-8d05993dda07 h1:oI+RNwuC9jF2g2lP0u0cVEEZrc/AYBCuFdvwrLWM/6Q=
14 | github.com/siddontang/go-log v0.0.0-20180807004314-8d05993dda07/go.mod h1:yFdBgwXP24JziuRl2NMUahT7nGLNOKi1SIiFxMttVD4=
15 | github.com/siddontang/go-mysql v0.0.0-20190123011128-88e9cd7f6643 h1:yzg8+Cip1iDhy6GGS1zKflqOybgRc4xp82eYwQrP+DU=
16 | github.com/siddontang/go-mysql v0.0.0-20190123011128-88e9cd7f6643/go.mod h1:/b8ZcWjAShCcHp2dWpjb1vTlNyiG03UeHEQr2jteOpI=
17 | github.com/siddontang/go-mysql v0.0.0-20190303113352-670f74e8daf5 h1:5Nr7spTeY+ziXzqk/9p+GLnvH4rIjp9BX+aRaYDbR44=
18 | github.com/siddontang/go-mysql v0.0.0-20190303113352-670f74e8daf5/go.mod h1:/b8ZcWjAShCcHp2dWpjb1vTlNyiG03UeHEQr2jteOpI=
19 | 


--------------------------------------------------------------------------------
/river/config.go:
--------------------------------------------------------------------------------
 1 | package river
 2 | 
 3 | import (
 4 | 	"io/ioutil"
 5 | 	"time"
 6 | 
 7 | 	"github.com/BurntSushi/toml"
 8 | 	"github.com/juju/errors"
 9 | )
10 | 
11 | // SourceConfig is the configs for source
12 | type SourceConfig struct {
13 | 	Schema string   `toml:"schema"`
14 | 	Tables []string `toml:"tables"`
15 | }
16 | 
17 | // Config is the configuration
18 | type Config struct {
19 | 	MyAddr     string `toml:"my_addr"`
20 | 	MyUser     string `toml:"my_user"`
21 | 	MyPassword string `toml:"my_pass"`
22 | 	MyCharset  string `toml:"my_charset"`
23 | 
24 | 	ESHttps    bool   `toml:"es_https"`
25 | 	ESAddr     string `toml:"es_addr"`
26 | 	ESUser     string `toml:"es_user"`
27 | 	ESPassword string `toml:"es_pass"`
28 | 
29 | 	StatAddr string `toml:"stat_addr"`
30 | 
31 | 	ServerID uint32 `toml:"server_id"`
32 | 	Flavor   string `toml:"flavor"`
33 | 	DataDir  string `toml:"data_dir"`
34 | 
35 | 	DumpExec       string `toml:"mysqldump"`
36 | 	SkipMasterData bool   `toml:"skip_master_data"`
37 | 
38 | 	Sources []SourceConfig `toml:"source"`
39 | 
40 | 	Rules []*Rule `toml:"rule"`
41 | 
42 | 	BulkSize int `toml:"bulk_size"`
43 | 
44 | 	FlushBulkTime TomlDuration `toml:"flush_bulk_time"`
45 | 
46 | 	SkipNoPkTable bool `toml:"skip_no_pk_table"`
47 | }
48 | 
49 | // NewConfigWithFile creates a Config from file.
50 | func NewConfigWithFile(name string) (*Config, error) {
51 | 	data, err := ioutil.ReadFile(name)
52 | 	if err != nil {
53 | 		return nil, errors.Trace(err)
54 | 	}
55 | 
56 | 	return NewConfig(string(data))
57 | }
58 | 
59 | // NewConfig creates a Config from data.
60 | func NewConfig(data string) (*Config, error) {
61 | 	var c Config
62 | 
63 | 	_, err := toml.Decode(data, &c)
64 | 	if err != nil {
65 | 		return nil, errors.Trace(err)
66 | 	}
67 | 
68 | 	return &c, nil
69 | }
70 | 
71 | // TomlDuration supports time codec for TOML format.
72 | type TomlDuration struct {
73 | 	time.Duration
74 | }
75 | 
76 | // UnmarshalText implementes TOML UnmarshalText
77 | func (d *TomlDuration) UnmarshalText(text []byte) error {
78 | 	var err error
79 | 	d.Duration, err = time.ParseDuration(string(text))
80 | 	return err
81 | }
82 | 


--------------------------------------------------------------------------------
/river/master.go:
--------------------------------------------------------------------------------
  1 | package river
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"os"
  6 | 	"path"
  7 | 	"sync"
  8 | 	"time"
  9 | 
 10 | 	"github.com/BurntSushi/toml"
 11 | 	"github.com/juju/errors"
 12 | 	"github.com/siddontang/go-log/log"
 13 | 	"github.com/siddontang/go-mysql/mysql"
 14 | 	"github.com/siddontang/go/ioutil2"
 15 | )
 16 | 
 17 | type masterInfo struct {
 18 | 	sync.RWMutex
 19 | 
 20 | 	Name string `toml:"bin_name"`
 21 | 	Pos  uint32 `toml:"bin_pos"`
 22 | 
 23 | 	filePath     string
 24 | 	lastSaveTime time.Time
 25 | }
 26 | 
 27 | func loadMasterInfo(dataDir string) (*masterInfo, error) {
 28 | 	var m masterInfo
 29 | 
 30 | 	if len(dataDir) == 0 {
 31 | 		return &m, nil
 32 | 	}
 33 | 
 34 | 	m.filePath = path.Join(dataDir, "master.info")
 35 | 	m.lastSaveTime = time.Now()
 36 | 
 37 | 	if err := os.MkdirAll(dataDir, 0755); err != nil {
 38 | 		return nil, errors.Trace(err)
 39 | 	}
 40 | 
 41 | 	f, err := os.Open(m.filePath)
 42 | 	if err != nil && !os.IsNotExist(errors.Cause(err)) {
 43 | 		return nil, errors.Trace(err)
 44 | 	} else if os.IsNotExist(errors.Cause(err)) {
 45 | 		return &m, nil
 46 | 	}
 47 | 	defer f.Close()
 48 | 
 49 | 	_, err = toml.DecodeReader(f, &m)
 50 | 	return &m, errors.Trace(err)
 51 | }
 52 | 
 53 | func (m *masterInfo) Save(pos mysql.Position) error {
 54 | 	log.Infof("save position %s", pos)
 55 | 
 56 | 	m.Lock()
 57 | 	defer m.Unlock()
 58 | 
 59 | 	m.Name = pos.Name
 60 | 	m.Pos = pos.Pos
 61 | 
 62 | 	if len(m.filePath) == 0 {
 63 | 		return nil
 64 | 	}
 65 | 
 66 | 	n := time.Now()
 67 | 	if n.Sub(m.lastSaveTime) < time.Second {
 68 | 		return nil
 69 | 	}
 70 | 
 71 | 	m.lastSaveTime = n
 72 | 	var buf bytes.Buffer
 73 | 	e := toml.NewEncoder(&buf)
 74 | 
 75 | 	e.Encode(m)
 76 | 
 77 | 	var err error
 78 | 	if err = ioutil2.WriteFileAtomic(m.filePath, buf.Bytes(), 0644); err != nil {
 79 | 		log.Errorf("canal save master info to file %s err %v", m.filePath, err)
 80 | 	}
 81 | 
 82 | 	return errors.Trace(err)
 83 | }
 84 | 
 85 | func (m *masterInfo) Position() mysql.Position {
 86 | 	m.RLock()
 87 | 	defer m.RUnlock()
 88 | 
 89 | 	return mysql.Position{
 90 | 		Name: m.Name,
 91 | 		Pos:  m.Pos,
 92 | 	}
 93 | }
 94 | 
 95 | func (m *masterInfo) Close() error {
 96 | 	pos := m.Position()
 97 | 
 98 | 	return m.Save(pos)
 99 | }
100 | 


--------------------------------------------------------------------------------
/river/river.go:
--------------------------------------------------------------------------------
  1 | package river
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"fmt"
  6 | 	"regexp"
  7 | 	"strings"
  8 | 	"sync"
  9 | 
 10 | 	"github.com/juju/errors"
 11 | 	"github.com/siddontang/go-log/log"
 12 | 	"github.com/siddontang/go-mysql-elasticsearch/elastic"
 13 | 	"github.com/siddontang/go-mysql/canal"
 14 | )
 15 | 
 16 | // ErrRuleNotExist is the error if rule is not defined.
 17 | var ErrRuleNotExist = errors.New("rule is not exist")
 18 | 
 19 | // River is a pluggable service within Elasticsearch pulling data then indexing it into Elasticsearch.
 20 | // We use this definition here too, although it may not run within Elasticsearch.
 21 | // Maybe later I can implement a acutal river in Elasticsearch, but I must learn java. :-)
 22 | type River struct {
 23 | 	c *Config
 24 | 
 25 | 	canal *canal.Canal
 26 | 
 27 | 	rules map[string]*Rule
 28 | 
 29 | 	ctx    context.Context
 30 | 	cancel context.CancelFunc
 31 | 
 32 | 	wg sync.WaitGroup
 33 | 
 34 | 	es *elastic.Client
 35 | 
 36 | 	st *stat
 37 | 
 38 | 	master *masterInfo
 39 | 
 40 | 	syncCh chan interface{}
 41 | }
 42 | 
 43 | // NewRiver creates the River from config
 44 | func NewRiver(c *Config) (*River, error) {
 45 | 	r := new(River)
 46 | 
 47 | 	r.c = c
 48 | 	r.rules = make(map[string]*Rule)
 49 | 	r.syncCh = make(chan interface{}, 4096)
 50 | 	r.ctx, r.cancel = context.WithCancel(context.Background())
 51 | 
 52 | 	var err error
 53 | 	if r.master, err = loadMasterInfo(c.DataDir); err != nil {
 54 | 		return nil, errors.Trace(err)
 55 | 	}
 56 | 
 57 | 	if err = r.newCanal(); err != nil {
 58 | 		return nil, errors.Trace(err)
 59 | 	}
 60 | 
 61 | 	if err = r.prepareRule(); err != nil {
 62 | 		return nil, errors.Trace(err)
 63 | 	}
 64 | 
 65 | 	if err = r.prepareCanal(); err != nil {
 66 | 		return nil, errors.Trace(err)
 67 | 	}
 68 | 
 69 | 	// We must use binlog full row image
 70 | 	if err = r.canal.CheckBinlogRowImage("FULL"); err != nil {
 71 | 		return nil, errors.Trace(err)
 72 | 	}
 73 | 
 74 | 	cfg := new(elastic.ClientConfig)
 75 | 	cfg.Addr = r.c.ESAddr
 76 | 	cfg.User = r.c.ESUser
 77 | 	cfg.Password = r.c.ESPassword
 78 | 	cfg.HTTPS = r.c.ESHttps
 79 | 	r.es = elastic.NewClient(cfg)
 80 | 
 81 | 	r.st = &stat{r: r}
 82 | 	go r.st.Run(r.c.StatAddr)
 83 | 
 84 | 	return r, nil
 85 | }
 86 | 
 87 | func (r *River) newCanal() error {
 88 | 	cfg := canal.NewDefaultConfig()
 89 | 	cfg.Addr = r.c.MyAddr
 90 | 	cfg.User = r.c.MyUser
 91 | 	cfg.Password = r.c.MyPassword
 92 | 	cfg.Charset = r.c.MyCharset
 93 | 	cfg.Flavor = r.c.Flavor
 94 | 
 95 | 	cfg.ServerID = r.c.ServerID
 96 | 	cfg.Dump.ExecutionPath = r.c.DumpExec
 97 | 	cfg.Dump.DiscardErr = false
 98 | 	cfg.Dump.SkipMasterData = r.c.SkipMasterData
 99 | 
100 | 	for _, s := range r.c.Sources {
101 | 		for _, t := range s.Tables {
102 | 			cfg.IncludeTableRegex = append(cfg.IncludeTableRegex, s.Schema+"\\."+t)
103 | 		}
104 | 	}
105 | 
106 | 	var err error
107 | 	r.canal, err = canal.NewCanal(cfg)
108 | 	return errors.Trace(err)
109 | }
110 | 
111 | func (r *River) prepareCanal() error {
112 | 	var db string
113 | 	dbs := map[string]struct{}{}
114 | 	tables := make([]string, 0, len(r.rules))
115 | 	for _, rule := range r.rules {
116 | 		db = rule.Schema
117 | 		dbs[rule.Schema] = struct{}{}
118 | 		tables = append(tables, rule.Table)
119 | 	}
120 | 
121 | 	if len(dbs) == 1 {
122 | 		// one db, we can shrink using table
123 | 		r.canal.AddDumpTables(db, tables...)
124 | 	} else {
125 | 		// many dbs, can only assign databases to dump
126 | 		keys := make([]string, 0, len(dbs))
127 | 		for key := range dbs {
128 | 			keys = append(keys, key)
129 | 		}
130 | 
131 | 		r.canal.AddDumpDatabases(keys...)
132 | 	}
133 | 
134 | 	r.canal.SetEventHandler(&eventHandler{r})
135 | 
136 | 	return nil
137 | }
138 | 
139 | func (r *River) newRule(schema, table string) error {
140 | 	key := ruleKey(schema, table)
141 | 
142 | 	if _, ok := r.rules[key]; ok {
143 | 		return errors.Errorf("duplicate source %s, %s defined in config", schema, table)
144 | 	}
145 | 
146 | 	r.rules[key] = newDefaultRule(schema, table)
147 | 	return nil
148 | }
149 | 
150 | func (r *River) updateRule(schema, table string) error {
151 | 	rule, ok := r.rules[ruleKey(schema, table)]
152 | 	if !ok {
153 | 		return ErrRuleNotExist
154 | 	}
155 | 
156 | 	tableInfo, err := r.canal.GetTable(schema, table)
157 | 	if err != nil {
158 | 		return errors.Trace(err)
159 | 	}
160 | 
161 | 	rule.TableInfo = tableInfo
162 | 
163 | 	return nil
164 | }
165 | 
166 | func (r *River) parseSource() (map[string][]string, error) {
167 | 	wildTables := make(map[string][]string, len(r.c.Sources))
168 | 
169 | 	// first, check sources
170 | 	for _, s := range r.c.Sources {
171 | 		if !isValidTables(s.Tables) {
172 | 			return nil, errors.Errorf("wildcard * is not allowed for multiple tables")
173 | 		}
174 | 
175 | 		for _, table := range s.Tables {
176 | 			if len(s.Schema) == 0 {
177 | 				return nil, errors.Errorf("empty schema not allowed for source")
178 | 			}
179 | 
180 | 			if regexp.QuoteMeta(table) != table {
181 | 				if _, ok := wildTables[ruleKey(s.Schema, table)]; ok {
182 | 					return nil, errors.Errorf("duplicate wildcard table defined for %s.%s", s.Schema, table)
183 | 				}
184 | 
185 | 				tables := []string{}
186 | 
187 | 				sql := fmt.Sprintf(`SELECT table_name FROM information_schema.tables WHERE
188 | 					table_name RLIKE "%s" AND table_schema = "%s";`, buildTable(table), s.Schema)
189 | 
190 | 				res, err := r.canal.Execute(sql)
191 | 				if err != nil {
192 | 					return nil, errors.Trace(err)
193 | 				}
194 | 
195 | 				for i := 0; i < res.Resultset.RowNumber(); i++ {
196 | 					f, _ := res.GetString(i, 0)
197 | 					err := r.newRule(s.Schema, f)
198 | 					if err != nil {
199 | 						return nil, errors.Trace(err)
200 | 					}
201 | 
202 | 					tables = append(tables, f)
203 | 				}
204 | 
205 | 				wildTables[ruleKey(s.Schema, table)] = tables
206 | 			} else {
207 | 				err := r.newRule(s.Schema, table)
208 | 				if err != nil {
209 | 					return nil, errors.Trace(err)
210 | 				}
211 | 			}
212 | 		}
213 | 	}
214 | 
215 | 	if len(r.rules) == 0 {
216 | 		return nil, errors.Errorf("no source data defined")
217 | 	}
218 | 
219 | 	return wildTables, nil
220 | }
221 | 
222 | func (r *River) prepareRule() error {
223 | 	wildtables, err := r.parseSource()
224 | 	if err != nil {
225 | 		return errors.Trace(err)
226 | 	}
227 | 
228 | 	if r.c.Rules != nil {
229 | 		// then, set custom mapping rule
230 | 		for _, rule := range r.c.Rules {
231 | 			if len(rule.Schema) == 0 {
232 | 				return errors.Errorf("empty schema not allowed for rule")
233 | 			}
234 | 
235 | 			if regexp.QuoteMeta(rule.Table) != rule.Table {
236 | 				//wildcard table
237 | 				tables, ok := wildtables[ruleKey(rule.Schema, rule.Table)]
238 | 				if !ok {
239 | 					return errors.Errorf("wildcard table for %s.%s is not defined in source", rule.Schema, rule.Table)
240 | 				}
241 | 
242 | 				if len(rule.Index) == 0 {
243 | 					return errors.Errorf("wildcard table rule %s.%s must have a index, can not empty", rule.Schema, rule.Table)
244 | 				}
245 | 
246 | 				rule.prepare()
247 | 
248 | 				for _, table := range tables {
249 | 					rr := r.rules[ruleKey(rule.Schema, table)]
250 | 					rr.Index = rule.Index
251 | 					rr.Type = rule.Type
252 | 					rr.Parent = rule.Parent
253 | 					rr.ID = rule.ID
254 | 					rr.FieldMapping = rule.FieldMapping
255 | 				}
256 | 			} else {
257 | 				key := ruleKey(rule.Schema, rule.Table)
258 | 				if _, ok := r.rules[key]; !ok {
259 | 					return errors.Errorf("rule %s, %s not defined in source", rule.Schema, rule.Table)
260 | 				}
261 | 				rule.prepare()
262 | 				r.rules[key] = rule
263 | 			}
264 | 		}
265 | 	}
266 | 
267 | 	rules := make(map[string]*Rule)
268 | 	for key, rule := range r.rules {
269 | 		if rule.TableInfo, err = r.canal.GetTable(rule.Schema, rule.Table); err != nil {
270 | 			return errors.Trace(err)
271 | 		}
272 | 
273 | 		if len(rule.TableInfo.PKColumns) == 0 {
274 | 			if !r.c.SkipNoPkTable {
275 | 				return errors.Errorf("%s.%s must have a PK for a column", rule.Schema, rule.Table)
276 | 			}
277 | 
278 | 			log.Errorf("ignored table without a primary key: %s\n", rule.TableInfo.Name)
279 | 		} else {
280 | 			rules[key] = rule
281 | 		}
282 | 	}
283 | 	r.rules = rules
284 | 
285 | 	return nil
286 | }
287 | 
288 | func ruleKey(schema string, table string) string {
289 | 	return strings.ToLower(fmt.Sprintf("%s:%s", schema, table))
290 | }
291 | 
292 | // Run syncs the data from MySQL and inserts to ES.
293 | func (r *River) Run() error {
294 | 	r.wg.Add(1)
295 | 	go r.syncLoop()
296 | 
297 | 	pos := r.master.Position()
298 | 	if err := r.canal.RunFrom(pos); err != nil {
299 | 		log.Errorf("start canal err %v", err)
300 | 		return errors.Trace(err)
301 | 	}
302 | 
303 | 	return nil
304 | }
305 | 
306 | // Ctx returns the internal context for outside use.
307 | func (r *River) Ctx() context.Context {
308 | 	return r.ctx
309 | }
310 | 
311 | // Close closes the River
312 | func (r *River) Close() {
313 | 	log.Infof("closing river")
314 | 
315 | 	r.cancel()
316 | 
317 | 	r.canal.Close()
318 | 
319 | 	r.master.Close()
320 | 
321 | 	r.wg.Wait()
322 | }
323 | 
324 | func isValidTables(tables []string) bool {
325 | 	if len(tables) > 1 {
326 | 		for _, table := range tables {
327 | 			if table == "*" {
328 | 				return false
329 | 			}
330 | 		}
331 | 	}
332 | 	return true
333 | }
334 | 
335 | func buildTable(table string) string {
336 | 	if table == "*" {
337 | 		return "." + table
338 | 	}
339 | 	return table
340 | }
341 | 


--------------------------------------------------------------------------------
/river/river_extra_test.go:
--------------------------------------------------------------------------------
  1 | package river
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"net/http"
  6 | 	"net/url"
  7 | 	"os"
  8 | 	"time"
  9 | 
 10 | 	. "github.com/pingcap/check"
 11 | )
 12 | 
 13 | func (s *riverTestSuite) setupExtra(c *C) (r *River) {
 14 | 	var err error
 15 | 
 16 | 	schema := `
 17 |         CREATE TABLE IF NOT EXISTS %s (
 18 |             id INT,
 19 |             title VARCHAR(256),
 20 |             pid INT,
 21 |             PRIMARY KEY(id)) ENGINE=INNODB;
 22 |     `
 23 | 
 24 | 	s.testExecute(c, "DROP TABLE IF EXISTS test_river_extra")
 25 | 	s.testExecute(c, fmt.Sprintf(schema, "test_river_extra"))
 26 | 
 27 | 	schema = `
 28 |         CREATE TABLE IF NOT EXISTS %s (
 29 |             id INT,
 30 |             PRIMARY KEY(id)) ENGINE=INNODB;
 31 |     `
 32 | 
 33 | 	s.testExecute(c, "DROP TABLE IF EXISTS test_river_parent")
 34 | 	s.testExecute(c, fmt.Sprintf(schema, "test_river_parent"))
 35 | 
 36 | 	cfg := new(Config)
 37 | 	cfg.MyAddr = *myAddr
 38 | 	cfg.MyUser = "root"
 39 | 	cfg.MyPassword = ""
 40 | 	cfg.ESAddr = *esAddr
 41 | 
 42 | 	cfg.ServerID = 1001
 43 | 	cfg.Flavor = "mysql"
 44 | 
 45 | 	cfg.DataDir = "/tmp/test_river_extra"
 46 | 	cfg.DumpExec = "mysqldump"
 47 | 
 48 | 	cfg.StatAddr = "127.0.0.1:12800"
 49 | 	cfg.BulkSize = 1
 50 | 	cfg.FlushBulkTime = TomlDuration{3 * time.Millisecond}
 51 | 
 52 | 	os.RemoveAll(cfg.DataDir)
 53 | 
 54 | 	cfg.Sources = []SourceConfig{SourceConfig{Schema: "test", Tables: []string{"test_river_extra", "test_river_parent"}}}
 55 | 
 56 | 	cfg.Rules = []*Rule{
 57 | 		&Rule{Schema: "test",
 58 | 			Table: "test_river_parent",
 59 | 			Index: "river",
 60 | 			Type:  "river_extra_parent"},
 61 | 		&Rule{Schema: "test",
 62 | 			Table:  "test_river_extra",
 63 | 			Index:  "river",
 64 | 			Type:   "river_extra",
 65 | 			Parent: "pid"}}
 66 | 
 67 | 	r, err = NewRiver(cfg)
 68 | 	c.Assert(err, IsNil)
 69 | 
 70 | 	mapping := map[string]interface{}{
 71 | 		"river_extra": map[string]interface{}{
 72 | 			"_parent": map[string]string{"type": "river_extra_parent"},
 73 | 		},
 74 | 	}
 75 | 
 76 | 	r.es.CreateMapping("river", "river_extra", mapping)
 77 | 
 78 | 	return r
 79 | }
 80 | 
 81 | func (s *riverTestSuite) testPrepareExtraData(c *C) {
 82 | 	s.testExecute(c, "INSERT INTO test_river_parent (id) VALUES (?)", 1)
 83 | 	s.testExecute(c, "INSERT INTO test_river_extra (id, title, pid) VALUES (?, ?, ?)", 1, "first", 1)
 84 | 	s.testExecute(c, "INSERT INTO test_river_extra (id, title, pid) VALUES (?, ?, ?)", 2, "second", 1)
 85 | 	s.testExecute(c, "INSERT INTO test_river_extra (id, title, pid) VALUES (?, ?, ?)", 3, "third", 1)
 86 | 	s.testExecute(c, "INSERT INTO test_river_extra (id, title, pid) VALUES (?, ?, ?)", 4, "fourth", 1)
 87 | }
 88 | 
 89 | func (s *riverTestSuite) testElasticExtraExists(c *C, id string, parent string, exist bool) {
 90 | 	index := "river"
 91 | 	docType := "river_extra"
 92 | 
 93 | 	reqURL := fmt.Sprintf("http://%s/%s/%s/%s?parent=%s", s.r.es.Addr,
 94 | 		url.QueryEscape(index),
 95 | 		url.QueryEscape(docType),
 96 | 		url.QueryEscape(id),
 97 | 		url.QueryEscape(parent))
 98 | 
 99 | 	r, err := s.r.es.Do("HEAD", reqURL, nil)
100 | 	c.Assert(err, IsNil)
101 | 
102 | 	if exist {
103 | 		c.Assert(r.Code, Equals, http.StatusOK)
104 | 	} else {
105 | 		c.Assert(r.Code, Equals, http.StatusNotFound)
106 | 	}
107 | }
108 | 
109 | func (s *riverTestSuite) TestRiverWithParent(c *C) {
110 | 	river := s.setupExtra(c)
111 | 
112 | 	defer river.Close()
113 | 
114 | 	s.testPrepareExtraData(c)
115 | 
116 | 	go func() { river.Run() }()
117 | 
118 | 	testWaitSyncDone(c, river)
119 | 
120 | 	s.testElasticExtraExists(c, "1", "1", true)
121 | 
122 | 	s.testExecute(c, "DELETE FROM test_river_extra WHERE id = ?", 1)
123 | 	testWaitSyncDone(c, river)
124 | 
125 | 	s.testElasticExtraExists(c, "1", "1", false)
126 | }
127 | 


--------------------------------------------------------------------------------
/river/river_test.go:
--------------------------------------------------------------------------------
  1 | package river
  2 | 
  3 | import (
  4 | 	"flag"
  5 | 	"fmt"
  6 | 	"os"
  7 | 	"testing"
  8 | 	"time"
  9 | 
 10 | 	. "github.com/pingcap/check"
 11 | 	"github.com/siddontang/go-mysql-elasticsearch/elastic"
 12 | 	"github.com/siddontang/go-mysql/client"
 13 | 	"github.com/siddontang/go-mysql/mysql"
 14 | )
 15 | 
 16 | var myAddr = flag.String("my_addr", "127.0.0.1:3306", "MySQL addr")
 17 | var esAddr = flag.String("es_addr", "127.0.0.1:9200", "Elasticsearch addr")
 18 | var dateTimeStr = time.Now().Format(mysql.TimeFormat)
 19 | var dateStr = time.Now().Format(mysqlDateFormat)
 20 | 
 21 | func Test(t *testing.T) {
 22 | 	TestingT(t)
 23 | }
 24 | 
 25 | type riverTestSuite struct {
 26 | 	c *client.Conn
 27 | 	r *River
 28 | }
 29 | 
 30 | var _ = Suite(&riverTestSuite{})
 31 | 
 32 | func (s *riverTestSuite) SetUpSuite(c *C) {
 33 | 	var err error
 34 | 	s.c, err = client.Connect(*myAddr, "root", "", "test")
 35 | 	c.Assert(err, IsNil)
 36 | 
 37 | 	s.testExecute(c, "SET SESSION binlog_format = 'ROW'")
 38 | 
 39 | 	schema := `
 40 |         CREATE TABLE IF NOT EXISTS %s (
 41 | 					id INT,
 42 | 					title VARCHAR(256),
 43 | 					content VARCHAR(256),
 44 | 					mylist VARCHAR(256),
 45 | 					mydate INT(10),
 46 | 					tenum ENUM("e1", "e2", "e3"),
 47 | 					tset SET("a", "b", "c"),
 48 | 					tbit BIT(1) default 1,
 49 | 					tdatetime DATETIME DEFAULT NULL,
 50 | 					tdate DATE DEFAULT NULL,
 51 | 					ip INT UNSIGNED DEFAULT 0,
 52 | 					PRIMARY KEY(id)) ENGINE=INNODB;
 53 |     `
 54 | 
 55 | 	schemaJSON := `
 56 | 	CREATE TABLE IF NOT EXISTS %s (
 57 | 	    id INT,
 58 | 	    info JSON,
 59 | 	    PRIMARY KEY(id)) ENGINE=INNODB;
 60 |     `
 61 | 
 62 | 	s.testExecute(c, "DROP TABLE IF EXISTS test_river")
 63 | 	s.testExecute(c, "DROP TABLE IF EXISTS test_for_id")
 64 | 	s.testExecute(c, "DROP TABLE IF EXISTS test_for_json")
 65 | 	s.testExecute(c, fmt.Sprintf(schema, "test_river"))
 66 | 	s.testExecute(c, fmt.Sprintf(schema, "test_for_id"))
 67 | 	s.testExecute(c, fmt.Sprintf(schemaJSON, "test_for_json"))
 68 | 
 69 | 	for i := 0; i < 10; i++ {
 70 | 		table := fmt.Sprintf("test_river_%04d", i)
 71 | 		s.testExecute(c, fmt.Sprintf("DROP TABLE IF EXISTS %s", table))
 72 | 		s.testExecute(c, fmt.Sprintf(schema, table))
 73 | 	}
 74 | 
 75 | 	cfg := new(Config)
 76 | 	cfg.MyAddr = *myAddr
 77 | 	cfg.MyUser = "root"
 78 | 	cfg.MyPassword = ""
 79 | 	cfg.MyCharset = "utf8"
 80 | 	cfg.ESAddr = *esAddr
 81 | 
 82 | 	cfg.ServerID = 1001
 83 | 	cfg.Flavor = "mysql"
 84 | 
 85 | 	cfg.DataDir = "/tmp/test_river"
 86 | 	cfg.DumpExec = "mysqldump"
 87 | 
 88 | 	cfg.StatAddr = "127.0.0.1:12800"
 89 | 	cfg.BulkSize = 1
 90 | 	cfg.FlushBulkTime = TomlDuration{3 * time.Millisecond}
 91 | 
 92 | 	os.RemoveAll(cfg.DataDir)
 93 | 
 94 | 	cfg.Sources = []SourceConfig{SourceConfig{Schema: "test", Tables: []string{"test_river", "test_river_[0-9]{4}", "test_for_id", "test_for_json"}}}
 95 | 
 96 | 	cfg.Rules = []*Rule{
 97 | 		&Rule{Schema: "test",
 98 | 			Table:        "test_river",
 99 | 			Index:        "river",
100 | 			Type:         "river",
101 | 			FieldMapping: map[string]string{"title": "es_title", "mylist": "es_mylist,list", "mydate": ",date"},
102 | 		},
103 | 
104 | 		&Rule{Schema: "test",
105 | 			Table:        "test_for_id",
106 | 			Index:        "river",
107 | 			Type:         "river",
108 | 			ID:           []string{"id", "title"},
109 | 			FieldMapping: map[string]string{"title": "es_title", "mylist": "es_mylist,list", "mydate": ",date"},
110 | 		},
111 | 
112 | 		&Rule{Schema: "test",
113 | 			Table:        "test_river_[0-9]{4}",
114 | 			Index:        "river",
115 | 			Type:         "river",
116 | 			FieldMapping: map[string]string{"title": "es_title", "mylist": "es_mylist,list", "mydate": ",date"},
117 | 		},
118 | 
119 | 		&Rule{Schema: "test",
120 | 			Table: "test_for_json",
121 | 			Index: "river",
122 | 			Type:  "river",
123 | 		},
124 | 	}
125 | 
126 | 	s.r, err = NewRiver(cfg)
127 | 	c.Assert(err, IsNil)
128 | 
129 | 	err = s.r.es.DeleteIndex("river")
130 | 	c.Assert(err, IsNil)
131 | }
132 | 
133 | func (s *riverTestSuite) TearDownSuite(c *C) {
134 | 	if s.c != nil {
135 | 		s.c.Close()
136 | 	}
137 | 
138 | 	if s.r != nil {
139 | 		s.r.Close()
140 | 	}
141 | }
142 | 
143 | func (s *riverTestSuite) TestConfig(c *C) {
144 | 	str := `
145 | my_addr = "127.0.0.1:3306"
146 | my_user = "root"
147 | my_pass = ""
148 | my_charset = "utf8"
149 | es_addr = "127.0.0.1:9200"
150 | es_user = ""
151 | es_pass = ""
152 | data_dir = "./var"
153 | 
154 | [[source]]
155 | schema = "test"
156 | 
157 | tables = ["test_river", "test_river_[0-9]{4}", "test_for_id", "test_for_json"]
158 | 
159 | [[rule]]
160 | schema = "test"
161 | table = "test_river"
162 | index = "river"
163 | type = "river"
164 | parent = "pid"
165 | 
166 |     [rule.field]
167 |     title = "es_title"
168 |     mylist = "es_mylist,list"
169 |     mydate = ",date"
170 | 
171 | 
172 | [[rule]]
173 | schema = "test"
174 | table = "test_for_id"
175 | index = "river"
176 | type = "river"
177 | parent = "pid"
178 | id = ["id", "title"]
179 |     [rule.field]
180 |     title = "es_title"
181 |     mylist = "es_mylist,list"
182 |     mydate = ",date"
183 | 
184 | 
185 | [[rule]]
186 | schema = "test"
187 | table = "test_river_[0-9]{4}"
188 | index = "river"
189 | type = "river"
190 | 
191 |     [rule.field]
192 |     title = "es_title"
193 |     mylist = "es_mylist,list"
194 |     mydate = ",date"
195 | 
196 | [[rule]]
197 | schema = "test"
198 | table = "test_for_json"
199 | index = "river"
200 | type = "river"
201 | `
202 | 
203 | 	cfg, err := NewConfig(str)
204 | 	c.Assert(err, IsNil)
205 | 	c.Assert(cfg.Sources, HasLen, 1)
206 | 	c.Assert(cfg.Sources[0].Tables, HasLen, 4)
207 | 	c.Assert(cfg.Rules, HasLen, 4)
208 | }
209 | 
210 | func (s *riverTestSuite) testExecute(c *C, query string, args ...interface{}) {
211 | 	c.Logf("query %s, args: %v", query, args)
212 | 	_, err := s.c.Execute(query, args...)
213 | 	c.Assert(err, IsNil)
214 | }
215 | 
216 | func (s *riverTestSuite) testPrepareData(c *C) {
217 | 	s.testExecute(c, "INSERT INTO test_river (id, title, content, tenum, tset) VALUES (?, ?, ?, ?, ?)", 1, "first", "hello go 1", "e1", "a,b")
218 | 	s.testExecute(c, "INSERT INTO test_river (id, title, content, tenum, tset) VALUES (?, ?, ?, ?, ?)", 2, "second", "hello mysql 2", "e2", "b,c")
219 | 	s.testExecute(c, "INSERT INTO test_river (id, title, content, tenum, tset) VALUES (?, ?, ?, ?, ?)", 3, "third", "hello elaticsearch 3", "e3", "c")
220 | 	s.testExecute(c, "INSERT INTO test_river (id, title, content, tenum, tset, tbit) VALUES (?, ?, ?, ?, ?, ?)", 4, "fouth", "hello go-mysql-elasticserach 4", "e1", "a,b,c", 0)
221 | 	s.testExecute(c, "INSERT INTO test_for_id (id, title, content, tenum, tset) VALUES (?, ?, ?, ?, ?)", 1, "first", "hello go 1", "e1", "a,b")
222 | 	s.testExecute(c, "INSERT INTO test_for_json (id, info) VALUES (?, ?)", 9200, "{\"first\": \"a\", \"second\": \"b\"}")
223 | 
224 | 	for i := 0; i < 10; i++ {
225 | 		table := fmt.Sprintf("test_river_%04d", i)
226 | 		s.testExecute(c, fmt.Sprintf("INSERT INTO %s (id, title, content, tenum, tset) VALUES (?, ?, ?, ?, ?)", table), 5+i, "abc", "hello", "e1", "a,b,c")
227 | 	}
228 | 
229 | 	s.testExecute(c, "INSERT INTO test_river (id, title, content, tenum, tset, tdatetime, mydate, tdate) VALUES (?, ?, ?, ?, ?, ?, ?, ?)", 16, "test datetime", "hello go 16", "e1", "a,b", dateTimeStr, 1458131094, dateStr)
230 | 
231 | 	s.testExecute(c, "SET sql_mode = '';") // clear sql_mode to allow empty dates
232 | 	s.testExecute(c, "INSERT INTO test_river (id, title, content, tenum, tset, tdatetime, mydate, tdate) VALUES (?, ?, ?, ?, ?, ?, ?, ?)", 20, "test empty datetime", "date test 20", "e1", "a,b", "0000-00-00 00:00:00", 0, "0000-00-00")
233 | 
234 | 	// test ip
235 | 	s.testExecute(c, "INSERT test_river (id, ip) VALUES (?, ?)", 17, 0)
236 | }
237 | 
238 | func (s *riverTestSuite) testElasticGet(c *C, id string) *elastic.Response {
239 | 	index := "river"
240 | 	docType := "river"
241 | 
242 | 	r, err := s.r.es.Get(index, docType, id)
243 | 	c.Assert(err, IsNil)
244 | 
245 | 	return r
246 | }
247 | 
248 | func (s *riverTestSuite) testElasticMapping(c *C) *elastic.MappingResponse {
249 | 	index := "river"
250 | 	docType := "river"
251 | 
252 | 	r, err := s.r.es.GetMapping(index, docType)
253 | 	c.Assert(err, IsNil)
254 | 
255 | 	c.Assert(r.Mapping[index].Mappings[docType].Properties["tdatetime"].Type, Equals, "date")
256 | 	c.Assert(r.Mapping[index].Mappings[docType].Properties["tdate"].Type, Equals, "date")
257 | 	c.Assert(r.Mapping[index].Mappings[docType].Properties["mydate"].Type, Equals, "date")
258 | 	return r
259 | }
260 | 
261 | func testWaitSyncDone(c *C, r *River) {
262 | 	<-r.canal.WaitDumpDone()
263 | 
264 | 	err := r.canal.CatchMasterPos(10 * time.Second)
265 | 	c.Assert(err, IsNil)
266 | 
267 | 	for i := 0; i < 1000; i++ {
268 | 		if len(r.syncCh) == 0 {
269 | 			return
270 | 		}
271 | 
272 | 		time.Sleep(10 * time.Millisecond)
273 | 	}
274 | 
275 | 	c.Fatalf("wait 1s but still have %d items to be synced", len(r.syncCh))
276 | }
277 | 
278 | func (s *riverTestSuite) TestRiver(c *C) {
279 | 	s.testPrepareData(c)
280 | 
281 | 	go func() { s.r.Run() }()
282 | 
283 | 	testWaitSyncDone(c, s.r)
284 | 
285 | 	var mr *elastic.MappingResponse
286 | 	mr = s.testElasticMapping(c)
287 | 	c.Assert(mr.Code, Equals, 200)
288 | 
289 | 	var r *elastic.Response
290 | 	r = s.testElasticGet(c, "1")
291 | 	c.Assert(r.Found, IsTrue)
292 | 	c.Assert(r.Source["tenum"], Equals, "e1")
293 | 	c.Assert(r.Source["tset"], Equals, "a,b")
294 | 
295 | 	r = s.testElasticGet(c, "1:first")
296 | 	c.Assert(r.Found, IsTrue)
297 | 
298 | 	r = s.testElasticGet(c, "9200")
299 | 	c.Assert(r.Found, IsTrue)
300 | 	switch v := r.Source["info"].(type) {
301 | 	case map[string]interface{}:
302 | 		c.Assert(v["first"], Equals, "a")
303 | 		c.Assert(v["second"], Equals, "b")
304 | 	default:
305 | 		c.Assert(v, IsNil)
306 | 		c.Assert(true, IsFalse)
307 | 	}
308 | 
309 | 	r = s.testElasticGet(c, "100")
310 | 	c.Assert(r.Found, IsFalse)
311 | 
312 | 	for i := 0; i < 10; i++ {
313 | 		r = s.testElasticGet(c, fmt.Sprintf("%d", 5+i))
314 | 		c.Assert(r.Found, IsTrue)
315 | 		c.Assert(r.Source["es_title"], Equals, "abc")
316 | 	}
317 | 
318 | 	s.testExecute(c, "UPDATE test_river SET title = ?, tenum = ?, tset = ?, mylist = ? WHERE id = ?", "second 2", "e3", "a,b,c", "a,b,c", 2)
319 | 	s.testExecute(c, "DELETE FROM test_river WHERE id = ?", 1)
320 | 	s.testExecute(c, "UPDATE test_river SET title = ?, id = ? WHERE id = ?", "second 30", 30, 3)
321 | 
322 | 	// so we can insert invalid data
323 | 	s.testExecute(c, `SET SESSION sql_mode="NO_ENGINE_SUBSTITUTION";`)
324 | 
325 | 	// bad insert
326 | 	s.testExecute(c, "UPDATE test_river SET title = ?, tenum = ?, tset = ? WHERE id = ?", "second 2", "e5", "a,b,c,d", 4)
327 | 
328 | 	for i := 0; i < 10; i++ {
329 | 		table := fmt.Sprintf("test_river_%04d", i)
330 | 		s.testExecute(c, fmt.Sprintf("UPDATE %s SET title = ? WHERE id = ?", table), "hello", 5+i)
331 | 	}
332 | 
333 | 	// test ip
334 | 	s.testExecute(c, "UPDATE test_river set ip = ? WHERE id = ?", 3748168280, 17)
335 | 
336 | 	testWaitSyncDone(c, s.r)
337 | 
338 | 	r = s.testElasticGet(c, "1")
339 | 	c.Assert(r.Found, IsFalse)
340 | 
341 | 	r = s.testElasticGet(c, "2")
342 | 	c.Assert(r.Found, IsTrue)
343 | 	c.Assert(r.Source["es_title"], Equals, "second 2")
344 | 	c.Assert(r.Source["tenum"], Equals, "e3")
345 | 	c.Assert(r.Source["tset"], Equals, "a,b,c")
346 | 	c.Assert(r.Source["es_mylist"], DeepEquals, []interface{}{"a", "b", "c"})
347 | 	c.Assert(r.Source["tbit"], Equals, float64(1))
348 | 
349 | 	r = s.testElasticGet(c, "4")
350 | 	c.Assert(r.Found, IsTrue)
351 | 	c.Assert(r.Source["tenum"], Equals, "")
352 | 	c.Assert(r.Source["tset"], Equals, "a,b,c")
353 | 	c.Assert(r.Source["tbit"], Equals, float64(0))
354 | 
355 | 	r = s.testElasticGet(c, "3")
356 | 	c.Assert(r.Found, IsFalse)
357 | 
358 | 	r = s.testElasticGet(c, "30")
359 | 	c.Assert(r.Found, IsTrue)
360 | 	c.Assert(r.Source["es_title"], Equals, "second 30")
361 | 
362 | 	for i := 0; i < 10; i++ {
363 | 		r = s.testElasticGet(c, fmt.Sprintf("%d", 5+i))
364 | 		c.Assert(r.Found, IsTrue)
365 | 		c.Assert(r.Source["es_title"], Equals, "hello")
366 | 	}
367 | 
368 | 	r = s.testElasticGet(c, "16")
369 | 	c.Assert(r.Found, IsTrue)
370 | 	tdt, _ := time.Parse(time.RFC3339, r.Source["tdatetime"].(string))
371 | 	c.Assert(tdt.Format(mysql.TimeFormat), Equals, dateTimeStr)
372 | 	c.Assert(r.Source["tdate"], Equals, dateStr)
373 | 
374 | 	r = s.testElasticGet(c, "20")
375 | 	c.Assert(r.Found, IsTrue)
376 | 	c.Assert(r.Source["tdate"], Equals, nil)
377 | 	c.Assert(r.Source["tdatetime"], Equals, nil)
378 | 
379 | 	// test ip
380 | 	r = s.testElasticGet(c, "17")
381 | 	c.Assert(r.Found, IsTrue)
382 | 	c.Assert(r.Source["ip"], Equals, float64(3748168280))
383 | 
384 | 	// alter table
385 | 	s.testExecute(c, "ALTER TABLE test_river ADD COLUMN new INT(10)")
386 | 	s.testExecute(c, "INSERT INTO test_river (id, title, content, tenum, tset, new) VALUES (?, ?, ?, ?, ?, ?)", 1000, "abc", "hello", "e1", "a,b,c", 1)
387 | 	s.testExecute(c, "ALTER TABLE test_river DROP COLUMN new")
388 | 	s.testExecute(c, "INSERT INTO test_river (id, title, content, tenum, tset) VALUES (?, ?, ?, ?, ?)", 1001, "abc", "hello", "e1", "a,b,c")
389 | 
390 | 	testWaitSyncDone(c, s.r)
391 | 
392 | 	r = s.testElasticGet(c, "1000")
393 | 	c.Assert(r.Found, IsTrue)
394 | 	c.Assert(r.Source["new"], Equals, float64(1))
395 | 
396 | 	r = s.testElasticGet(c, "1001")
397 | 	c.Assert(r.Found, IsTrue)
398 | 	_, ok := r.Source["new"]
399 | 	c.Assert(ok, IsFalse)
400 | }
401 | 
402 | func TestTableValidation(t *testing.T) {
403 | 	tables := []struct {
404 | 		Tables []string
405 | 		Expect bool
406 | 	}{
407 | 		{[]string{"*"}, true},
408 | 		{[]string{"table", "table2"}, true},
409 | 		{[]string{"*", "table"}, false},
410 | 	}
411 | 
412 | 	for _, table := range tables {
413 | 		if isValidTables(table.Tables) != table.Expect {
414 | 			t.Errorf("Tables: %s, Expected: is %t, but: was %t", table.Tables, table.Expect, isValidTables(table.Tables))
415 | 		}
416 | 	}
417 | }
418 | 
419 | func TestBuildTable(t *testing.T) {
420 | 	tables := []struct {
421 | 		Table  string
422 | 		Expect string
423 | 	}{
424 | 		{"*", ".*"},
425 | 		{"table2", "table2"},
426 | 	}
427 | 
428 | 	for _, table := range tables {
429 | 		if buildTable(table.Table) != table.Expect {
430 | 			t.Errorf("Table: %s, Expected: is \"%s\", but: was \"%s\"", table.Table, table.Expect, buildTable(table.Table))
431 | 		}
432 | 	}
433 | }
434 | 


--------------------------------------------------------------------------------
/river/rule.go:
--------------------------------------------------------------------------------
 1 | package river
 2 | 
 3 | import (
 4 | 	"strings"
 5 | 
 6 | 	"github.com/siddontang/go-mysql/schema"
 7 | )
 8 | 
 9 | // Rule is the rule for how to sync data from MySQL to ES.
10 | // If you want to sync MySQL data into elasticsearch, you must set a rule to let use know how to do it.
11 | // The mapping rule may thi: schema + table <-> index + document type.
12 | // schema and table is for MySQL, index and document type is for Elasticsearch.
13 | type Rule struct {
14 | 	Schema string   `toml:"schema"`
15 | 	Table  string   `toml:"table"`
16 | 	Index  string   `toml:"index"`
17 | 	Type   string   `toml:"type"`
18 | 	Parent string   `toml:"parent"`
19 | 	ID     []string `toml:"id"`
20 | 
21 | 	// Default, a MySQL table field name is mapped to Elasticsearch field name.
22 | 	// Sometimes, you want to use different name, e.g, the MySQL file name is title,
23 | 	// but in Elasticsearch, you want to name it my_title.
24 | 	FieldMapping map[string]string `toml:"field"`
25 | 
26 | 	// MySQL table information
27 | 	TableInfo *schema.Table
28 | 
29 | 	//only MySQL fields in filter will be synced , default sync all fields
30 | 	Filter []string `toml:"filter"`
31 | 
32 | 	// Elasticsearch pipeline
33 | 	// To pre-process documents before indexing
34 | 	Pipeline string `toml:"pipeline"`
35 | }
36 | 
37 | func newDefaultRule(schema string, table string) *Rule {
38 | 	r := new(Rule)
39 | 
40 | 	r.Schema = schema
41 | 	r.Table = table
42 | 
43 | 	lowerTable := strings.ToLower(table)
44 | 	r.Index = lowerTable
45 | 	r.Type = lowerTable
46 | 
47 | 	r.FieldMapping = make(map[string]string)
48 | 
49 | 	return r
50 | }
51 | 
52 | func (r *Rule) prepare() error {
53 | 	if r.FieldMapping == nil {
54 | 		r.FieldMapping = make(map[string]string)
55 | 	}
56 | 
57 | 	if len(r.Index) == 0 {
58 | 		r.Index = r.Table
59 | 	}
60 | 
61 | 	if len(r.Type) == 0 {
62 | 		r.Type = r.Index
63 | 	}
64 | 
65 | 	// ES must use a lower-case Type
66 | 	// Here we also use for Index
67 | 	r.Index = strings.ToLower(r.Index)
68 | 	r.Type = strings.ToLower(r.Type)
69 | 
70 | 	return nil
71 | }
72 | 
73 | // CheckFilter checkers whether the field needs to be filtered.
74 | func (r *Rule) CheckFilter(field string) bool {
75 | 	if r.Filter == nil {
76 | 		return true
77 | 	}
78 | 
79 | 	for _, f := range r.Filter {
80 | 		if f == field {
81 | 			return true
82 | 		}
83 | 	}
84 | 	return false
85 | }
86 | 


--------------------------------------------------------------------------------
/river/status.go:
--------------------------------------------------------------------------------
 1 | package river
 2 | 
 3 | import (
 4 | 	"bytes"
 5 | 	"fmt"
 6 | 	"net"
 7 | 	"net/http"
 8 | 	"net/http/pprof"
 9 | 
10 | 	"github.com/siddontang/go-log/log"
11 | 	"github.com/siddontang/go/sync2"
12 | )
13 | 
14 | type stat struct {
15 | 	r *River
16 | 
17 | 	l net.Listener
18 | 
19 | 	InsertNum sync2.AtomicInt64
20 | 	UpdateNum sync2.AtomicInt64
21 | 	DeleteNum sync2.AtomicInt64
22 | }
23 | 
24 | func (s *stat) ServeHTTP(w http.ResponseWriter, r *http.Request) {
25 | 	var buf bytes.Buffer
26 | 
27 | 	rr, err := s.r.canal.Execute("SHOW MASTER STATUS")
28 | 	if err != nil {
29 | 		w.WriteHeader(http.StatusInternalServerError)
30 | 		w.Write([]byte(fmt.Sprintf("execute sql error %v", err)))
31 | 		return
32 | 	}
33 | 
34 | 	binName, _ := rr.GetString(0, 0)
35 | 	binPos, _ := rr.GetUint(0, 1)
36 | 
37 | 	pos := s.r.canal.SyncedPosition()
38 | 
39 | 	buf.WriteString(fmt.Sprintf("server_current_binlog:(%s, %d)\n", binName, binPos))
40 | 	buf.WriteString(fmt.Sprintf("read_binlog:%s\n", pos))
41 | 
42 | 	buf.WriteString(fmt.Sprintf("insert_num:%d\n", s.InsertNum.Get()))
43 | 	buf.WriteString(fmt.Sprintf("update_num:%d\n", s.UpdateNum.Get()))
44 | 	buf.WriteString(fmt.Sprintf("delete_num:%d\n", s.DeleteNum.Get()))
45 | 
46 | 	w.Write(buf.Bytes())
47 | }
48 | 
49 | func (s *stat) Run(addr string) {
50 | 	if len(addr) == 0 {
51 | 		return
52 | 	}
53 | 	log.Infof("run status http server %s", addr)
54 | 	var err error
55 | 	s.l, err = net.Listen("tcp", addr)
56 | 	if err != nil {
57 | 		log.Errorf("listen stat addr %s err %v", addr, err)
58 | 		return
59 | 	}
60 | 
61 | 	srv := http.Server{}
62 | 	mux := http.NewServeMux()
63 | 	mux.Handle("/stat", s)
64 | 	mux.Handle("/debug/pprof/", http.HandlerFunc(pprof.Index))
65 | 	srv.Handler = mux
66 | 
67 | 	srv.Serve(s.l)
68 | }
69 | 
70 | func (s *stat) Close() {
71 | 	if s.l != nil {
72 | 		s.l.Close()
73 | 	}
74 | }
75 | 


--------------------------------------------------------------------------------
/river/sync.go:
--------------------------------------------------------------------------------
  1 | package river
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"encoding/json"
  6 | 	"fmt"
  7 | 	"reflect"
  8 | 	"strings"
  9 | 	"time"
 10 | 
 11 | 	"github.com/juju/errors"
 12 | 	"github.com/siddontang/go-log/log"
 13 | 	"github.com/siddontang/go-mysql-elasticsearch/elastic"
 14 | 	"github.com/siddontang/go-mysql/canal"
 15 | 	"github.com/siddontang/go-mysql/mysql"
 16 | 	"github.com/siddontang/go-mysql/replication"
 17 | 	"github.com/siddontang/go-mysql/schema"
 18 | )
 19 | 
 20 | const (
 21 | 	syncInsertDoc = iota
 22 | 	syncDeleteDoc
 23 | 	syncUpdateDoc
 24 | )
 25 | 
 26 | const (
 27 | 	fieldTypeList = "list"
 28 | 	// for the mysql int type to es date type
 29 | 	// set the [rule.field] created_time = ",date"
 30 | 	fieldTypeDate = "date"
 31 | )
 32 | 
 33 | const mysqlDateFormat = "2006-01-02"
 34 | 
 35 | type posSaver struct {
 36 | 	pos   mysql.Position
 37 | 	force bool
 38 | }
 39 | 
 40 | type eventHandler struct {
 41 | 	r *River
 42 | }
 43 | 
 44 | func (h *eventHandler) OnRotate(e *replication.RotateEvent) error {
 45 | 	pos := mysql.Position{
 46 | 		Name: string(e.NextLogName),
 47 | 		Pos:  uint32(e.Position),
 48 | 	}
 49 | 
 50 | 	h.r.syncCh <- posSaver{pos, true}
 51 | 
 52 | 	return h.r.ctx.Err()
 53 | }
 54 | 
 55 | func (h *eventHandler) OnTableChanged(schema, table string) error {
 56 | 	err := h.r.updateRule(schema, table)
 57 | 	if err != nil && err != ErrRuleNotExist {
 58 | 		return errors.Trace(err)
 59 | 	}
 60 | 	return nil
 61 | }
 62 | 
 63 | func (h *eventHandler) OnDDL(nextPos mysql.Position, _ *replication.QueryEvent) error {
 64 | 	h.r.syncCh <- posSaver{nextPos, true}
 65 | 	return h.r.ctx.Err()
 66 | }
 67 | 
 68 | func (h *eventHandler) OnXID(nextPos mysql.Position) error {
 69 | 	h.r.syncCh <- posSaver{nextPos, false}
 70 | 	return h.r.ctx.Err()
 71 | }
 72 | 
 73 | func (h *eventHandler) OnRow(e *canal.RowsEvent) error {
 74 | 	rule, ok := h.r.rules[ruleKey(e.Table.Schema, e.Table.Name)]
 75 | 	if !ok {
 76 | 		return nil
 77 | 	}
 78 | 
 79 | 	var reqs []*elastic.BulkRequest
 80 | 	var err error
 81 | 	switch e.Action {
 82 | 	case canal.InsertAction:
 83 | 		reqs, err = h.r.makeInsertRequest(rule, e.Rows)
 84 | 	case canal.DeleteAction:
 85 | 		reqs, err = h.r.makeDeleteRequest(rule, e.Rows)
 86 | 	case canal.UpdateAction:
 87 | 		reqs, err = h.r.makeUpdateRequest(rule, e.Rows)
 88 | 	default:
 89 | 		err = errors.Errorf("invalid rows action %s", e.Action)
 90 | 	}
 91 | 
 92 | 	if err != nil {
 93 | 		h.r.cancel()
 94 | 		return errors.Errorf("make %s ES request err %v, close sync", e.Action, err)
 95 | 	}
 96 | 
 97 | 	h.r.syncCh <- reqs
 98 | 
 99 | 	return h.r.ctx.Err()
100 | }
101 | 
102 | func (h *eventHandler) OnGTID(gtid mysql.GTIDSet) error {
103 | 	return nil
104 | }
105 | 
106 | func (h *eventHandler) OnPosSynced(pos mysql.Position, force bool) error {
107 | 	return nil
108 | }
109 | 
110 | func (h *eventHandler) String() string {
111 | 	return "ESRiverEventHandler"
112 | }
113 | 
114 | func (r *River) syncLoop() {
115 | 	bulkSize := r.c.BulkSize
116 | 	if bulkSize == 0 {
117 | 		bulkSize = 128
118 | 	}
119 | 
120 | 	interval := r.c.FlushBulkTime.Duration
121 | 	if interval == 0 {
122 | 		interval = 200 * time.Millisecond
123 | 	}
124 | 
125 | 	ticker := time.NewTicker(interval)
126 | 	defer ticker.Stop()
127 | 	defer r.wg.Done()
128 | 
129 | 	lastSavedTime := time.Now()
130 | 	reqs := make([]*elastic.BulkRequest, 0, 1024)
131 | 
132 | 	var pos mysql.Position
133 | 
134 | 	for {
135 | 		needFlush := false
136 | 		needSavePos := false
137 | 
138 | 		select {
139 | 		case v := <-r.syncCh:
140 | 			switch v := v.(type) {
141 | 			case posSaver:
142 | 				now := time.Now()
143 | 				if v.force || now.Sub(lastSavedTime) > 3*time.Second {
144 | 					lastSavedTime = now
145 | 					needFlush = true
146 | 					needSavePos = true
147 | 					pos = v.pos
148 | 				}
149 | 			case []*elastic.BulkRequest:
150 | 				reqs = append(reqs, v...)
151 | 				needFlush = len(reqs) >= bulkSize
152 | 			}
153 | 		case <-ticker.C:
154 | 			needFlush = true
155 | 		case <-r.ctx.Done():
156 | 			return
157 | 		}
158 | 
159 | 		if needFlush {
160 | 			// TODO: retry some times?
161 | 			if err := r.doBulk(reqs); err != nil {
162 | 				log.Errorf("do ES bulk err %v, close sync", err)
163 | 				r.cancel()
164 | 				return
165 | 			}
166 | 			reqs = reqs[0:0]
167 | 		}
168 | 
169 | 		if needSavePos {
170 | 			if err := r.master.Save(pos); err != nil {
171 | 				log.Errorf("save sync position %s err %v, close sync", pos, err)
172 | 				r.cancel()
173 | 				return
174 | 			}
175 | 		}
176 | 	}
177 | }
178 | 
179 | // for insert and delete
180 | func (r *River) makeRequest(rule *Rule, action string, rows [][]interface{}) ([]*elastic.BulkRequest, error) {
181 | 	reqs := make([]*elastic.BulkRequest, 0, len(rows))
182 | 
183 | 	for _, values := range rows {
184 | 		id, err := r.getDocID(rule, values)
185 | 		if err != nil {
186 | 			return nil, errors.Trace(err)
187 | 		}
188 | 
189 | 		parentID := ""
190 | 		if len(rule.Parent) > 0 {
191 | 			if parentID, err = r.getParentID(rule, values, rule.Parent); err != nil {
192 | 				return nil, errors.Trace(err)
193 | 			}
194 | 		}
195 | 
196 | 		req := &elastic.BulkRequest{Index: rule.Index, Type: rule.Type, ID: id, Parent: parentID, Pipeline: rule.Pipeline}
197 | 
198 | 		if action == canal.DeleteAction {
199 | 			req.Action = elastic.ActionDelete
200 | 			r.st.DeleteNum.Add(1)
201 | 		} else {
202 | 			r.makeInsertReqData(req, rule, values)
203 | 			r.st.InsertNum.Add(1)
204 | 		}
205 | 
206 | 		reqs = append(reqs, req)
207 | 	}
208 | 
209 | 	return reqs, nil
210 | }
211 | 
212 | func (r *River) makeInsertRequest(rule *Rule, rows [][]interface{}) ([]*elastic.BulkRequest, error) {
213 | 	return r.makeRequest(rule, canal.InsertAction, rows)
214 | }
215 | 
216 | func (r *River) makeDeleteRequest(rule *Rule, rows [][]interface{}) ([]*elastic.BulkRequest, error) {
217 | 	return r.makeRequest(rule, canal.DeleteAction, rows)
218 | }
219 | 
220 | func (r *River) makeUpdateRequest(rule *Rule, rows [][]interface{}) ([]*elastic.BulkRequest, error) {
221 | 	if len(rows)%2 != 0 {
222 | 		return nil, errors.Errorf("invalid update rows event, must have 2x rows, but %d", len(rows))
223 | 	}
224 | 
225 | 	reqs := make([]*elastic.BulkRequest, 0, len(rows))
226 | 
227 | 	for i := 0; i < len(rows); i += 2 {
228 | 		beforeID, err := r.getDocID(rule, rows[i])
229 | 		if err != nil {
230 | 			return nil, errors.Trace(err)
231 | 		}
232 | 
233 | 		afterID, err := r.getDocID(rule, rows[i+1])
234 | 
235 | 		if err != nil {
236 | 			return nil, errors.Trace(err)
237 | 		}
238 | 
239 | 		beforeParentID, afterParentID := "", ""
240 | 		if len(rule.Parent) > 0 {
241 | 			if beforeParentID, err = r.getParentID(rule, rows[i], rule.Parent); err != nil {
242 | 				return nil, errors.Trace(err)
243 | 			}
244 | 			if afterParentID, err = r.getParentID(rule, rows[i+1], rule.Parent); err != nil {
245 | 				return nil, errors.Trace(err)
246 | 			}
247 | 		}
248 | 
249 | 		req := &elastic.BulkRequest{Index: rule.Index, Type: rule.Type, ID: beforeID, Parent: beforeParentID}
250 | 
251 | 		
252 | 		if beforeID != afterID || beforeParentID != afterParentID {
253 | 			req.Action = elastic.ActionDelete
254 | 			reqs = append(reqs, req)
255 | 
256 | 			req = &elastic.BulkRequest{Index: rule.Index, Type: rule.Type, ID: afterID, Parent: afterParentID, Pipeline: rule.Pipeline}
257 | 			r.makeInsertReqData(req, rule, rows[i+1])
258 | 
259 | 			r.st.DeleteNum.Add(1)
260 | 			r.st.InsertNum.Add(1)
261 | 		} else {
262 | 			//先尝试删除
263 | 			req.Action = elastic.ActionDelete
264 | 			reqs = append(reqs, req)
265 | 			//再重新添加
266 | 			req = &elastic.BulkRequest{Index: rule.Index, Type: rule.Type, ID: afterID, Parent: afterParentID, Pipeline: rule.Pipeline}
267 | 			r.makeInsertReqData(req, rule, rows[i+1])
268 | 
269 | 			// if len(rule.Pipeline) > 0 {
270 | 			// 	// Pipelines can only be specified on index action
271 | 			// 	r.makeInsertReqData(req, rule, rows[i+1])
272 | 			// 	// Make sure action is index, not create
273 | 			// 	req.Action = elastic.ActionIndex
274 | 			// 	req.Pipeline = rule.Pipeline
275 | 			// } else {
276 | 			// 	r.makeUpdateReqData(req, rule, rows[i], rows[i+1])
277 | 			// }
278 | 			r.st.UpdateNum.Add(1)
279 | 		}
280 | 
281 | 		reqs = append(reqs, req)
282 | 	}
283 | 
284 | 	return reqs, nil
285 | }
286 | 
287 | func (r *River) makeReqColumnData(col *schema.TableColumn, value interface{}) interface{} {
288 | 	switch col.Type {
289 | 	case schema.TYPE_ENUM:
290 | 		switch value := value.(type) {
291 | 		case int64:
292 | 			// for binlog, ENUM may be int64, but for dump, enum is string
293 | 			eNum := value - 1
294 | 			if eNum < 0 || eNum >= int64(len(col.EnumValues)) {
295 | 				// we insert invalid enum value before, so return empty
296 | 				log.Warnf("invalid binlog enum index %d, for enum %v", eNum, col.EnumValues)
297 | 				return ""
298 | 			}
299 | 
300 | 			return col.EnumValues[eNum]
301 | 		}
302 | 	case schema.TYPE_SET:
303 | 		switch value := value.(type) {
304 | 		case int64:
305 | 			// for binlog, SET may be int64, but for dump, SET is string
306 | 			bitmask := value
307 | 			sets := make([]string, 0, len(col.SetValues))
308 | 			for i, s := range col.SetValues {
309 | 				if bitmask&int64(1<<uint(i)) > 0 {
310 | 					sets = append(sets, s)
311 | 				}
312 | 			}
313 | 			return strings.Join(sets, ",")
314 | 		}
315 | 	case schema.TYPE_BIT:
316 | 		switch value := value.(type) {
317 | 		case string:
318 | 			// for binlog, BIT is int64, but for dump, BIT is string
319 | 			// for dump 0x01 is for 1, \0 is for 0
320 | 			if value == "\x01" {
321 | 				return int64(1)
322 | 			}
323 | 
324 | 			return int64(0)
325 | 		}
326 | 	case schema.TYPE_STRING:
327 | 		switch value := value.(type) {
328 | 		case []byte:
329 | 			return string(value[:])
330 | 		}
331 | 	case schema.TYPE_JSON:
332 | 		var f interface{}
333 | 		var err error
334 | 		switch v := value.(type) {
335 | 		case string:
336 | 			err = json.Unmarshal([]byte(v), &f)
337 | 		case []byte:
338 | 			err = json.Unmarshal(v, &f)
339 | 		}
340 | 		if err == nil && f != nil {
341 | 			return f
342 | 		}
343 | 	case schema.TYPE_DATETIME, schema.TYPE_TIMESTAMP:
344 | 		switch v := value.(type) {
345 | 		case string:
346 | 			vt, err := time.ParseInLocation(mysql.TimeFormat, string(v), time.Local)
347 | 			if err != nil || vt.IsZero() { // failed to parse date or zero date
348 | 				return nil
349 | 			}
350 | 			return vt.Format(time.RFC3339)
351 | 		}
352 | 	case schema.TYPE_DATE:
353 | 		switch v := value.(type) {
354 | 		case string:
355 | 			vt, err := time.Parse(mysqlDateFormat, string(v))
356 | 			if err != nil || vt.IsZero() { // failed to parse date or zero date
357 | 				return nil
358 | 			}
359 | 			return vt.Format(mysqlDateFormat)
360 | 		}
361 | 	}
362 | 
363 | 	return value
364 | }
365 | 
366 | func (r *River) getFieldParts(k string, v string) (string, string, string) {
367 | 	composedField := strings.Split(v, ",")
368 | 
369 | 	mysql := k
370 | 	elastic := composedField[0]
371 | 	fieldType := ""
372 | 
373 | 	if 0 == len(elastic) {
374 | 		elastic = mysql
375 | 	}
376 | 	if 2 == len(composedField) {
377 | 		fieldType = composedField[1]
378 | 	}
379 | 
380 | 	return mysql, elastic, fieldType
381 | }
382 | 
383 | func (r *River) makeInsertReqData(req *elastic.BulkRequest, rule *Rule, values []interface{}) {
384 | 	req.Data = make(map[string]interface{}, len(values))
385 | 	req.Action = elastic.ActionIndex
386 | 
387 | 	for i, c := range rule.TableInfo.Columns {
388 | 		if !rule.CheckFilter(c.Name) {
389 | 			continue
390 | 		}
391 | 		mapped := false
392 | 		for k, v := range rule.FieldMapping {
393 | 			mysql, elastic, fieldType := r.getFieldParts(k, v)
394 | 			if mysql == c.Name {
395 | 				mapped = true
396 | 				req.Data[elastic] = r.getFieldValue(&c, fieldType, values[i])
397 | 			}
398 | 		}
399 | 		if mapped == false {
400 | 			req.Data[c.Name] = r.makeReqColumnData(&c, values[i])
401 | 		}
402 | 	}
403 | }
404 | 
405 | func (r *River) makeUpdateReqData(req *elastic.BulkRequest, rule *Rule,
406 | 	beforeValues []interface{}, afterValues []interface{}) {
407 | 	req.Data = make(map[string]interface{}, len(beforeValues))
408 | 
409 | 	// maybe dangerous if something wrong delete before?
410 | 	req.Action = elastic.ActionUpdate
411 | 
412 | 	for i, c := range rule.TableInfo.Columns {
413 | 		mapped := false
414 | 		if !rule.CheckFilter(c.Name) {
415 | 			continue
416 | 		}
417 | 		if reflect.DeepEqual(beforeValues[i], afterValues[i]) {
418 | 			//nothing changed
419 | 			continue
420 | 		}
421 | 		for k, v := range rule.FieldMapping {
422 | 			mysql, elastic, fieldType := r.getFieldParts(k, v)
423 | 			if mysql == c.Name {
424 | 				mapped = true
425 | 				req.Data[elastic] = r.getFieldValue(&c, fieldType, afterValues[i])
426 | 			}
427 | 		}
428 | 		if mapped == false {
429 | 			req.Data[c.Name] = r.makeReqColumnData(&c, afterValues[i])
430 | 		}
431 | 
432 | 	}
433 | }
434 | 
435 | // If id in toml file is none, get primary keys in one row and format them into a string, and PK must not be nil
436 | // Else get the ID's column in one row and format them into a string
437 | func (r *River) getDocID(rule *Rule, row []interface{}) (string, error) {
438 | 	var (
439 | 		ids []interface{}
440 | 		err error
441 | 	)
442 | 	if rule.ID == nil {
443 | 		ids, err = rule.TableInfo.GetPKValues(row)
444 | 		if err != nil {
445 | 			return "", err
446 | 		}
447 | 	} else {
448 | 		ids = make([]interface{}, 0, len(rule.ID))
449 | 		for _, column := range rule.ID {
450 | 			value, err := rule.TableInfo.GetColumnValue(column, row)
451 | 			if err != nil {
452 | 				return "", err
453 | 			}
454 | 			ids = append(ids, value)
455 | 		}
456 | 	}
457 | 
458 | 	var buf bytes.Buffer
459 | 
460 | 	sep := ""
461 | 	for i, value := range ids {
462 | 		if value == nil {
463 | 			return "", errors.Errorf("The %ds id or PK value is nil", i)
464 | 		}
465 | 
466 | 		buf.WriteString(fmt.Sprintf("%s%v", sep, value))
467 | 		sep = ":"
468 | 	}
469 | 
470 | 	return buf.String(), nil
471 | }
472 | 
473 | func (r *River) getParentID(rule *Rule, row []interface{}, columnName string) (string, error) {
474 | 	index := rule.TableInfo.FindColumn(columnName)
475 | 	if index < 0 {
476 | 		return "", errors.Errorf("parent id not found %s(%s)", rule.TableInfo.Name, columnName)
477 | 	}
478 | 
479 | 	return fmt.Sprint(row[index]), nil
480 | }
481 | 
482 | func (r *River) doBulk(reqs []*elastic.BulkRequest) error {
483 | 	if len(reqs) == 0 {
484 | 		return nil
485 | 	}
486 | 
487 | 	if resp, err := r.es.Bulk(reqs); err != nil {
488 | 		log.Errorf("sync docs err %v after binlog %s", err, r.canal.SyncedPosition())
489 | 		return errors.Trace(err)
490 | 	} else if resp.Code/100 == 2 || resp.Errors {
491 | 		for i := 0; i < len(resp.Items); i++ {
492 | 			for action, item := range resp.Items[i] {
493 | 				if len(item.Error) > 0 {
494 | 					log.Errorf("%s index: %s, type: %s, id: %s, status: %d, error: %s",
495 | 						action, item.Index, item.Type, item.ID, item.Status, item.Error)
496 | 				}
497 | 			}
498 | 		}
499 | 	}
500 | 
501 | 	return nil
502 | }
503 | 
504 | // get mysql field value and convert it to specific value to es
505 | func (r *River) getFieldValue(col *schema.TableColumn, fieldType string, value interface{}) interface{} {
506 | 	var fieldValue interface{}
507 | 	switch fieldType {
508 | 	case fieldTypeList:
509 | 		v := r.makeReqColumnData(col, value)
510 | 		if str, ok := v.(string); ok {
511 | 			fieldValue = strings.Split(str, ",")
512 | 		} else {
513 | 			fieldValue = v
514 | 		}
515 | 
516 | 	case fieldTypeDate:
517 | 		if col.Type == schema.TYPE_NUMBER {
518 | 			col.Type = schema.TYPE_DATETIME
519 | 
520 | 			v := reflect.ValueOf(value)
521 | 			switch v.Kind() {
522 | 			case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
523 | 				fieldValue = r.makeReqColumnData(col, time.Unix(v.Int(), 0).Format(mysql.TimeFormat))
524 | 			}
525 | 		}
526 | 	}
527 | 
528 | 	if fieldValue == nil {
529 | 		fieldValue = r.makeReqColumnData(col, value)
530 | 	}
531 | 	return fieldValue
532 | }
533 | 


--------------------------------------------------------------------------------