├── .gitignore ├── .travis.yml ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── clear_vendor.sh ├── cmd └── go-mysql-elasticsearch │ └── main.go ├── elastic ├── client.go └── client_test.go ├── etc └── river.toml ├── go.mod ├── go.sum └── river ├── config.go ├── master.go ├── river.go ├── river_extra_test.go ├── river_test.go ├── rule.go ├── status.go └── sync.go /.gitignore: -------------------------------------------------------------------------------- 1 | bin -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: go 2 | 3 | go: 4 | - "1.11" 5 | 6 | services: 7 | - elasticsearch 8 | 9 | addons: 10 | apt: 11 | sources: 12 | - mysql-5.7-trusty 13 | packages: 14 | - mysql-server 15 | - mysql-client 16 | 17 | before_install: 18 | - sudo mysql -e "use mysql; update user set authentication_string=PASSWORD('') where User='root'; update user set plugin='mysql_native_password';FLUSH PRIVILEGES;" 19 | - sudo mysql_upgrade 20 | 21 | # stop mysql and use row-based format binlog 22 | - "sudo service mysql stop || true" 23 | - "echo '[mysqld]' | sudo tee /etc/mysql/conf.d/replication.cnf" 24 | - "echo 'server-id=1' | sudo tee -a /etc/mysql/conf.d/replication.cnf" 25 | - "echo 'log-bin=mysql-bin' | sudo tee -a /etc/mysql/conf.d/replication.cnf" 26 | - "echo 'binlog-format = row' | sudo tee -a /etc/mysql/conf.d/replication.cnf" 27 | 28 | # Start mysql (avoid errors to have logs) 29 | - "sudo service mysql start || true" 30 | - "sudo tail -1000 /var/log/syslog" 31 | 32 | - mysql -e "CREATE DATABASE IF NOT EXISTS test;" -uroot 33 | 34 | script: 35 | - go test --race ./... 36 | 37 | env: 38 | - GO111MODULE=on -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:alpine 2 | 3 | MAINTAINER siddontang 4 | 5 | RUN apk add --no-cache tini mariadb-client 6 | 7 | ADD . /go/src/github.com/siddontang/go-mysql-elasticsearch 8 | 9 | RUN apk add --no-cache mariadb-client 10 | RUN cd /go/src/github.com/siddontang/go-mysql-elasticsearch/ && \ 11 | go build -o bin/go-mysql-elasticsearch ./cmd/go-mysql-elasticsearch && \ 12 | cp -f ./bin/go-mysql-elasticsearch /go/bin/go-mysql-elasticsearch 13 | 14 | ENTRYPOINT ["/sbin/tini","--","go-mysql-elasticsearch"] 15 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 siddontang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | all: build 2 | 3 | build: build-elasticsearch 4 | 5 | build-elasticsearch: 6 | GO111MODULE=on go build -o bin/go-mysql-elasticsearch ./cmd/go-mysql-elasticsearch 7 | 8 | test: 9 | GO111MODULE=on go test -timeout 1m --race ./... 10 | 11 | clean: 12 | GO111MODULE=on go clean -i ./... 13 | @rm -rf bin -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | go-mysql-elasticsearch is a service syncing your MySQL data into Elasticsearch automatically. 2 | 3 | It uses `mysqldump` to fetch the origin data at first, then syncs data incrementally with binlog. 4 | 5 | ## Install 6 | 7 | + Install Go (1.9+) and set your [GOPATH](https://golang.org/doc/code.html#GOPATH) 8 | + `go get github.com/siddontang/go-mysql-elasticsearch`, it will print some messages in console, skip it. :-) 9 | + cd `$GOPATH/src/github.com/siddontang/go-mysql-elasticsearch` 10 | + `make` 11 | 12 | ## How to use? 13 | 14 | + Create table in MySQL. 15 | + Create the associated Elasticsearch index, document type and mappings if possible, if not, Elasticsearch will create these automatically. 16 | + Config base, see the example config [river.toml](./etc/river.toml). 17 | + Set MySQL source in config file, see [Source](#source) below. 18 | + Customize MySQL and Elasticsearch mapping rule in config file, see [Rule](#rule) below. 19 | + Start `./bin/go-mysql-elasticsearch -config=./etc/river.toml` and enjoy it. 20 | 21 | ## Notice 22 | 23 | + MySQL supported version < 8.0 24 | + ES supported version < 6.0 25 | + binlog format must be **row**. 26 | + binlog row image must be **full** for MySQL, you may lost some field data if you update PK data in MySQL with minimal or noblob binlog row image. MariaDB only supports full row image. 27 | + Can not alter table format at runtime. 28 | + MySQL table which will be synced should have a PK(primary key), multi columns PK is allowed now, e,g, if the PKs is (a, b), we will use "a:b" as the key. The PK data will be used as "id" in Elasticsearch. And you can also config the id's constituent part with other column. 29 | + You should create the associated mappings in Elasticsearch first, I don't think using the default mapping is a wise decision, you must know how to search accurately. 30 | + `mysqldump` must exist in the same node with go-mysql-elasticsearch, if not, go-mysql-elasticsearch will try to sync binlog only. 31 | + Don't change too many rows at same time in one SQL. 32 | 33 | ## Source 34 | 35 | In go-mysql-elasticsearch, you must decide which tables you want to sync into elasticsearch in the source config. 36 | 37 | The format in config file is below: 38 | 39 | ``` 40 | [[source]] 41 | schema = "test" 42 | tables = ["t1", t2] 43 | 44 | [[source]] 45 | schema = "test_1" 46 | tables = ["t3", t4] 47 | ``` 48 | 49 | `schema` is the database name, and `tables` includes the table need to be synced. 50 | 51 | If you want to sync **all table in database**, you can use **asterisk(\*)**. 52 | ``` 53 | [[source]] 54 | schema = "test" 55 | tables = ["*"] 56 | 57 | # When using an asterisk, it is not allowed to sync multiple tables 58 | # tables = ["*", "table"] 59 | ``` 60 | 61 | ## Rule 62 | 63 | By default, go-mysql-elasticsearch will use MySQL table name as the Elasticserach's index and type name, use MySQL table field name as the Elasticserach's field name. 64 | e.g, if a table named blog, the default index and type in Elasticserach are both named blog, if the table field named title, 65 | the default field name is also named title. 66 | 67 | Notice: go-mysql-elasticsearch will use the lower-case name for the ES index and type. E.g, if your table named BLOG, the ES index and type are both named blog. 68 | 69 | Rule can let you change this name mapping. Rule format in config file is below: 70 | 71 | ``` 72 | [[rule]] 73 | schema = "test" 74 | table = "t1" 75 | index = "t" 76 | type = "t" 77 | parent = "parent_id" 78 | id = ["id"] 79 | 80 | [rule.field] 81 | mysql = "title" 82 | elastic = "my_title" 83 | ``` 84 | 85 | In the example above, we will use a new index and type both named "t" instead of default "t1", and use "my_title" instead of field name "title". 86 | 87 | ## Rule field types 88 | 89 | In order to map a mysql column on different elasticsearch types you can define the field type as follows: 90 | 91 | ``` 92 | [[rule]] 93 | schema = "test" 94 | table = "t1" 95 | index = "t" 96 | type = "t" 97 | 98 | [rule.field] 99 | // This will map column title to elastic search my_title 100 | title="my_title" 101 | 102 | // This will map column title to elastic search my_title and use array type 103 | title="my_title,list" 104 | 105 | // This will map column title to elastic search title and use array type 106 | title=",list" 107 | 108 | // If the created_time field type is "int", and you want to convert it to "date" type in es, you can do it as below 109 | created_time=",date" 110 | ``` 111 | 112 | Modifier "list" will translates a mysql string field like "a,b,c" on an elastic array type '{"a", "b", "c"}' this is specially useful if you need to use those fields on filtering on elasticsearch. 113 | 114 | ## Wildcard table 115 | 116 | go-mysql-elasticsearch only allows you determind which table to be synced, but sometimes, if you split a big table into multi sub tables, like 1024, table_0000, table_0001, ... table_1023, it is very hard to write rules for every table. 117 | 118 | go-mysql-elasticserach supports using wildcard table, e.g: 119 | 120 | ``` 121 | [[source]] 122 | schema = "test" 123 | tables = ["test_river_[0-9]{4}"] 124 | 125 | [[rule]] 126 | schema = "test" 127 | table = "test_river_[0-9]{4}" 128 | index = "river" 129 | type = "river" 130 | ``` 131 | 132 | "test_river_[0-9]{4}" is a wildcard table definition, which represents "test_river_0000" to "test_river_9999", at the same time, the table in the rule must be same as it. 133 | 134 | At the above example, if you have 1024 sub tables, all tables will be synced into Elasticsearch with index "river" and type "river". 135 | 136 | ## Parent-Child Relationship 137 | 138 | One-to-many join ( [parent-child relationship](https://www.elastic.co/guide/en/elasticsearch/guide/current/parent-child.html) in Elasticsearch ) is supported. Simply specify the field name for `parent` property. 139 | 140 | ``` 141 | [[rule]] 142 | schema = "test" 143 | table = "t1" 144 | index = "t" 145 | type = "t" 146 | parent = "parent_id" 147 | ``` 148 | 149 | Note: you should [setup relationship](https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-parent-field.html) with creating the mapping manually. 150 | 151 | ## Filter fields 152 | 153 | You can use `filter` to sync specified fields, like: 154 | 155 | ``` 156 | [[rule]] 157 | schema = "test" 158 | table = "tfilter" 159 | index = "test" 160 | type = "tfilter" 161 | 162 | # Only sync following columns 163 | filter = ["id", "name"] 164 | ``` 165 | 166 | In the above example, we will only sync MySQL table tfiler's columns `id` and `name` to Elasticsearch. 167 | 168 | ## Ignore table without a primary key 169 | When you sync table without a primary key, you can see below error message. 170 | ``` 171 | schema.table must have a PK for a column 172 | ``` 173 | You can ignore these tables in the configuration like: 174 | ``` 175 | # Ignore table without a primary key 176 | skip_no_pk_table = true 177 | ``` 178 | 179 | ## Elasticsearch Pipeline 180 | You can use [Ingest Node Pipeline](https://www.elastic.co/guide/en/elasticsearch/reference/current/ingest.html) to pre-process documents before indexing, like JSON string decode, merge fileds and more. 181 | 182 | ``` 183 | [[rule]] 184 | schema = "test" 185 | table = "t1" 186 | index = "t" 187 | type = "_doc" 188 | 189 | # pipeline id 190 | pipeline = "my-pipeline-id" 191 | ``` 192 | Node: you should [create pipeline](https://www.elastic.co/guide/en/elasticsearch/reference/current/put-pipeline-api.html) manually and Elasticsearch >= 5.0. 193 | 194 | ## Why not other rivers? 195 | 196 | Although there are some other MySQL rivers for Elasticsearch, like [elasticsearch-river-jdbc](https://github.com/jprante/elasticsearch-river-jdbc), [elasticsearch-river-mysql](https://github.com/scharron/elasticsearch-river-mysql), I still want to build a new one with Go, why? 197 | 198 | + Customization, I want to decide which table to be synced, the associated index and type name, or even the field name in Elasticsearch. 199 | + Incremental update with binlog, and can resume from the last sync position when the service starts again. 200 | + A common sync framework not only for Elasticsearch but also for others, like memcached, redis, etc... 201 | + Wildcard tables support, we have many sub tables like table_0000 - table_1023, but want use a unique Elasticsearch index and type. 202 | 203 | ## Todo 204 | 205 | + MySQL 8 206 | + ES 6 207 | + Statistic. 208 | 209 | ## Donate 210 | 211 | If you like the project and want to buy me a cola, you can through: 212 | 213 | |PayPal|微信| 214 | |------|---| 215 | |[![](https://www.paypalobjects.com/webstatic/paypalme/images/pp_logo_small.png)](https://paypal.me/siddontang)|[![](https://github.com/siddontang/blog/blob/master/donate/weixin.png)| 216 | 217 | ## Feedback 218 | 219 | go-mysql-elasticsearch is still in development, and we will try to use it in production later. Any feedback is very welcome. 220 | 221 | Email: siddontang@gmail.com 222 | -------------------------------------------------------------------------------- /clear_vendor.sh: -------------------------------------------------------------------------------- 1 | find vendor \( -type f -or -type l \) -not -name "*.go" -not -name "LICENSE" -not -name "*.s" -not -name "PATENTS" -not -name "*.h" -not -name "*.c" | xargs -I {} rm {} 2 | # delete all test files 3 | find vendor -type f -name "*_generated.go" | xargs -I {} rm {} 4 | find vendor -type f -name "*_test.go" | xargs -I {} rm {} 5 | find vendor -type d -name "_vendor" | xargs -I {} rm -rf {} 6 | find vendor -type d -empty | xargs -I {} rm -rf {} -------------------------------------------------------------------------------- /cmd/go-mysql-elasticsearch/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "os" 6 | "os/signal" 7 | "runtime" 8 | "syscall" 9 | 10 | "github.com/juju/errors" 11 | "github.com/siddontang/go-log/log" 12 | "github.com/siddontang/go-mysql-elasticsearch/river" 13 | ) 14 | 15 | var configFile = flag.String("config", "./etc/river.toml", "go-mysql-elasticsearch config file") 16 | var my_addr = flag.String("my_addr", "", "MySQL addr") 17 | var my_user = flag.String("my_user", "", "MySQL user") 18 | var my_pass = flag.String("my_pass", "", "MySQL password") 19 | var es_addr = flag.String("es_addr", "", "Elasticsearch addr") 20 | var data_dir = flag.String("data_dir", "", "path for go-mysql-elasticsearch to save data") 21 | var server_id = flag.Int("server_id", 0, "MySQL server id, as a pseudo slave") 22 | var flavor = flag.String("flavor", "", "flavor: mysql or mariadb") 23 | var execution = flag.String("exec", "", "mysqldump execution path") 24 | var logLevel = flag.String("log_level", "info", "log level") 25 | 26 | func main() { 27 | runtime.GOMAXPROCS(runtime.NumCPU()) 28 | flag.Parse() 29 | 30 | log.SetLevelByName(*logLevel) 31 | 32 | sc := make(chan os.Signal, 1) 33 | signal.Notify(sc, 34 | os.Kill, 35 | os.Interrupt, 36 | syscall.SIGHUP, 37 | syscall.SIGINT, 38 | syscall.SIGTERM, 39 | syscall.SIGQUIT) 40 | 41 | cfg, err := river.NewConfigWithFile(*configFile) 42 | if err != nil { 43 | println(errors.ErrorStack(err)) 44 | return 45 | } 46 | 47 | if len(*my_addr) > 0 { 48 | cfg.MyAddr = *my_addr 49 | } 50 | 51 | if len(*my_user) > 0 { 52 | cfg.MyUser = *my_user 53 | } 54 | 55 | if len(*my_pass) > 0 { 56 | cfg.MyPassword = *my_pass 57 | } 58 | 59 | if *server_id > 0 { 60 | cfg.ServerID = uint32(*server_id) 61 | } 62 | 63 | if len(*es_addr) > 0 { 64 | cfg.ESAddr = *es_addr 65 | } 66 | 67 | if len(*data_dir) > 0 { 68 | cfg.DataDir = *data_dir 69 | } 70 | 71 | if len(*flavor) > 0 { 72 | cfg.Flavor = *flavor 73 | } 74 | 75 | if len(*execution) > 0 { 76 | cfg.DumpExec = *execution 77 | } 78 | 79 | r, err := river.NewRiver(cfg) 80 | if err != nil { 81 | println(errors.ErrorStack(err)) 82 | return 83 | } 84 | 85 | done := make(chan struct{}, 1) 86 | go func() { 87 | r.Run() 88 | done <- struct{}{} 89 | }() 90 | 91 | select { 92 | case n := <-sc: 93 | log.Infof("receive signal %v, closing", n) 94 | case <-r.Ctx().Done(): 95 | log.Infof("context is done with %v, closing", r.Ctx().Err()) 96 | } 97 | 98 | r.Close() 99 | <-done 100 | } 101 | -------------------------------------------------------------------------------- /elastic/client.go: -------------------------------------------------------------------------------- 1 | package elastic 2 | 3 | import ( 4 | "bytes" 5 | "crypto/tls" 6 | "encoding/json" 7 | "fmt" 8 | "io/ioutil" 9 | "net/http" 10 | "net/url" 11 | 12 | "github.com/juju/errors" 13 | ) 14 | 15 | // Client is the client to communicate with ES. 16 | // Although there are many Elasticsearch clients with Go, I still want to implement one by myself. 17 | // Because we only need some very simple usages. 18 | type Client struct { 19 | Protocol string 20 | Addr string 21 | User string 22 | Password string 23 | 24 | c *http.Client 25 | } 26 | 27 | // ClientConfig is the configuration for the client. 28 | type ClientConfig struct { 29 | HTTPS bool 30 | Addr string 31 | User string 32 | Password string 33 | } 34 | 35 | // NewClient creates the Cient with configuration. 36 | func NewClient(conf *ClientConfig) *Client { 37 | c := new(Client) 38 | 39 | c.Addr = conf.Addr 40 | c.User = conf.User 41 | c.Password = conf.Password 42 | 43 | if conf.HTTPS { 44 | c.Protocol = "https" 45 | tr := &http.Transport{ 46 | TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, 47 | } 48 | c.c = &http.Client{Transport: tr} 49 | } else { 50 | c.Protocol = "http" 51 | c.c = &http.Client{} 52 | } 53 | 54 | return c 55 | } 56 | 57 | // ResponseItem is the ES item in the response. 58 | type ResponseItem struct { 59 | ID string `json:"_id"` 60 | Index string `json:"_index"` 61 | Type string `json:"_type"` 62 | Version int `json:"_version"` 63 | Found bool `json:"found"` 64 | Source map[string]interface{} `json:"_source"` 65 | } 66 | 67 | // Response is the ES response 68 | type Response struct { 69 | Code int 70 | ResponseItem 71 | } 72 | 73 | // See http://www.elasticsearch.org/guide/en/elasticsearch/guide/current/bulk.html 74 | const ( 75 | ActionCreate = "create" 76 | ActionUpdate = "update" 77 | ActionDelete = "delete" 78 | ActionIndex = "index" 79 | ) 80 | 81 | // BulkRequest is used to send multi request in batch. 82 | type BulkRequest struct { 83 | Action string 84 | Index string 85 | Type string 86 | ID string 87 | Parent string 88 | Pipeline string 89 | 90 | Data map[string]interface{} 91 | } 92 | 93 | func (r *BulkRequest) bulk(buf *bytes.Buffer) error { 94 | meta := make(map[string]map[string]string) 95 | metaData := make(map[string]string) 96 | if len(r.Index) > 0 { 97 | metaData["_index"] = r.Index 98 | } 99 | if len(r.Type) > 0 { 100 | metaData["_type"] = r.Type 101 | } 102 | 103 | if len(r.ID) > 0 { 104 | metaData["_id"] = r.ID 105 | } 106 | if len(r.Parent) > 0 { 107 | metaData["_parent"] = r.Parent 108 | } 109 | if len(r.Pipeline) > 0 { 110 | metaData["pipeline"] = r.Pipeline 111 | } 112 | 113 | meta[r.Action] = metaData 114 | 115 | data, err := json.Marshal(meta) 116 | if err != nil { 117 | return errors.Trace(err) 118 | } 119 | 120 | buf.Write(data) 121 | buf.WriteByte('\n') 122 | 123 | switch r.Action { 124 | case ActionDelete: 125 | //nothing to do 126 | case ActionUpdate: 127 | doc := map[string]interface{}{ 128 | "doc": r.Data, 129 | } 130 | data, err = json.Marshal(doc) 131 | if err != nil { 132 | return errors.Trace(err) 133 | } 134 | 135 | buf.Write(data) 136 | buf.WriteByte('\n') 137 | default: 138 | //for create and index 139 | data, err = json.Marshal(r.Data) 140 | if err != nil { 141 | return errors.Trace(err) 142 | } 143 | 144 | buf.Write(data) 145 | buf.WriteByte('\n') 146 | } 147 | 148 | return nil 149 | } 150 | 151 | // BulkResponse is the response for the bulk request. 152 | type BulkResponse struct { 153 | Code int 154 | Took int `json:"took"` 155 | Errors bool `json:"errors"` 156 | 157 | Items []map[string]*BulkResponseItem `json:"items"` 158 | } 159 | 160 | // BulkResponseItem is the item in the bulk response. 161 | type BulkResponseItem struct { 162 | Index string `json:"_index"` 163 | Type string `json:"_type"` 164 | ID string `json:"_id"` 165 | Version int `json:"_version"` 166 | Status int `json:"status"` 167 | Error json.RawMessage `json:"error"` 168 | Found bool `json:"found"` 169 | } 170 | 171 | // MappingResponse is the response for the mapping request. 172 | type MappingResponse struct { 173 | Code int 174 | Mapping Mapping 175 | } 176 | 177 | // Mapping represents ES mapping. 178 | type Mapping map[string]struct { 179 | Mappings map[string]struct { 180 | Properties map[string]struct { 181 | Type string `json:"type"` 182 | Fields interface{} `json:"fields"` 183 | } `json:"properties"` 184 | } `json:"mappings"` 185 | } 186 | 187 | // DoRequest sends a request with body to ES. 188 | func (c *Client) DoRequest(method string, url string, body *bytes.Buffer) (*http.Response, error) { 189 | req, err := http.NewRequest(method, url, body) 190 | req.Header.Add("Content-Type", "application/json") 191 | if err != nil { 192 | return nil, errors.Trace(err) 193 | } 194 | if len(c.User) > 0 && len(c.Password) > 0 { 195 | req.SetBasicAuth(c.User, c.Password) 196 | } 197 | resp, err := c.c.Do(req) 198 | 199 | return resp, err 200 | } 201 | 202 | // Do sends the request with body to ES. 203 | func (c *Client) Do(method string, url string, body map[string]interface{}) (*Response, error) { 204 | bodyData, err := json.Marshal(body) 205 | if err != nil { 206 | return nil, errors.Trace(err) 207 | } 208 | 209 | buf := bytes.NewBuffer(bodyData) 210 | if body == nil { 211 | buf = bytes.NewBuffer(nil) 212 | } 213 | 214 | resp, err := c.DoRequest(method, url, buf) 215 | if err != nil { 216 | return nil, errors.Trace(err) 217 | } 218 | 219 | defer resp.Body.Close() 220 | 221 | ret := new(Response) 222 | ret.Code = resp.StatusCode 223 | 224 | data, err := ioutil.ReadAll(resp.Body) 225 | if err != nil { 226 | return nil, errors.Trace(err) 227 | } 228 | 229 | if len(data) > 0 { 230 | err = json.Unmarshal(data, &ret.ResponseItem) 231 | } 232 | 233 | return ret, errors.Trace(err) 234 | } 235 | 236 | // DoBulk sends the bulk request to the ES. 237 | func (c *Client) DoBulk(url string, items []*BulkRequest) (*BulkResponse, error) { 238 | var buf bytes.Buffer 239 | 240 | for _, item := range items { 241 | if err := item.bulk(&buf); err != nil { 242 | return nil, errors.Trace(err) 243 | } 244 | } 245 | 246 | resp, err := c.DoRequest("POST", url, &buf) 247 | if err != nil { 248 | return nil, errors.Trace(err) 249 | } 250 | 251 | defer resp.Body.Close() 252 | 253 | ret := new(BulkResponse) 254 | ret.Code = resp.StatusCode 255 | 256 | data, err := ioutil.ReadAll(resp.Body) 257 | if err != nil { 258 | return nil, errors.Trace(err) 259 | } 260 | 261 | if len(data) > 0 { 262 | err = json.Unmarshal(data, &ret) 263 | } 264 | 265 | return ret, errors.Trace(err) 266 | } 267 | 268 | // CreateMapping creates a ES mapping. 269 | func (c *Client) CreateMapping(index string, docType string, mapping map[string]interface{}) error { 270 | reqURL := fmt.Sprintf("%s://%s/%s", c.Protocol, c.Addr, 271 | url.QueryEscape(index)) 272 | 273 | r, err := c.Do("HEAD", reqURL, nil) 274 | if err != nil { 275 | return errors.Trace(err) 276 | } 277 | 278 | // if index doesn't exist, will get 404 not found, create index first 279 | if r.Code == http.StatusNotFound { 280 | _, err = c.Do("PUT", reqURL, nil) 281 | 282 | if err != nil { 283 | return errors.Trace(err) 284 | } 285 | } else if r.Code != http.StatusOK { 286 | return errors.Errorf("Error: %s, code: %d", http.StatusText(r.Code), r.Code) 287 | } 288 | 289 | reqURL = fmt.Sprintf("%s://%s/%s/%s/_mapping", c.Protocol, c.Addr, 290 | url.QueryEscape(index), 291 | url.QueryEscape(docType)) 292 | 293 | _, err = c.Do("POST", reqURL, mapping) 294 | return errors.Trace(err) 295 | } 296 | 297 | // GetMapping gets the mapping. 298 | func (c *Client) GetMapping(index string, docType string) (*MappingResponse, error) { 299 | reqURL := fmt.Sprintf("%s://%s/%s/%s/_mapping", c.Protocol, c.Addr, 300 | url.QueryEscape(index), 301 | url.QueryEscape(docType)) 302 | buf := bytes.NewBuffer(nil) 303 | resp, err := c.DoRequest("GET", reqURL, buf) 304 | 305 | if err != nil { 306 | return nil, errors.Trace(err) 307 | } 308 | 309 | defer resp.Body.Close() 310 | 311 | data, err := ioutil.ReadAll(resp.Body) 312 | if err != nil { 313 | return nil, errors.Trace(err) 314 | } 315 | 316 | ret := new(MappingResponse) 317 | err = json.Unmarshal(data, &ret.Mapping) 318 | if err != nil { 319 | return nil, errors.Trace(err) 320 | } 321 | 322 | ret.Code = resp.StatusCode 323 | return ret, errors.Trace(err) 324 | } 325 | 326 | // DeleteIndex deletes the index. 327 | func (c *Client) DeleteIndex(index string) error { 328 | reqURL := fmt.Sprintf("%s://%s/%s", c.Protocol, c.Addr, 329 | url.QueryEscape(index)) 330 | 331 | r, err := c.Do("DELETE", reqURL, nil) 332 | if err != nil { 333 | return errors.Trace(err) 334 | } 335 | 336 | if r.Code == http.StatusOK || r.Code == http.StatusNotFound { 337 | return nil 338 | } 339 | 340 | return errors.Errorf("Error: %s, code: %d", http.StatusText(r.Code), r.Code) 341 | } 342 | 343 | // Get gets the item by id. 344 | func (c *Client) Get(index string, docType string, id string) (*Response, error) { 345 | reqURL := fmt.Sprintf("%s://%s/%s/%s/%s", c.Protocol, c.Addr, 346 | url.QueryEscape(index), 347 | url.QueryEscape(docType), 348 | url.QueryEscape(id)) 349 | 350 | return c.Do("GET", reqURL, nil) 351 | } 352 | 353 | // Update creates or updates the data 354 | func (c *Client) Update(index string, docType string, id string, data map[string]interface{}) error { 355 | reqURL := fmt.Sprintf("%s://%s/%s/%s/%s", c.Protocol, c.Addr, 356 | url.QueryEscape(index), 357 | url.QueryEscape(docType), 358 | url.QueryEscape(id)) 359 | 360 | r, err := c.Do("PUT", reqURL, data) 361 | if err != nil { 362 | return errors.Trace(err) 363 | } 364 | 365 | if r.Code == http.StatusOK || r.Code == http.StatusCreated { 366 | return nil 367 | } 368 | 369 | return errors.Errorf("Error: %s, code: %d", http.StatusText(r.Code), r.Code) 370 | } 371 | 372 | // Exists checks whether id exists or not. 373 | func (c *Client) Exists(index string, docType string, id string) (bool, error) { 374 | reqURL := fmt.Sprintf("%s://%s/%s/%s/%s", c.Protocol, c.Addr, 375 | url.QueryEscape(index), 376 | url.QueryEscape(docType), 377 | url.QueryEscape(id)) 378 | 379 | r, err := c.Do("HEAD", reqURL, nil) 380 | if err != nil { 381 | return false, err 382 | } 383 | 384 | return r.Code == http.StatusOK, nil 385 | } 386 | 387 | // Delete deletes the item by id. 388 | func (c *Client) Delete(index string, docType string, id string) error { 389 | reqURL := fmt.Sprintf("%s://%s/%s/%s/%s", c.Protocol, c.Addr, 390 | url.QueryEscape(index), 391 | url.QueryEscape(docType), 392 | url.QueryEscape(id)) 393 | 394 | r, err := c.Do("DELETE", reqURL, nil) 395 | if err != nil { 396 | return errors.Trace(err) 397 | } 398 | 399 | if r.Code == http.StatusOK || r.Code == http.StatusNotFound { 400 | return nil 401 | } 402 | 403 | return errors.Errorf("Error: %s, code: %d", http.StatusText(r.Code), r.Code) 404 | } 405 | 406 | // Bulk sends the bulk request. 407 | // only support parent in 'Bulk' related apis 408 | func (c *Client) Bulk(items []*BulkRequest) (*BulkResponse, error) { 409 | reqURL := fmt.Sprintf("%s://%s/_bulk", c.Protocol, c.Addr) 410 | 411 | return c.DoBulk(reqURL, items) 412 | } 413 | 414 | // IndexBulk sends the bulk request for index. 415 | func (c *Client) IndexBulk(index string, items []*BulkRequest) (*BulkResponse, error) { 416 | reqURL := fmt.Sprintf("%s://%s/%s/_bulk", c.Protocol, c.Addr, 417 | url.QueryEscape(index)) 418 | 419 | return c.DoBulk(reqURL, items) 420 | } 421 | 422 | // IndexTypeBulk sends the bulk request for index and doc type. 423 | func (c *Client) IndexTypeBulk(index string, docType string, items []*BulkRequest) (*BulkResponse, error) { 424 | reqURL := fmt.Sprintf("%s://%s/%s/%s/_bulk", c.Protocol, c.Addr, 425 | url.QueryEscape(index), 426 | url.QueryEscape(docType)) 427 | 428 | return c.DoBulk(reqURL, items) 429 | } 430 | -------------------------------------------------------------------------------- /elastic/client_test.go: -------------------------------------------------------------------------------- 1 | package elastic 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "testing" 7 | 8 | . "github.com/pingcap/check" 9 | ) 10 | 11 | var host = flag.String("host", "127.0.0.1", "Elasticsearch host") 12 | var port = flag.Int("port", 9200, "Elasticsearch port") 13 | 14 | func Test(t *testing.T) { 15 | TestingT(t) 16 | } 17 | 18 | type elasticTestSuite struct { 19 | c *Client 20 | } 21 | 22 | var _ = Suite(&elasticTestSuite{}) 23 | 24 | func (s *elasticTestSuite) SetUpSuite(c *C) { 25 | cfg := new(ClientConfig) 26 | cfg.Addr = fmt.Sprintf("%s:%d", *host, *port) 27 | cfg.User = "" 28 | cfg.Password = "" 29 | s.c = NewClient(cfg) 30 | } 31 | 32 | func (s *elasticTestSuite) TearDownSuite(c *C) { 33 | 34 | } 35 | 36 | func makeTestData(arg1 string, arg2 string) map[string]interface{} { 37 | m := make(map[string]interface{}) 38 | m["name"] = arg1 39 | m["content"] = arg2 40 | 41 | return m 42 | } 43 | 44 | func (s *elasticTestSuite) TestSimple(c *C) { 45 | index := "dummy" 46 | docType := "blog" 47 | 48 | //key1 := "name" 49 | //key2 := "content" 50 | 51 | err := s.c.Update(index, docType, "1", makeTestData("abc", "hello world")) 52 | c.Assert(err, IsNil) 53 | 54 | exists, err := s.c.Exists(index, docType, "1") 55 | c.Assert(err, IsNil) 56 | c.Assert(exists, Equals, true) 57 | 58 | r, err := s.c.Get(index, docType, "1") 59 | c.Assert(err, IsNil) 60 | c.Assert(r.Code, Equals, 200) 61 | c.Assert(r.ID, Equals, "1") 62 | 63 | err = s.c.Delete(index, docType, "1") 64 | c.Assert(err, IsNil) 65 | 66 | exists, err = s.c.Exists(index, docType, "1") 67 | c.Assert(err, IsNil) 68 | c.Assert(exists, Equals, false) 69 | 70 | items := make([]*BulkRequest, 10) 71 | 72 | for i := 0; i < 10; i++ { 73 | id := fmt.Sprintf("%d", i) 74 | req := new(BulkRequest) 75 | req.Action = ActionIndex 76 | req.ID = id 77 | req.Data = makeTestData(fmt.Sprintf("abc %d", i), fmt.Sprintf("hello world %d", i)) 78 | items[i] = req 79 | } 80 | 81 | resp, err := s.c.IndexTypeBulk(index, docType, items) 82 | c.Assert(err, IsNil) 83 | c.Assert(resp.Code, Equals, 200) 84 | c.Assert(resp.Errors, Equals, false) 85 | 86 | for i := 0; i < 10; i++ { 87 | id := fmt.Sprintf("%d", i) 88 | req := new(BulkRequest) 89 | req.Action = ActionDelete 90 | req.ID = id 91 | items[i] = req 92 | } 93 | 94 | resp, err = s.c.IndexTypeBulk(index, docType, items) 95 | c.Assert(err, IsNil) 96 | c.Assert(resp.Code, Equals, 200) 97 | c.Assert(resp.Errors, Equals, false) 98 | } 99 | 100 | // this requires a parent setting in _mapping 101 | func (s *elasticTestSuite) TestParent(c *C) { 102 | index := "dummy" 103 | docType := "comment" 104 | ParentType := "parent" 105 | 106 | mapping := map[string]interface{}{ 107 | docType: map[string]interface{}{ 108 | "_parent": map[string]string{"type": ParentType}, 109 | }, 110 | } 111 | err := s.c.CreateMapping(index, docType, mapping) 112 | c.Assert(err, IsNil) 113 | 114 | items := make([]*BulkRequest, 10) 115 | 116 | for i := 0; i < 10; i++ { 117 | id := fmt.Sprintf("%d", i) 118 | req := new(BulkRequest) 119 | req.Action = ActionIndex 120 | req.ID = id 121 | req.Data = makeTestData(fmt.Sprintf("abc %d", i), fmt.Sprintf("hello world %d", i)) 122 | req.Parent = "1" 123 | items[i] = req 124 | } 125 | 126 | resp, err := s.c.IndexTypeBulk(index, docType, items) 127 | c.Assert(err, IsNil) 128 | c.Assert(resp.Code, Equals, 200) 129 | c.Assert(resp.Errors, Equals, false) 130 | 131 | for i := 0; i < 10; i++ { 132 | id := fmt.Sprintf("%d", i) 133 | req := new(BulkRequest) 134 | req.Index = index 135 | req.Type = docType 136 | req.Action = ActionDelete 137 | req.ID = id 138 | req.Parent = "1" 139 | items[i] = req 140 | } 141 | resp, err = s.c.Bulk(items) 142 | c.Assert(err, IsNil) 143 | c.Assert(resp.Code, Equals, 200) 144 | c.Assert(resp.Errors, Equals, false) 145 | } 146 | -------------------------------------------------------------------------------- /etc/river.toml: -------------------------------------------------------------------------------- 1 | # MySQL address, user and password 2 | # user must have replication privilege in MySQL. 3 | my_addr = "127.0.0.1:3306" 4 | my_user = "root" 5 | my_pass = "" 6 | my_charset = "utf8" 7 | 8 | # Set true when elasticsearch use https 9 | #es_https = false 10 | # Elasticsearch address 11 | es_addr = "127.0.0.1:9200" 12 | # Elasticsearch user and password, maybe set by shield, nginx, or x-pack 13 | es_user = "" 14 | es_pass = "" 15 | 16 | # Path to store data, like master.info, if not set or empty, 17 | # we must use this to support breakpoint resume syncing. 18 | # TODO: support other storage, like etcd. 19 | data_dir = "./var" 20 | 21 | # Inner Http status address 22 | stat_addr = "127.0.0.1:12800" 23 | 24 | # pseudo server id like a slave 25 | server_id = 1001 26 | 27 | # mysql or mariadb 28 | flavor = "mysql" 29 | 30 | # mysqldump execution path 31 | # if not set or empty, ignore mysqldump. 32 | mysqldump = "mysqldump" 33 | 34 | # if we have no privilege to use mysqldump with --master-data, 35 | # we must skip it. 36 | #skip_master_data = false 37 | 38 | # minimal items to be inserted in one bulk 39 | bulk_size = 128 40 | 41 | # force flush the pending requests if we don't have enough items >= bulk_size 42 | flush_bulk_time = "200ms" 43 | 44 | # Ignore table without primary key 45 | skip_no_pk_table = false 46 | 47 | # MySQL data source 48 | [[source]] 49 | schema = "test" 50 | 51 | # Only below tables will be synced into Elasticsearch. 52 | # "t_[0-9]{4}" is a wildcard table format, you can use it if you have many sub tables, like table_0000 - table_1023 53 | # I don't think it is necessary to sync all tables in a database. 54 | tables = ["t", "t_[0-9]{4}", "tfield", "tfilter"] 55 | 56 | # Below is for special rule mapping 57 | 58 | # Very simple example 59 | # 60 | # desc t; 61 | # +-------+--------------+------+-----+---------+-------+ 62 | # | Field | Type | Null | Key | Default | Extra | 63 | # +-------+--------------+------+-----+---------+-------+ 64 | # | id | int(11) | NO | PRI | NULL | | 65 | # | name | varchar(256) | YES | | NULL | | 66 | # +-------+--------------+------+-----+---------+-------+ 67 | # 68 | # The table `t` will be synced to ES index `test` and type `t`. 69 | [[rule]] 70 | schema = "test" 71 | table = "t" 72 | index = "test" 73 | type = "t" 74 | 75 | # Wildcard table rule, the wildcard table must be in source tables 76 | # All tables which match the wildcard format will be synced to ES index `test` and type `t`. 77 | # In this example, all tables must have same schema with above table `t`; 78 | [[rule]] 79 | schema = "test" 80 | table = "t_[0-9]{4}" 81 | index = "test" 82 | type = "t" 83 | 84 | # Simple field rule 85 | # 86 | # desc tfield; 87 | # +----------+--------------+------+-----+---------+-------+ 88 | # | Field | Type | Null | Key | Default | Extra | 89 | # +----------+--------------+------+-----+---------+-------+ 90 | # | id | int(11) | NO | PRI | NULL | | 91 | # | tags | varchar(256) | YES | | NULL | | 92 | # | keywords | varchar(256) | YES | | NULL | | 93 | # +----------+--------------+------+-----+---------+-------+ 94 | # 95 | [[rule]] 96 | schema = "test" 97 | table = "tfield" 98 | index = "test" 99 | type = "tfield" 100 | 101 | [rule.field] 102 | # Map column `id` to ES field `es_id` 103 | id="es_id" 104 | # Map column `tags` to ES field `es_tags` with array type 105 | tags="es_tags,list" 106 | # Map column `keywords` to ES with array type 107 | keywords=",list" 108 | 109 | # Filter rule 110 | # 111 | # desc tfilter; 112 | # +-------+--------------+------+-----+---------+-------+ 113 | # | Field | Type | Null | Key | Default | Extra | 114 | # +-------+--------------+------+-----+---------+-------+ 115 | # | id | int(11) | NO | PRI | NULL | | 116 | # | c1 | int(11) | YES | | 0 | | 117 | # | c2 | int(11) | YES | | 0 | | 118 | # | name | varchar(256) | YES | | NULL | | 119 | # +-------+--------------+------+-----+---------+-------+ 120 | # 121 | [[rule]] 122 | schema = "test" 123 | table = "tfilter" 124 | index = "test" 125 | type = "tfilter" 126 | 127 | # Only sync following columns 128 | filter = ["id", "name"] 129 | 130 | # id rule 131 | # 132 | # desc tid_[0-9]{4}; 133 | # +----------+--------------+------+-----+---------+-------+ 134 | # | Field | Type | Null | Key | Default | Extra | 135 | # +----------+--------------+------+-----+---------+-------+ 136 | # | id | int(11) | NO | PRI | NULL | | 137 | # | tag | varchar(256) | YES | | NULL | | 138 | # | desc | varchar(256) | YES | | NULL | | 139 | # +----------+--------------+------+-----+---------+-------+ 140 | # 141 | [[rule]] 142 | schema = "test" 143 | table = "tid_[0-9]{4}" 144 | index = "test" 145 | type = "t" 146 | # The es doc's id will be `id`:`tag` 147 | # It is useful for merge muliple table into one type while theses tables have same PK 148 | id = ["id", "tag"] 149 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/siddontang/go-mysql-elasticsearch 2 | 3 | require ( 4 | github.com/BurntSushi/toml v0.3.1 5 | github.com/juju/errors v0.0.0-20190207033735-e65537c515d7 6 | github.com/siddontang/go v0.0.0-20180604090527-bdc77568d726 7 | github.com/siddontang/go-log v0.0.0-20180807004314-8d05993dda07 8 | github.com/siddontang/go-mysql v0.0.0-20190303113352-670f74e8daf5 9 | ) 10 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ= 2 | github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= 3 | github.com/juju/errors v0.0.0-20190207033735-e65537c515d7 h1:dMIPRDg6gi7CUp0Kj2+HxqJ5kTr1iAdzsXYIrLCNSmU= 4 | github.com/juju/errors v0.0.0-20190207033735-e65537c515d7/go.mod h1:W54LbzXuIE0boCoNJfwqpmkKJ1O4TCTZMetAt6jGk7Q= 5 | github.com/pingcap/errors v0.11.0 h1:DCJQB8jrHbQ1VVlMFIrbj2ApScNNotVmkSNplu2yUt4= 6 | github.com/pingcap/errors v0.11.0/go.mod h1:Oi8TUi2kEtXXLMJk9l1cGmz20kV3TaQ0usTwv5KuLY8= 7 | github.com/satori/go.uuid v1.2.0 h1:0uYX9dsZ2yD7q2RtLRtPSdGDWzjeM3TbMJP9utgA0ww= 8 | github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0= 9 | github.com/shopspring/decimal v0.0.0-20180709203117-cd690d0c9e24 h1:pntxY8Ary0t43dCZ5dqY4YTJCObLY1kIXl0uzMv+7DE= 10 | github.com/shopspring/decimal v0.0.0-20180709203117-cd690d0c9e24/go.mod h1:M+9NzErvs504Cn4c5DxATwIqPbtswREoFCre64PpcG4= 11 | github.com/siddontang/go v0.0.0-20180604090527-bdc77568d726 h1:xT+JlYxNGqyT+XcU8iUrN18JYed2TvG9yN5ULG2jATM= 12 | github.com/siddontang/go v0.0.0-20180604090527-bdc77568d726/go.mod h1:3yhqj7WBBfRhbBlzyOC3gUxftwsU0u8gqevxwIHQpMw= 13 | github.com/siddontang/go-log v0.0.0-20180807004314-8d05993dda07 h1:oI+RNwuC9jF2g2lP0u0cVEEZrc/AYBCuFdvwrLWM/6Q= 14 | github.com/siddontang/go-log v0.0.0-20180807004314-8d05993dda07/go.mod h1:yFdBgwXP24JziuRl2NMUahT7nGLNOKi1SIiFxMttVD4= 15 | github.com/siddontang/go-mysql v0.0.0-20190123011128-88e9cd7f6643 h1:yzg8+Cip1iDhy6GGS1zKflqOybgRc4xp82eYwQrP+DU= 16 | github.com/siddontang/go-mysql v0.0.0-20190123011128-88e9cd7f6643/go.mod h1:/b8ZcWjAShCcHp2dWpjb1vTlNyiG03UeHEQr2jteOpI= 17 | github.com/siddontang/go-mysql v0.0.0-20190303113352-670f74e8daf5 h1:5Nr7spTeY+ziXzqk/9p+GLnvH4rIjp9BX+aRaYDbR44= 18 | github.com/siddontang/go-mysql v0.0.0-20190303113352-670f74e8daf5/go.mod h1:/b8ZcWjAShCcHp2dWpjb1vTlNyiG03UeHEQr2jteOpI= 19 | -------------------------------------------------------------------------------- /river/config.go: -------------------------------------------------------------------------------- 1 | package river 2 | 3 | import ( 4 | "io/ioutil" 5 | "time" 6 | 7 | "github.com/BurntSushi/toml" 8 | "github.com/juju/errors" 9 | ) 10 | 11 | // SourceConfig is the configs for source 12 | type SourceConfig struct { 13 | Schema string `toml:"schema"` 14 | Tables []string `toml:"tables"` 15 | } 16 | 17 | // Config is the configuration 18 | type Config struct { 19 | MyAddr string `toml:"my_addr"` 20 | MyUser string `toml:"my_user"` 21 | MyPassword string `toml:"my_pass"` 22 | MyCharset string `toml:"my_charset"` 23 | 24 | ESHttps bool `toml:"es_https"` 25 | ESAddr string `toml:"es_addr"` 26 | ESUser string `toml:"es_user"` 27 | ESPassword string `toml:"es_pass"` 28 | 29 | StatAddr string `toml:"stat_addr"` 30 | 31 | ServerID uint32 `toml:"server_id"` 32 | Flavor string `toml:"flavor"` 33 | DataDir string `toml:"data_dir"` 34 | 35 | DumpExec string `toml:"mysqldump"` 36 | SkipMasterData bool `toml:"skip_master_data"` 37 | 38 | Sources []SourceConfig `toml:"source"` 39 | 40 | Rules []*Rule `toml:"rule"` 41 | 42 | BulkSize int `toml:"bulk_size"` 43 | 44 | FlushBulkTime TomlDuration `toml:"flush_bulk_time"` 45 | 46 | SkipNoPkTable bool `toml:"skip_no_pk_table"` 47 | } 48 | 49 | // NewConfigWithFile creates a Config from file. 50 | func NewConfigWithFile(name string) (*Config, error) { 51 | data, err := ioutil.ReadFile(name) 52 | if err != nil { 53 | return nil, errors.Trace(err) 54 | } 55 | 56 | return NewConfig(string(data)) 57 | } 58 | 59 | // NewConfig creates a Config from data. 60 | func NewConfig(data string) (*Config, error) { 61 | var c Config 62 | 63 | _, err := toml.Decode(data, &c) 64 | if err != nil { 65 | return nil, errors.Trace(err) 66 | } 67 | 68 | return &c, nil 69 | } 70 | 71 | // TomlDuration supports time codec for TOML format. 72 | type TomlDuration struct { 73 | time.Duration 74 | } 75 | 76 | // UnmarshalText implementes TOML UnmarshalText 77 | func (d *TomlDuration) UnmarshalText(text []byte) error { 78 | var err error 79 | d.Duration, err = time.ParseDuration(string(text)) 80 | return err 81 | } 82 | -------------------------------------------------------------------------------- /river/master.go: -------------------------------------------------------------------------------- 1 | package river 2 | 3 | import ( 4 | "bytes" 5 | "os" 6 | "path" 7 | "sync" 8 | "time" 9 | 10 | "github.com/BurntSushi/toml" 11 | "github.com/juju/errors" 12 | "github.com/siddontang/go-log/log" 13 | "github.com/siddontang/go-mysql/mysql" 14 | "github.com/siddontang/go/ioutil2" 15 | ) 16 | 17 | type masterInfo struct { 18 | sync.RWMutex 19 | 20 | Name string `toml:"bin_name"` 21 | Pos uint32 `toml:"bin_pos"` 22 | 23 | filePath string 24 | lastSaveTime time.Time 25 | } 26 | 27 | func loadMasterInfo(dataDir string) (*masterInfo, error) { 28 | var m masterInfo 29 | 30 | if len(dataDir) == 0 { 31 | return &m, nil 32 | } 33 | 34 | m.filePath = path.Join(dataDir, "master.info") 35 | m.lastSaveTime = time.Now() 36 | 37 | if err := os.MkdirAll(dataDir, 0755); err != nil { 38 | return nil, errors.Trace(err) 39 | } 40 | 41 | f, err := os.Open(m.filePath) 42 | if err != nil && !os.IsNotExist(errors.Cause(err)) { 43 | return nil, errors.Trace(err) 44 | } else if os.IsNotExist(errors.Cause(err)) { 45 | return &m, nil 46 | } 47 | defer f.Close() 48 | 49 | _, err = toml.DecodeReader(f, &m) 50 | return &m, errors.Trace(err) 51 | } 52 | 53 | func (m *masterInfo) Save(pos mysql.Position) error { 54 | log.Infof("save position %s", pos) 55 | 56 | m.Lock() 57 | defer m.Unlock() 58 | 59 | m.Name = pos.Name 60 | m.Pos = pos.Pos 61 | 62 | if len(m.filePath) == 0 { 63 | return nil 64 | } 65 | 66 | n := time.Now() 67 | if n.Sub(m.lastSaveTime) < time.Second { 68 | return nil 69 | } 70 | 71 | m.lastSaveTime = n 72 | var buf bytes.Buffer 73 | e := toml.NewEncoder(&buf) 74 | 75 | e.Encode(m) 76 | 77 | var err error 78 | if err = ioutil2.WriteFileAtomic(m.filePath, buf.Bytes(), 0644); err != nil { 79 | log.Errorf("canal save master info to file %s err %v", m.filePath, err) 80 | } 81 | 82 | return errors.Trace(err) 83 | } 84 | 85 | func (m *masterInfo) Position() mysql.Position { 86 | m.RLock() 87 | defer m.RUnlock() 88 | 89 | return mysql.Position{ 90 | Name: m.Name, 91 | Pos: m.Pos, 92 | } 93 | } 94 | 95 | func (m *masterInfo) Close() error { 96 | pos := m.Position() 97 | 98 | return m.Save(pos) 99 | } 100 | -------------------------------------------------------------------------------- /river/river.go: -------------------------------------------------------------------------------- 1 | package river 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "regexp" 7 | "strings" 8 | "sync" 9 | 10 | "github.com/juju/errors" 11 | "github.com/siddontang/go-log/log" 12 | "github.com/siddontang/go-mysql-elasticsearch/elastic" 13 | "github.com/siddontang/go-mysql/canal" 14 | ) 15 | 16 | // ErrRuleNotExist is the error if rule is not defined. 17 | var ErrRuleNotExist = errors.New("rule is not exist") 18 | 19 | // River is a pluggable service within Elasticsearch pulling data then indexing it into Elasticsearch. 20 | // We use this definition here too, although it may not run within Elasticsearch. 21 | // Maybe later I can implement a acutal river in Elasticsearch, but I must learn java. :-) 22 | type River struct { 23 | c *Config 24 | 25 | canal *canal.Canal 26 | 27 | rules map[string]*Rule 28 | 29 | ctx context.Context 30 | cancel context.CancelFunc 31 | 32 | wg sync.WaitGroup 33 | 34 | es *elastic.Client 35 | 36 | st *stat 37 | 38 | master *masterInfo 39 | 40 | syncCh chan interface{} 41 | } 42 | 43 | // NewRiver creates the River from config 44 | func NewRiver(c *Config) (*River, error) { 45 | r := new(River) 46 | 47 | r.c = c 48 | r.rules = make(map[string]*Rule) 49 | r.syncCh = make(chan interface{}, 4096) 50 | r.ctx, r.cancel = context.WithCancel(context.Background()) 51 | 52 | var err error 53 | if r.master, err = loadMasterInfo(c.DataDir); err != nil { 54 | return nil, errors.Trace(err) 55 | } 56 | 57 | if err = r.newCanal(); err != nil { 58 | return nil, errors.Trace(err) 59 | } 60 | 61 | if err = r.prepareRule(); err != nil { 62 | return nil, errors.Trace(err) 63 | } 64 | 65 | if err = r.prepareCanal(); err != nil { 66 | return nil, errors.Trace(err) 67 | } 68 | 69 | // We must use binlog full row image 70 | if err = r.canal.CheckBinlogRowImage("FULL"); err != nil { 71 | return nil, errors.Trace(err) 72 | } 73 | 74 | cfg := new(elastic.ClientConfig) 75 | cfg.Addr = r.c.ESAddr 76 | cfg.User = r.c.ESUser 77 | cfg.Password = r.c.ESPassword 78 | cfg.HTTPS = r.c.ESHttps 79 | r.es = elastic.NewClient(cfg) 80 | 81 | r.st = &stat{r: r} 82 | go r.st.Run(r.c.StatAddr) 83 | 84 | return r, nil 85 | } 86 | 87 | func (r *River) newCanal() error { 88 | cfg := canal.NewDefaultConfig() 89 | cfg.Addr = r.c.MyAddr 90 | cfg.User = r.c.MyUser 91 | cfg.Password = r.c.MyPassword 92 | cfg.Charset = r.c.MyCharset 93 | cfg.Flavor = r.c.Flavor 94 | 95 | cfg.ServerID = r.c.ServerID 96 | cfg.Dump.ExecutionPath = r.c.DumpExec 97 | cfg.Dump.DiscardErr = false 98 | cfg.Dump.SkipMasterData = r.c.SkipMasterData 99 | 100 | for _, s := range r.c.Sources { 101 | for _, t := range s.Tables { 102 | cfg.IncludeTableRegex = append(cfg.IncludeTableRegex, s.Schema+"\\."+t) 103 | } 104 | } 105 | 106 | var err error 107 | r.canal, err = canal.NewCanal(cfg) 108 | return errors.Trace(err) 109 | } 110 | 111 | func (r *River) prepareCanal() error { 112 | var db string 113 | dbs := map[string]struct{}{} 114 | tables := make([]string, 0, len(r.rules)) 115 | for _, rule := range r.rules { 116 | db = rule.Schema 117 | dbs[rule.Schema] = struct{}{} 118 | tables = append(tables, rule.Table) 119 | } 120 | 121 | if len(dbs) == 1 { 122 | // one db, we can shrink using table 123 | r.canal.AddDumpTables(db, tables...) 124 | } else { 125 | // many dbs, can only assign databases to dump 126 | keys := make([]string, 0, len(dbs)) 127 | for key := range dbs { 128 | keys = append(keys, key) 129 | } 130 | 131 | r.canal.AddDumpDatabases(keys...) 132 | } 133 | 134 | r.canal.SetEventHandler(&eventHandler{r}) 135 | 136 | return nil 137 | } 138 | 139 | func (r *River) newRule(schema, table string) error { 140 | key := ruleKey(schema, table) 141 | 142 | if _, ok := r.rules[key]; ok { 143 | return errors.Errorf("duplicate source %s, %s defined in config", schema, table) 144 | } 145 | 146 | r.rules[key] = newDefaultRule(schema, table) 147 | return nil 148 | } 149 | 150 | func (r *River) updateRule(schema, table string) error { 151 | rule, ok := r.rules[ruleKey(schema, table)] 152 | if !ok { 153 | return ErrRuleNotExist 154 | } 155 | 156 | tableInfo, err := r.canal.GetTable(schema, table) 157 | if err != nil { 158 | return errors.Trace(err) 159 | } 160 | 161 | rule.TableInfo = tableInfo 162 | 163 | return nil 164 | } 165 | 166 | func (r *River) parseSource() (map[string][]string, error) { 167 | wildTables := make(map[string][]string, len(r.c.Sources)) 168 | 169 | // first, check sources 170 | for _, s := range r.c.Sources { 171 | if !isValidTables(s.Tables) { 172 | return nil, errors.Errorf("wildcard * is not allowed for multiple tables") 173 | } 174 | 175 | for _, table := range s.Tables { 176 | if len(s.Schema) == 0 { 177 | return nil, errors.Errorf("empty schema not allowed for source") 178 | } 179 | 180 | if regexp.QuoteMeta(table) != table { 181 | if _, ok := wildTables[ruleKey(s.Schema, table)]; ok { 182 | return nil, errors.Errorf("duplicate wildcard table defined for %s.%s", s.Schema, table) 183 | } 184 | 185 | tables := []string{} 186 | 187 | sql := fmt.Sprintf(`SELECT table_name FROM information_schema.tables WHERE 188 | table_name RLIKE "%s" AND table_schema = "%s";`, buildTable(table), s.Schema) 189 | 190 | res, err := r.canal.Execute(sql) 191 | if err != nil { 192 | return nil, errors.Trace(err) 193 | } 194 | 195 | for i := 0; i < res.Resultset.RowNumber(); i++ { 196 | f, _ := res.GetString(i, 0) 197 | err := r.newRule(s.Schema, f) 198 | if err != nil { 199 | return nil, errors.Trace(err) 200 | } 201 | 202 | tables = append(tables, f) 203 | } 204 | 205 | wildTables[ruleKey(s.Schema, table)] = tables 206 | } else { 207 | err := r.newRule(s.Schema, table) 208 | if err != nil { 209 | return nil, errors.Trace(err) 210 | } 211 | } 212 | } 213 | } 214 | 215 | if len(r.rules) == 0 { 216 | return nil, errors.Errorf("no source data defined") 217 | } 218 | 219 | return wildTables, nil 220 | } 221 | 222 | func (r *River) prepareRule() error { 223 | wildtables, err := r.parseSource() 224 | if err != nil { 225 | return errors.Trace(err) 226 | } 227 | 228 | if r.c.Rules != nil { 229 | // then, set custom mapping rule 230 | for _, rule := range r.c.Rules { 231 | if len(rule.Schema) == 0 { 232 | return errors.Errorf("empty schema not allowed for rule") 233 | } 234 | 235 | if regexp.QuoteMeta(rule.Table) != rule.Table { 236 | //wildcard table 237 | tables, ok := wildtables[ruleKey(rule.Schema, rule.Table)] 238 | if !ok { 239 | return errors.Errorf("wildcard table for %s.%s is not defined in source", rule.Schema, rule.Table) 240 | } 241 | 242 | if len(rule.Index) == 0 { 243 | return errors.Errorf("wildcard table rule %s.%s must have a index, can not empty", rule.Schema, rule.Table) 244 | } 245 | 246 | rule.prepare() 247 | 248 | for _, table := range tables { 249 | rr := r.rules[ruleKey(rule.Schema, table)] 250 | rr.Index = rule.Index 251 | rr.Type = rule.Type 252 | rr.Parent = rule.Parent 253 | rr.ID = rule.ID 254 | rr.FieldMapping = rule.FieldMapping 255 | } 256 | } else { 257 | key := ruleKey(rule.Schema, rule.Table) 258 | if _, ok := r.rules[key]; !ok { 259 | return errors.Errorf("rule %s, %s not defined in source", rule.Schema, rule.Table) 260 | } 261 | rule.prepare() 262 | r.rules[key] = rule 263 | } 264 | } 265 | } 266 | 267 | rules := make(map[string]*Rule) 268 | for key, rule := range r.rules { 269 | if rule.TableInfo, err = r.canal.GetTable(rule.Schema, rule.Table); err != nil { 270 | return errors.Trace(err) 271 | } 272 | 273 | if len(rule.TableInfo.PKColumns) == 0 { 274 | if !r.c.SkipNoPkTable { 275 | return errors.Errorf("%s.%s must have a PK for a column", rule.Schema, rule.Table) 276 | } 277 | 278 | log.Errorf("ignored table without a primary key: %s\n", rule.TableInfo.Name) 279 | } else { 280 | rules[key] = rule 281 | } 282 | } 283 | r.rules = rules 284 | 285 | return nil 286 | } 287 | 288 | func ruleKey(schema string, table string) string { 289 | return strings.ToLower(fmt.Sprintf("%s:%s", schema, table)) 290 | } 291 | 292 | // Run syncs the data from MySQL and inserts to ES. 293 | func (r *River) Run() error { 294 | r.wg.Add(1) 295 | go r.syncLoop() 296 | 297 | pos := r.master.Position() 298 | if err := r.canal.RunFrom(pos); err != nil { 299 | log.Errorf("start canal err %v", err) 300 | return errors.Trace(err) 301 | } 302 | 303 | return nil 304 | } 305 | 306 | // Ctx returns the internal context for outside use. 307 | func (r *River) Ctx() context.Context { 308 | return r.ctx 309 | } 310 | 311 | // Close closes the River 312 | func (r *River) Close() { 313 | log.Infof("closing river") 314 | 315 | r.cancel() 316 | 317 | r.canal.Close() 318 | 319 | r.master.Close() 320 | 321 | r.wg.Wait() 322 | } 323 | 324 | func isValidTables(tables []string) bool { 325 | if len(tables) > 1 { 326 | for _, table := range tables { 327 | if table == "*" { 328 | return false 329 | } 330 | } 331 | } 332 | return true 333 | } 334 | 335 | func buildTable(table string) string { 336 | if table == "*" { 337 | return "." + table 338 | } 339 | return table 340 | } 341 | -------------------------------------------------------------------------------- /river/river_extra_test.go: -------------------------------------------------------------------------------- 1 | package river 2 | 3 | import ( 4 | "fmt" 5 | "net/http" 6 | "net/url" 7 | "os" 8 | "time" 9 | 10 | . "github.com/pingcap/check" 11 | ) 12 | 13 | func (s *riverTestSuite) setupExtra(c *C) (r *River) { 14 | var err error 15 | 16 | schema := ` 17 | CREATE TABLE IF NOT EXISTS %s ( 18 | id INT, 19 | title VARCHAR(256), 20 | pid INT, 21 | PRIMARY KEY(id)) ENGINE=INNODB; 22 | ` 23 | 24 | s.testExecute(c, "DROP TABLE IF EXISTS test_river_extra") 25 | s.testExecute(c, fmt.Sprintf(schema, "test_river_extra")) 26 | 27 | schema = ` 28 | CREATE TABLE IF NOT EXISTS %s ( 29 | id INT, 30 | PRIMARY KEY(id)) ENGINE=INNODB; 31 | ` 32 | 33 | s.testExecute(c, "DROP TABLE IF EXISTS test_river_parent") 34 | s.testExecute(c, fmt.Sprintf(schema, "test_river_parent")) 35 | 36 | cfg := new(Config) 37 | cfg.MyAddr = *myAddr 38 | cfg.MyUser = "root" 39 | cfg.MyPassword = "" 40 | cfg.ESAddr = *esAddr 41 | 42 | cfg.ServerID = 1001 43 | cfg.Flavor = "mysql" 44 | 45 | cfg.DataDir = "/tmp/test_river_extra" 46 | cfg.DumpExec = "mysqldump" 47 | 48 | cfg.StatAddr = "127.0.0.1:12800" 49 | cfg.BulkSize = 1 50 | cfg.FlushBulkTime = TomlDuration{3 * time.Millisecond} 51 | 52 | os.RemoveAll(cfg.DataDir) 53 | 54 | cfg.Sources = []SourceConfig{SourceConfig{Schema: "test", Tables: []string{"test_river_extra", "test_river_parent"}}} 55 | 56 | cfg.Rules = []*Rule{ 57 | &Rule{Schema: "test", 58 | Table: "test_river_parent", 59 | Index: "river", 60 | Type: "river_extra_parent"}, 61 | &Rule{Schema: "test", 62 | Table: "test_river_extra", 63 | Index: "river", 64 | Type: "river_extra", 65 | Parent: "pid"}} 66 | 67 | r, err = NewRiver(cfg) 68 | c.Assert(err, IsNil) 69 | 70 | mapping := map[string]interface{}{ 71 | "river_extra": map[string]interface{}{ 72 | "_parent": map[string]string{"type": "river_extra_parent"}, 73 | }, 74 | } 75 | 76 | r.es.CreateMapping("river", "river_extra", mapping) 77 | 78 | return r 79 | } 80 | 81 | func (s *riverTestSuite) testPrepareExtraData(c *C) { 82 | s.testExecute(c, "INSERT INTO test_river_parent (id) VALUES (?)", 1) 83 | s.testExecute(c, "INSERT INTO test_river_extra (id, title, pid) VALUES (?, ?, ?)", 1, "first", 1) 84 | s.testExecute(c, "INSERT INTO test_river_extra (id, title, pid) VALUES (?, ?, ?)", 2, "second", 1) 85 | s.testExecute(c, "INSERT INTO test_river_extra (id, title, pid) VALUES (?, ?, ?)", 3, "third", 1) 86 | s.testExecute(c, "INSERT INTO test_river_extra (id, title, pid) VALUES (?, ?, ?)", 4, "fourth", 1) 87 | } 88 | 89 | func (s *riverTestSuite) testElasticExtraExists(c *C, id string, parent string, exist bool) { 90 | index := "river" 91 | docType := "river_extra" 92 | 93 | reqURL := fmt.Sprintf("http://%s/%s/%s/%s?parent=%s", s.r.es.Addr, 94 | url.QueryEscape(index), 95 | url.QueryEscape(docType), 96 | url.QueryEscape(id), 97 | url.QueryEscape(parent)) 98 | 99 | r, err := s.r.es.Do("HEAD", reqURL, nil) 100 | c.Assert(err, IsNil) 101 | 102 | if exist { 103 | c.Assert(r.Code, Equals, http.StatusOK) 104 | } else { 105 | c.Assert(r.Code, Equals, http.StatusNotFound) 106 | } 107 | } 108 | 109 | func (s *riverTestSuite) TestRiverWithParent(c *C) { 110 | river := s.setupExtra(c) 111 | 112 | defer river.Close() 113 | 114 | s.testPrepareExtraData(c) 115 | 116 | go func() { river.Run() }() 117 | 118 | testWaitSyncDone(c, river) 119 | 120 | s.testElasticExtraExists(c, "1", "1", true) 121 | 122 | s.testExecute(c, "DELETE FROM test_river_extra WHERE id = ?", 1) 123 | testWaitSyncDone(c, river) 124 | 125 | s.testElasticExtraExists(c, "1", "1", false) 126 | } 127 | -------------------------------------------------------------------------------- /river/river_test.go: -------------------------------------------------------------------------------- 1 | package river 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "os" 7 | "testing" 8 | "time" 9 | 10 | . "github.com/pingcap/check" 11 | "github.com/siddontang/go-mysql-elasticsearch/elastic" 12 | "github.com/siddontang/go-mysql/client" 13 | "github.com/siddontang/go-mysql/mysql" 14 | ) 15 | 16 | var myAddr = flag.String("my_addr", "127.0.0.1:3306", "MySQL addr") 17 | var esAddr = flag.String("es_addr", "127.0.0.1:9200", "Elasticsearch addr") 18 | var dateTimeStr = time.Now().Format(mysql.TimeFormat) 19 | var dateStr = time.Now().Format(mysqlDateFormat) 20 | 21 | func Test(t *testing.T) { 22 | TestingT(t) 23 | } 24 | 25 | type riverTestSuite struct { 26 | c *client.Conn 27 | r *River 28 | } 29 | 30 | var _ = Suite(&riverTestSuite{}) 31 | 32 | func (s *riverTestSuite) SetUpSuite(c *C) { 33 | var err error 34 | s.c, err = client.Connect(*myAddr, "root", "", "test") 35 | c.Assert(err, IsNil) 36 | 37 | s.testExecute(c, "SET SESSION binlog_format = 'ROW'") 38 | 39 | schema := ` 40 | CREATE TABLE IF NOT EXISTS %s ( 41 | id INT, 42 | title VARCHAR(256), 43 | content VARCHAR(256), 44 | mylist VARCHAR(256), 45 | mydate INT(10), 46 | tenum ENUM("e1", "e2", "e3"), 47 | tset SET("a", "b", "c"), 48 | tbit BIT(1) default 1, 49 | tdatetime DATETIME DEFAULT NULL, 50 | tdate DATE DEFAULT NULL, 51 | ip INT UNSIGNED DEFAULT 0, 52 | PRIMARY KEY(id)) ENGINE=INNODB; 53 | ` 54 | 55 | schemaJSON := ` 56 | CREATE TABLE IF NOT EXISTS %s ( 57 | id INT, 58 | info JSON, 59 | PRIMARY KEY(id)) ENGINE=INNODB; 60 | ` 61 | 62 | s.testExecute(c, "DROP TABLE IF EXISTS test_river") 63 | s.testExecute(c, "DROP TABLE IF EXISTS test_for_id") 64 | s.testExecute(c, "DROP TABLE IF EXISTS test_for_json") 65 | s.testExecute(c, fmt.Sprintf(schema, "test_river")) 66 | s.testExecute(c, fmt.Sprintf(schema, "test_for_id")) 67 | s.testExecute(c, fmt.Sprintf(schemaJSON, "test_for_json")) 68 | 69 | for i := 0; i < 10; i++ { 70 | table := fmt.Sprintf("test_river_%04d", i) 71 | s.testExecute(c, fmt.Sprintf("DROP TABLE IF EXISTS %s", table)) 72 | s.testExecute(c, fmt.Sprintf(schema, table)) 73 | } 74 | 75 | cfg := new(Config) 76 | cfg.MyAddr = *myAddr 77 | cfg.MyUser = "root" 78 | cfg.MyPassword = "" 79 | cfg.MyCharset = "utf8" 80 | cfg.ESAddr = *esAddr 81 | 82 | cfg.ServerID = 1001 83 | cfg.Flavor = "mysql" 84 | 85 | cfg.DataDir = "/tmp/test_river" 86 | cfg.DumpExec = "mysqldump" 87 | 88 | cfg.StatAddr = "127.0.0.1:12800" 89 | cfg.BulkSize = 1 90 | cfg.FlushBulkTime = TomlDuration{3 * time.Millisecond} 91 | 92 | os.RemoveAll(cfg.DataDir) 93 | 94 | cfg.Sources = []SourceConfig{SourceConfig{Schema: "test", Tables: []string{"test_river", "test_river_[0-9]{4}", "test_for_id", "test_for_json"}}} 95 | 96 | cfg.Rules = []*Rule{ 97 | &Rule{Schema: "test", 98 | Table: "test_river", 99 | Index: "river", 100 | Type: "river", 101 | FieldMapping: map[string]string{"title": "es_title", "mylist": "es_mylist,list", "mydate": ",date"}, 102 | }, 103 | 104 | &Rule{Schema: "test", 105 | Table: "test_for_id", 106 | Index: "river", 107 | Type: "river", 108 | ID: []string{"id", "title"}, 109 | FieldMapping: map[string]string{"title": "es_title", "mylist": "es_mylist,list", "mydate": ",date"}, 110 | }, 111 | 112 | &Rule{Schema: "test", 113 | Table: "test_river_[0-9]{4}", 114 | Index: "river", 115 | Type: "river", 116 | FieldMapping: map[string]string{"title": "es_title", "mylist": "es_mylist,list", "mydate": ",date"}, 117 | }, 118 | 119 | &Rule{Schema: "test", 120 | Table: "test_for_json", 121 | Index: "river", 122 | Type: "river", 123 | }, 124 | } 125 | 126 | s.r, err = NewRiver(cfg) 127 | c.Assert(err, IsNil) 128 | 129 | err = s.r.es.DeleteIndex("river") 130 | c.Assert(err, IsNil) 131 | } 132 | 133 | func (s *riverTestSuite) TearDownSuite(c *C) { 134 | if s.c != nil { 135 | s.c.Close() 136 | } 137 | 138 | if s.r != nil { 139 | s.r.Close() 140 | } 141 | } 142 | 143 | func (s *riverTestSuite) TestConfig(c *C) { 144 | str := ` 145 | my_addr = "127.0.0.1:3306" 146 | my_user = "root" 147 | my_pass = "" 148 | my_charset = "utf8" 149 | es_addr = "127.0.0.1:9200" 150 | es_user = "" 151 | es_pass = "" 152 | data_dir = "./var" 153 | 154 | [[source]] 155 | schema = "test" 156 | 157 | tables = ["test_river", "test_river_[0-9]{4}", "test_for_id", "test_for_json"] 158 | 159 | [[rule]] 160 | schema = "test" 161 | table = "test_river" 162 | index = "river" 163 | type = "river" 164 | parent = "pid" 165 | 166 | [rule.field] 167 | title = "es_title" 168 | mylist = "es_mylist,list" 169 | mydate = ",date" 170 | 171 | 172 | [[rule]] 173 | schema = "test" 174 | table = "test_for_id" 175 | index = "river" 176 | type = "river" 177 | parent = "pid" 178 | id = ["id", "title"] 179 | [rule.field] 180 | title = "es_title" 181 | mylist = "es_mylist,list" 182 | mydate = ",date" 183 | 184 | 185 | [[rule]] 186 | schema = "test" 187 | table = "test_river_[0-9]{4}" 188 | index = "river" 189 | type = "river" 190 | 191 | [rule.field] 192 | title = "es_title" 193 | mylist = "es_mylist,list" 194 | mydate = ",date" 195 | 196 | [[rule]] 197 | schema = "test" 198 | table = "test_for_json" 199 | index = "river" 200 | type = "river" 201 | ` 202 | 203 | cfg, err := NewConfig(str) 204 | c.Assert(err, IsNil) 205 | c.Assert(cfg.Sources, HasLen, 1) 206 | c.Assert(cfg.Sources[0].Tables, HasLen, 4) 207 | c.Assert(cfg.Rules, HasLen, 4) 208 | } 209 | 210 | func (s *riverTestSuite) testExecute(c *C, query string, args ...interface{}) { 211 | c.Logf("query %s, args: %v", query, args) 212 | _, err := s.c.Execute(query, args...) 213 | c.Assert(err, IsNil) 214 | } 215 | 216 | func (s *riverTestSuite) testPrepareData(c *C) { 217 | s.testExecute(c, "INSERT INTO test_river (id, title, content, tenum, tset) VALUES (?, ?, ?, ?, ?)", 1, "first", "hello go 1", "e1", "a,b") 218 | s.testExecute(c, "INSERT INTO test_river (id, title, content, tenum, tset) VALUES (?, ?, ?, ?, ?)", 2, "second", "hello mysql 2", "e2", "b,c") 219 | s.testExecute(c, "INSERT INTO test_river (id, title, content, tenum, tset) VALUES (?, ?, ?, ?, ?)", 3, "third", "hello elaticsearch 3", "e3", "c") 220 | s.testExecute(c, "INSERT INTO test_river (id, title, content, tenum, tset, tbit) VALUES (?, ?, ?, ?, ?, ?)", 4, "fouth", "hello go-mysql-elasticserach 4", "e1", "a,b,c", 0) 221 | s.testExecute(c, "INSERT INTO test_for_id (id, title, content, tenum, tset) VALUES (?, ?, ?, ?, ?)", 1, "first", "hello go 1", "e1", "a,b") 222 | s.testExecute(c, "INSERT INTO test_for_json (id, info) VALUES (?, ?)", 9200, "{\"first\": \"a\", \"second\": \"b\"}") 223 | 224 | for i := 0; i < 10; i++ { 225 | table := fmt.Sprintf("test_river_%04d", i) 226 | s.testExecute(c, fmt.Sprintf("INSERT INTO %s (id, title, content, tenum, tset) VALUES (?, ?, ?, ?, ?)", table), 5+i, "abc", "hello", "e1", "a,b,c") 227 | } 228 | 229 | s.testExecute(c, "INSERT INTO test_river (id, title, content, tenum, tset, tdatetime, mydate, tdate) VALUES (?, ?, ?, ?, ?, ?, ?, ?)", 16, "test datetime", "hello go 16", "e1", "a,b", dateTimeStr, 1458131094, dateStr) 230 | 231 | s.testExecute(c, "SET sql_mode = '';") // clear sql_mode to allow empty dates 232 | s.testExecute(c, "INSERT INTO test_river (id, title, content, tenum, tset, tdatetime, mydate, tdate) VALUES (?, ?, ?, ?, ?, ?, ?, ?)", 20, "test empty datetime", "date test 20", "e1", "a,b", "0000-00-00 00:00:00", 0, "0000-00-00") 233 | 234 | // test ip 235 | s.testExecute(c, "INSERT test_river (id, ip) VALUES (?, ?)", 17, 0) 236 | } 237 | 238 | func (s *riverTestSuite) testElasticGet(c *C, id string) *elastic.Response { 239 | index := "river" 240 | docType := "river" 241 | 242 | r, err := s.r.es.Get(index, docType, id) 243 | c.Assert(err, IsNil) 244 | 245 | return r 246 | } 247 | 248 | func (s *riverTestSuite) testElasticMapping(c *C) *elastic.MappingResponse { 249 | index := "river" 250 | docType := "river" 251 | 252 | r, err := s.r.es.GetMapping(index, docType) 253 | c.Assert(err, IsNil) 254 | 255 | c.Assert(r.Mapping[index].Mappings[docType].Properties["tdatetime"].Type, Equals, "date") 256 | c.Assert(r.Mapping[index].Mappings[docType].Properties["tdate"].Type, Equals, "date") 257 | c.Assert(r.Mapping[index].Mappings[docType].Properties["mydate"].Type, Equals, "date") 258 | return r 259 | } 260 | 261 | func testWaitSyncDone(c *C, r *River) { 262 | <-r.canal.WaitDumpDone() 263 | 264 | err := r.canal.CatchMasterPos(10 * time.Second) 265 | c.Assert(err, IsNil) 266 | 267 | for i := 0; i < 1000; i++ { 268 | if len(r.syncCh) == 0 { 269 | return 270 | } 271 | 272 | time.Sleep(10 * time.Millisecond) 273 | } 274 | 275 | c.Fatalf("wait 1s but still have %d items to be synced", len(r.syncCh)) 276 | } 277 | 278 | func (s *riverTestSuite) TestRiver(c *C) { 279 | s.testPrepareData(c) 280 | 281 | go func() { s.r.Run() }() 282 | 283 | testWaitSyncDone(c, s.r) 284 | 285 | var mr *elastic.MappingResponse 286 | mr = s.testElasticMapping(c) 287 | c.Assert(mr.Code, Equals, 200) 288 | 289 | var r *elastic.Response 290 | r = s.testElasticGet(c, "1") 291 | c.Assert(r.Found, IsTrue) 292 | c.Assert(r.Source["tenum"], Equals, "e1") 293 | c.Assert(r.Source["tset"], Equals, "a,b") 294 | 295 | r = s.testElasticGet(c, "1:first") 296 | c.Assert(r.Found, IsTrue) 297 | 298 | r = s.testElasticGet(c, "9200") 299 | c.Assert(r.Found, IsTrue) 300 | switch v := r.Source["info"].(type) { 301 | case map[string]interface{}: 302 | c.Assert(v["first"], Equals, "a") 303 | c.Assert(v["second"], Equals, "b") 304 | default: 305 | c.Assert(v, IsNil) 306 | c.Assert(true, IsFalse) 307 | } 308 | 309 | r = s.testElasticGet(c, "100") 310 | c.Assert(r.Found, IsFalse) 311 | 312 | for i := 0; i < 10; i++ { 313 | r = s.testElasticGet(c, fmt.Sprintf("%d", 5+i)) 314 | c.Assert(r.Found, IsTrue) 315 | c.Assert(r.Source["es_title"], Equals, "abc") 316 | } 317 | 318 | s.testExecute(c, "UPDATE test_river SET title = ?, tenum = ?, tset = ?, mylist = ? WHERE id = ?", "second 2", "e3", "a,b,c", "a,b,c", 2) 319 | s.testExecute(c, "DELETE FROM test_river WHERE id = ?", 1) 320 | s.testExecute(c, "UPDATE test_river SET title = ?, id = ? WHERE id = ?", "second 30", 30, 3) 321 | 322 | // so we can insert invalid data 323 | s.testExecute(c, `SET SESSION sql_mode="NO_ENGINE_SUBSTITUTION";`) 324 | 325 | // bad insert 326 | s.testExecute(c, "UPDATE test_river SET title = ?, tenum = ?, tset = ? WHERE id = ?", "second 2", "e5", "a,b,c,d", 4) 327 | 328 | for i := 0; i < 10; i++ { 329 | table := fmt.Sprintf("test_river_%04d", i) 330 | s.testExecute(c, fmt.Sprintf("UPDATE %s SET title = ? WHERE id = ?", table), "hello", 5+i) 331 | } 332 | 333 | // test ip 334 | s.testExecute(c, "UPDATE test_river set ip = ? WHERE id = ?", 3748168280, 17) 335 | 336 | testWaitSyncDone(c, s.r) 337 | 338 | r = s.testElasticGet(c, "1") 339 | c.Assert(r.Found, IsFalse) 340 | 341 | r = s.testElasticGet(c, "2") 342 | c.Assert(r.Found, IsTrue) 343 | c.Assert(r.Source["es_title"], Equals, "second 2") 344 | c.Assert(r.Source["tenum"], Equals, "e3") 345 | c.Assert(r.Source["tset"], Equals, "a,b,c") 346 | c.Assert(r.Source["es_mylist"], DeepEquals, []interface{}{"a", "b", "c"}) 347 | c.Assert(r.Source["tbit"], Equals, float64(1)) 348 | 349 | r = s.testElasticGet(c, "4") 350 | c.Assert(r.Found, IsTrue) 351 | c.Assert(r.Source["tenum"], Equals, "") 352 | c.Assert(r.Source["tset"], Equals, "a,b,c") 353 | c.Assert(r.Source["tbit"], Equals, float64(0)) 354 | 355 | r = s.testElasticGet(c, "3") 356 | c.Assert(r.Found, IsFalse) 357 | 358 | r = s.testElasticGet(c, "30") 359 | c.Assert(r.Found, IsTrue) 360 | c.Assert(r.Source["es_title"], Equals, "second 30") 361 | 362 | for i := 0; i < 10; i++ { 363 | r = s.testElasticGet(c, fmt.Sprintf("%d", 5+i)) 364 | c.Assert(r.Found, IsTrue) 365 | c.Assert(r.Source["es_title"], Equals, "hello") 366 | } 367 | 368 | r = s.testElasticGet(c, "16") 369 | c.Assert(r.Found, IsTrue) 370 | tdt, _ := time.Parse(time.RFC3339, r.Source["tdatetime"].(string)) 371 | c.Assert(tdt.Format(mysql.TimeFormat), Equals, dateTimeStr) 372 | c.Assert(r.Source["tdate"], Equals, dateStr) 373 | 374 | r = s.testElasticGet(c, "20") 375 | c.Assert(r.Found, IsTrue) 376 | c.Assert(r.Source["tdate"], Equals, nil) 377 | c.Assert(r.Source["tdatetime"], Equals, nil) 378 | 379 | // test ip 380 | r = s.testElasticGet(c, "17") 381 | c.Assert(r.Found, IsTrue) 382 | c.Assert(r.Source["ip"], Equals, float64(3748168280)) 383 | 384 | // alter table 385 | s.testExecute(c, "ALTER TABLE test_river ADD COLUMN new INT(10)") 386 | s.testExecute(c, "INSERT INTO test_river (id, title, content, tenum, tset, new) VALUES (?, ?, ?, ?, ?, ?)", 1000, "abc", "hello", "e1", "a,b,c", 1) 387 | s.testExecute(c, "ALTER TABLE test_river DROP COLUMN new") 388 | s.testExecute(c, "INSERT INTO test_river (id, title, content, tenum, tset) VALUES (?, ?, ?, ?, ?)", 1001, "abc", "hello", "e1", "a,b,c") 389 | 390 | testWaitSyncDone(c, s.r) 391 | 392 | r = s.testElasticGet(c, "1000") 393 | c.Assert(r.Found, IsTrue) 394 | c.Assert(r.Source["new"], Equals, float64(1)) 395 | 396 | r = s.testElasticGet(c, "1001") 397 | c.Assert(r.Found, IsTrue) 398 | _, ok := r.Source["new"] 399 | c.Assert(ok, IsFalse) 400 | } 401 | 402 | func TestTableValidation(t *testing.T) { 403 | tables := []struct { 404 | Tables []string 405 | Expect bool 406 | }{ 407 | {[]string{"*"}, true}, 408 | {[]string{"table", "table2"}, true}, 409 | {[]string{"*", "table"}, false}, 410 | } 411 | 412 | for _, table := range tables { 413 | if isValidTables(table.Tables) != table.Expect { 414 | t.Errorf("Tables: %s, Expected: is %t, but: was %t", table.Tables, table.Expect, isValidTables(table.Tables)) 415 | } 416 | } 417 | } 418 | 419 | func TestBuildTable(t *testing.T) { 420 | tables := []struct { 421 | Table string 422 | Expect string 423 | }{ 424 | {"*", ".*"}, 425 | {"table2", "table2"}, 426 | } 427 | 428 | for _, table := range tables { 429 | if buildTable(table.Table) != table.Expect { 430 | t.Errorf("Table: %s, Expected: is \"%s\", but: was \"%s\"", table.Table, table.Expect, buildTable(table.Table)) 431 | } 432 | } 433 | } 434 | -------------------------------------------------------------------------------- /river/rule.go: -------------------------------------------------------------------------------- 1 | package river 2 | 3 | import ( 4 | "strings" 5 | 6 | "github.com/siddontang/go-mysql/schema" 7 | ) 8 | 9 | // Rule is the rule for how to sync data from MySQL to ES. 10 | // If you want to sync MySQL data into elasticsearch, you must set a rule to let use know how to do it. 11 | // The mapping rule may thi: schema + table <-> index + document type. 12 | // schema and table is for MySQL, index and document type is for Elasticsearch. 13 | type Rule struct { 14 | Schema string `toml:"schema"` 15 | Table string `toml:"table"` 16 | Index string `toml:"index"` 17 | Type string `toml:"type"` 18 | Parent string `toml:"parent"` 19 | ID []string `toml:"id"` 20 | 21 | // Default, a MySQL table field name is mapped to Elasticsearch field name. 22 | // Sometimes, you want to use different name, e.g, the MySQL file name is title, 23 | // but in Elasticsearch, you want to name it my_title. 24 | FieldMapping map[string]string `toml:"field"` 25 | 26 | // MySQL table information 27 | TableInfo *schema.Table 28 | 29 | //only MySQL fields in filter will be synced , default sync all fields 30 | Filter []string `toml:"filter"` 31 | 32 | // Elasticsearch pipeline 33 | // To pre-process documents before indexing 34 | Pipeline string `toml:"pipeline"` 35 | } 36 | 37 | func newDefaultRule(schema string, table string) *Rule { 38 | r := new(Rule) 39 | 40 | r.Schema = schema 41 | r.Table = table 42 | 43 | lowerTable := strings.ToLower(table) 44 | r.Index = lowerTable 45 | r.Type = lowerTable 46 | 47 | r.FieldMapping = make(map[string]string) 48 | 49 | return r 50 | } 51 | 52 | func (r *Rule) prepare() error { 53 | if r.FieldMapping == nil { 54 | r.FieldMapping = make(map[string]string) 55 | } 56 | 57 | if len(r.Index) == 0 { 58 | r.Index = r.Table 59 | } 60 | 61 | if len(r.Type) == 0 { 62 | r.Type = r.Index 63 | } 64 | 65 | // ES must use a lower-case Type 66 | // Here we also use for Index 67 | r.Index = strings.ToLower(r.Index) 68 | r.Type = strings.ToLower(r.Type) 69 | 70 | return nil 71 | } 72 | 73 | // CheckFilter checkers whether the field needs to be filtered. 74 | func (r *Rule) CheckFilter(field string) bool { 75 | if r.Filter == nil { 76 | return true 77 | } 78 | 79 | for _, f := range r.Filter { 80 | if f == field { 81 | return true 82 | } 83 | } 84 | return false 85 | } 86 | -------------------------------------------------------------------------------- /river/status.go: -------------------------------------------------------------------------------- 1 | package river 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "net" 7 | "net/http" 8 | "net/http/pprof" 9 | 10 | "github.com/siddontang/go-log/log" 11 | "github.com/siddontang/go/sync2" 12 | ) 13 | 14 | type stat struct { 15 | r *River 16 | 17 | l net.Listener 18 | 19 | InsertNum sync2.AtomicInt64 20 | UpdateNum sync2.AtomicInt64 21 | DeleteNum sync2.AtomicInt64 22 | } 23 | 24 | func (s *stat) ServeHTTP(w http.ResponseWriter, r *http.Request) { 25 | var buf bytes.Buffer 26 | 27 | rr, err := s.r.canal.Execute("SHOW MASTER STATUS") 28 | if err != nil { 29 | w.WriteHeader(http.StatusInternalServerError) 30 | w.Write([]byte(fmt.Sprintf("execute sql error %v", err))) 31 | return 32 | } 33 | 34 | binName, _ := rr.GetString(0, 0) 35 | binPos, _ := rr.GetUint(0, 1) 36 | 37 | pos := s.r.canal.SyncedPosition() 38 | 39 | buf.WriteString(fmt.Sprintf("server_current_binlog:(%s, %d)\n", binName, binPos)) 40 | buf.WriteString(fmt.Sprintf("read_binlog:%s\n", pos)) 41 | 42 | buf.WriteString(fmt.Sprintf("insert_num:%d\n", s.InsertNum.Get())) 43 | buf.WriteString(fmt.Sprintf("update_num:%d\n", s.UpdateNum.Get())) 44 | buf.WriteString(fmt.Sprintf("delete_num:%d\n", s.DeleteNum.Get())) 45 | 46 | w.Write(buf.Bytes()) 47 | } 48 | 49 | func (s *stat) Run(addr string) { 50 | if len(addr) == 0 { 51 | return 52 | } 53 | log.Infof("run status http server %s", addr) 54 | var err error 55 | s.l, err = net.Listen("tcp", addr) 56 | if err != nil { 57 | log.Errorf("listen stat addr %s err %v", addr, err) 58 | return 59 | } 60 | 61 | srv := http.Server{} 62 | mux := http.NewServeMux() 63 | mux.Handle("/stat", s) 64 | mux.Handle("/debug/pprof/", http.HandlerFunc(pprof.Index)) 65 | srv.Handler = mux 66 | 67 | srv.Serve(s.l) 68 | } 69 | 70 | func (s *stat) Close() { 71 | if s.l != nil { 72 | s.l.Close() 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /river/sync.go: -------------------------------------------------------------------------------- 1 | package river 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | "fmt" 7 | "reflect" 8 | "strings" 9 | "time" 10 | 11 | "github.com/juju/errors" 12 | "github.com/siddontang/go-log/log" 13 | "github.com/siddontang/go-mysql-elasticsearch/elastic" 14 | "github.com/siddontang/go-mysql/canal" 15 | "github.com/siddontang/go-mysql/mysql" 16 | "github.com/siddontang/go-mysql/replication" 17 | "github.com/siddontang/go-mysql/schema" 18 | ) 19 | 20 | const ( 21 | syncInsertDoc = iota 22 | syncDeleteDoc 23 | syncUpdateDoc 24 | ) 25 | 26 | const ( 27 | fieldTypeList = "list" 28 | // for the mysql int type to es date type 29 | // set the [rule.field] created_time = ",date" 30 | fieldTypeDate = "date" 31 | ) 32 | 33 | const mysqlDateFormat = "2006-01-02" 34 | 35 | type posSaver struct { 36 | pos mysql.Position 37 | force bool 38 | } 39 | 40 | type eventHandler struct { 41 | r *River 42 | } 43 | 44 | func (h *eventHandler) OnRotate(e *replication.RotateEvent) error { 45 | pos := mysql.Position{ 46 | Name: string(e.NextLogName), 47 | Pos: uint32(e.Position), 48 | } 49 | 50 | h.r.syncCh <- posSaver{pos, true} 51 | 52 | return h.r.ctx.Err() 53 | } 54 | 55 | func (h *eventHandler) OnTableChanged(schema, table string) error { 56 | err := h.r.updateRule(schema, table) 57 | if err != nil && err != ErrRuleNotExist { 58 | return errors.Trace(err) 59 | } 60 | return nil 61 | } 62 | 63 | func (h *eventHandler) OnDDL(nextPos mysql.Position, _ *replication.QueryEvent) error { 64 | h.r.syncCh <- posSaver{nextPos, true} 65 | return h.r.ctx.Err() 66 | } 67 | 68 | func (h *eventHandler) OnXID(nextPos mysql.Position) error { 69 | h.r.syncCh <- posSaver{nextPos, false} 70 | return h.r.ctx.Err() 71 | } 72 | 73 | func (h *eventHandler) OnRow(e *canal.RowsEvent) error { 74 | rule, ok := h.r.rules[ruleKey(e.Table.Schema, e.Table.Name)] 75 | if !ok { 76 | return nil 77 | } 78 | 79 | var reqs []*elastic.BulkRequest 80 | var err error 81 | switch e.Action { 82 | case canal.InsertAction: 83 | reqs, err = h.r.makeInsertRequest(rule, e.Rows) 84 | case canal.DeleteAction: 85 | reqs, err = h.r.makeDeleteRequest(rule, e.Rows) 86 | case canal.UpdateAction: 87 | reqs, err = h.r.makeUpdateRequest(rule, e.Rows) 88 | default: 89 | err = errors.Errorf("invalid rows action %s", e.Action) 90 | } 91 | 92 | if err != nil { 93 | h.r.cancel() 94 | return errors.Errorf("make %s ES request err %v, close sync", e.Action, err) 95 | } 96 | 97 | h.r.syncCh <- reqs 98 | 99 | return h.r.ctx.Err() 100 | } 101 | 102 | func (h *eventHandler) OnGTID(gtid mysql.GTIDSet) error { 103 | return nil 104 | } 105 | 106 | func (h *eventHandler) OnPosSynced(pos mysql.Position, force bool) error { 107 | return nil 108 | } 109 | 110 | func (h *eventHandler) String() string { 111 | return "ESRiverEventHandler" 112 | } 113 | 114 | func (r *River) syncLoop() { 115 | bulkSize := r.c.BulkSize 116 | if bulkSize == 0 { 117 | bulkSize = 128 118 | } 119 | 120 | interval := r.c.FlushBulkTime.Duration 121 | if interval == 0 { 122 | interval = 200 * time.Millisecond 123 | } 124 | 125 | ticker := time.NewTicker(interval) 126 | defer ticker.Stop() 127 | defer r.wg.Done() 128 | 129 | lastSavedTime := time.Now() 130 | reqs := make([]*elastic.BulkRequest, 0, 1024) 131 | 132 | var pos mysql.Position 133 | 134 | for { 135 | needFlush := false 136 | needSavePos := false 137 | 138 | select { 139 | case v := <-r.syncCh: 140 | switch v := v.(type) { 141 | case posSaver: 142 | now := time.Now() 143 | if v.force || now.Sub(lastSavedTime) > 3*time.Second { 144 | lastSavedTime = now 145 | needFlush = true 146 | needSavePos = true 147 | pos = v.pos 148 | } 149 | case []*elastic.BulkRequest: 150 | reqs = append(reqs, v...) 151 | needFlush = len(reqs) >= bulkSize 152 | } 153 | case <-ticker.C: 154 | needFlush = true 155 | case <-r.ctx.Done(): 156 | return 157 | } 158 | 159 | if needFlush { 160 | // TODO: retry some times? 161 | if err := r.doBulk(reqs); err != nil { 162 | log.Errorf("do ES bulk err %v, close sync", err) 163 | r.cancel() 164 | return 165 | } 166 | reqs = reqs[0:0] 167 | } 168 | 169 | if needSavePos { 170 | if err := r.master.Save(pos); err != nil { 171 | log.Errorf("save sync position %s err %v, close sync", pos, err) 172 | r.cancel() 173 | return 174 | } 175 | } 176 | } 177 | } 178 | 179 | // for insert and delete 180 | func (r *River) makeRequest(rule *Rule, action string, rows [][]interface{}) ([]*elastic.BulkRequest, error) { 181 | reqs := make([]*elastic.BulkRequest, 0, len(rows)) 182 | 183 | for _, values := range rows { 184 | id, err := r.getDocID(rule, values) 185 | if err != nil { 186 | return nil, errors.Trace(err) 187 | } 188 | 189 | parentID := "" 190 | if len(rule.Parent) > 0 { 191 | if parentID, err = r.getParentID(rule, values, rule.Parent); err != nil { 192 | return nil, errors.Trace(err) 193 | } 194 | } 195 | 196 | req := &elastic.BulkRequest{Index: rule.Index, Type: rule.Type, ID: id, Parent: parentID, Pipeline: rule.Pipeline} 197 | 198 | if action == canal.DeleteAction { 199 | req.Action = elastic.ActionDelete 200 | r.st.DeleteNum.Add(1) 201 | } else { 202 | r.makeInsertReqData(req, rule, values) 203 | r.st.InsertNum.Add(1) 204 | } 205 | 206 | reqs = append(reqs, req) 207 | } 208 | 209 | return reqs, nil 210 | } 211 | 212 | func (r *River) makeInsertRequest(rule *Rule, rows [][]interface{}) ([]*elastic.BulkRequest, error) { 213 | return r.makeRequest(rule, canal.InsertAction, rows) 214 | } 215 | 216 | func (r *River) makeDeleteRequest(rule *Rule, rows [][]interface{}) ([]*elastic.BulkRequest, error) { 217 | return r.makeRequest(rule, canal.DeleteAction, rows) 218 | } 219 | 220 | func (r *River) makeUpdateRequest(rule *Rule, rows [][]interface{}) ([]*elastic.BulkRequest, error) { 221 | if len(rows)%2 != 0 { 222 | return nil, errors.Errorf("invalid update rows event, must have 2x rows, but %d", len(rows)) 223 | } 224 | 225 | reqs := make([]*elastic.BulkRequest, 0, len(rows)) 226 | 227 | for i := 0; i < len(rows); i += 2 { 228 | beforeID, err := r.getDocID(rule, rows[i]) 229 | if err != nil { 230 | return nil, errors.Trace(err) 231 | } 232 | 233 | afterID, err := r.getDocID(rule, rows[i+1]) 234 | 235 | if err != nil { 236 | return nil, errors.Trace(err) 237 | } 238 | 239 | beforeParentID, afterParentID := "", "" 240 | if len(rule.Parent) > 0 { 241 | if beforeParentID, err = r.getParentID(rule, rows[i], rule.Parent); err != nil { 242 | return nil, errors.Trace(err) 243 | } 244 | if afterParentID, err = r.getParentID(rule, rows[i+1], rule.Parent); err != nil { 245 | return nil, errors.Trace(err) 246 | } 247 | } 248 | 249 | req := &elastic.BulkRequest{Index: rule.Index, Type: rule.Type, ID: beforeID, Parent: beforeParentID} 250 | 251 | 252 | if beforeID != afterID || beforeParentID != afterParentID { 253 | req.Action = elastic.ActionDelete 254 | reqs = append(reqs, req) 255 | 256 | req = &elastic.BulkRequest{Index: rule.Index, Type: rule.Type, ID: afterID, Parent: afterParentID, Pipeline: rule.Pipeline} 257 | r.makeInsertReqData(req, rule, rows[i+1]) 258 | 259 | r.st.DeleteNum.Add(1) 260 | r.st.InsertNum.Add(1) 261 | } else { 262 | //先尝试删除 263 | req.Action = elastic.ActionDelete 264 | reqs = append(reqs, req) 265 | //再重新添加 266 | req = &elastic.BulkRequest{Index: rule.Index, Type: rule.Type, ID: afterID, Parent: afterParentID, Pipeline: rule.Pipeline} 267 | r.makeInsertReqData(req, rule, rows[i+1]) 268 | 269 | // if len(rule.Pipeline) > 0 { 270 | // // Pipelines can only be specified on index action 271 | // r.makeInsertReqData(req, rule, rows[i+1]) 272 | // // Make sure action is index, not create 273 | // req.Action = elastic.ActionIndex 274 | // req.Pipeline = rule.Pipeline 275 | // } else { 276 | // r.makeUpdateReqData(req, rule, rows[i], rows[i+1]) 277 | // } 278 | r.st.UpdateNum.Add(1) 279 | } 280 | 281 | reqs = append(reqs, req) 282 | } 283 | 284 | return reqs, nil 285 | } 286 | 287 | func (r *River) makeReqColumnData(col *schema.TableColumn, value interface{}) interface{} { 288 | switch col.Type { 289 | case schema.TYPE_ENUM: 290 | switch value := value.(type) { 291 | case int64: 292 | // for binlog, ENUM may be int64, but for dump, enum is string 293 | eNum := value - 1 294 | if eNum < 0 || eNum >= int64(len(col.EnumValues)) { 295 | // we insert invalid enum value before, so return empty 296 | log.Warnf("invalid binlog enum index %d, for enum %v", eNum, col.EnumValues) 297 | return "" 298 | } 299 | 300 | return col.EnumValues[eNum] 301 | } 302 | case schema.TYPE_SET: 303 | switch value := value.(type) { 304 | case int64: 305 | // for binlog, SET may be int64, but for dump, SET is string 306 | bitmask := value 307 | sets := make([]string, 0, len(col.SetValues)) 308 | for i, s := range col.SetValues { 309 | if bitmask&int64(1< 0 { 310 | sets = append(sets, s) 311 | } 312 | } 313 | return strings.Join(sets, ",") 314 | } 315 | case schema.TYPE_BIT: 316 | switch value := value.(type) { 317 | case string: 318 | // for binlog, BIT is int64, but for dump, BIT is string 319 | // for dump 0x01 is for 1, \0 is for 0 320 | if value == "\x01" { 321 | return int64(1) 322 | } 323 | 324 | return int64(0) 325 | } 326 | case schema.TYPE_STRING: 327 | switch value := value.(type) { 328 | case []byte: 329 | return string(value[:]) 330 | } 331 | case schema.TYPE_JSON: 332 | var f interface{} 333 | var err error 334 | switch v := value.(type) { 335 | case string: 336 | err = json.Unmarshal([]byte(v), &f) 337 | case []byte: 338 | err = json.Unmarshal(v, &f) 339 | } 340 | if err == nil && f != nil { 341 | return f 342 | } 343 | case schema.TYPE_DATETIME, schema.TYPE_TIMESTAMP: 344 | switch v := value.(type) { 345 | case string: 346 | vt, err := time.ParseInLocation(mysql.TimeFormat, string(v), time.Local) 347 | if err != nil || vt.IsZero() { // failed to parse date or zero date 348 | return nil 349 | } 350 | return vt.Format(time.RFC3339) 351 | } 352 | case schema.TYPE_DATE: 353 | switch v := value.(type) { 354 | case string: 355 | vt, err := time.Parse(mysqlDateFormat, string(v)) 356 | if err != nil || vt.IsZero() { // failed to parse date or zero date 357 | return nil 358 | } 359 | return vt.Format(mysqlDateFormat) 360 | } 361 | } 362 | 363 | return value 364 | } 365 | 366 | func (r *River) getFieldParts(k string, v string) (string, string, string) { 367 | composedField := strings.Split(v, ",") 368 | 369 | mysql := k 370 | elastic := composedField[0] 371 | fieldType := "" 372 | 373 | if 0 == len(elastic) { 374 | elastic = mysql 375 | } 376 | if 2 == len(composedField) { 377 | fieldType = composedField[1] 378 | } 379 | 380 | return mysql, elastic, fieldType 381 | } 382 | 383 | func (r *River) makeInsertReqData(req *elastic.BulkRequest, rule *Rule, values []interface{}) { 384 | req.Data = make(map[string]interface{}, len(values)) 385 | req.Action = elastic.ActionIndex 386 | 387 | for i, c := range rule.TableInfo.Columns { 388 | if !rule.CheckFilter(c.Name) { 389 | continue 390 | } 391 | mapped := false 392 | for k, v := range rule.FieldMapping { 393 | mysql, elastic, fieldType := r.getFieldParts(k, v) 394 | if mysql == c.Name { 395 | mapped = true 396 | req.Data[elastic] = r.getFieldValue(&c, fieldType, values[i]) 397 | } 398 | } 399 | if mapped == false { 400 | req.Data[c.Name] = r.makeReqColumnData(&c, values[i]) 401 | } 402 | } 403 | } 404 | 405 | func (r *River) makeUpdateReqData(req *elastic.BulkRequest, rule *Rule, 406 | beforeValues []interface{}, afterValues []interface{}) { 407 | req.Data = make(map[string]interface{}, len(beforeValues)) 408 | 409 | // maybe dangerous if something wrong delete before? 410 | req.Action = elastic.ActionUpdate 411 | 412 | for i, c := range rule.TableInfo.Columns { 413 | mapped := false 414 | if !rule.CheckFilter(c.Name) { 415 | continue 416 | } 417 | if reflect.DeepEqual(beforeValues[i], afterValues[i]) { 418 | //nothing changed 419 | continue 420 | } 421 | for k, v := range rule.FieldMapping { 422 | mysql, elastic, fieldType := r.getFieldParts(k, v) 423 | if mysql == c.Name { 424 | mapped = true 425 | req.Data[elastic] = r.getFieldValue(&c, fieldType, afterValues[i]) 426 | } 427 | } 428 | if mapped == false { 429 | req.Data[c.Name] = r.makeReqColumnData(&c, afterValues[i]) 430 | } 431 | 432 | } 433 | } 434 | 435 | // If id in toml file is none, get primary keys in one row and format them into a string, and PK must not be nil 436 | // Else get the ID's column in one row and format them into a string 437 | func (r *River) getDocID(rule *Rule, row []interface{}) (string, error) { 438 | var ( 439 | ids []interface{} 440 | err error 441 | ) 442 | if rule.ID == nil { 443 | ids, err = rule.TableInfo.GetPKValues(row) 444 | if err != nil { 445 | return "", err 446 | } 447 | } else { 448 | ids = make([]interface{}, 0, len(rule.ID)) 449 | for _, column := range rule.ID { 450 | value, err := rule.TableInfo.GetColumnValue(column, row) 451 | if err != nil { 452 | return "", err 453 | } 454 | ids = append(ids, value) 455 | } 456 | } 457 | 458 | var buf bytes.Buffer 459 | 460 | sep := "" 461 | for i, value := range ids { 462 | if value == nil { 463 | return "", errors.Errorf("The %ds id or PK value is nil", i) 464 | } 465 | 466 | buf.WriteString(fmt.Sprintf("%s%v", sep, value)) 467 | sep = ":" 468 | } 469 | 470 | return buf.String(), nil 471 | } 472 | 473 | func (r *River) getParentID(rule *Rule, row []interface{}, columnName string) (string, error) { 474 | index := rule.TableInfo.FindColumn(columnName) 475 | if index < 0 { 476 | return "", errors.Errorf("parent id not found %s(%s)", rule.TableInfo.Name, columnName) 477 | } 478 | 479 | return fmt.Sprint(row[index]), nil 480 | } 481 | 482 | func (r *River) doBulk(reqs []*elastic.BulkRequest) error { 483 | if len(reqs) == 0 { 484 | return nil 485 | } 486 | 487 | if resp, err := r.es.Bulk(reqs); err != nil { 488 | log.Errorf("sync docs err %v after binlog %s", err, r.canal.SyncedPosition()) 489 | return errors.Trace(err) 490 | } else if resp.Code/100 == 2 || resp.Errors { 491 | for i := 0; i < len(resp.Items); i++ { 492 | for action, item := range resp.Items[i] { 493 | if len(item.Error) > 0 { 494 | log.Errorf("%s index: %s, type: %s, id: %s, status: %d, error: %s", 495 | action, item.Index, item.Type, item.ID, item.Status, item.Error) 496 | } 497 | } 498 | } 499 | } 500 | 501 | return nil 502 | } 503 | 504 | // get mysql field value and convert it to specific value to es 505 | func (r *River) getFieldValue(col *schema.TableColumn, fieldType string, value interface{}) interface{} { 506 | var fieldValue interface{} 507 | switch fieldType { 508 | case fieldTypeList: 509 | v := r.makeReqColumnData(col, value) 510 | if str, ok := v.(string); ok { 511 | fieldValue = strings.Split(str, ",") 512 | } else { 513 | fieldValue = v 514 | } 515 | 516 | case fieldTypeDate: 517 | if col.Type == schema.TYPE_NUMBER { 518 | col.Type = schema.TYPE_DATETIME 519 | 520 | v := reflect.ValueOf(value) 521 | switch v.Kind() { 522 | case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: 523 | fieldValue = r.makeReqColumnData(col, time.Unix(v.Int(), 0).Format(mysql.TimeFormat)) 524 | } 525 | } 526 | } 527 | 528 | if fieldValue == nil { 529 | fieldValue = r.makeReqColumnData(col, value) 530 | } 531 | return fieldValue 532 | } 533 | --------------------------------------------------------------------------------