├── .github
└── workflows
│ ├── gh-pages.yml
│ ├── lint.yml
│ └── tests.yml
├── .gitignore
├── .goreleaser.yml
├── .pre-commit-config.yaml
├── CHANGELOG.md
├── Dockerfile
├── Dockerfile_goreleaser
├── LICENSE
├── Makefile
├── README.md
├── clickhouse_sinker-dashboard.json
├── cmd
├── clickhouse_sinker
│ └── main.go
├── kafka_gen_log
│ ├── main.go
│ ├── marshal_amd64.go
│ └── marshal_arm64.go
├── kafka_gen_metric
│ ├── main.go
│ ├── marshal_amd64.go
│ └── marshal_arm64.go
├── kafka_gen_prom
│ └── main.go
└── nacos_publish_config
│ └── main.go
├── config
└── config.go
├── config_manager
├── lags.go
├── nacos.go
└── rcm.go
├── dev
├── deploy_gh_pages.sh
└── license.header
├── discovery
├── discovery.go
└── discovery_test.go
├── docker-compose.yml
├── docker
├── metrika.xml
├── test_auto_schema.hjson
├── test_dynamic_schema.hjson
├── test_fixed_schema.hjson
├── test_prom_metric.data
└── test_prom_metric.hjson
├── docs
├── .vuepress
│ ├── config.js
│ └── public
│ │ ├── favicon.ico
│ │ └── logo_320px.svg
├── README.md
├── configuration
│ ├── config.md
│ └── flag.md
├── dev
│ ├── design.md
│ └── introduction.md
└── guide
│ ├── install.md
│ └── run.md
├── go.metrictest.sh
├── go.mod
├── go.sum
├── go.test.sh
├── health
└── health.go
├── input
├── kafka_franz.go
└── poller.go
├── model
├── message.go
├── metric.go
└── value.go
├── output
├── clickhouse.go
└── clickhouse_util.go
├── package.json
├── parser
├── csv.go
├── fastjson.go
├── gjson.go
├── parser.go
└── parser_test.go
├── pool
├── ck_cli.go
└── conn.go
├── statistics
└── statistics.go
├── task
├── consumer.go
├── sharding.go
├── sinker.go
└── task.go
├── test
├── kafka.client.keystore.jks
└── kafka.client.truststore.jks
└── util
├── aes.go
├── app.go
├── common.go
├── common_test.go
├── gosypt.go
├── net.go
├── recordpoolsize.go
├── workerpool.go
└── workerpool_test.go
/.github/workflows/gh-pages.yml:
--------------------------------------------------------------------------------
1 | name: gh-pages
2 |
3 | on:
4 | push:
5 | branches: [master]
6 |
7 | jobs:
8 | build:
9 | runs-on: ubuntu-20.04
10 | steps:
11 | - uses: actions/checkout@v2
12 | - uses: actions/setup-node@v1
13 | with:
14 | node-version: "15.x"
15 | - run: "[[ -z $(git show HEAD -- docs) ]] || npm install"
16 | - run: "[[ -z $(git show HEAD -- docs) ]] || npm run docs:build"
17 | - run: "[[ -z $(git show HEAD -- docs) ]] || dev/deploy_gh_pages.sh"
18 | env:
19 | TOKEN_GITHUB: ${{ secrets.TOKEN_GITHUB }}
20 |
--------------------------------------------------------------------------------
/.github/workflows/lint.yml:
--------------------------------------------------------------------------------
1 | # https://github.com/golangci/golangci-lint-action
2 | name: lint
3 | on:
4 | push:
5 | tags: [ v* ]
6 | branches:
7 | - master
8 | - main
9 | pull_request:
10 | branches:
11 | - master
12 | - main
13 | jobs:
14 | golangci-lint:
15 | runs-on: ubuntu-latest
16 | steps:
17 | - uses: actions/setup-go@v4
18 | with:
19 | go-version: '1.20'
20 | - uses: actions/checkout@v3
21 | - name: golangci-lint
22 | uses: golangci/golangci-lint-action@v3
23 | with:
24 | # Optional: version of golangci-lint to use in form of v1.2 or v1.2.3 or `latest` to use the latest version
25 | version: v1.51.2
26 |
27 | # Optional: working directory, useful for monorepos
28 | # working-directory: somedir
29 |
30 | # Optional: golangci-lint command line arguments.
31 | # args: --issues-exit-code=0
32 | args: --timeout=3m
33 |
34 | # Optional: show only new issues if it's a pull request. The default value is `false`.
35 | # only-new-issues: true
36 |
37 | # Optional: if set to true then the all caching functionality will be complete disabled,
38 | # takes precedence over all other caching options.
39 | # skip-cache: true
40 |
41 | # Optional: if set to true then the action don't cache or restore ~/go/pkg.
42 | # skip-pkg-cache: true
43 |
44 | # Optional: if set to true then the action don't cache or restore ~/.cache/go-build.
45 | # skip-build-cache: true
46 |
--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
1 | name: tests
2 |
3 | on:
4 | push:
5 | branches: [ master ]
6 | pull_request:
7 | branches: [ master ]
8 |
9 | jobs:
10 |
11 | tests:
12 | name: tests
13 | runs-on: ubuntu-latest
14 | steps:
15 |
16 | - name: Set up Go 1.x
17 | uses: actions/setup-go@v4
18 | with:
19 | go-version: '1.21'
20 | id: go
21 |
22 | - name: Check out code into the Go module directory
23 | uses: actions/checkout@v2
24 |
25 | - name: Start docker-compose
26 | run: |
27 | docker-compose --version
28 | docker-compose up -d
29 |
30 | - name: Build
31 | run: make build
32 |
33 | - name: unittest
34 | run: make gotest
35 |
36 | - name: benchtest
37 | run: make benchtest
38 |
39 | - name: systest
40 | run: make systest
41 |
42 | - name: Stop docker-compose
43 | run: docker-compose down
44 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | tmp
2 | sh
3 | .vscode
4 | node_modules/
5 | package-lock.json
6 | nacos_cache
7 | .DS_Store
8 | *.log
9 | .idea
10 | *.bin
11 | *.out
12 | *.pem
13 | *.p12
14 | dist
15 | vendor
16 | bin/
17 |
18 |
--------------------------------------------------------------------------------
/.goreleaser.yml:
--------------------------------------------------------------------------------
1 | # This is an example goreleaser.yaml file with some sane defaults.
2 | # Make sure to check the documentation at http://goreleaser.com
3 | before:
4 | hooks:
5 | # You may remove this if you don't use go modules.
6 | - go mod download
7 | # you may remove this if you don't need go generate
8 | - go generate ./...
9 | builds:
10 | - id: clickhouse_sinker
11 | env:
12 | - CGO_ENABLED=0
13 | goos:
14 | - linux
15 | goarch:
16 | - amd64
17 | - arm64
18 | main: cmd/clickhouse_sinker/main.go
19 | binary: clickhouse_sinker
20 | - id: nacos_publish_config
21 | env:
22 | - CGO_ENABLED=0
23 | goos:
24 | - linux
25 | goarch:
26 | - amd64
27 | - arm64
28 | main: cmd/nacos_publish_config/main.go
29 | binary: nacos_publish_config
30 | archives:
31 | - replacements:
32 | darwin: Darwin
33 | linux: Linux
34 | windows: Windows
35 | 386: i386
36 | amd64: x86_64
37 | checksum:
38 | name_template: 'checksums.txt'
39 | snapshot:
40 | name_template: "{{ .Tag }}-next"
41 | changelog:
42 | sort: asc
43 | filters:
44 | exclude:
45 | - '^docs:'
46 | - '^test:'
47 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | ---
2 |
3 | repos:
4 | - repo: git://github.com/dnephin/pre-commit-golang
5 | rev: HEAD
6 | hooks:
7 | - id: go-fmt
8 | - id: go-imports
9 | - id: golangci-lint
10 | - id: go-unit-tests
11 | - id: go-mod-tidy
12 | - id: go-build
13 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Changelog
2 |
3 | #### Version 3.0.8 (2023-08-xx)
4 |
5 | Improvements:
6 |
7 | - "too many simultaneous queries" from ClickHouse will no longer be a fatal error, instead sinker will keep retrying untill complete the write operation
8 | - Output the offset range per batch
9 |
10 | New Features:
11 |
12 | - Introduced a task level configuration property "field", which accepts a valid json string and allows additional
13 | fields to be appened to each input message.
14 |
15 | #### Version 3.0.7 (2023-07-19)
16 |
17 | Improvements:
18 |
19 | - when run sinker in cluster mode, caculate lag failure of certain task should not block the entire rebalance work. Instead, the current task should not be assigned to any running sinker instance.
20 | - sinker will not crash when handling incompatible map type.
21 |
22 | #### Version 3.0.6 (2023-05-19)
23 |
24 | Improvements:
25 |
26 | - When idenfying new columns, also update the schema of tables that are part of a logical cluster
27 | - Allow writing non-numeric type metric
28 |
29 | #### Version 3.0.5 (2023-05-12)
30 |
31 | New Features:
32 |
33 | - Introduced two new configuration properties to Sinker:
34 | - "ReloadSeriesMapInterval", allows you to manage how frequently the cached seriesMap is reloaded.
35 | - "ActiveSeriesRange", allows you to manage the amount of cached information.
36 | Refer to the "config.md" file for more info.
37 | - Add a new http endpoint "/debug/vars" for viewing contents of seriesMap
38 |
39 | Deprecations:
40 |
41 | - DialTimeout option is deprecated.
42 |
43 | #### Version 3.0.4 (2023-04-18)
44 |
45 | Improvements:
46 |
47 | - Automatically end sinker if it remains inactive in last 10 mins
48 |
49 | #### Version 3.0.3 (2023-03-30)
50 |
51 | Improvements:
52 |
53 | - Have writingpool per shard to avoid ErrAcquireConnTimeout
54 | - Do not create kafka client everytime when caculate lags
55 | - Support configuring PlainloginModule in kafka.security section
56 |
57 | Bug Fixes:
58 |
59 | - Avoid program running into stuck when facing fatalpanic
60 |
61 | #### Version 3.0.2 (2023-03-13)
62 |
63 | Improvements:
64 |
65 | - update sinker grafana dashboard
66 | - combine nacos log into sinker log
67 | - update dmseries map when applying new config, reload the records from series table every single day
68 | - avoid recreating dist tables, alter the table schema instead
69 | - update clickhouse_sinker_consume_lags metric every 10 secs
70 |
71 | #### Version 3.0.1 (2023-03-03)
72 |
73 | Bug Fixes:
74 |
75 | - Fix nacos publish config error "BUG: got different config"
76 | - Fix changing "TimeUnit" config property does not trigger config reload
77 | - Fix illegal "TimeZone" value result in sinker crash
78 | - Fix wrong parsing result of Decimal type [909](https://github.com/ClickHouse/clickhouse-go/pull/909)
79 |
80 | Improvements:
81 |
82 | - Metrics from GoCollector and ProcessCollector are now being pushed to metric-push-gateway-addrs
83 | - Terminate program immediately when receiving one more exit signal
84 | - Limit the fetch size and poll size based on the BufferSize config property
85 |
86 | #### Version 3.0.0 (2023-02-07)
87 |
88 | New Features:
89 |
90 | - Add support of ingesting multi-value metrics, the metric table will be expanded accordingly
91 | - Allow specifying the series table name
92 | - Allow customization of DatabaseName in task level
93 |
94 | Improvements:
95 |
96 | - Group the tasks by consumerGroup property to reduce number of kafka client, see design.md for details
97 |
98 | Deprecation:
99 |
100 | - Kafka-go and Sarama are no longer internal options for sinker
101 | -
102 |
103 | #### Version 2.6.9 (2023-02-07)
104 |
105 | Improvements:
106 |
107 | - Ignore SIGHUP signal, so that fire up sinker with nohup could work correctly
108 | - Stop retrying when facing offsets commit error, leave it to the future commitment to sync the offsets
109 | - Offsets commit error should not result in a process abort
110 |
111 | #### Version 2.6.8 (2022-12-10)
112 |
113 | New Features:
114 |
115 | - Add clickhouse Map type support
116 | - Small updates to allow TLS connections for AWS MSK, etc.
117 | ([169](https://github.com/housepower/clickhouse_sinker/pull/169))
118 |
119 | Bug Fixes:
120 |
121 | - Fix ClickHouse.Init goroutine leak
122 |
123 | #### Version 2.6.7 (2022-12-07)
124 |
125 | Improvements:
126 |
127 | - Add new sinker metrics to show the wrSeriesQuota status
128 | - Always allow writing new series to avoid data mismatch between series and metrics table
129 |
130 | #### Version 2.6.6 (2022-12-05)
131 |
132 | Bug Fixes:
133 |
134 | - reset wrSeries timely to avoid failure of writing metric data to clickhouse
135 |
136 | #### Version 2.6.5 (2022-11-30)
137 |
138 | Bug Fixes:
139 |
140 | - Fix the 'segmentation violation' in ch-go package
141 | - Fix the create table error 'table already exists' when trying to create a distribution table
142 |
143 | #### Previous releases
144 |
145 | See https://github.com/housepower/clickhouse_sinker/releases
146 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM golang:1.21-alpine3.19 AS builder
2 |
3 | ADD . /app
4 | WORKDIR /app
5 | RUN go env -w GOPROXY=https://goproxy.cn,direct
6 | RUN make build
7 |
8 | FROM alpine:3.19
9 | RUN apk --no-cache add ca-certificates tzdata && \
10 | echo "UTC" > /etc/timezone
11 | COPY --from=builder /app/bin/clickhouse_sinker /usr/local/bin/clickhouse_sinker
12 | COPY --from=builder /app/bin/nacos_publish_config /usr/local/bin/nacos_publish_config
13 | COPY --from=builder /app/bin/kafka_gen_log /usr/local/bin/kafka_gen_log
14 | COPY --from=builder /app/bin/kafka_gen_metric /usr/local/bin/kafka_gen_metric
15 |
16 | # clickhouse_sinker gets config from local file "/etc/clickhouse_sinker.hjson" by default.
17 | # Customize behavior with following env variables:
18 | # - V
19 | # - LOG_LEVEL
20 | # - LOG_PATHS
21 | # - HTTP_PORT
22 | # - HTTP_HOST
23 | # - METRIC_PUSH_GATEWAY_ADDRS
24 | # - PUSH_INTERVAL
25 | # - LOCAL_CFG_FILE
26 | # - NACOS_ADDR
27 | # - NACOS_USERNAME
28 | # - NACOS_PASSWORD
29 | # - NACOS_NAMESPACE_ID
30 | # - NACOS_GROUP
31 | # - NACOS_DATAID
32 | # - NACOS_SERVICE_NAME
33 | # - CLICKHOUSE_USERNAME
34 | # - CLICKHOUSE_PASSWORD
35 | # - KAFKA_USERNAME
36 | # - KAFKA_PASSWORD
37 | # - KAFKA_GSSAPI_USERNAME
38 | # - KAFKA_GSSAPI_PASSWORD
39 | # See cmd/clickhouse_sinker/main.go for details.
40 |
41 | ENTRYPOINT ["/usr/local/bin/clickhouse_sinker"]
42 |
--------------------------------------------------------------------------------
/Dockerfile_goreleaser:
--------------------------------------------------------------------------------
1 | FROM alpine:3.19
2 | RUN sed -i 's/dl-cdn.alpinelinux.org/mirrors.aliyun.com/g' /etc/apk/repositories && \
3 | apk --no-cache add ca-certificates tzdata && \
4 | echo "UTC" > /etc/timezone
5 | ADD dist/clickhouse_sinker_linux_amd64_v1/clickhouse_sinker /usr/local/bin/clickhouse_sinker
6 | ADD dist/nacos_publish_config_linux_amd64_v1/nacos_publish_config /usr/local/bin/nacos_publish_config
7 |
8 | # clickhouse_sinker gets config from local file "/etc/clickhouse_sinker.hjson" by default.
9 | # Customize behavior with following env variables:
10 | # - V
11 | # - LOG_LEVEL
12 | # - LOG_PATHS
13 | # - HTTP_PORT
14 | # - HTTP_HOST
15 | # - METRIC_PUSH_GATEWAY_ADDRS
16 | # - PUSH_INTERVAL
17 | # - LOCAL_CFG_FILE
18 | # - NACOS_ADDR
19 | # - NACOS_USERNAME
20 | # - NACOS_PASSWORD
21 | # - NACOS_NAMESPACE_ID
22 | # - NACOS_GROUP
23 | # - NACOS_DATAID
24 | # - NACOS_SERVICE_NAME
25 | # - CLICKHOUSE_USERNAME
26 | # - CLICKHOUSE_PASSWORD
27 | # - KAFKA_USERNAME
28 | # - KAFKA_PASSWORD
29 | # - KAFKA_GSSAPI_USERNAME
30 | # - KAFKA_GSSAPI_PASSWORD
31 | # See cmd/clickhouse_sinker/main.go for details.
32 |
33 | ENTRYPOINT ["/usr/local/bin/clickhouse_sinker"]
34 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | VERSION=$(shell git describe --tags --dirty)
2 | SINKER_LDFLAGS += -X "main.version=$(VERSION)"
3 | SINKER_LDFLAGS += -X "main.date=$(shell date --iso-8601=s)"
4 | SINKER_LDFLAGS += -X "main.commit=$(shell git rev-parse HEAD)"
5 | SINKER_LDFLAGS += -X "main.builtBy=$(shell echo `whoami`@`hostname`)"
6 | DEFAULT_CFG_PATH = /etc/clickhouse_sinker.hjson
7 | IMG_TAGGED = hub.eoitek.net/aimeter/clickhouse_sinker:${VERSION}
8 | IMG_LATEST = hub.eoitek.net/aimeter/clickhouse_sinker:latest
9 | export GOPROXY=https://goproxy.cn,direct
10 |
11 | GO := CGO_ENABLED=0 go
12 | GOBUILD := $(GO) build $(BUILD_FLAG)
13 |
14 |
15 | .PHONY: pre
16 | pre:
17 | go mod tidy
18 |
19 | .PHONY: build
20 | build: pre
21 | $(GOBUILD) -ldflags '$(SINKER_LDFLAGS)' -o bin/ ./...
22 |
23 | .PHONY: debug
24 | debug: pre
25 | $(GOBUILD) -ldflags '$(SINKER_LDFLAGS)' -gcflags "all=-N -l" -o bin/ ./...
26 |
27 | .PHONY: benchtest
28 | benchtest: pre
29 | go test -bench=. ./...
30 |
31 | .PHONY: systest
32 | systest: build
33 | bash go.test.sh
34 | bash go.metrictest.sh
35 |
36 | .PHONY: gotest
37 | gotest: pre
38 | go test -v ./... -coverprofile=coverage.out -covermode count
39 | go tool cover -func coverage.out
40 |
41 | .PHONY: lint
42 | lint:
43 | golangci-lint run -D errcheck,govet,gosimple
44 |
45 | .PHONY: run
46 | run: pre
47 | go run cmd/clickhouse_sinker/main.go --local-cfg-file docker/test_dynamic_schema.hjson
48 |
49 | .PHONY: release
50 | release:
51 | goreleaser release --skip-publish --clean
52 |
53 | .PHONY: docker-build
54 | docker-build: release
55 | docker build . -t clickhouse_sinker:${VERSION} -f Dockerfile_goreleaser
56 | docker tag clickhouse_sinker:${VERSION} ${IMG_TAGGED}
57 | docker tag clickhouse_sinker:${VERSION} ${IMG_LATEST}
58 | docker rmi clickhouse_sinker:${VERSION}
59 |
60 | .PHONY: docker-push
61 | docker-push:
62 | docker push ${IMG_TAGGED}
63 | docker push ${IMG_LATEST}
64 |
65 | .PHONY: docker-run
66 | docker-run:
67 | docker run -d -v ${DEFAULT_CFG_PATH}:${DEFAULT_CFG_PATH} ${IMG_LATEST}
68 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # clickhouse_sinker
2 |
3 | [](https://travis-ci.com/housepower/clickhouse_sinker)
4 | [](https://goreportcard.com/report/github.com/housepower/clickhouse_sinker)
5 |
6 | clickhouse_sinker is a sinker program that transfer kafka message into [ClickHouse](https://github.com/clickhouse/clickhouse).
7 |
8 | [Get Started](https://housepower.github.io/clickhouse_sinker/)
9 |
10 | Refers to [docs](https://housepower.github.io/clickhouse_sinker/dev/introduction.html#features) to see how it works.
--------------------------------------------------------------------------------
/cmd/kafka_gen_log/marshal_amd64.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "github.com/bytedance/sonic"
5 | )
6 |
7 | func JSONMarshal(obj interface{}) ([]byte, error) {
8 | return sonic.Marshal(obj)
9 | }
10 |
--------------------------------------------------------------------------------
/cmd/kafka_gen_log/marshal_arm64.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "encoding/json"
5 | )
6 |
7 | func JSONMarshal(obj interface{}) ([]byte, error) {
8 | return json.Marshal(obj)
9 | }
10 |
--------------------------------------------------------------------------------
/cmd/kafka_gen_metric/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | /*
4 | CREATE TABLE sensor_dt_result_online ON CLUSTER abc (
5 | `@time` DateTime,
6 | `@ItemGUID` String,
7 | `@MetricName` LowCardinality(String),
8 | `@AlgName` LowCardinality(String),
9 | value Float64,
10 | upper Float64,
11 | lower Float64,
12 | yhat_upper Float64,
13 | yhat_lower Float64,
14 | yhat_flag Int32,
15 | total_anomaly Int64,
16 | anomaly Float32,
17 | abnormal_type Int16,
18 | abnormality Int16,
19 | container_id Int64,
20 | hard_upper Float64,
21 | hard_lower Float64,
22 | hard_anomaly Int64,
23 | shift_tag Int32,
24 | season_tag Int32,
25 | spike_tag Int32,
26 | is_missing Int32
27 | ) ENGINE=ReplicatedMergeTree('/clickhouse/tables/{cluster}/{database}/{table}/{shard}', '{replica}')
28 | PARTITION BY toYYYYMMDD(`@time`)
29 | ORDER BY (`@ItemGUID`, `@MetricName`, `@time`);
30 |
31 | CREATE TABLE dist_sensor_dt_result_online ON CLUSTER abc AS sensor_dt_result_online ENGINE = Distributed(abc, default, sensor_dt_result_online);
32 |
33 | */
34 |
35 | import (
36 | "context"
37 | "flag"
38 | "fmt"
39 | "math/rand"
40 | "os"
41 | "os/signal"
42 | "strings"
43 | "sync/atomic"
44 | "syscall"
45 | "time"
46 |
47 | "github.com/google/gops/agent"
48 | "github.com/housepower/clickhouse_sinker/util"
49 | "github.com/thanos-io/thanos/pkg/errors"
50 | "github.com/twmb/franz-go/pkg/kgo"
51 | "go.uber.org/zap"
52 | )
53 |
54 | const (
55 | BusinessNum = 10
56 | InstanceNum = 100
57 | )
58 |
59 | var (
60 | KafkaBrokers string
61 | KafkaTopic string
62 |
63 | ListMetricName = []string{"CPU", "RAM", "IOPS"}
64 | ListArgName = []string{
65 | "DecisionTrees",
66 | "NaiveBayesClassification",
67 | "OrdinaryLeastSquaresRegression",
68 | "LogisticRegression",
69 | "SupportVectorMachines",
70 | "EnsembleMethods",
71 | "ClusteringAlgorithms",
72 | "PrincipalComponentAnalysis",
73 | "SingularValueDecomposition",
74 | "IndependentComponentAnalysis"}
75 |
76 | gLines int64
77 | gSize int64
78 | )
79 |
80 | type Metric struct {
81 | Time time.Time `json:"@time"` //seconds since epoch
82 | ItemGUID string `json:"@item_guid"`
83 | MetricName string `json:"@metric_name"`
84 | AlgName string `json:"@alg_name"`
85 | Value float64 `json:"value"`
86 | Upper float64 `json:"upper"`
87 | Lower float64 `json:"lower"`
88 | YhatUpper float64 `json:"yhat_upper"`
89 | YhatLower float64 `json:"yhat_lower"`
90 | YhatFlag int32 `json:"yhat_flag"`
91 | TotalAnomaly int64 `json:"total_anomaly"`
92 | Anomaly float64 `json:"anomaly"`
93 | AbnormalType int16 `json:"abnormal_type"`
94 | Abnormality int16 `json:"abnormality"`
95 | ContainerID int64 `json:"container_id"`
96 | HardUpper float64 `json:"hard_upper"`
97 | HardLower float64 `json:"hard_lower"`
98 | HardAnomaly int64 `json:"hard_anomaly"`
99 | ShiftTag int32 `json:"shift_tag"`
100 | SeasonTag int32 `json:"season_tag"`
101 | SpikeTag int32 `json:"spike_tag"`
102 | IsMissing int32 `json:"is_missing"`
103 | }
104 |
105 | func randElement(list []string) string {
106 | off := rand.Intn(len(list))
107 | return list[off]
108 | }
109 |
110 | func generate() {
111 | toRound := time.Now().Add(time.Duration(-30*24) * time.Hour)
112 | // refers to time.Time.Truncate
113 | rounded := time.Date(toRound.Year(), toRound.Month(), toRound.Day(), 0, 0, 0, 0, toRound.Location())
114 |
115 | wp := util.NewWorkerPool(10, 10000)
116 | opts := []kgo.Opt{
117 | kgo.SeedBrokers(strings.Split(KafkaBrokers, ",")...),
118 | }
119 | var err error
120 | var cl *kgo.Client
121 | if cl, err = kgo.NewClient(opts...); err != nil {
122 | util.Logger.Fatal("kgo.NewClient failed", zap.Error(err))
123 | }
124 | defer cl.Close()
125 |
126 | ctx := context.Background()
127 | produceCb := func(rec *kgo.Record, err error) {
128 | if err != nil {
129 | util.Logger.Fatal("kgo.Client.Produce failed", zap.Error(err))
130 | }
131 | atomic.AddInt64(&gLines, int64(1))
132 | atomic.AddInt64(&gSize, int64(len(rec.Value)))
133 | }
134 |
135 | for day := 0; ; day++ {
136 | tsDay := rounded.Add(time.Duration(24*day) * time.Hour)
137 | for step := 0; step < 24*60*60; step++ {
138 | timestamp := tsDay.Add(time.Duration(step) * time.Second)
139 | for bus := 0; bus < BusinessNum; bus++ {
140 | for ins := 0; ins < InstanceNum; ins++ {
141 | metric := Metric{
142 | Time: timestamp,
143 | ItemGUID: fmt.Sprintf("bus%03d_ins%03d", bus, ins),
144 | MetricName: randElement(ListMetricName),
145 | AlgName: randElement(ListArgName),
146 | Value: float64(rand.Intn(100)),
147 | Upper: float64(100.0),
148 | Lower: float64(60.0),
149 | YhatUpper: float64(100.0),
150 | YhatLower: float64(60.0),
151 | YhatFlag: rand.Int31n(65535),
152 | TotalAnomaly: rand.Int63n(65535),
153 | Anomaly: float64(rand.Intn(100)) / float64(100),
154 | AbnormalType: int16(rand.Intn(1000)),
155 | Abnormality: int16(rand.Intn(1000)),
156 | ContainerID: rand.Int63n(65535),
157 | HardUpper: float64(100),
158 | HardLower: float64(60),
159 | HardAnomaly: int64(rand.Intn(65535)),
160 | ShiftTag: int32(rand.Intn(65535)),
161 | SeasonTag: int32(rand.Intn(65535)),
162 | SpikeTag: int32(rand.Intn(65535)),
163 | IsMissing: int32(rand.Intn(2)),
164 | }
165 |
166 | _ = wp.Submit(func() {
167 | var b []byte
168 | if b, err = JSONMarshal(&metric); err != nil {
169 | err = errors.Wrapf(err, "")
170 | util.Logger.Fatal("got error", zap.Error(err))
171 | }
172 | cl.Produce(ctx, &kgo.Record{
173 | Topic: KafkaTopic,
174 | Key: []byte(metric.ItemGUID),
175 | Value: b,
176 | }, produceCb)
177 | })
178 | }
179 | }
180 | }
181 | }
182 | }
183 |
184 | func main() {
185 | util.InitLogger([]string{"stdout"})
186 | flag.Usage = func() {
187 | usage := fmt.Sprintf(`Usage of %s
188 | %s kakfa_brokers topic
189 | This util fill some fields with random content, serialize and send to kafka.
190 | kakfa_brokers: for example, 192.168.110.8:9092,192.168.110.12:9092,192.168.110.16:9092
191 | topic: for example, sensor_dt_result_online`, os.Args[0], os.Args[0])
192 | util.Logger.Info(usage)
193 | os.Exit(0)
194 | }
195 | flag.Parse()
196 | args := flag.Args()
197 | if len(args) != 2 {
198 | flag.Usage()
199 | }
200 | KafkaBrokers = args[0]
201 | KafkaTopic = args[1]
202 | util.Logger.Info("CLI options",
203 | zap.String("KafkaBrokers", KafkaBrokers),
204 | zap.String("KafkaTopic", KafkaTopic),
205 | zap.Int("BusinessNum", BusinessNum),
206 | zap.Int("InstanceNum", InstanceNum))
207 |
208 | if err := agent.Listen(agent.Options{}); err != nil {
209 | util.Logger.Fatal("got error", zap.Error(err))
210 | }
211 |
212 | var prevLines, prevSize int64
213 | ctx, _ := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM)
214 | go generate()
215 |
216 | ticker := time.NewTicker(10 * time.Second)
217 | defer ticker.Stop()
218 | LOOP:
219 | for {
220 | select {
221 | case <-ctx.Done():
222 | util.Logger.Info("quit due to context been canceled")
223 | break LOOP
224 | case <-ticker.C:
225 | var speedLine, speedSize int64
226 | if gLines != 0 {
227 | speedLine = (gLines - prevLines) / int64(10)
228 | speedSize = (gSize - prevSize) / int64(10)
229 | }
230 | prevLines = gLines
231 | prevSize = gSize
232 | util.Logger.Info("status", zap.Int64("lines", gLines), zap.Int64("bytes", gSize), zap.Int64("speed(lines/s)", speedLine), zap.Int64("speed(bytes/s)", speedSize))
233 | }
234 | }
235 | }
236 |
--------------------------------------------------------------------------------
/cmd/kafka_gen_metric/marshal_amd64.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "github.com/bytedance/sonic"
5 | )
6 |
7 | func JSONMarshal(obj interface{}) ([]byte, error) {
8 | return sonic.Marshal(obj)
9 | }
10 |
--------------------------------------------------------------------------------
/cmd/kafka_gen_metric/marshal_arm64.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "encoding/json"
5 | )
6 |
7 | func JSONMarshal(obj interface{}) ([]byte, error) {
8 | return json.Marshal(obj)
9 | }
10 |
--------------------------------------------------------------------------------
/cmd/kafka_gen_prom/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | /*
4 | https://github.com/ClickHouse/ClickHouse/issues/38878
5 | performance of inserting to sparse wide table is bad
6 |
7 | -- Prometheus metric solution 1 - one wide table, each row is a datapoint and its series lables
8 | CREATE TABLE default.prom_extend ON CLUSTER abc (
9 | timestamp DateTime,
10 | value Float32,
11 | __name__ String,
12 | labels String
13 | ) ENGINE=ReplicatedMergeTree()
14 | PARTITION BY toYYYYMMDD(timestamp)
15 | ORDER BY (__name__, timestamp);
16 |
17 | CREATE TABLE default.dist_prom_extend ON CLUSTER abc AS prom_extend ENGINE = Distributed(abc, default, prom_extend);
18 |
19 | -- Prometheus metric solution 2 - seperated table for datapoints and series labels can join on series id
20 | CREATE TABLE default.prom_metric ON CLUSTER abc (
21 | __series_id__ Int64,
22 | timestamp DateTime CODEC(DoubleDelta, LZ4),
23 | value Float32 CODEC(ZSTD(15))
24 | ) ENGINE=ReplicatedReplacingMergeTree()
25 | PARTITION BY toYYYYMMDD(timestamp)
26 | ORDER BY (__series_id__, timestamp);
27 |
28 | CREATE TABLE default.dist_prom_metric ON CLUSTER abc AS prom_metric ENGINE = Distributed(abc, default, prom_metric);
29 |
30 | CREATE TABLE default.prom_metric_series ON CLUSTER abc (
31 | __series_id__ Int64,
32 | __mgmt_id__ Int64,
33 | labels String,
34 | __name__ String
35 | ) ENGINE=ReplicatedReplacingMergeTree()
36 | ORDER BY (__name__, __series_id__);
37 |
38 | CREATE TABLE default.dist_prom_metric_series ON CLUSTER abc AS prom_metric_series ENGINE = Distributed(abc, default, prom_metric_series);
39 |
40 | CREATE TABLE default.prom_metric_agg ON CLUSTER abc (
41 | __series_id__ Int64,
42 | timestamp DateTime CODEC(DoubleDelta, LZ4),
43 | max_value AggregateFunction(max, Float32),
44 | min_value AggregateFunction(min, Float32),
45 | avg_value AggregateFunction(avg, Float32)
46 | ) ENGINE=ReplicatedReplacingMergeTree()
47 | PARTITION BY toYYYYMMDD(timestamp)
48 | ORDER BY (__series_id__, timestamp);
49 |
50 | CREATE TABLE default.dist_prom_metric_agg ON CLUSTER abc AS prom_metric_agg ENGINE = Distributed(abc, default, prom_metric_agg);
51 |
52 | SELECT __series_id__,
53 | toStartOfDay(timestamp) AS timestamp,
54 | maxMerge(max_value) AS max_value,
55 | minMerge(min_value) AS min_value,
56 | avgMerge(avg_value) AS avg_value
57 | FROM default.dist_prom_metric_agg
58 | WHERE __series_id__ IN (-9223014754132113609, -9223015002162651005)
59 | GROUP BY __series_id__, timestamp
60 | ORDER BY __series_id__, timestamp;
61 |
62 | -- Activate aggregation for future datapoints by creating a materialized view
63 | CREATE MATERIALIZED VIEW default.prom_metric_mv ON CLUSTER abc
64 | TO prom_metric_agg
65 | AS SELECT __series_id__,
66 | toStartOfHour(timestamp) AS timestamp,
67 | maxState(value) AS max_value,
68 | minState(value) AS min_value,
69 | avgState(value) AS avg_value
70 | FROM prom_metric
71 | GROUP BY __series_id__, timestamp;
72 |
73 | -- Deactivate aggregation by dropping the materialized view. You can revise and create it later as you will.
74 | DROP TABLE default.prom_metric_mv ON CLUSTER abc SYNC;
75 |
76 | */
77 |
78 | import (
79 | "context"
80 | "flag"
81 | "fmt"
82 | "math/rand"
83 | "os"
84 | "os/signal"
85 | "sort"
86 | "strings"
87 | "sync/atomic"
88 | "syscall"
89 | "time"
90 |
91 | "github.com/bytedance/sonic"
92 | "github.com/cespare/xxhash/v2"
93 | "github.com/google/gops/agent"
94 | "github.com/housepower/clickhouse_sinker/util"
95 | "github.com/thanos-io/thanos/pkg/errors"
96 | "github.com/twmb/franz-go/pkg/kgo"
97 | "go.uber.org/zap"
98 | )
99 |
100 | // number of series: NumMetrics * (NumRunes^LenVal)^NumKeys
101 | const (
102 | Alpha = "abcdefghijklmnopqrstuvwxyz"
103 | NumMetrics = 1000
104 | NumKeys = 3
105 | NumRunes = 10
106 | LenVal = 1 // 1000 * (10^1)^3 = 10^6 series
107 | NumAllKeys = 1000
108 | )
109 |
110 | var (
111 | KafkaBrokers string
112 | KafkaTopic string
113 | gLines int64
114 | gSize int64
115 | metrics []PromMetric
116 | )
117 |
118 | type Labels map[string]string
119 |
120 | type Datapoint struct {
121 | Timestamp time.Time
122 | Value float32
123 | Value1 float64
124 | Value2 int64
125 | Value3 bool
126 | Name string `json:"__name__"`
127 | Labels Labels
128 | LabelKeys []string
129 | }
130 |
131 | // I need every label be present at the top level.
132 | func (dp Datapoint) MarshalJSON() ([]byte, error) {
133 | var dig xxhash.Digest
134 | for _, labelKey := range dp.LabelKeys {
135 | _, _ = dig.WriteString("###")
136 | _, _ = dig.WriteString(labelKey)
137 | _, _ = dig.WriteString("###")
138 | _, _ = dig.WriteString(dp.Labels[labelKey])
139 | }
140 | mgmtID := int64(dig.Sum64())
141 | seriesID := mgmtID
142 | labels, err := sonic.MarshalString(dp.Labels)
143 | if err != nil {
144 | return nil, err
145 | }
146 | labels2 := labels[1 : len(labels)-1]
147 | msg := fmt.Sprintf(`{"timestamp":"%s", "value":%f, "value1":%g, "value2":%d, "value3":%t, "__name__":"%s", %s, "__series_id__":%d, "__mgmt_id__":%d}`,
148 | dp.Timestamp.Format(time.RFC3339), dp.Value, dp.Value1, dp.Value2, dp.Value3, dp.Name, labels2, seriesID, mgmtID)
149 | return []byte(msg), nil
150 | }
151 |
152 | type PromMetric struct {
153 | Name string
154 | LabelKeys []string
155 | LabelValues []string
156 | }
157 |
158 | func randValue() (val string) {
159 | b := make([]byte, LenVal)
160 | for i := 0; i < LenVal; i++ {
161 | b[i] = Alpha[rand.Intn(NumRunes+1)]
162 | }
163 | val = string(b)
164 | return
165 | }
166 |
167 | func randBool() bool {
168 | rand.New(rand.NewSource(time.Now().UnixNano()))
169 | return rand.Intn(2) == 1
170 | }
171 |
172 | func initMetrics() {
173 | metrics = make([]PromMetric, NumMetrics)
174 | for i := 0; i < NumMetrics; i++ {
175 | m := PromMetric{
176 | Name: fmt.Sprintf("metric_%08d", i),
177 | LabelKeys: make([]string, NumKeys),
178 | LabelValues: make([]string, NumKeys),
179 | }
180 | for j := 0; j < NumKeys; j++ {
181 | key := fmt.Sprintf("key_%d", rand.Intn(NumAllKeys))
182 | m.LabelKeys[j] = key
183 | m.LabelValues[j] = randValue()
184 | }
185 | sort.Strings(m.LabelKeys)
186 | metrics[i] = m
187 | }
188 | }
189 |
190 | func generate() {
191 | initMetrics()
192 | toRound := time.Now().Add(time.Duration(-30*24) * time.Hour)
193 | // refers to time.Time.Truncate
194 | rounded := time.Date(toRound.Year(), toRound.Month(), toRound.Day(), 0, 0, 0, 0, toRound.Location())
195 |
196 | wp := util.NewWorkerPool(10, 10000)
197 | opts := []kgo.Opt{
198 | kgo.SeedBrokers(strings.Split(KafkaBrokers, ",")...),
199 | }
200 | var err error
201 | var cl *kgo.Client
202 | if cl, err = kgo.NewClient(opts...); err != nil {
203 | util.Logger.Fatal("kgo.NewClient failed", zap.Error(err))
204 | }
205 | defer cl.Close()
206 |
207 | ctx := context.Background()
208 | produceCb := func(rec *kgo.Record, err error) {
209 | if err != nil {
210 | util.Logger.Fatal("kgo.Client.Produce failed", zap.Error(err))
211 | }
212 | atomic.AddInt64(&gLines, int64(1))
213 | atomic.AddInt64(&gSize, int64(len(rec.Value)))
214 | }
215 |
216 | for day := 0; ; day++ {
217 | tsDay := rounded.Add(time.Duration(24*day) * time.Hour)
218 | for step := 0; step < 24*60*60; step++ {
219 | timestamp := tsDay.Add(time.Duration(step) * time.Second)
220 | for i := 0; i < NumMetrics; i++ {
221 | dp := Datapoint{
222 | Timestamp: timestamp,
223 | Value: rand.Float32(),
224 | Value1: rand.Float64(),
225 | Value2: rand.Int63(),
226 | Value3: randBool(),
227 | Name: metrics[i].Name,
228 | Labels: make(Labels),
229 | LabelKeys: metrics[i].LabelKeys,
230 | }
231 | for valueuIndex, key := range metrics[i].LabelKeys {
232 | dp.Labels[key] = metrics[i].LabelValues[valueuIndex]
233 | }
234 |
235 | _ = wp.Submit(func() {
236 | var b []byte
237 | if b, err = dp.MarshalJSON(); err != nil {
238 | err = errors.Wrapf(err, "")
239 | util.Logger.Fatal("got error", zap.Error(err))
240 | }
241 | cl.Produce(ctx, &kgo.Record{
242 | Topic: KafkaTopic,
243 | Key: []byte(dp.Name),
244 | Value: b,
245 | }, produceCb)
246 | })
247 | }
248 | }
249 | }
250 | }
251 |
252 | func main() {
253 | util.InitLogger([]string{"stdout"})
254 | flag.Usage = func() {
255 | usage := fmt.Sprintf(`Usage of %s
256 | %s kakfa_brokers topic
257 | This util fill some fields with random content, serialize and send to kafka.
258 | kakfa_brokers: for example, 192.168.110.8:9092,192.168.110.12:9092,192.168.110.16:9092
259 | topic: for example, prom_extend`, os.Args[0], os.Args[0])
260 | util.Logger.Info(usage)
261 | os.Exit(0)
262 | }
263 | flag.Parse()
264 | args := flag.Args()
265 | if len(args) != 2 {
266 | flag.Usage()
267 | }
268 | KafkaBrokers = args[0]
269 | KafkaTopic = args[1]
270 | util.Logger.Info("CLI options",
271 | zap.String("KafkaBrokers", KafkaBrokers),
272 | zap.String("KafkaTopic", KafkaTopic),
273 | zap.Int("NumMetrics", NumMetrics),
274 | zap.Int("NumKeys", NumKeys),
275 | zap.Int("NumRunes", NumRunes),
276 | zap.Int("LenVal", LenVal),
277 | zap.Int("NumAllKeys", NumAllKeys),
278 | )
279 |
280 | if err := agent.Listen(agent.Options{}); err != nil {
281 | util.Logger.Fatal("got error", zap.Error(err))
282 | }
283 |
284 | var prevLines, prevSize int64
285 | ctx, _ := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM)
286 | go generate()
287 |
288 | ticker := time.NewTicker(10 * time.Second)
289 | defer ticker.Stop()
290 | LOOP:
291 | for {
292 | select {
293 | case <-ctx.Done():
294 | util.Logger.Info("quit due to context been canceled")
295 | break LOOP
296 | case <-ticker.C:
297 | var speedLine, speedSize int64
298 | if gLines != 0 {
299 | speedLine = (gLines - prevLines) / int64(10)
300 | speedSize = (gSize - prevSize) / int64(10)
301 | }
302 | prevLines = gLines
303 | prevSize = gSize
304 | util.Logger.Info("status", zap.Int64("lines", gLines), zap.Int64("bytes", gSize), zap.Int64("speed(lines/s)", speedLine), zap.Int64("speed(bytes/s)", speedSize))
305 | }
306 | }
307 | }
308 |
--------------------------------------------------------------------------------
/cmd/nacos_publish_config/main.go:
--------------------------------------------------------------------------------
1 | /*Copyright [2019] housepower
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 |
16 | package main
17 |
18 | import (
19 | "flag"
20 | "fmt"
21 | "os"
22 | "reflect"
23 | "time"
24 |
25 | _ "github.com/ClickHouse/clickhouse-go/v2"
26 | "github.com/housepower/clickhouse_sinker/config"
27 | cm "github.com/housepower/clickhouse_sinker/config_manager"
28 | "github.com/housepower/clickhouse_sinker/util"
29 | "github.com/jinzhu/copier"
30 | "go.uber.org/zap"
31 | )
32 |
33 | var (
34 | nacosAddr = flag.String("nacos-addr", "127.0.0.1:8848",
35 | "a list of comma-separated nacos server addresses")
36 | nacosUsername = flag.String("nacos-username", "username",
37 | "nacos username")
38 | nacosPassword = flag.String("nacos-password", "password",
39 | "nacos password")
40 | nacosNamespaceID = flag.String("nacos-namespace-id", "",
41 | `nacos namespace ID. Neither DEFAULT_NAMESPACE_ID("public") nor namespace name work!`)
42 | nacosGroup = flag.String("nacos-group", "DEFAULT_GROUP",
43 | `nacos group name. Empty string doesn't work!`)
44 | nacosDataID = flag.String("nacos-dataid", "clickhouse_sinker.hjson",
45 | `nacos data id`)
46 |
47 | localCfgFile = flag.String("local-cfg-file", "/etc/clickhouse_sinker.hjson", "local config file")
48 | replicas = flag.Int("replicas", 1, "replicate each task to multiple ones with the same config except task name, consumer group and table name")
49 | maxOpenConns = flag.Int("max-open-conns", 0, "max open connections per shard")
50 |
51 | clickhouseUsername = flag.String("clickhouse-username", "", "clickhouse username")
52 | clickhousePassword = flag.String("clickhouse-password", "", "clickhouse password")
53 | kafkaUsername = flag.String("kafka-username", "", "kafka username")
54 | kafkaPassword = flag.String("kafka-password", "", "kafka password")
55 | )
56 |
57 | // Empty is not valid namespaceID
58 | func getProperties() map[string]interface{} {
59 | properties := make(map[string]interface{}, 6)
60 | properties["serverAddrs"] = *nacosAddr
61 | properties["username"] = *nacosUsername
62 | properties["password"] = *nacosPassword
63 | properties["namespaceId"] = *nacosNamespaceID
64 | properties["group"] = *nacosGroup
65 | properties["dataId"] = *nacosDataID
66 | return properties
67 | }
68 |
69 | func PublishSinkerConfig() {
70 | var err error
71 | var cfg *config.Config
72 | if _, err = os.Stat(*localCfgFile); err == nil {
73 | if cfg, err = config.ParseLocalCfgFile(*localCfgFile); err != nil {
74 | util.Logger.Fatal("config.ParseLocalCfgFile failed", zap.Error(err))
75 | return
76 | }
77 | } else {
78 | util.Logger.Fatal("expect --local-cfg-file")
79 | return
80 | }
81 |
82 | if err = cfg.Normallize(false, "", util.Credentials{
83 | ClickhouseUsername: *clickhouseUsername,
84 | ClickhousePassword: *clickhousePassword,
85 | KafkaUsername: *kafkaUsername,
86 | KafkaPassword: *kafkaPassword,
87 | }); err != nil {
88 | util.Logger.Fatal("cfg.Normallize failed", zap.Error(err))
89 | return
90 | }
91 | tasks := cfg.Tasks
92 | for i := 1; i < *replicas; i++ {
93 | for j := 0; j < len(tasks); j++ {
94 | taskCfg := &config.TaskConfig{}
95 | if err = copier.Copy(taskCfg, tasks[j]); err != nil {
96 | util.Logger.Fatal("copier.Copy failed", zap.Error(err))
97 | }
98 | taskCfg.Name = fmt.Sprintf("%s_r%d", taskCfg.Name, i)
99 | taskCfg.ConsumerGroup = fmt.Sprintf("%s_r%d", taskCfg.ConsumerGroup, i)
100 | taskCfg.TableName = fmt.Sprintf("%s_r%d", taskCfg.TableName, i)
101 | cfg.Tasks = append(cfg.Tasks, taskCfg)
102 | }
103 | }
104 | if *maxOpenConns > 0 {
105 | cfg.Clickhouse.MaxOpenConns = *maxOpenConns
106 | }
107 |
108 | ncm := cm.NacosConfManager{}
109 | properties := getProperties()
110 | if err = ncm.Init(properties); err != nil {
111 | util.Logger.Fatal("ncm.Init failed", zap.Error(err))
112 | }
113 |
114 | if err = ncm.PublishConfig(cfg); err != nil {
115 | util.Logger.Fatal("ncm.PublishConfig failed", zap.Error(err))
116 | }
117 | util.Logger.Info("sleep a while")
118 | time.Sleep(10 * time.Second)
119 |
120 | var newCfg *config.Config
121 | if newCfg, err = ncm.GetConfig(); err != nil {
122 | util.Logger.Fatal("ncm.GetConfig failed", zap.Error(err))
123 | }
124 | if !reflect.DeepEqual(newCfg, cfg) {
125 | util.Logger.Fatal("BUG: got different config", zap.Reflect("cfg", cfg), zap.Reflect("newCfg", newCfg))
126 | }
127 | }
128 |
129 | func main() {
130 | util.InitLogger([]string{"stdout"})
131 | flag.Parse()
132 | PublishSinkerConfig()
133 | }
134 |
--------------------------------------------------------------------------------
/config_manager/lags.go:
--------------------------------------------------------------------------------
1 | package rcm
2 |
3 | import (
4 | "context"
5 | "reflect"
6 |
7 | "github.com/housepower/clickhouse_sinker/config"
8 | "github.com/housepower/clickhouse_sinker/input"
9 | "github.com/housepower/clickhouse_sinker/statistics"
10 | "github.com/housepower/clickhouse_sinker/util"
11 | "github.com/thanos-io/thanos/pkg/errors"
12 | "github.com/twmb/franz-go/pkg/kadm"
13 | "github.com/twmb/franz-go/pkg/kgo"
14 | "go.uber.org/zap"
15 | )
16 |
17 | var (
18 | theCl *kgo.Client
19 | theAdm *kadm.Client
20 | kafkaConfig *config.KafkaConfig
21 | )
22 |
23 | type StateLag struct {
24 | State string
25 | Lag int64
26 | }
27 |
28 | // GetTaskStateAndLags get state and lag of all tasks.
29 | func GetTaskStateAndLags(cfg *config.Config) (stateLags map[string]StateLag, err error) {
30 | kconf := cfg.Kafka
31 | if !reflect.DeepEqual(&kconf, kafkaConfig) {
32 | cleanupKafkaClient()
33 | if err = newClient(cfg.Kafka); err != nil {
34 | return
35 | }
36 | kafkaConfig = &kconf
37 | }
38 |
39 | stateLags = make(map[string]StateLag, len(cfg.Tasks))
40 | for _, taskCfg := range cfg.Tasks {
41 | var state string
42 | var totalLags int64
43 | if state, totalLags, err = getStateAndLag(theAdm, taskCfg.Topic, taskCfg.ConsumerGroup); err != nil {
44 | // skip this task for now, wait next assign cycle
45 | util.Logger.Error("retrieve lag failed", zap.String("task", taskCfg.Name), zap.Error(err))
46 | statistics.ConsumeLags.WithLabelValues(taskCfg.ConsumerGroup, taskCfg.Topic, taskCfg.Name).Set(float64(-1))
47 | continue
48 | }
49 | stateLags[taskCfg.Name] = StateLag{State: state, Lag: totalLags}
50 | statistics.ConsumeLags.WithLabelValues(taskCfg.ConsumerGroup, taskCfg.Topic, taskCfg.Name).Set(float64(totalLags))
51 | }
52 | return
53 | }
54 |
55 | func cleanupKafkaClient() {
56 | if theCl != nil {
57 | theCl.Close()
58 | theCl = nil
59 | }
60 | if theAdm != nil {
61 | theAdm.Close()
62 | theAdm = nil
63 | }
64 | }
65 |
66 | func newClient(cfg config.KafkaConfig) (err error) {
67 | var opts []kgo.Opt
68 | if opts, err = input.GetFranzConfig(&cfg); err != nil {
69 | return
70 | }
71 | // franz.config.go 379 - invalid autocommit options specified when a group was not specified
72 | if theCl, err = kgo.NewClient(opts...); err != nil {
73 | err = errors.Wrapf(err, "")
74 | return
75 | }
76 | theAdm = kadm.NewClient(theCl)
77 | return
78 | }
79 |
80 | // getStateAndLag is inspired by https://github.com/cloudhut/kminion/blob/1ffd02ba94a5edc26d4f11e57191ed3479d8a111/prometheus/collect_consumer_group_lags.go
81 | func getStateAndLag(adm *kadm.Client, topic, group string) (state string, totalLags int64, err error) {
82 | ctx := context.Background()
83 | var ok bool
84 | var descGroups kadm.DescribedGroups
85 | var descGroup kadm.DescribedGroup
86 | if descGroups, err = adm.DescribeGroups(ctx, group); err != nil {
87 | err = errors.Wrapf(err, "")
88 | return
89 | }
90 | if descGroup, ok = descGroups[group]; ok {
91 | state = descGroup.State
92 | } else {
93 | state = "NA"
94 | }
95 | var commit kadm.OffsetResponses
96 | if commit, err = adm.FetchOffsets(ctx, group); err != nil {
97 | err = errors.Wrapf(err, "")
98 | return
99 | }
100 | var offsets kadm.ListedOffsets
101 | if offsets, err = adm.ListEndOffsets(ctx, topic); err != nil {
102 | err = errors.Wrapf(err, "")
103 | return
104 | }
105 | grpLag := kadm.CalculateGroupLag(descGroup, commit, offsets)
106 | if topLag, ok := grpLag[topic]; ok {
107 | for _, grpMemberLag := range topLag {
108 | if grpMemberLag.Lag >= 0 {
109 | totalLags += grpMemberLag.Lag
110 | }
111 | }
112 | }
113 | return
114 | }
115 |
--------------------------------------------------------------------------------
/config_manager/rcm.go:
--------------------------------------------------------------------------------
1 | package rcm
2 |
3 | import (
4 | "github.com/housepower/clickhouse_sinker/config"
5 | )
6 |
7 | // RemoteConfManager can be implemented by many backends: Nacos, Consul, etcd, ZooKeeper...
8 | type RemoteConfManager interface {
9 | Init(properties map[string]interface{}) error
10 | GetConfig() (conf *config.Config, err error)
11 | // PublishConfig publishs the config.
12 | PublishConfig(conf *config.Config) (err error)
13 | Register(ip string, port int) (err error)
14 | Deregister(ip string, port int) (err error)
15 |
16 | // Assignment loop
17 | Run()
18 | Stop()
19 | }
20 |
--------------------------------------------------------------------------------
/dev/deploy_gh_pages.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | cd docs/.vuepress/dist
4 |
5 | git config --global user.name 'GitHub Workflow'
6 | git config --global user.email 'dummy@dummy.dummy'
7 |
8 | git init
9 | git add -A
10 | git commit -m 'Deploy GitHub Pages'
11 | git push -f https://sundy-li:${TOKEN_GITHUB}@github.com/housepower/clickhouse_sinker.git master:gh-pages
12 |
--------------------------------------------------------------------------------
/dev/license.header:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed under the Apache License, Version 2.0 (the "License");
3 | * you may not use this file except in compliance with the License.
4 | * You may obtain a copy of the License at
5 | *
6 | * http://www.apache.org/licenses/LICENSE-2.0
7 | *
8 | * Unless required by applicable law or agreed to in writing, software
9 | * distributed under the License is distributed on an "AS IS" BASIS,
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | * See the License for the specific language governing permissions and
12 | * limitations under the License.
--------------------------------------------------------------------------------
/discovery/discovery.go:
--------------------------------------------------------------------------------
1 | package discovery
2 |
3 | import (
4 | "fmt"
5 | "sort"
6 | "time"
7 |
8 | "github.com/housepower/clickhouse_sinker/config"
9 | cm "github.com/housepower/clickhouse_sinker/config_manager"
10 | "github.com/housepower/clickhouse_sinker/pool"
11 | "github.com/housepower/clickhouse_sinker/util"
12 | )
13 |
14 | var (
15 | getClusterSQL string = `SELECT shard_num, replica_num, host_name FROM system.clusters WHERE cluster = '%s' ORDER BY shard_num, replica_num`
16 | )
17 |
18 | type Discovery struct {
19 | enabled bool
20 | config *config.Config
21 | conn *pool.Conn
22 | ncm cm.RemoteConfManager
23 | rcm bool
24 | }
25 |
26 | type Replicas []string
27 | type Shards []Replicas
28 |
29 | func (r Shards) Less(i, j int) bool {
30 | if len(r[i]) == 0 || len(r[j]) == 0 {
31 | return false
32 | }
33 | return r[i][0] < r[j][0]
34 | }
35 | func (r Shards) Swap(i, j int) {
36 | r[i], r[j] = r[j], r[i]
37 | }
38 | func (r Shards) Len() int {
39 | return len(r)
40 | }
41 |
42 | func NewDiscovery(cfg *config.Config, ncm cm.RemoteConfManager, rcm bool) *Discovery {
43 | return &Discovery{
44 | config: cfg,
45 | ncm: ncm,
46 | rcm: rcm,
47 | }
48 | }
49 |
50 | func (d *Discovery) SetConfig(conf config.Config) {
51 | d.config = &conf
52 | d.enabled = conf.Discovery.Enabled
53 | }
54 |
55 | func (d *Discovery) IsEnabled() bool {
56 | return d.enabled && d.rcm
57 | }
58 |
59 | func (d *Discovery) GetCKConn() error {
60 | // err := pool.InitClusterConn(&d.config.Clickhouse)
61 | // if err != nil {
62 | // return err
63 | // }
64 | conn, _, err := pool.GetShardConn(0).NextGoodReplica(d.config.Clickhouse.Ctx, 0)
65 | if err != nil {
66 | return err
67 | }
68 | d.conn = conn
69 | return nil
70 | }
71 |
72 | func (d *Discovery) Dispatcher() error {
73 | util.Logger.Debug("discovery: start")
74 | query := fmt.Sprintf(getClusterSQL, d.config.Clickhouse.Cluster)
75 | util.Logger.Debug("discovery: query: " + query)
76 | rows, err := d.conn.Query(query)
77 | if err != nil {
78 | return err
79 | }
80 | defer rows.Close()
81 | var shards Shards
82 | var replicas Replicas
83 | var lastShardNum uint32
84 | lastShardNum = 1
85 | for rows.Next() {
86 | var shardNum, replicaNum uint32
87 | var hostName string
88 | err = rows.Scan(&shardNum, &replicaNum, &hostName)
89 | if err != nil {
90 | return err
91 | }
92 | util.Logger.Debug(fmt.Sprintf("discovery: shardNum: %d, replicaNum: %d, hostName: %s", shardNum, replicaNum, hostName))
93 | if lastShardNum != shardNum {
94 | lastShardNum = shardNum
95 | shards = append(shards, replicas)
96 | replicas = make([]string, 0)
97 | }
98 | replicas = append(replicas, hostName)
99 | util.Logger.Debug(fmt.Sprintf("discovery: shards: %#v", shards))
100 | util.Logger.Debug(fmt.Sprintf("discovery: replicas: %#v", replicas))
101 | }
102 | if len(replicas) > 0 {
103 | shards = append(shards, replicas)
104 | }
105 | if len(shards) == 0 {
106 | return nil
107 | }
108 | util.Logger.Debug(fmt.Sprintf("discovery: shards: %#v", shards))
109 | if diffShards(shards, hosts2shards(d.config.Clickhouse.Hosts)) {
110 | util.Logger.Info(fmt.Sprintf("discovery: shards changed, old: %v, new: %v", d.config.Clickhouse.Hosts, shards))
111 | d.config.Clickhouse.Hosts = shards2hosts(shards)
112 | d.Publish()
113 | } else {
114 | util.Logger.Info("discovery: shards not changed")
115 | }
116 | return nil
117 | }
118 |
119 | func (d *Discovery) Publish() {
120 | d.config.Discovery.UpdatedBy = "discovery-changed"
121 | d.config.Discovery.UpdatedAt = time.Now()
122 | d.ncm.PublishConfig(d.config)
123 | }
124 |
125 | func hosts2shards(hosts [][]string) Shards {
126 | var shards Shards
127 | for _, host := range hosts {
128 | shards = append(shards, host)
129 | }
130 | return shards
131 | }
132 |
133 | func shards2hosts(shards Shards) [][]string {
134 | var hosts [][]string
135 | for _, shard := range shards {
136 | hosts = append(hosts, shard)
137 | }
138 | return hosts
139 | }
140 |
141 | func diffShards(old, new Shards) bool {
142 | if len(old) != len(new) {
143 | return true
144 | }
145 | // sort.Sort(old)
146 | // sort.Sort(new)
147 | for i := range old {
148 | if diffReplicas(old[i], new[i]) {
149 | return true
150 | }
151 | }
152 | return false
153 | }
154 |
155 | func diffReplicas(old, new []string) bool {
156 | if len(old) != len(new) {
157 | return true
158 | }
159 | sort.Sort(sort.StringSlice(old))
160 | sort.Sort(sort.StringSlice(new))
161 | for i := range old {
162 | if old[i] != new[i] {
163 | return true
164 | }
165 | }
166 | return false
167 | }
168 |
--------------------------------------------------------------------------------
/discovery/discovery_test.go:
--------------------------------------------------------------------------------
1 | package discovery
2 |
3 | import (
4 | "testing"
5 |
6 | "github.com/stretchr/testify/assert"
7 | )
8 |
9 | func TestConvert(t *testing.T) {
10 | var hosts = [][]string{
11 | {"127.0.0.1:8080", "127.0.0.1:8081"},
12 | {"127.0.0.1:8082", "127.0.0.1:8083"},
13 | }
14 |
15 | shards := hosts2shards(hosts)
16 | target := Shards{
17 | Replicas{"127.0.0.1:8080", "127.0.0.1:8081"},
18 | Replicas{"127.0.0.1:8082", "127.0.0.1:8083"},
19 | }
20 | assert.Equal(t, target, shards)
21 | }
22 |
23 | func TestDiffShards(t *testing.T) {
24 | var old1 = Shards{
25 | Replicas{"127.0.0.1:8080", "127.0.0.1:8081"},
26 | Replicas{"127.0.0.1:8082", "127.0.0.1:8083"},
27 | }
28 | var new1 = Shards{
29 | Replicas{"127.0.0.1:8080", "127.0.0.1:8081"},
30 | Replicas{"127.0.0.1:8082", "127.0.0.1:8083"},
31 | Replicas{"127.0.0.1:8084", "127.0.0.1:8085"},
32 | }
33 |
34 | assert.Equal(t, true, diffShards(old1, new1))
35 |
36 | var new2 = Shards{
37 | Replicas{"127.0.0.1:8082", "127.0.0.1:8083"},
38 | Replicas{"127.0.0.1:8081", "127.0.0.1:8080"},
39 | }
40 | assert.Equal(t, true, diffShards(old1, new2))
41 |
42 | var new3 = Shards{
43 | Replicas{"127.0.0.1:8082", "127.0.0.1:8083"},
44 | Replicas{"127.0.0.2:8081", "127.0.0.1:8080"},
45 | }
46 | assert.Equal(t, true, diffShards(old1, new3))
47 |
48 | var new4 = Shards{
49 | Replicas{"127.0.0.1:8081", "127.0.0.1:8080"},
50 | Replicas{"127.0.0.1:8083", "127.0.0.1:8082"},
51 | }
52 | assert.Equal(t, false, diffShards(old1, new4))
53 | }
54 |
--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
1 | ---
2 | version: '3'
3 |
4 | services:
5 | zookeeper:
6 | image: zookeeper:3.7.0
7 | restart: always
8 | hostname: zookeeper
9 | ports:
10 | - "52181:2181"
11 | environment:
12 | ALLOW_ANONYMOUS_LOGIN: 1
13 | ZOO_4LW_COMMANDS_WHITELIST: "*"
14 | security_opt:
15 | - label:disable
16 | kafka:
17 | image: confluentinc/cp-kafka:7.5.3
18 | container_name: kafka
19 | restart: always
20 | hostname: kafka
21 | ports:
22 | - "9092:9092"
23 | - "9093:9093"
24 | environment:
25 | KAFKA_ADVERTISED_LISTENERS: "LISTENER_DOCKER_INTERNAL://127.0.0.1:9092,LISTENER_DOCKER_EXTERNAL://127.0.0.1:9093"
26 | KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: LISTENER_DOCKER_INTERNAL:PLAINTEXT,LISTENER_DOCKER_EXTERNAL:PLAINTEXT
27 | KAFKA_INTER_BROKER_LISTENER_NAME: LISTENER_DOCKER_INTERNAL
28 | KAFKA_ZOOKEEPER_CONNECT: "zookeeper:2181"
29 | KAFKA_BROKER_ID: 1
30 | KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO"
31 | KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
32 | depends_on:
33 | - zookeeper
34 | security_opt:
35 | - label:disable
36 | clickhouse:
37 | image: clickhouse/clickhouse-server:23.8
38 | restart: always
39 | ports:
40 | - "58123:8123"
41 | - "59000:9000"
42 | ulimits:
43 | nofile:
44 | soft: 262144
45 | hard: 262144
46 | volumes:
47 | - "./docker-entrypoint-initdb.d:/docker-entrypoint-initdb.d"
48 | - "./docker/metrika.xml:/etc/clickhouse-server/config.d/metrika.xml"
49 | security_opt:
50 | - label:disable
51 | nacos:
52 | image: nacos/nacos-server:1.4.1
53 | container_name: nacos-standalone
54 | environment:
55 | - PREFER_HOST_MODE=hostname
56 | - MODE=standalone
57 | ports:
58 | - "58848:8848"
59 |
--------------------------------------------------------------------------------
/docker/metrika.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | 01
4 | clickhouse01
5 |
6 |
7 | 2181
8 | 1
9 | /var/lib/clickhouse/coordination/log
10 | /var/lib/clickhouse/coordination/snapshots
11 |
12 |
13 | 10000
14 | 30000
15 | trace
16 |
17 |
18 |
19 |
20 | 1
21 | localhost
22 | 9444
23 |
24 |
25 |
26 |
27 |
28 | localhost
29 | 2181
30 |
31 |
32 | ::
33 |
34 |
35 | foo
36 |
37 | false
38 |
39 | localhost
40 | 9000
41 |
42 |
43 |
44 |
45 |
--------------------------------------------------------------------------------
/docker/test_auto_schema.hjson:
--------------------------------------------------------------------------------
1 | {
2 | clickhouse: {
3 | hosts: [
4 | [
5 | 127.0.0.1
6 | ]
7 | ]
8 | port: 59000
9 | db: default
10 | username: ""
11 | password: ""
12 | retryTimes: 0
13 | }
14 | kafka: {
15 | brokers: 127.0.0.1:9092
16 | }
17 | task: {
18 | name: test_auto_schema
19 | topic: topic1
20 | consumerGroup: test_auto_schema
21 | earliest: true
22 | parser: json
23 | autoSchema: true
24 | tableName: test_auto_schema
25 | excludeColumns: []
26 | bufferSize: 50000
27 | }
28 | logLevel: info
29 | }
--------------------------------------------------------------------------------
/docker/test_dynamic_schema.hjson:
--------------------------------------------------------------------------------
1 | {
2 | clickhouse: {
3 | cluster: ""
4 | hosts: [
5 | [
6 | 127.0.0.1
7 | ]
8 | ]
9 | port: 59000
10 | db: default
11 | username: ""
12 | password: ""
13 | retryTimes: 0
14 | }
15 | kafka: {
16 | brokers: 127.0.0.1:9093
17 | version: 2.6.0
18 | }
19 | task: {
20 | name: test_dynamic_schema
21 | topic: topic1
22 | consumerGroup: test_dynamic_schema
23 | earliest: true
24 | parser: json
25 | autoSchema: true
26 | tableName: test_dynamic_schema
27 | excludeColumns: [
28 | day
29 | ]
30 | dynamicSchema: {
31 | enable: true
32 | }
33 | bufferSize: 50000
34 | }
35 | logLevel: info
36 | }
--------------------------------------------------------------------------------
/docker/test_fixed_schema.hjson:
--------------------------------------------------------------------------------
1 | {
2 | clickhouse: {
3 | hosts: [
4 | [
5 | 127.0.0.1
6 | ]
7 | ]
8 | port: 59000
9 | db: default
10 | username: ""
11 | password: ""
12 | retryTimes: 0
13 | }
14 | kafka: {
15 | brokers: 127.0.0.1:9093
16 | version: 2.5.0
17 | }
18 | task: {
19 | name: test_fixed_schema
20 | topic: topic1
21 | consumerGroup: test_fixed_schema
22 | earliest: true
23 | parser: json
24 | tableName: test_fixed_schema
25 | dims: [
26 | {
27 | name: time
28 | type: DateTime
29 | }
30 | {
31 | name: name
32 | type: String
33 | }
34 | {
35 | name: value
36 | type: Float32
37 | }
38 | ]
39 | bufferSize: 50000
40 | }
41 | logLevel: info
42 | }
--------------------------------------------------------------------------------
/docker/test_prom_metric.hjson:
--------------------------------------------------------------------------------
1 | {
2 | clickhouse: {
3 | cluster: abc
4 | hosts: [
5 | [
6 | 127.0.0.1
7 | ]
8 | ]
9 | port: 59000
10 | db: gauge
11 | username: ""
12 | password: ""
13 | retryTimes: 0
14 | }
15 | kafka: {
16 | brokers: 127.0.0.1:9092
17 | }
18 | task: {
19 | name: test_prom_metric
20 | topic: test_metric_topic
21 | consumerGroup: test_prom_metric
22 | earliest: true
23 | parser: json
24 | autoSchema: true
25 | tableName: default.test_prom_metric
26 | seriesTableName: test_prom_series
27 | excludeColumns: []
28 | bufferSize: 50000
29 | PrometheusSchema: true
30 | DynamicSchema: {
31 | Enable: true
32 | whiteList: ^(key_[0-9]?|value[1-4]?|__mgmt_id__|__name__|timestamp|__series_id__)$
33 | blackList: ^(key_3|value4)$
34 | }
35 | }
36 | logLevel: info
37 | }
--------------------------------------------------------------------------------
/docs/.vuepress/config.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 | base: "/clickhouse_sinker/",
3 | title: "clickhouse_sinker",
4 | evergreen: true,
5 | plugins: ["mermaidjs"],
6 | locales: {
7 | "/": {
8 | lang: "en-US",
9 | title: "clickhouse_sinker",
10 | description: "clickhouse_sinker a tool to sink the data into ClickHouse",
11 | },
12 | "/zh/": {
13 | lang: "zh-CN",
14 | title: "clickhouse_sinker",
15 | description: "clickhouse_sinker 一个将数据摄入到ClickHouse的工具",
16 | },
17 | },
18 | themeConfig: {
19 | locales: {
20 | "/": {
21 | selectText: "Languages",
22 | label: "English",
23 | ariaLabel: "Languages",
24 | editLinkText: "Edit this page on GitHub",
25 | serviceWorker: {
26 | updatePopup: {
27 | message: "New content is available.",
28 | buttonText: "Refresh",
29 | },
30 | },
31 | algolia: {},
32 | nav: [
33 | { text: "Get Started", link: "/guide/install" },
34 | { text: "Introduction", link: "/dev/introduction" },
35 | { text: "Configuration", link: "/configuration/flag" },
36 | {
37 | text: "GitHub",
38 | link: "https://github.com/housepower/clickhouse_sinker",
39 | },
40 | ],
41 | sidebar: {
42 | "/guide/": [
43 | {
44 | title: "Install and Run",
45 | children: [
46 | ["install", "Install"],
47 | ["run", "Run"],
48 | ],
49 | },
50 | ],
51 |
52 | "/configuration/": [
53 | {
54 | title: "Configuration",
55 | children: [
56 | ["flag", "Flag"],
57 | ["config", "Config"],
58 | ]
59 | }
60 | ],
61 |
62 | "/dev/": [
63 | {
64 | title: "Development",
65 | children: [
66 | ["introduction", "Introduction"],
67 | ["design", "Design"],
68 | ]
69 | }
70 | ],
71 | },
72 | }
73 | },
74 | },
75 | };
76 |
--------------------------------------------------------------------------------
/docs/.vuepress/public/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/housepower/clickhouse_sinker/2541b53d8e7a3a472c0c2c9394af2574f7c943dd/docs/.vuepress/public/favicon.ico
--------------------------------------------------------------------------------
/docs/.vuepress/public/logo_320px.svg:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | home: true
3 | heroImage: /logo_320px.svg
4 | actionText: Get Started →
5 | actionLink: /guide/install
6 | features:
7 | - title: clickhouse_sinker
8 | details: A tool to sink the data into ClickHouse
9 | footer: Apache License 2.0
10 | ---
11 |
--------------------------------------------------------------------------------
/docs/configuration/config.md:
--------------------------------------------------------------------------------
1 | # Config Items
2 |
3 | > Here we use json with comments for documentation, config file in hjson format is also supported
4 |
5 | ```json
6 | {
7 | // ClickHouse config
8 | "clickhouse": {
9 | // cluster the ClickHouse node belongs
10 | "cluster": "test",
11 | // hosts for connection, it's Array(Array(String))
12 | // we can put hosts with same shard into the inner array
13 | // it helps data deduplication for ReplicateMergeTree when driver error occurs
14 | "hosts": [
15 | [
16 | "192.168.101.106",
17 | "192.168.101.108"
18 | ],
19 | [
20 | "192.168.102.114",
21 | "192.168.101.110"
22 | ],
23 | [
24 | "192.168.102.115"
25 | ]
26 | ],
27 | "port": 9000,
28 | "username": "default",
29 | "password": "",
30 | // database name
31 | "db": "default",
32 | // Whether enable TLS encryption with clickhouse-server
33 | "secure": false,
34 | // Whether skip verify clickhouse-server cert if secure=true.
35 | "insecureSkipVerify": false,
36 | // retryTimes when error occurs in inserting datas
37 | "retryTimes": 0,
38 | // max open connections with each clickhouse node. default to 1.
39 | "maxOpenConns": 1,
40 | // native or http, if configured secure and http both, means support https. default to native.
41 | "protocol": "native"
42 | },
43 |
44 | // Kafka config
45 | "kafka": {
46 | "brokers": "127.0.0.1:9093",
47 |
48 | "properties":{
49 | // This corresponds to Kafka's heartbeat.interval.ms.
50 | "heartbeat.interval.ms": 3000,
51 | // This option corresponds to Kafka's session.timeout.ms setting and must be within the broker's group.min.session.timeout.ms and group.max.session.timeout.ms.
52 | "session.timeout.ms": 120000,
53 | // This corresponds to Kafka's rebalance.timeout.ms.
54 | "rebalance.timeout.ms": 120000,
55 | // This option is roughly equivalent to request.timeout.ms, but grants additional time to requests that have timeout fields.
56 | "request.timeout.ms": 60000
57 | }
58 |
59 | // jave client style security authentication
60 | "security":{
61 | "security.protocol": "SASL_PLAINTEXT",
62 | "sasl.kerberos.service.name": "kafka",
63 | "sasl.mechanism":"GSSAPI",
64 | "sasl.jaas.config":"com.sun.security.auth.module.Krb5LoginModule required useKeyTab=true storeKey=true debug=true keyTab=\"/etc/security/mmmtest.keytab\" principal=\"mmm@ALANWANG.COM\";"
65 | },
66 | // whether reset domain realm. if this option is true, domain realm will replaced by "hadoop.{toLower(GSSAPI.Realm)}:{port}", this feature is worked when clickhouse_sinker connect to HUAWEI MRS kerberos kafka.
67 | "resetSaslRealm": false,
68 |
69 | // SSL
70 | "tls": {
71 | "enable": false,
72 | // Required. It's the CA certificate with which Kafka brokers certs be signed.
73 | "caCertFiles": "/etc/security/ca-cert",
74 | // Required if Kafka brokers require client authentication.
75 | "clientCertFile": "",
76 | // Required if and only if ClientCertFile is present.
77 | "clientKeyFile": ""
78 | },
79 |
80 | // SASL
81 | "sasl": {
82 | "enable": false,
83 | // Mechanism is the name of the enabled SASL mechanism.
84 | // Possible values: PLAIN, SCRAM-SHA-256, SCRAM-SHA-512, GSSAPI (defaults to PLAIN)
85 | "mechanism": "PLAIN",
86 | // Username is the authentication identity (authcid) to present for
87 | // SASL/PLAIN or SASL/SCRAM authentication
88 | "username": "",
89 | // Password for SASL/PLAIN or SASL/SCRAM authentication
90 | "password": "",
91 | "gssapi": {
92 | // authtype - 1. KRB5_USER_AUTH, 2. KRB5_KEYTAB_AUTH
93 | "authtype": 0,
94 | "keytabpath": "",
95 | "kerberosconfigpath": "",
96 | "servicename": "",
97 | "username": "",
98 | "password": "",
99 | "realm": "",
100 | "disablepafxfast": false
101 | }
102 | },
103 | },
104 |
105 | "task": {
106 | "name": "test_dynamic_schema",
107 | // kafka topic
108 | "topic": "topic",
109 | // kafka consume from earliest or latest
110 | "earliest": true,
111 | // kafka consumer group
112 | "consumerGroup": "group",
113 |
114 | // message parser
115 | "parser": "json",
116 |
117 | // clickhouse table name
118 | // override the clickhouse.db with "db.tableName" format, eg "default.tbl1"
119 | "tableName": "prom_metric",
120 |
121 | // name of the timeseries table, by default it is tableName with a "_series" suffix
122 | "seriesTableName": "prom_metric_myseries",
123 |
124 | // columns of the table
125 | "dims": [
126 | {
127 | // column name
128 | "name": "timestamp",
129 | // column type
130 | "type": "DateTime"
131 | },
132 | {
133 | "name": "name",
134 | "type": "String"
135 | },
136 | {
137 | "name": "value",
138 | "type": "Float32",
139 | // json field name. This must be specified if it doesn't match with the column name.
140 | "sourcename": "val"
141 | }
142 | ],
143 |
144 | // if it's specified, clickhouse_sinker will detect table schema instead of using the fixed schema given by "dims".
145 | "autoSchema" : true,
146 | // these columns will be excluded from the detected table schema. This takes effect only if "autoSchema" is true.
147 | "excludeColumns": [],
148 |
149 | // (experiment feature) detect new fields and their type, and add columns to the ClickHouse table accordingly. This feature requires parser be "fastjson" or "gjson". New fields' type will be one of: Int64, Float64, String.
150 | // A column is added for new key K if all following conditions are true:
151 | // - K isn't in ExcludeColumns
152 | // - number of existing columns doesn't reach MaxDims-1
153 | // - WhiteList is empty, or K matchs WhiteList
154 | // - BlackList is empty, or K doesn't match BlackList
155 | "dynamicSchema": {
156 | // whether enable this feature, default to false
157 | "enable": true,
158 | // the upper limit of dynamic columns number, <=0 means math.MaxInt16. protecting dirty data attack
159 | "maxDims": 1024,
160 | // the regexp of white list. syntax reference: https://github.com/google/re2/wiki/Syntax
161 | "whiteList": "^[0-9A-Za-z_]+$",
162 | // the regexp of black list
163 | "blackList": "@"
164 | },
165 |
166 | // additional fields to be appended to each input message, should be a valid json string
167 | // e.g. fields: "{\"Enable\":true,\"MaxDims\":0,\"Earliest\":false,\"Parser\":\"fastjson\"}"
168 | "fields": "",
169 |
170 | // PrometheusSchema expects each message is a Prometheus metric(timestamp, value, metric name and a list of labels).
171 | "prometheusSchema": true,
172 | // the regexp of labels black list, fields match promLabelsBlackList are not considered as part of labels column in series table
173 | // Requires PrometheusSchema be true.
174 | "promLabelsBlackList": "",
175 |
176 | // shardingKey is the column name to which sharding against
177 | "shardingKey": "",
178 | // shardingStripe take effect if the sharding key is numerical
179 | "shardingStripe": 0,
180 |
181 | // interval of flushing the batch. Default to 5, max to 600.
182 | "flushInterval": 5,
183 | // Approximate batch size to insert into clickhouse per shard, also control the kafka max.partition.fetch.bytes.
184 | // Sinker will round upward it to the the nearest 2^n. Default to 262114, max to 1048576.
185 | "bufferSize": 262114,
186 |
187 | // In the absence of time zone information, interprets the time as in the given location. Default to "Local" (aka /etc/localtime of the machine on which sinker runs)
188 | "timeZone": "",
189 | // Time unit when interprete a number as time. Default to 1.0.
190 | // Java's timestamp is milliseconds since epoch. Change timeUnit to 0.001 at this case.
191 | "timeUnit": 1.0
192 | },
193 |
194 | // log level, possible value: "debug", "info", "warn", "error", "dpanic", "panic", "fatal". Default to "info".
195 | "logLevel": "debug",
196 | // The Series table may contain hundreds of columns, and writing this table every time a datapoint is persisted can result in significant
197 | // performance overhead. This should be unnecessary since the lables from the same timeseries usually do not change(mid could be an exception).
198 | // Therefore, it would be reasonable to keep the map between "sid" and "mid" in cache to avoid frequent write operations. To optimize the memory
199 | // utilization, only active series from the last "activeSeriesRange" seconds will be cached, and the map in the cache will be updated every
200 | // "reloadSeriesMapInterval" seconds. By default, series from the last 24 hours will be cached, and the cache will be updated every hour.
201 | "reloadSeriesMapInterval": 3600,
202 | "activeSeriesRange": 86400,
203 | "logTrace": false,
204 | // It is recommended that recordPoolSize be 3 or 4 times the bufferSize, for the backpressure mechanism, to avoid using too much memory.
205 | "recordPoolSize": 1048576
206 | }
207 | ```
208 |
--------------------------------------------------------------------------------
/docs/configuration/flag.md:
--------------------------------------------------------------------------------
1 | # run args
2 |
3 | ```
4 | ./clickhouse_sinker -h
5 |
6 | Usage of ./clickhouse_sinker:
7 | -http-port int
8 | http listen port (default 2112)
9 | -local-cfg-file string
10 | local config file (default "/etc/clickhouse_sinker.hjson")
11 | -metric-push-gateway-addrs string
12 | a list of comma-separated prometheus push gatway address
13 | -nacos-addr string
14 | a list of comma-separated nacos server addresses (default "127.0.0.1:8848")
15 | -nacos-dataid string
16 | nacos dataid
17 | -nacos-group string
18 | nacos group name. Empty string doesn't work! (default "DEFAULT_GROUP")
19 | -nacos-namespace-id string
20 | nacos namespace ID. Neither DEFAULT_NAMESPACE_ID("public") nor namespace name work!
21 | -nacos-password string
22 | nacos password (default "nacos")
23 | -nacos-username string
24 | nacos username (default "nacos")
25 | -push-interval int
26 | push interval in seconds (default 10)
27 | -v show build version and quit
28 | ```
--------------------------------------------------------------------------------
/docs/dev/design.md:
--------------------------------------------------------------------------------
1 | # Architecture
2 |
3 | ## Sharding
4 |
5 | clickhouse_sinker guarantee:
6 |
7 | - at-least-once
8 | - Duplicated messages (per topic-partition-offset) are routed to the same ClickHouse shard.
9 |
10 | So if you setup ClickHouse properly(ReplacingMergeTree ORDER BY (__kafak_topic, __kafka_partition, __kafka_offset)), you could get exactly-once semantic.
11 |
12 | It's hard for clickhouse_sinker to guarantee exactly-once semantic without ReplacingMergeTree. Kafka consumer group load-balance cause duplicated messages if one consumer crash suddenly.
13 |
14 | ## Workflow
15 |
16 | Internally, clickhouse_sinker groups tasks that with identical "consumerGroup" property set together for the purpose of reducing the number of Kafka client, so that Kafka server is able to handle more requests concurrently. And consequently, it's decided to commit Kafka offset only after messages in a whole fetch got written to clickhouse completely.
17 |
18 | The flow is like this:
19 |
20 | - Group tasks with identical "consumerGroup" property together, fetch messages for the group of tasks with a single goroutine.
21 | - Route fetched messages to the individual tasks for further parsing. By default, the mapping between messages and tasks is controlled by "topic" and "tableName" property. But for messages with Kafka header "__table_name" specified, the mapping between "__table_name" and "tableName" will override the default behavior.
22 | - Parse messages and calculate the dest shard:
23 | -- For tasks with "shardingkey" property specified, if the sharding key is numerical(integer, float, time, etc.), the dest shard is determined by `(shardingKey/shardingStripe)%clickhouse_shards`, if not, it is determined by `xxHash64(shardingKey)%clickhouse_shards`.
24 | -- Otherwise, the dest shard for each message is determined by `(kafka_offset/roundup(buffer_size))%clickhouse_shards`.
25 | - Generate batches for all shard slots that are in the same Group, when total cached message count in the Group reached `sum(batchSize)*80%` boundary or flush timer fire.
26 | - Write batches to ClickHouse in a global goroutine pool(pool size is a fixed number based on the number of tasks and Clickhouse shards).
27 | - Commit offset back to Kafka
28 |
29 |
30 | ## Task scheduling
31 |
32 | The clickhouse-server configuration item `max_concurrent_queries`(default 100) is the maximum number of simultaneously processed queries related to MergeTree table. If the number of concurrent INSERT is close to `max_concurrent_queries`, the user queries(`SELECT`) could fail due to the limit.
33 |
34 | If the clickhouse-server is big, ingesting data to >=100 MergeTree tables via clickhouse_sinker bring pressure to the Clickhouse cluster. On the other side, large number of clickhouse_sinker instances requires lots of CPU/MEM resources.
35 |
36 | The solution is, clickhouse_sinker instances coordinate with each other to assign tasks among themselves.
37 |
38 | The task scheduling procedure:
39 |
40 | - Some platform(Kubernetes, Yarn, etc.) start several clickhouse_sinker instances and may start/stop instances dynamically. Every clickhouse_sinker instance registers with Nacos as a single service(CLI option `--nacos-service-name`).
41 | - Someone publish(add/delete/modify) a list of tasks(with empty assignment) to Nacos.
42 | - The first clickhouse_sinker(per instance's ip+port) instance(named scheduler) is responsible to generate and publish task assignments regularly. The task list and assignment consist of the whole config. The task list change, service change, and task lag change will trigger another assignment. The scheduler ensures Each clickhouse_innker instance's total lag be balanced.
43 | - Each clickhouse_sinker reloads the config regularly. This may start/stop tasks. clickhouse_sinker stops tasks gracefully so that there's no message lost/duplication during task transferring.
44 |
--------------------------------------------------------------------------------
/docs/guide/install.md:
--------------------------------------------------------------------------------
1 | # Install
2 |
3 | ## By binary files (recommended)
4 |
5 | Download the binary files from [release](https://github.com/housepower/clickhouse_sinker/releases), choose the executable binary file according to your env.
6 |
7 | ## By container image
8 |
9 | `docker pull quay.io/housepower/clickhouse_sinker`
10 |
11 | ## By source
12 |
13 | - Install Golang
14 |
15 | - Go Get
16 |
17 | ```
18 | go get -u github.com/housepower/clickhouse_sinker/...
19 | ```
20 |
21 | - Build && Run
22 |
23 | ```
24 | make build
25 | ```
26 |
--------------------------------------------------------------------------------
/docs/guide/run.md:
--------------------------------------------------------------------------------
1 | # Run
2 |
3 | ## Requirements
4 |
5 | Note: Ensure `clickhouse-server` and `kafka` work before running clickhouse_sinker.
6 |
7 | ## Configs
8 |
9 | There are two ways to get config: a local single config, or Nacos.
10 |
11 | - For local file:
12 |
13 | `clickhouse_sinker --local-cfg-file docker/test_auto_schema.hjson`
14 |
15 | - For Nacos:
16 |
17 | `clickhouse_sinker --nacos-addr 127.0.0.1:8848 --nacos-username nacos --nacos-password nacos --nacos-dataid test_auto_schema`
18 |
19 | Read more detail descriptions of config in [here](../configuration/config.html)
20 |
21 | ## Example
22 |
23 | Let's follow up a piece of the systest script.
24 |
25 | * Prepare
26 |
27 | - let's checkout `clickhouse_sinker`
28 |
29 | ```bash
30 | git clone https://github.com/housepower/clickhouse_sinker.git
31 | cd clickhouse_sinker
32 | ```
33 |
34 | - let's start standalone clickhouse-server and kafka in container:
35 |
36 | ```bash
37 | docker compose up -d
38 | ```
39 | * Connect to the ClickHouse server
40 |
41 | ```bash
42 | docker exec -it clickhouse_sinker-clickhouse-1 clickhouse client
43 | ```
44 |
45 | * Create a simple table in Clickhouse
46 |
47 | > It's not the duty for clickhouse_sinker to auto create table, so we should do that manually.
48 |
49 | ```sql
50 | CREATE TABLE IF NOT EXISTS test_auto_schema
51 | (
52 | `day` Date DEFAULT toDate(time),
53 | `time` DateTime,
54 | `name` String,
55 | `value` Float64
56 | )
57 | ENGINE = MergeTree
58 | PARTITION BY day
59 | ORDER BY (time, name);
60 | ```
61 |
62 | * Create a topic in kafka
63 |
64 | > I use [kaf](https://github.com/birdayz/kaf) tool to create topics.
65 |
66 | ```bash
67 | kaf topic create topic1 -p 1 -r 1
68 | ```
69 |
70 | Output:
71 | ```bash
72 | ✅ Created topic!
73 | Topic Name: topic1
74 | Partitions: 1
75 | Replication Factor: 1
76 | Cleanup Policy: delete
77 | ```
78 |
79 |
80 | * Run clickhouse_sinker
81 |
82 | ```bash
83 | ./bin/clickhouse_sinker --local-cfg-file docker/test_auto_schema.hjson
84 | ```
85 |
86 |
87 | * Send messages to the topic
88 |
89 | ```bash
90 | echo '{"time" : "2020-12-18T03:38:39.000Z", "name" : "name1", "value" : 1}' | kaf -b '127.0.0.1:9092' produce topic1
91 | echo '{"time" : "2020-12-18T03:38:39.000Z", "name" : "name2", "value" : 2}' | kaf -b '127.0.0.1:9092' produce topic1
92 | echo '{"time" : "2020-12-18T03:38:39.000Z", "name" : "name3", "value" : 3}' | kaf -b '127.0.0.1:9092' produce topic1
93 | ```
94 |
95 | * Check the data in clickhouse
96 |
97 | ```sql
98 | SELECT count() FROM test_auto_schema;
99 | ```
100 | Output:
101 | ```bash
102 | 3 rows in set. Elapsed: 0.016 sec.
103 | ```
104 |
105 | ## Run as a daemon
106 |
107 | On systemd managed Linux OSs such as RHEL, Debian and their variants, it's doable to run `clickhouse_sinker` as a system service to achieve auto-restart, coredump management etc.
108 |
109 | ### Create `/etc/systemd/system/sinker_metric.service`
110 |
111 | ```
112 | [Unit]
113 | Description=ck-sink-metric
114 | Requires=network-online.target
115 | After=network-online.target
116 |
117 | [Service]
118 | Type=simple
119 | User=eoi
120 | LimitCORE=infinity
121 | Environment="GOTRACEBACK=crash"
122 | ExecStart=/data02/app/sinker/sinker/clickhouse_sinker --local-cfg-file=/data02/app/sinker/sinker/ck-sink-metric.json --log-paths=/data02/app/sinker/sinker/logs/sinker_metric.log
123 | Restart=on-failure
124 | RestartSec=3s
125 | StartLimitInterval=0
126 |
127 | [Install]
128 | WantedBy=multi-user.target
129 | ```
130 |
131 | Note:
132 |
133 | - Change pathes in `ExecStart` as necessary.
134 | - `User=eoi` means to run service as non-root for security reason.
135 | - `LimitCORE=infinity` for service is equivalent to `ulimit -c unlimited` for non-service.
136 | - env `GOTRACEBACK=crash` is required for Go applications to dump core. Refers to `https://pkg.go.dev/runtime`.
137 |
138 | ### Modify `/etc/sysctl.conf`
139 |
140 | ```kernel.core_pattern = |/usr/lib/systemd/systemd-coredump %p %u %g %s %t```
141 |
142 | Run `sysctl -p`.
143 |
144 | ### Modify `etc/systemd/coredump.conf`
145 |
146 | ```
147 | [Coredump]
148 | ProcessSizeMax=50G
149 | ExternalSizeMax=50G
150 | ```
151 |
152 | ### Manage `clickhouse-sinker` service
153 |
154 | - To start, `systemctl start sinker_metric`
155 | - To stop, `systemctl stop sinker_metric`
156 | - To view status, `systemctl status sinker_metric`
157 |
158 | ### Manage coredumps with `coredumpctl`
159 |
160 | Coredumps are stored under `/var/lib/systemd/coredump`.
161 | Refers to core(5), systemd.exec(5), systemd-coredump(8), coredump.conf(5).
162 |
--------------------------------------------------------------------------------
/go.metrictest.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | echo "create database"
4 | curl "localhost:58123" -d "CREATE DATABASE IF NOT EXISTS gauge ON CLUSTER abc"
5 |
6 | echo "create metric test tables"
7 | curl "localhost:58123" -d "DROP TABLE IF EXISTS test_prom_metric ON CLUSTER abc SYNC"
8 | curl "localhost:58123" -d "CREATE TABLE test_prom_metric ON CLUSTER abc
9 | (
10 | __series_id__ Int64,
11 | timestamp DateTime CODEC(DoubleDelta, LZ4),
12 | value Float32 CODEC(ZSTD(15))
13 | ) ENGINE=ReplicatedReplacingMergeTree()
14 | PARTITION BY toYYYYMMDD(timestamp)
15 | ORDER BY (__series_id__, timestamp);"
16 |
17 | curl "localhost:58123" -d "DROP TABLE IF EXISTS dist_test_prom_metric ON CLUSTER abc SYNC"
18 | curl "localhost:58123" -d "CREATE TABLE dist_test_prom_metric ON CLUSTER abc AS test_prom_metric ENGINE = Distributed(abc, default, test_prom_metric);"
19 |
20 | curl "localhost:58123" -d "DROP TABLE IF EXISTS test_prom_series ON CLUSTER abc SYNC"
21 | curl "localhost:58123" -d "CREATE TABLE test_prom_series ON CLUSTER abc
22 | (
23 | __series_id__ Int64,
24 | __mgmt_id__ Int64,
25 | labels String,
26 | __name__ String
27 | ) ENGINE=ReplicatedReplacingMergeTree()
28 | ORDER BY (__name__, __series_id__);"
29 |
30 | curl "localhost:58123" -d "DROP TABLE IF EXISTS dist_test_prom_series ON CLUSTER abc SYNC"
31 | curl "localhost:58123" -d "CREATE TABLE dist_test_prom_series ON CLUSTER abc AS test_prom_series ENGINE = Distributed(abc, default, test_prom_series);"
32 |
33 | echo "send messages to kafka"
34 | echo "cat /tmp/test_prom_metric.data | kafka-console-producer --topic test_metric_topic --broker-list localhost:9092" > send.sh
35 | # data generated by ""./kafka_gen_prom 192.168.110.10:19092 TestMetric"
36 | sudo docker cp ./docker/test_prom_metric.data kafka:/tmp/
37 | sudo docker cp send.sh kafka:/tmp/
38 | sudo docker exec kafka kafka-topics --bootstrap-server localhost:9093 --topic test_metric_topic --delete
39 | sudo docker exec kafka sh /tmp/send.sh
40 |
41 | echo "start clickhouse_sinker to consume"
42 | timeout 30 ./bin/clickhouse_sinker --local-cfg-file docker/test_prom_metric.hjson
43 |
44 | schema=`curl "localhost:58123" -d 'DESC test_prom_metric' 2>/dev/null | sort | tr -d '\t' | tr -d ' '| tr '\n' ','`
45 | echo "Got test_prom_metric schema => $schema"
46 | [ $schema = "__series_id__Int64,timestampDateTimeDoubleDelta,LZ4,value1Nullable(Float64),value2Nullable(Int64),value3Nullable(Bool),valueFloat32ZSTD(15)," ] || exit 1
47 |
48 |
49 | schema=`curl "localhost:58123" -d 'DESC test_prom_series' 2>/dev/null | sort | tr -d '\t' | tr -d ' '| tr '\n' ','`
50 | echo "Got test_prom_series schema => $schema"
51 | [ $schema = "key_0Nullable(String),key_1Nullable(String),key_2Nullable(String),key_4Nullable(String),key_5Nullable(String),key_6Nullable(String),key_7Nullable(String),key_8Nullable(String),key_9Nullable(String),labelsString,__mgmt_id__Int64,__name__String,__series_id__Int64," ] || exit 1
52 |
53 | echo "check result 1"
54 | count=`curl "localhost:58123" -d 'select count() from dist_test_prom_metric'`
55 | echo "Got test_prom_metric count => $count"
56 | [ $count -le 10000 ] || exit 1
57 |
58 | count=`curl "localhost:58123" -d 'select count() from dist_test_prom_series'`
59 | echo "Got test_prom_series count => $count"
60 | [ $count -eq 1000 ] || exit 1
--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/housepower/clickhouse_sinker
2 |
3 | go 1.21
4 |
5 | toolchain go1.21.4
6 |
7 | require (
8 | github.com/ClickHouse/clickhouse-go/v2 v2.21.0
9 | github.com/RoaringBitmap/roaring v1.7.0
10 | github.com/YenchangChan/franz-go/pkg/sasl/kerberos v0.0.0-20231127011105-840a25342a2e
11 | github.com/avast/retry-go/v4 v4.5.1
12 | github.com/bytedance/sonic v1.10.2
13 | github.com/cespare/xxhash/v2 v2.2.0
14 | github.com/google/gops v0.3.28
15 | github.com/google/uuid v1.6.0
16 | github.com/hjson/hjson-go/v4 v4.4.0
17 | github.com/jcmturner/gokrb5/v8 v8.4.4
18 | github.com/jinzhu/copier v0.4.0
19 | github.com/matoous/go-nanoid/v2 v2.0.0
20 | github.com/nacos-group/nacos-sdk-go v1.1.4
21 | github.com/pkg/errors v0.9.1
22 | github.com/prometheus/client_golang v1.18.0
23 | github.com/prometheus/common v0.45.0
24 | github.com/shopspring/decimal v1.3.1
25 | github.com/stretchr/testify v1.9.0
26 | github.com/thanos-io/thanos v0.33.0
27 | github.com/tidwall/gjson v1.17.0
28 | github.com/troian/healthcheck v0.1.4-0.20200127040058-c373fb6a0dc1
29 | github.com/twmb/franz-go v1.17.1
30 | github.com/twmb/franz-go/pkg/kadm v1.10.0
31 | github.com/twmb/franz-go/plugin/kzap v1.1.2
32 | github.com/valyala/fastjson v1.6.4
33 | go.uber.org/zap v1.27.0
34 | golang.org/x/exp v0.0.0-20231226003508-02704c960a9b
35 | golang.org/x/time v0.5.0
36 | gopkg.in/natefinch/lumberjack.v2 v2.2.1
37 | )
38 |
39 | require (
40 | github.com/ClickHouse/ch-go v0.61.3 // indirect
41 | github.com/aliyun/alibaba-cloud-sdk-go v1.62.648 // indirect
42 | github.com/andybalholm/brotli v1.1.0 // indirect
43 | github.com/beorn7/perks v1.0.1 // indirect
44 | github.com/bits-and-blooms/bitset v1.13.0 // indirect
45 | github.com/buger/jsonparser v1.1.1 // indirect
46 | github.com/chenzhuoyu/base64x v0.0.0-20230717121745-296ad89f973d // indirect
47 | github.com/chenzhuoyu/iasm v0.9.1 // indirect
48 | github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
49 | github.com/go-errors/errors v1.5.1 // indirect
50 | github.com/go-faster/city v1.0.1 // indirect
51 | github.com/go-faster/errors v0.7.1 // indirect
52 | github.com/hashicorp/go-uuid v1.0.3 // indirect
53 | github.com/jcmturner/aescts/v2 v2.0.0 // indirect
54 | github.com/jcmturner/dnsutils/v2 v2.0.0 // indirect
55 | github.com/jcmturner/gofork v1.7.6 // indirect
56 | github.com/jcmturner/rpc/v2 v2.0.3 // indirect
57 | github.com/jmespath/go-jmespath v0.4.0 // indirect
58 | github.com/json-iterator/go v1.1.12 // indirect
59 | github.com/klauspost/compress v1.17.8 // indirect
60 | github.com/klauspost/cpuid/v2 v2.2.6 // indirect
61 | github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0 // indirect
62 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
63 | github.com/modern-go/reflect2 v1.0.2 // indirect
64 | github.com/mschoch/smat v0.2.0 // indirect
65 | github.com/opentracing/opentracing-go v1.2.1-0.20220228012449-10b1cf09e00b // indirect
66 | github.com/paulmach/orb v0.11.1 // indirect
67 | github.com/pierrec/lz4/v4 v4.1.21 // indirect
68 | github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
69 | github.com/prometheus/client_model v0.5.0 // indirect
70 | github.com/prometheus/procfs v0.12.0 // indirect
71 | github.com/segmentio/asm v1.2.0 // indirect
72 | github.com/streadway/amqp v1.1.0 // indirect
73 | github.com/tidwall/match v1.1.1 // indirect
74 | github.com/tidwall/pretty v1.2.1 // indirect
75 | github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
76 | github.com/twmb/franz-go/pkg/kmsg v1.8.0 // indirect
77 | go.opentelemetry.io/otel v1.24.0 // indirect
78 | go.opentelemetry.io/otel/trace v1.24.0 // indirect
79 | go.uber.org/multierr v1.11.0 // indirect
80 | golang.org/x/arch v0.6.0 // indirect
81 | golang.org/x/crypto v0.31.0 // indirect
82 | golang.org/x/net v0.23.0 // indirect
83 | golang.org/x/sync v0.6.0 // indirect
84 | golang.org/x/sys v0.28.0 // indirect
85 | google.golang.org/protobuf v1.33.0 // indirect
86 | gopkg.in/ini.v1 v1.67.0 // indirect
87 | gopkg.in/yaml.v3 v3.0.1 // indirect
88 | )
89 |
--------------------------------------------------------------------------------
/go.test.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | echo "create tables"
4 | curl "localhost:58123" -d 'DROP TABLE IF EXISTS test_fixed_schema'
5 | curl "localhost:58123" -d 'CREATE TABLE test_fixed_schema
6 | (
7 | time DateTime,
8 | name String,
9 | value Float32,
10 | price Decimal32(3) DEFAULT(9.9)
11 | )
12 | ENGINE = MergeTree
13 | PARTITION BY toYYYYMMDD(time)
14 | ORDER BY (time, name)'
15 |
16 | curl "localhost:58123" -d 'DROP TABLE IF EXISTS test_auto_schema'
17 | curl "localhost:58123" -d 'CREATE TABLE test_auto_schema AS test_fixed_schema'
18 |
19 | curl "localhost:58123" -d 'DROP TABLE IF EXISTS test_dynamic_schema'
20 | curl "localhost:58123" -d 'CREATE TABLE test_dynamic_schema AS test_fixed_schema'
21 |
22 | counts=`curl "localhost:58123" -d 'SELECT count() FROM test_fixed_schema UNION ALL SELECT count() FROM test_auto_schema UNION ALL SELECT count() FROM test_dynamic_schema' 2>/dev/null | tr '\n' ','`
23 | echo "Got initial row counts => $counts"
24 | [ $counts = "0,0,0," ] || exit 1
25 |
26 | now=`date --rfc-3339=ns`
27 | for i in `seq 1 10000`;do
28 | price=`echo "scale = 3; $i / 1000" | bc -q`
29 | echo "{\"time\" : \"${now}\", \"name\" : \"name$i\", \"value\" : $i, \"price\" : $price }"
30 | done > a.json
31 | for i in `seq 10001 30000`;do
32 | echo "{\"time\" : \"${now}\", \"name\" : \"name$i\", \"value\" : $i, \"newkey00\" : false, \"newkey01\" : $i }"
33 | done >> a.json
34 | for i in `seq 30001 50000`;do
35 | echo "{\"time\" : \"${now}\", \"name\" : \"name$i\", \"value\" : $i, \"newkey02\" : $i.123, \"newkey03\" : \"name$i\", \"newkey04\" : \"${now}\", \"newkey05\" : {\"k1\": 1, \"k2\": 2} }"
36 | done >> a.json
37 | for i in `seq 50001 70000`;do
38 | echo "{\"time\" : \"${now}\", \"name\" : \"name$i\", \"value\" : $i, \"newkey06\" : [$i], \"newkey07\" : [$i.123], \"newkey08\" : [\"name$i\"], \"newkey09\" : [\"${now}\"], \"newkey10\" : [{\"k1\": 1, \"k2\": 2}, {\"k3\": 3, \"k4\": 4}] }"
39 | done >> a.json
40 | for i in `seq 70001 100000`;do
41 | echo "{\"time\" : \"${now}\", \"name\" : \"name$i\", \"value\" : $i }"
42 | done >> a.json
43 | echo "generated a.json"
44 | echo "send messages to kafka"
45 | echo "cat /tmp/a.json | kafka-console-producer --topic topic1 --broker-list localhost:9092" > send.sh
46 | sudo docker cp a.json kafka:/tmp/
47 | sudo docker cp send.sh kafka:/tmp/
48 | sudo docker exec kafka kafka-topics --bootstrap-server localhost:9093 --topic topic1 --delete
49 | sudo docker exec kafka sh /tmp/send.sh
50 |
51 | echo "start clickhouse_sinker to consume"
52 | timeout 30 ./bin/clickhouse_sinker --local-cfg-file docker/test_fixed_schema.hjson
53 | timeout 30 ./bin/clickhouse_sinker --local-cfg-file docker/test_auto_schema.hjson
54 | timeout 60 ./bin/clickhouse_sinker --local-cfg-file docker/test_dynamic_schema.hjson
55 |
56 | echo "check result 1"
57 | count=`curl "localhost:58123" -d 'select count() from test_fixed_schema'`
58 | echo "Got test_fixed_schema count => $count"
59 | [ $count -eq 100000 ] || exit 1
60 |
61 | count=`curl "localhost:58123" -d 'select count() from test_auto_schema'`
62 | echo "Got test_auto_schema count => $count"
63 | [ $count -eq 100000 ] || exit 1
64 |
65 | schema=`curl "localhost:58123" -d 'DESC test_dynamic_schema' 2>/dev/null | grep newkey | sort | tr -d '\t' | tr '\n' ','`
66 | echo "Got test_dynamic_schema schema => $schema"
67 | [ $schema = "newkey00Nullable(Bool),newkey01Nullable(Int64),newkey02Nullable(Float64),newkey03Nullable(String),newkey04Nullable(DateTime64(3))," ] || exit 1
68 | count=`curl "localhost:58123" -d 'SELECT count() FROM test_dynamic_schema'`
69 | echo "Got test_dynamic_schema count => $count"
70 | [ $count -eq 100000 ] || exit 1
71 |
72 | echo "truncate tables"
73 | curl "localhost:58123" -d 'TRUNCATE TABLE test_fixed_schema'
74 | curl "localhost:58123" -d 'TRUNCATE TABLE test_auto_schema'
75 | curl "localhost:58123" -d 'TRUNCATE TABLE test_dynamic_schema'
76 |
77 | echo "publish clickhouse_sinker config"
78 | ./bin/nacos_publish_config --nacos-addr 127.0.0.1:58848 --nacos-username nacos --nacos-password nacos --nacos-dataid test_fixed_schema --local-cfg-file docker/test_fixed_schema.hjson
79 | ./bin/nacos_publish_config --nacos-addr 127.0.0.1:58848 --nacos-username nacos --nacos-password nacos --nacos-dataid test_auto_schema --local-cfg-file docker/test_auto_schema.hjson
80 | ./bin/nacos_publish_config --nacos-addr 127.0.0.1:58848 --nacos-username nacos --nacos-password nacos --nacos-dataid test_dynamic_schema --local-cfg-file docker/test_dynamic_schema.hjson
81 |
82 | echo "start clickhouse_sinker to consume"
83 | sudo docker exec kafka kafka-consumer-groups --bootstrap-server localhost:9093 --execute --reset-offsets --group test_fixed_schema --all-topics --to-earliest
84 | timeout 30 ./bin/clickhouse_sinker --nacos-addr 127.0.0.1:58848 --nacos-username nacos --nacos-password nacos --nacos-dataid test_fixed_schema
85 |
86 | sudo docker exec kafka kafka-consumer-groups --bootstrap-server localhost:9093 --execute --reset-offsets --group test_auto_schema --all-topics --to-earliest
87 | timeout 30 ./bin/clickhouse_sinker --nacos-addr 127.0.0.1:58848 --nacos-username nacos --nacos-password nacos --nacos-dataid test_auto_schema
88 |
89 | sudo docker exec kafka kafka-consumer-groups --bootstrap-server localhost:9093 --execute --reset-offsets --group test_dynamic_schema --all-topics --to-earliest
90 | timeout 30 ./bin/clickhouse_sinker --nacos-addr 127.0.0.1:58848 --nacos-username nacos --nacos-password nacos --nacos-dataid test_dynamic_schema
91 |
92 | echo "check result 2"
93 | count=`curl "localhost:58123" -d 'select count() from test_fixed_schema'`
94 | echo "Got test_fixed_schema count => $count"
95 | [ $count -eq 100000 ] || exit 1
96 |
97 | count=`curl "localhost:58123" -d 'select count() from test_auto_schema'`
98 | echo "Got test_auto_schema count => $count"
99 | [ $count -eq 100000 ] || exit 1
100 |
101 | count=`curl "localhost:58123" -d 'SELECT count() FROM test_dynamic_schema'`
102 | echo "Got test_dynamic_schema count => $count"
103 | [ $count -eq 100000 ] || exit 1
104 |
--------------------------------------------------------------------------------
/health/health.go:
--------------------------------------------------------------------------------
1 | package health
2 |
3 | import "github.com/troian/healthcheck"
4 |
5 | var Health = healthcheck.NewHandler()
6 |
--------------------------------------------------------------------------------
/input/kafka_franz.go:
--------------------------------------------------------------------------------
1 | /*Copyright [2019] housepower
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 |
16 | package input
17 |
18 | import (
19 | "context"
20 | "crypto/tls"
21 | "fmt"
22 | "strings"
23 | "sync"
24 | "time"
25 |
26 | "github.com/YenchangChan/franz-go/pkg/sasl/kerberos"
27 | krb5client "github.com/jcmturner/gokrb5/v8/client"
28 | krb5config "github.com/jcmturner/gokrb5/v8/config"
29 | "github.com/jcmturner/gokrb5/v8/keytab"
30 | "github.com/thanos-io/thanos/pkg/errors"
31 | "github.com/twmb/franz-go/pkg/kgo"
32 | "github.com/twmb/franz-go/pkg/sasl"
33 | "github.com/twmb/franz-go/pkg/sasl/plain"
34 | "github.com/twmb/franz-go/pkg/sasl/scram"
35 | "github.com/twmb/franz-go/plugin/kzap"
36 | "go.uber.org/zap"
37 |
38 | "github.com/housepower/clickhouse_sinker/config"
39 | "github.com/housepower/clickhouse_sinker/model"
40 | "github.com/housepower/clickhouse_sinker/util"
41 | )
42 |
43 | const (
44 | Krb5KeytabAuth = 2
45 | CommitRetries = 6
46 | RetryBackoff = 5 * time.Second
47 | processTimeOut = 10
48 | )
49 |
50 | type Fetches struct {
51 | Fetch *kgo.Fetches
52 | TraceId string
53 | }
54 |
55 | // KafkaFranz implements input.Inputer
56 | // refers to examples/group_consuming/main.go
57 | type KafkaFranz struct {
58 | cfg *config.Config
59 | grpConfig *config.GroupConfig
60 | consumerId string
61 | cl *kgo.Client
62 | ctx context.Context
63 | cancel context.CancelFunc
64 | wgRun sync.WaitGroup
65 | fetch chan Fetches
66 | cleanupFn func()
67 | }
68 |
69 | // NewKafkaFranz get instance of kafka reader
70 | func NewKafkaFranz() *KafkaFranz {
71 | return &KafkaFranz{}
72 | }
73 |
74 | // Init Initialise the kafka instance with configuration
75 | func (k *KafkaFranz) Init(cfg *config.Config, gCfg *config.GroupConfig, f chan Fetches, cleanupFn func()) (err error) {
76 | k.cfg = cfg
77 | k.grpConfig = gCfg
78 | k.ctx, k.cancel = context.WithCancel(context.Background())
79 | k.fetch = f
80 | k.cleanupFn = cleanupFn
81 | kfkCfg := &cfg.Kafka
82 | var opts []kgo.Opt
83 | if opts, err = GetFranzConfig(kfkCfg); err != nil {
84 | return
85 | }
86 | opts = append(opts,
87 | kgo.ConsumeTopics(k.grpConfig.Topics...),
88 | kgo.ConsumerGroup(k.grpConfig.Name),
89 | kgo.DisableAutoCommit(),
90 | )
91 |
92 | maxPartBytes := int32(1 << (util.GetShift(100*k.grpConfig.BufferSize) - 1))
93 |
94 | opts = append(opts,
95 | kgo.FetchMaxBytes(maxPartBytes),
96 | kgo.FetchMaxPartitionBytes(maxPartBytes),
97 | kgo.OnPartitionsRevoked(k.onPartitionRevoked),
98 | kgo.OnPartitionsAssigned(k.onPartitionAssigned),
99 | kgo.RebalanceTimeout(time.Millisecond*time.Duration(cfg.Kafka.Properties.RebalanceTimeout)),
100 | kgo.SessionTimeout(time.Millisecond*time.Duration(cfg.Kafka.Properties.SessionTimeout)),
101 | kgo.HeartbeatInterval(time.Millisecond*time.Duration(cfg.Kafka.Properties.HeartbeatInterval)),
102 | kgo.RequestTimeoutOverhead(time.Millisecond*time.Duration(cfg.Kafka.Properties.RequestTimeoutOverhead)),
103 | )
104 | if !k.grpConfig.Earliest {
105 | opts = append(opts, kgo.ConsumeResetOffset(kgo.NewOffset().AtEnd()))
106 | }
107 |
108 | if k.cl, err = kgo.NewClient(opts...); err != nil {
109 | err = errors.Wrapf(err, "")
110 | return
111 | }
112 | return nil
113 | }
114 |
115 | func GetFranzConfig(kfkCfg *config.KafkaConfig) (opts []kgo.Opt, err error) {
116 | opts = []kgo.Opt{
117 | kgo.SeedBrokers(strings.Split(kfkCfg.Brokers, ",")...),
118 | // kgo.BrokerMaxReadBytes(), // 100 MB
119 | kgo.MaxConcurrentFetches(2),
120 | kgo.WithLogger(kzap.New(util.Logger)),
121 | }
122 | if kfkCfg.TLS.Enable {
123 | var tlsCfg *tls.Config
124 | if tlsCfg, err = util.NewTLSConfig(kfkCfg.TLS.CaCertFiles, kfkCfg.TLS.ClientCertFile, kfkCfg.TLS.ClientKeyFile, kfkCfg.TLS.EndpIdentAlgo == ""); err != nil {
125 | return
126 | }
127 | opts = append(opts, kgo.DialTLSConfig(tlsCfg))
128 | }
129 | if kfkCfg.Sasl.Enable {
130 | var mch sasl.Mechanism
131 | switch kfkCfg.Sasl.Mechanism {
132 | case "PLAIN":
133 | auth := plain.Auth{
134 | User: kfkCfg.Sasl.Username,
135 | Pass: kfkCfg.Sasl.Password,
136 | }
137 | mch = auth.AsMechanism()
138 | case "SCRAM-SHA-256", "SCRAM-SHA-512":
139 | auth := scram.Auth{
140 | User: kfkCfg.Sasl.Username,
141 | Pass: kfkCfg.Sasl.Password,
142 | }
143 | switch kfkCfg.Sasl.Mechanism {
144 | case "SCRAM-SHA-256":
145 | mch = auth.AsSha256Mechanism()
146 | case "SCRAM-SHA-512":
147 | mch = auth.AsSha512Mechanism()
148 | default:
149 | }
150 | case "GSSAPI":
151 | gssapiCfg := kfkCfg.Sasl.GSSAPI
152 | auth := kerberos.Auth{Service: gssapiCfg.ServiceName}
153 | // refers to https://github.com/Shopify/sarama/blob/main/kerberos_client.go
154 | var krbCfg *krb5config.Config
155 | var kt *keytab.Keytab
156 | if krbCfg, err = krb5config.Load(gssapiCfg.KerberosConfigPath); err != nil {
157 | err = errors.Wrapf(err, "")
158 | return
159 | }
160 | if gssapiCfg.AuthType == Krb5KeytabAuth {
161 | if kt, err = keytab.Load(gssapiCfg.KeyTabPath); err != nil {
162 | err = errors.Wrapf(err, "")
163 | return
164 | }
165 | auth.Client = krb5client.NewWithKeytab(gssapiCfg.Username, gssapiCfg.Realm, kt, krbCfg, krb5client.DisablePAFXFAST(gssapiCfg.DisablePAFXFAST))
166 | } else {
167 | auth.Client = krb5client.NewWithPassword(gssapiCfg.Username,
168 | gssapiCfg.Realm, gssapiCfg.Password, krbCfg, krb5client.DisablePAFXFAST(gssapiCfg.DisablePAFXFAST))
169 | }
170 | mch = auth.AsMechanismWithClose()
171 | }
172 | if mch != nil {
173 | opts = append(opts, kgo.SASL(mch))
174 | }
175 | }
176 | return
177 | }
178 |
179 | // kafka main loop
180 | func (k *KafkaFranz) Run() {
181 | k.wgRun.Add(1)
182 | defer k.wgRun.Done()
183 | LOOP:
184 | for {
185 | if !util.Rs.Allow() {
186 | select {
187 | case <-k.ctx.Done():
188 | break LOOP
189 | default:
190 | }
191 | continue
192 | }
193 | traceId := util.GenTraceId()
194 | util.LogTrace(traceId, util.TraceKindFetchStart, zap.String("consumer group", k.grpConfig.Name), zap.Int("buffersize", k.grpConfig.BufferSize))
195 | fetches := k.cl.PollRecords(k.ctx, k.grpConfig.BufferSize)
196 | err := fetches.Err()
197 | if fetches == nil || fetches.IsClientClosed() || errors.Is(err, context.Canceled) {
198 | break
199 | }
200 | if err != nil {
201 | err = errors.Wrapf(err, "")
202 | util.Logger.Info("kgo.Client.PollFetchs() got an error", zap.Error(err))
203 | }
204 | OnConsumerPoll(k.consumerId)
205 | fetchRecords := fetches.NumRecords()
206 | util.Rs.Inc(int64(fetchRecords))
207 | util.LogTrace(traceId, util.TraceKindFetchEnd, zap.String("consumer group", k.grpConfig.Name), zap.Int64("records", int64(fetchRecords)))
208 | // Automatically end the program if it remains inactive for a specific duration of time.
209 | timeout := processTimeOut * time.Minute
210 | if processTimeOut < time.Duration(k.cfg.Kafka.Properties.RebalanceTimeout)*time.Millisecond {
211 | timeout = time.Duration(k.cfg.Kafka.Properties.RebalanceTimeout) * time.Millisecond
212 | }
213 | t := time.NewTimer(timeout)
214 | select {
215 | case k.fetch <- Fetches{
216 | TraceId: traceId,
217 | Fetch: &fetches,
218 | }:
219 | t.Stop()
220 | case <-k.ctx.Done():
221 | t.Stop()
222 | break LOOP
223 | case <-t.C:
224 | util.Logger.Fatal(fmt.Sprintf("Sinker abort because group %s was not processing in last %d minutes", k.grpConfig.Name, timeout/time.Minute))
225 | }
226 | }
227 | k.cl.Close() // will trigger k.onPartitionRevoked
228 | util.Logger.Info("KafkaFranz.Run quit due to context has been canceled", zap.String("consumer group", k.grpConfig.Name))
229 | }
230 |
231 | func (k *KafkaFranz) CommitMessages(msg *model.InputMessage) error {
232 | // "LeaderEpoch: -1" will disable leader epoch validation
233 | var err error
234 | for i := 0; i < CommitRetries; i++ {
235 | err = k.cl.CommitRecords(context.Background(), &kgo.Record{Topic: msg.Topic, Partition: int32(msg.Partition), Offset: msg.Offset, LeaderEpoch: -1})
236 | if err == nil {
237 | break
238 | }
239 | err = errors.Wrapf(err, "")
240 | if i < CommitRetries-1 && !errors.Is(err, context.Canceled) {
241 | util.Logger.Error("cl.CommitRecords failed, will retry later", zap.String("consumer group", k.grpConfig.Name), zap.Int("try", i), zap.Error(err))
242 | time.Sleep(RetryBackoff)
243 | }
244 | }
245 | return err
246 | }
247 |
248 | // Stop kafka consumer and close all connections
249 | func (k *KafkaFranz) Stop() {
250 | k.cancel()
251 |
252 | // prevent the block of k.Run
253 | quit := make(chan struct{})
254 | go func() {
255 | select {
256 | case <-k.fetch:
257 | case <-quit:
258 | }
259 | }()
260 |
261 | k.wgRun.Wait()
262 | select {
263 | case quit <- struct{}{}:
264 | default:
265 | }
266 | }
267 |
268 | // Description of this kafka consumer, consumer group name
269 | func (k *KafkaFranz) Description() string {
270 | return fmt.Sprint("kafka consumer group ", k.grpConfig.Name)
271 | }
272 |
273 | func (k *KafkaFranz) onPartitionRevoked(_ context.Context, _ *kgo.Client, _ map[string][]int32) {
274 | begin := time.Now()
275 | k.cleanupFn()
276 | util.Logger.Info("consumer group cleanup",
277 | zap.String("consumer group", k.grpConfig.Name),
278 | zap.Duration("cost", time.Since(begin)))
279 | }
280 |
281 | func (k *KafkaFranz) onPartitionAssigned(_ context.Context, _ *kgo.Client, _ map[string][]int32) {
282 | memberId, _ := k.cl.GroupMetadata()
283 | k.consumerId = memberId
284 | NewConsumerPoller(k.consumerId, k.grpConfig.Name, k.cl)
285 | }
286 |
--------------------------------------------------------------------------------
/input/poller.go:
--------------------------------------------------------------------------------
1 | package input
2 |
3 | import (
4 | "sync"
5 | "time"
6 |
7 | "github.com/housepower/clickhouse_sinker/util"
8 | "github.com/twmb/franz-go/pkg/kgo"
9 | "go.uber.org/zap"
10 | )
11 |
12 | var consumerPoller sync.Map
13 |
14 | type Poller struct {
15 | consumerName string
16 | client *kgo.Client
17 | active time.Time
18 | }
19 |
20 | func NewConsumerPoller(consumerId, consumerName string, client *kgo.Client) {
21 | util.Logger.Info("new consumer poller", zap.String("consumerId", consumerId))
22 | consumerPoller.Store(consumerId, Poller{
23 | consumerName: consumerName,
24 | client: client,
25 | active: time.Now()})
26 | }
27 |
28 | func OnConsumerPoll(consumerId string) {
29 | if v, ok := consumerPoller.Load(consumerId); ok {
30 | poller := v.(Poller)
31 | poller.active = time.Now()
32 | util.Logger.Debug("consumer poller active", zap.String("consumerId", consumerId), zap.String("consumerName", poller.consumerName), zap.Time("active", poller.active))
33 | consumerPoller.Store(consumerId, poller)
34 | } else {
35 | util.Logger.Warn("consumer poller not found", zap.String("consumerId", consumerId))
36 | }
37 | }
38 |
39 | func Leave(consumerId string) {
40 | consumerPoller.Delete(consumerId)
41 | }
42 |
43 | func Walk(maxPollInterval int) string {
44 | var consumerName string
45 | util.Logger.Debug("consumer poller walk started")
46 | consumerPoller.Range(func(k, v any) bool {
47 | poller := v.(Poller)
48 | util.Logger.Debug("consumer poller walked", zap.String("consumerId", k.(string)),
49 | zap.String("consumerName", poller.consumerName),
50 | zap.Time("active", poller.active))
51 | if time.Since(poller.active) > time.Duration(maxPollInterval)*time.Millisecond {
52 | util.Logger.Warn("consumer group expired", zap.String("consumerId", k.(string)), zap.String("consumerName", poller.consumerName))
53 | consumerName = poller.consumerName
54 | //poller.client.LeaveGroup()
55 | Leave(k.(string))
56 | }
57 | return true
58 | })
59 | return consumerName
60 | }
61 |
--------------------------------------------------------------------------------
/model/message.go:
--------------------------------------------------------------------------------
1 | package model
2 |
3 | import (
4 | "sync"
5 | "time"
6 | )
7 |
8 | // MsgWithMeta abstract messages
9 | // We are not using interface because virtual call. See https://syslog.ravelin.com/go-interfaces-but-at-what-cost-961e0f58a07b?gi=58f6761d1d70
10 | type InputMessage struct {
11 | Topic string
12 | Partition int
13 | Key []byte
14 | Value []byte
15 | Offset int64
16 | Timestamp *time.Time
17 | }
18 |
19 | type Row []interface{}
20 | type Rows []*Row
21 |
22 | type MsgRow struct {
23 | Msg *InputMessage
24 | Row *Row
25 | Shard int
26 | }
27 |
28 | type Batch struct {
29 | Rows *Rows
30 | BatchIdx int64
31 | GroupId string
32 | RealSize int
33 |
34 | Wg *sync.WaitGroup
35 | }
36 |
37 | func (b *Batch) Size() int {
38 | return len(*b.Rows)
39 | }
40 |
41 | type BatchRange struct {
42 | Begin int64
43 | End int64
44 | }
45 |
46 | type RecordMap = map[string]map[int32]*BatchRange
47 |
--------------------------------------------------------------------------------
/model/metric.go:
--------------------------------------------------------------------------------
1 | /*Copyright [2019] housepower
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 |
16 | package model
17 |
18 | import (
19 | "regexp"
20 | "sync"
21 | "time"
22 | )
23 |
24 | // Metric interface for metric collection
25 | type Metric interface {
26 | GetBool(key string, nullable bool) (val interface{})
27 | GetInt8(key string, nullable bool) (val interface{})
28 | GetInt16(key string, nullable bool) (val interface{})
29 | GetInt32(key string, nullable bool) (val interface{})
30 | GetInt64(key string, nullable bool) (val interface{})
31 | GetUint8(key string, nullable bool) (val interface{})
32 | GetUint16(key string, nullable bool) (val interface{})
33 | GetUint32(key string, nullable bool) (val interface{})
34 | GetUint64(key string, nullable bool) (val interface{})
35 | GetFloat32(key string, nullable bool) (val interface{})
36 | GetFloat64(key string, nullable bool) (val interface{})
37 | GetDecimal(key string, nullable bool) (val interface{})
38 | GetDateTime(key string, nullable bool) (val interface{})
39 | GetString(key string, nullable bool) (val interface{})
40 | GetObject(key string, nullable bool) (val interface{})
41 | GetMap(key string, typeinfo *TypeInfo) (val interface{})
42 | GetArray(key string, t int) (val interface{})
43 | GetIPv4(key string, nullable bool) (val interface{})
44 | GetIPv6(key string, nullable bool) (val interface{})
45 | GetNewKeys(knownKeys, newKeys, warnKeys *sync.Map, white, black *regexp.Regexp, partition int, offset int64) bool
46 | }
47 |
48 | // DimMetrics
49 | type DimMetrics struct {
50 | Dims []*ColumnWithType
51 | Fields []*ColumnWithType
52 | }
53 |
54 | // ColumnWithType
55 | type ColumnWithType struct {
56 | Name string
57 | Type *TypeInfo
58 | SourceName string
59 | NotNullable bool
60 | }
61 |
62 | // struct for ingesting a clickhouse Map type value
63 | type OrderedMap struct {
64 | keys []interface{}
65 | values map[interface{}]interface{}
66 | }
67 |
68 | func (om *OrderedMap) Get(key interface{}) (interface{}, bool) {
69 | if value, present := om.values[key]; present {
70 | return value, present
71 | }
72 | return nil, false
73 | }
74 |
75 | func (om *OrderedMap) Put(key interface{}, value interface{}) {
76 | if _, present := om.values[key]; present {
77 | om.values[key] = value
78 | return
79 | }
80 | om.keys = append(om.keys, key)
81 | om.values[key] = value
82 | }
83 |
84 | func (om *OrderedMap) Keys() <-chan interface{} {
85 | ch := make(chan interface{})
86 | go func() {
87 | defer close(ch)
88 | for _, key := range om.keys {
89 | ch <- key
90 | }
91 | }()
92 | return ch
93 | }
94 |
95 | func (om *OrderedMap) GetValues() map[interface{}]interface{} {
96 | return om.values
97 | }
98 |
99 | func NewOrderedMap() *OrderedMap {
100 | om := OrderedMap{}
101 | om.keys = []interface{}{}
102 | om.values = map[interface{}]interface{}{}
103 | return &om
104 | }
105 |
106 | type SeriesQuota struct {
107 | sync.RWMutex `json:"-"`
108 | NextResetQuota time.Time
109 | BmSeries map[int64]int64
110 | WrSeries int
111 | Birth time.Time
112 | }
113 |
--------------------------------------------------------------------------------
/model/value.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright [2019] housepower
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 | package model
17 |
18 | import (
19 | "fmt"
20 | "regexp"
21 | "strings"
22 |
23 | "github.com/housepower/clickhouse_sinker/util"
24 | )
25 |
26 | const (
27 | Unknown = iota
28 | Bool
29 | Int8
30 | Int16
31 | Int32
32 | Int64
33 | UInt8
34 | UInt16
35 | UInt32
36 | UInt64
37 | Float32
38 | Float64
39 | Decimal
40 | DateTime
41 | String
42 | Object
43 | Map
44 | IPv4
45 | IPv6
46 | )
47 |
48 | type TypeInfo struct {
49 | Type int
50 | Nullable bool
51 | Array bool
52 | MapKey *TypeInfo
53 | MapValue *TypeInfo
54 | }
55 |
56 | var (
57 | typeInfo map[string]*TypeInfo
58 | lowCardinalityRegexp = regexp.MustCompile(`^LowCardinality\((.+)\)`)
59 | )
60 |
61 | // GetTypeName returns the column type in ClickHouse
62 | func GetTypeName(typ int) (name string) {
63 | switch typ {
64 | case Bool:
65 | name = "Bool"
66 | case Int8:
67 | name = "Int8"
68 | case Int16:
69 | name = "Int16"
70 | case Int32:
71 | name = "Int32"
72 | case Int64:
73 | name = "Int64"
74 | case UInt8:
75 | name = "UInt8"
76 | case UInt16:
77 | name = "UInt16"
78 | case UInt32:
79 | name = "UInt32"
80 | case UInt64:
81 | name = "UInt64"
82 | case Float32:
83 | name = "Float32"
84 | case Float64:
85 | name = "Float64"
86 | case Decimal:
87 | name = "Decimal"
88 | case DateTime:
89 | name = "DateTime"
90 | case String:
91 | name = "String"
92 | case Object:
93 | name = "Object('json')"
94 | case Map:
95 | name = "Map"
96 | case IPv4:
97 | name = "IPv4"
98 | case IPv6:
99 | name = "IPv6"
100 | default:
101 | name = "Unknown"
102 | }
103 | return
104 | }
105 |
106 | func GetValueByType(metric Metric, cwt *ColumnWithType) (val interface{}) {
107 | name := cwt.SourceName
108 | if cwt.Type.Array {
109 | val = metric.GetArray(name, cwt.Type.Type)
110 | } else {
111 | switch cwt.Type.Type {
112 | case Bool:
113 | val = metric.GetBool(name, cwt.Type.Nullable)
114 | case Int8:
115 | val = metric.GetInt8(name, cwt.Type.Nullable)
116 | case Int16:
117 | val = metric.GetInt16(name, cwt.Type.Nullable)
118 | case Int32:
119 | val = metric.GetInt32(name, cwt.Type.Nullable)
120 | case Int64:
121 | val = metric.GetInt64(name, cwt.Type.Nullable)
122 | case UInt8:
123 | val = metric.GetUint8(name, cwt.Type.Nullable)
124 | case UInt16:
125 | val = metric.GetUint16(name, cwt.Type.Nullable)
126 | case UInt32:
127 | val = metric.GetUint32(name, cwt.Type.Nullable)
128 | case UInt64:
129 | val = metric.GetUint64(name, cwt.Type.Nullable)
130 | case Float32:
131 | val = metric.GetFloat32(name, cwt.Type.Nullable)
132 | case Float64:
133 | val = metric.GetFloat64(name, cwt.Type.Nullable)
134 | case Decimal:
135 | val = metric.GetDecimal(name, cwt.Type.Nullable)
136 | case DateTime:
137 | val = metric.GetDateTime(name, cwt.Type.Nullable)
138 | case String:
139 | val = metric.GetString(name, cwt.Type.Nullable)
140 | case Map:
141 | val = metric.GetMap(name, cwt.Type)
142 | case Object:
143 | val = metric.GetObject(name, cwt.Type.Nullable)
144 | case IPv4:
145 | val = metric.GetIPv4(name, cwt.Type.Nullable)
146 | case IPv6:
147 | val = metric.GetIPv6(name, cwt.Type.Nullable)
148 | default:
149 | util.Logger.Fatal("LOGIC ERROR: reached switch default condition")
150 | }
151 | }
152 | return
153 | }
154 |
155 | func WhichType(typ string) (ti *TypeInfo) {
156 | typ = lowCardinalityRegexp.ReplaceAllString(typ, "$1")
157 |
158 | ti, ok := typeInfo[typ]
159 | if ok {
160 | return ti
161 | }
162 | origTyp := typ
163 | nullable := strings.HasPrefix(typ, "Nullable(")
164 | array := strings.HasPrefix(typ, "Array(")
165 | var dataType int
166 | if nullable {
167 | typ = typ[len("Nullable(") : len(typ)-1]
168 | } else if array {
169 | typ = typ[len("Array(") : len(typ)-1]
170 | }
171 | if strings.HasPrefix(typ, "DateTime64") {
172 | dataType = DateTime
173 | } else if strings.HasPrefix(typ, "Decimal") {
174 | dataType = Decimal
175 | } else if strings.HasPrefix(typ, "FixedString") {
176 | dataType = String
177 | } else if strings.HasPrefix(typ, "Enum8(") {
178 | dataType = String
179 | } else if strings.HasPrefix(typ, "Enum16(") {
180 | dataType = String
181 | } else if strings.HasPrefix(typ, "Map") {
182 | dataType = Map
183 | idx := strings.Index(typ, ", ")
184 | ti = &TypeInfo{
185 | Type: dataType,
186 | Nullable: nullable,
187 | Array: array,
188 | MapKey: WhichType(typ[len("Map("):idx]),
189 | MapValue: WhichType(typ[idx+2 : len(typ)-1]),
190 | }
191 | typeInfo[origTyp] = ti
192 | return ti
193 | } else {
194 | util.Logger.Fatal(fmt.Sprintf("ClickHouse column type %v is not inside supported ones(case-sensitive): %v", origTyp, typeInfo))
195 | }
196 | ti = &TypeInfo{Type: dataType, Nullable: nullable, Array: array}
197 | typeInfo[origTyp] = ti
198 | return ti
199 | }
200 |
201 | func init() {
202 | typeInfo = make(map[string]*TypeInfo)
203 | for _, t := range []int{Bool, Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64, Float32, Float64, DateTime, String, Object, IPv4, IPv6} {
204 | tn := GetTypeName(t)
205 | typeInfo[tn] = &TypeInfo{Type: t}
206 | nullTn := fmt.Sprintf("Nullable(%s)", tn)
207 | typeInfo[nullTn] = &TypeInfo{Type: t, Nullable: true}
208 | arrTn := fmt.Sprintf("Array(%s)", tn)
209 | typeInfo[arrTn] = &TypeInfo{Type: t, Array: true}
210 | }
211 | typeInfo["UUID"] = &TypeInfo{Type: String}
212 | typeInfo["Nullable(UUID)"] = &TypeInfo{Type: String, Nullable: true}
213 | typeInfo["Array(UUID)"] = &TypeInfo{Type: String, Array: true}
214 | typeInfo["Date"] = &TypeInfo{Type: DateTime}
215 | typeInfo["Nullable(Date)"] = &TypeInfo{Type: DateTime, Nullable: true}
216 | typeInfo["Array(Date)"] = &TypeInfo{Type: DateTime, Array: true}
217 | }
218 |
--------------------------------------------------------------------------------
/output/clickhouse_util.go:
--------------------------------------------------------------------------------
1 | package output
2 |
3 | import (
4 | "fmt"
5 | "strings"
6 |
7 | "github.com/housepower/clickhouse_sinker/model"
8 | "github.com/housepower/clickhouse_sinker/pool"
9 | "github.com/housepower/clickhouse_sinker/util"
10 | "github.com/thanos-io/thanos/pkg/errors"
11 | )
12 |
13 | func writeRows(prepareSQL string, rows model.Rows, idxBegin, idxEnd int, conn *pool.Conn) (numBad int, err error) {
14 | return conn.Write(prepareSQL, rows, idxBegin, idxEnd)
15 | }
16 |
17 | func getDims(database, table string, excludedColumns []string, parser string, conn *pool.Conn) (dims []*model.ColumnWithType, err error) {
18 | var rs *pool.Rows
19 | notNullable := make(map[string]bool)
20 | if rs, err = conn.Query(fmt.Sprintf(referedSQLTemplate, database, table)); err != nil {
21 | err = errors.Wrapf(err, "")
22 | return
23 | }
24 | var default_expression, referenced_col_type, col_name, ori_type string
25 | for rs.Next() {
26 | if err = rs.Scan(&default_expression, &referenced_col_type, &col_name, &ori_type); err != nil {
27 | err = errors.Wrapf(err, "")
28 | return
29 | }
30 | if strings.HasPrefix(referenced_col_type, "Nullable(") && !strings.HasSuffix(ori_type, "Nullable(") {
31 | notNullable[default_expression] = true
32 | }
33 | }
34 |
35 | rs.Close()
36 | if rs, err = conn.Query(fmt.Sprintf(selectSQLTemplate, database, table)); err != nil {
37 | err = errors.Wrapf(err, "")
38 | return
39 | }
40 | defer rs.Close()
41 |
42 | dims = make([]*model.ColumnWithType, 0, 10)
43 | var name, typ, defaultKind string
44 | for rs.Next() {
45 | if err = rs.Scan(&name, &typ, &defaultKind); err != nil {
46 | err = errors.Wrapf(err, "")
47 | return
48 | }
49 | if !util.StringContains(excludedColumns, name) && defaultKind != "MATERIALIZED" {
50 | nnull, ok := notNullable[name]
51 | if !ok {
52 | nnull = false
53 | }
54 | dims = append(dims, &model.ColumnWithType{
55 | Name: name,
56 | Type: model.WhichType(typ),
57 | SourceName: util.GetSourceName(parser, name),
58 | NotNullable: nnull,
59 | })
60 | }
61 | }
62 | if len(dims) == 0 {
63 | err = errors.Wrapf(ErrTblNotExist, "%s.%s", database, table)
64 | return
65 | }
66 | return
67 | }
68 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "clickhouse_sinker",
3 | "version": "1.0.0",
4 | "dependencies": {},
5 | "devDependencies": {
6 | "vuepress": "^1.7.1",
7 | "vuepress-plugin-mermaidjs": "^1.8.1"
8 | },
9 | "scripts": {
10 | "docs:dev": "vuepress dev docs",
11 | "docs:build": "vuepress build docs"
12 | },
13 | "repository": {
14 | "type": "git",
15 | "url": "git+https://github.com/housepower/clickhouse_sinker.git"
16 | },
17 | "license": "Apache-2.0",
18 | "homepage": "https://github.com/housepower/clickhouse_sinker#readme"
19 | }
20 |
--------------------------------------------------------------------------------
/parser/csv.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright [2019] housepower
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 | package parser
17 |
18 | import (
19 | "bytes"
20 | "encoding/csv"
21 | "fmt"
22 | "math"
23 | "net"
24 | "regexp"
25 | "strconv"
26 | "sync"
27 | "time"
28 |
29 | "github.com/housepower/clickhouse_sinker/model"
30 | "github.com/housepower/clickhouse_sinker/util"
31 | "github.com/shopspring/decimal"
32 | "github.com/thanos-io/thanos/pkg/errors"
33 | "github.com/tidwall/gjson"
34 | "github.com/valyala/fastjson/fastfloat"
35 | "golang.org/x/exp/constraints"
36 | )
37 |
38 | var _ Parser = (*CsvParser)(nil)
39 |
40 | // CsvParser implementation to parse input from a CSV format per RFC 4180
41 | type CsvParser struct {
42 | pp *Pool
43 | }
44 |
45 | // Parse extract a list of comma-separated values from the data
46 | func (p *CsvParser) Parse(bs []byte) (metric model.Metric, err error) {
47 | r := csv.NewReader(bytes.NewReader(bs))
48 | r.FieldsPerRecord = len(p.pp.csvFormat)
49 | if len(p.pp.delimiter) > 0 {
50 | r.Comma = rune(p.pp.delimiter[0])
51 | }
52 | var value []string
53 | if value, err = r.Read(); err != nil {
54 | err = errors.Wrapf(err, "")
55 | return
56 | }
57 | if len(value) != len(p.pp.csvFormat) {
58 | err = errors.Newf("csv value doesn't match the format")
59 | return
60 | }
61 | metric = &CsvMetric{p.pp, value}
62 | return
63 | }
64 |
65 | // CsvMetic
66 | type CsvMetric struct {
67 | pp *Pool
68 | values []string
69 | }
70 |
71 | // GetString get the value as string
72 | func (c *CsvMetric) GetString(key string, nullable bool) (val interface{}) {
73 | var idx int
74 | var ok bool
75 | if idx, ok = c.pp.csvFormat[key]; !ok || c.values[idx] == "null" {
76 | if nullable {
77 | return
78 | }
79 | val = ""
80 | return
81 | }
82 | val = c.values[idx]
83 | return
84 | }
85 |
86 | // GetDecimal returns the value as decimal
87 | func (c *CsvMetric) GetDecimal(key string, nullable bool) (val interface{}) {
88 | var idx int
89 | var ok bool
90 | if idx, ok = c.pp.csvFormat[key]; !ok || c.values[idx] == "null" {
91 | if nullable {
92 | return
93 | }
94 | val = decimal.NewFromInt(0)
95 | return
96 | }
97 | var err error
98 | if val, err = decimal.NewFromString(c.values[idx]); err != nil {
99 | val = decimal.NewFromInt(0)
100 | }
101 | return
102 | }
103 |
104 | func (c *CsvMetric) GetBool(key string, nullable bool) (val interface{}) {
105 | var idx int
106 | var ok bool
107 | if idx, ok = c.pp.csvFormat[key]; !ok || c.values[idx] == "" || c.values[idx] == "null" {
108 | if nullable {
109 | return
110 | }
111 | val = false
112 | return
113 | }
114 | val = (c.values[idx] == "true")
115 | return
116 | }
117 |
118 | func (c *CsvMetric) GetInt8(key string, nullable bool) (val interface{}) {
119 | return CsvGetInt[int8](c, key, nullable, math.MinInt8, math.MaxInt8)
120 | }
121 |
122 | func (c *CsvMetric) GetInt16(key string, nullable bool) (val interface{}) {
123 | return CsvGetInt[int16](c, key, nullable, math.MinInt16, math.MaxInt16)
124 | }
125 |
126 | func (c *CsvMetric) GetInt32(key string, nullable bool) (val interface{}) {
127 | return CsvGetInt[int32](c, key, nullable, math.MinInt32, math.MaxInt32)
128 | }
129 |
130 | func (c *CsvMetric) GetInt64(key string, nullable bool) (val interface{}) {
131 | return CsvGetInt[int64](c, key, nullable, math.MinInt64, math.MaxInt64)
132 | }
133 |
134 | func (c *CsvMetric) GetUint8(key string, nullable bool) (val interface{}) {
135 | return CsvGetUint[uint8](c, key, nullable, math.MaxUint8)
136 | }
137 |
138 | func (c *CsvMetric) GetUint16(key string, nullable bool) (val interface{}) {
139 | return CsvGetUint[uint16](c, key, nullable, math.MaxUint16)
140 | }
141 |
142 | func (c *CsvMetric) GetUint32(key string, nullable bool) (val interface{}) {
143 | return CsvGetUint[uint32](c, key, nullable, math.MaxUint32)
144 | }
145 |
146 | func (c *CsvMetric) GetUint64(key string, nullable bool) (val interface{}) {
147 | return CsvGetUint[uint64](c, key, nullable, math.MaxUint64)
148 | }
149 |
150 | func (c *CsvMetric) GetFloat32(key string, nullable bool) (val interface{}) {
151 | return CsvGetFloat[float32](c, key, nullable, math.MaxFloat32)
152 | }
153 |
154 | func (c *CsvMetric) GetFloat64(key string, nullable bool) (val interface{}) {
155 | return CsvGetFloat[float64](c, key, nullable, math.MaxFloat64)
156 | }
157 |
158 | func (c *CsvMetric) GetIPv4(key string, nullable bool) (val interface{}) {
159 | return c.GetUint32(key, nullable)
160 | }
161 |
162 | func (c *CsvMetric) GetIPv6(key string, nullable bool) (val interface{}) {
163 | s := c.GetString(key, nullable).(string)
164 | if net.ParseIP(s) != nil {
165 | val = s
166 | } else {
167 | val = net.IPv6zero.String()
168 | }
169 | return val
170 | }
171 |
172 | func CsvGetInt[T constraints.Signed](c *CsvMetric, key string, nullable bool, min, max int64) (val interface{}) {
173 | var idx int
174 | var ok bool
175 | if idx, ok = c.pp.csvFormat[key]; !ok || c.values[idx] == "null" {
176 | if nullable {
177 | return
178 | }
179 | val = T(0)
180 | return
181 | }
182 | if s := c.values[idx]; s == "true" {
183 | val = T(1)
184 | } else {
185 | val2 := fastfloat.ParseInt64BestEffort(s)
186 | if val2 < min {
187 | val = T(min)
188 | } else if val2 > max {
189 | val = T(max)
190 | } else {
191 | val = T(val2)
192 | }
193 | }
194 | return
195 | }
196 |
197 | func CsvGetUint[T constraints.Unsigned](c *CsvMetric, key string, nullable bool, max uint64) (val interface{}) {
198 | var idx int
199 | var ok bool
200 | if idx, ok = c.pp.csvFormat[key]; !ok || c.values[idx] == "null" {
201 | if nullable {
202 | return
203 | }
204 | val = T(0)
205 | return
206 | }
207 | if s := c.values[idx]; s == "true" {
208 | val = T(1)
209 | } else {
210 | val2 := fastfloat.ParseUint64BestEffort(s)
211 | if val2 > max {
212 | val = T(max)
213 | } else {
214 | val = T(val2)
215 | }
216 | }
217 | return
218 | }
219 |
220 | // GetFloat returns the value as float
221 | func CsvGetFloat[T constraints.Float](c *CsvMetric, key string, nullable bool, max float64) (val interface{}) {
222 | var idx int
223 | var ok bool
224 | if idx, ok = c.pp.csvFormat[key]; !ok || c.values[idx] == "null" {
225 | if nullable {
226 | return
227 | }
228 | val = T(0.0)
229 | return
230 | }
231 | val2 := fastfloat.ParseBestEffort(c.values[idx])
232 | if val2 > max {
233 | val = T(max)
234 | } else {
235 | val = T(val2)
236 | }
237 | return
238 | }
239 |
240 | func (c *CsvMetric) GetDateTime(key string, nullable bool) (val interface{}) {
241 | var idx int
242 | var ok bool
243 | if idx, ok = c.pp.csvFormat[key]; !ok || c.values[idx] == "null" {
244 | if nullable {
245 | return
246 | }
247 | val = Epoch
248 | return
249 | }
250 | s := c.values[idx]
251 | if dd, err := strconv.ParseFloat(s, 64); err != nil {
252 | var err error
253 | if val, err = c.pp.ParseDateTime(key, s); err != nil {
254 | val = Epoch
255 | }
256 | } else {
257 | val = UnixFloat(dd, c.pp.timeUnit)
258 | }
259 | return
260 | }
261 |
262 | // GetArray parse an CSV encoded array
263 | func (c *CsvMetric) GetArray(key string, typ int) (val interface{}) {
264 | s := c.GetString(key, false)
265 | str, _ := s.(string)
266 | var array []gjson.Result
267 | r := gjson.Parse(str)
268 | if r.IsArray() {
269 | array = r.Array()
270 | }
271 | switch typ {
272 | case model.Bool:
273 | results := make([]bool, 0, len(array))
274 | for _, e := range array {
275 | v := (e.Exists() && e.Type == gjson.True)
276 | results = append(results, v)
277 | }
278 | val = results
279 | case model.Int8:
280 | val = GjsonIntArray[int8](array, math.MinInt8, math.MaxInt8)
281 | case model.Int16:
282 | val = GjsonIntArray[int16](array, math.MinInt16, math.MaxInt16)
283 | case model.Int32:
284 | val = GjsonIntArray[int32](array, math.MinInt32, math.MaxInt32)
285 | case model.Int64:
286 | val = GjsonIntArray[int64](array, math.MinInt64, math.MaxInt64)
287 | case model.UInt8:
288 | val = GjsonUintArray[uint8](array, math.MaxUint8)
289 | case model.UInt16:
290 | val = GjsonUintArray[uint16](array, math.MaxUint16)
291 | case model.UInt32:
292 | val = GjsonUintArray[uint32](array, math.MaxUint32)
293 | case model.UInt64:
294 | val = GjsonUintArray[uint64](array, math.MaxUint64)
295 | case model.Float32:
296 | val = GjsonFloatArray[float32](array, math.MaxFloat32)
297 | case model.Float64:
298 | val = GjsonFloatArray[float64](array, math.MaxFloat64)
299 | case model.Decimal:
300 | results := make([]decimal.Decimal, 0, len(array))
301 | var f float64
302 | for _, e := range array {
303 | switch e.Type {
304 | case gjson.Number:
305 | f = e.Num
306 | default:
307 | f = float64(0.0)
308 | }
309 | results = append(results, decimal.NewFromFloat(f))
310 | }
311 | val = results
312 | case model.String:
313 | results := make([]string, 0, len(array))
314 | var s string
315 | for _, e := range array {
316 | switch e.Type {
317 | case gjson.Null:
318 | s = ""
319 | case gjson.String:
320 | s = e.Str
321 | default:
322 | s = e.Raw
323 | }
324 | results = append(results, s)
325 | }
326 | val = results
327 | case model.DateTime:
328 | results := make([]time.Time, 0, len(array))
329 | var t time.Time
330 | for _, e := range array {
331 | switch e.Type {
332 | case gjson.Number:
333 | t = UnixFloat(e.Num, c.pp.timeUnit)
334 | case gjson.String:
335 | var err error
336 | if t, err = c.pp.ParseDateTime(key, e.Str); err != nil {
337 | t = Epoch
338 | }
339 | default:
340 | t = Epoch
341 | }
342 | results = append(results, t)
343 | }
344 | val = results
345 | default:
346 | util.Logger.Fatal(fmt.Sprintf("LOGIC ERROR: unsupported array type %v", typ))
347 | }
348 | return
349 | }
350 |
351 | func (c *CsvMetric) GetObject(key string, nullable bool) (val interface{}) {
352 | return
353 | }
354 |
355 | func (c *CsvMetric) GetMap(key string, typeinfo *model.TypeInfo) (val interface{}) {
356 | return
357 | }
358 |
359 | func (c *CsvMetric) GetNewKeys(knownKeys, newKeys, warnKeys *sync.Map, white, black *regexp.Regexp, partition int, offset int64) bool {
360 | return false
361 | }
362 |
--------------------------------------------------------------------------------
/parser/parser.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright [2019] housepower
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 | package parser
17 |
18 | import (
19 | "math"
20 | "math/big"
21 | "sync"
22 | "time"
23 |
24 | "github.com/housepower/clickhouse_sinker/model"
25 | "github.com/housepower/clickhouse_sinker/util"
26 | "github.com/thanos-io/thanos/pkg/errors"
27 | "github.com/valyala/fastjson"
28 | )
29 |
30 | var (
31 | Layouts = []string{
32 | //DateTime, RFC3339
33 | "2006-01-02T15:04:05Z07:00", //time.RFC3339, `date --iso-8601=s` on Ubuntu 20.04
34 | "2006-01-02T15:04:05Z0700", //`date --iso-8601=s` on CentOS 7.6
35 | "2006-01-02T15:04:05",
36 | //DateTime, ISO8601
37 | "2006-01-02 15:04:05Z07:00", //`date --rfc-3339=s` output format
38 | "2006-01-02 15:04:05Z0700",
39 | "2006-01-02 15:04:05",
40 | //DateTime, other layouts supported by golang
41 | "Mon Jan _2 15:04:05 2006", //time.ANSIC
42 | "Mon Jan _2 15:04:05 MST 2006", //time.UnixDate
43 | "Mon Jan 02 15:04:05 -0700 2006", //time.RubyDate
44 | "02 Jan 06 15:04 MST", //time.RFC822
45 | "02 Jan 06 15:04 -0700", //time.RFC822Z
46 | "Monday, 02-Jan-06 15:04:05 MST", //time.RFC850
47 | "Mon, 02 Jan 2006 15:04:05 MST", //time.RFC1123
48 | "Mon, 02 Jan 2006 15:04:05 -0700", //time.RFC1123Z
49 | //DateTime, linux utils
50 | "Mon Jan 02 15:04:05 MST 2006", // `date` on CentOS 7.6 default output format
51 | "Mon 02 Jan 2006 03:04:05 PM MST", // `date` on Ubuntu 20.4 default output format
52 | //DateTime, home-brewed
53 | "Jan 02, 2006 15:04:05Z07:00",
54 | "Jan 02, 2006 15:04:05Z0700",
55 | "Jan 02, 2006 15:04:05",
56 | "02/Jan/2006 15:04:05 Z07:00",
57 | "02/Jan/2006 15:04:05 Z0700",
58 | "02/Jan/2006 15:04:05",
59 | //Date
60 | "2006-01-02",
61 | "02/01/2006",
62 | "02/Jan/2006",
63 | "Jan 02, 2006",
64 | "Mon Jan 02, 2006",
65 | }
66 | Epoch = time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC)
67 | ErrParseDateTime = errors.Newf("value doesn't contain DateTime")
68 | )
69 |
70 | // Parse is the Parser interface
71 | type Parser interface {
72 | Parse(bs []byte) (metric model.Metric, err error)
73 | }
74 |
75 | // Pool may be used for pooling Parsers for similarly typed JSONs.
76 | type Pool struct {
77 | name string
78 | csvFormat map[string]int
79 | delimiter string
80 | timeZone *time.Location
81 | timeUnit float64
82 | knownLayouts sync.Map
83 | pool sync.Pool
84 | once sync.Once // only need to detect new keys from fields once
85 | fields string
86 | }
87 |
88 | // NewParserPool creates a parser pool
89 | func NewParserPool(name string, csvFormat []string, delimiter string, timezone string, timeunit float64, fields string) (pp *Pool, err error) {
90 | var tz *time.Location
91 | if timezone == "" {
92 | tz = time.Local
93 | } else if tz, err = time.LoadLocation(timezone); err != nil {
94 | err = errors.Wrapf(err, "")
95 | return
96 | }
97 | pp = &Pool{
98 | name: name,
99 | delimiter: delimiter,
100 | timeZone: tz,
101 | timeUnit: timeunit,
102 | fields: fields,
103 | }
104 | if csvFormat != nil {
105 | pp.csvFormat = make(map[string]int, len(csvFormat))
106 | for i, title := range csvFormat {
107 | pp.csvFormat[title] = i
108 | }
109 | }
110 | return
111 | }
112 |
113 | // Get returns a Parser from pp.
114 | //
115 | // The Parser must be Put to pp after use.
116 | func (pp *Pool) Get() (Parser, error) {
117 | v := pp.pool.Get()
118 | if v == nil {
119 | switch pp.name {
120 | case "gjson":
121 | return &GjsonParser{pp: pp}, nil
122 | case "csv":
123 | if pp.fields != "" {
124 | util.Logger.Warn("extra fields for csv parser is not supported, fields ignored")
125 | }
126 | return &CsvParser{pp: pp}, nil
127 | case "fastjson":
128 | fallthrough
129 | default:
130 | var obj *fastjson.Object
131 | if pp.fields != "" {
132 | value, err := fastjson.Parse(pp.fields)
133 | if err != nil {
134 | err = errors.Wrapf(err, "failed to parse fields as a valid json object")
135 | return nil, err
136 | }
137 | obj, err = value.Object()
138 | if err != nil {
139 | err = errors.Wrapf(err, "failed to retrive fields member")
140 | return nil, err
141 | }
142 | }
143 | return &FastjsonParser{pp: pp, fields: obj}, nil
144 | }
145 | }
146 | return v.(Parser), nil
147 | }
148 |
149 | // Put returns p to pp.
150 | //
151 | // p and objects recursively returned from p cannot be used after p
152 | // is put into pp.
153 | func (pp *Pool) Put(p Parser) {
154 | pp.pool.Put(p)
155 | }
156 |
157 | // Assuming that all values of a field of kafka message has the same layout, and layouts of each field are unrelated.
158 | // Automatically detect the layout from till the first successful detection and reuse that layout forever.
159 | // Return time in UTC.
160 | func (pp *Pool) ParseDateTime(key string, val string) (t time.Time, err error) {
161 | var layout string
162 | var lay interface{}
163 | var ok bool
164 | var t2 time.Time
165 | if val == "" {
166 | err = ErrParseDateTime
167 | return
168 | }
169 | if lay, ok = pp.knownLayouts.Load(key); !ok {
170 | t2, layout = parseInLocation(val, pp.timeZone)
171 | if layout == "" {
172 | err = ErrParseDateTime
173 | return
174 | }
175 | t = t2
176 | pp.knownLayouts.Store(key, layout)
177 | return
178 | }
179 | if layout, ok = lay.(string); !ok {
180 | err = ErrParseDateTime
181 | return
182 | }
183 | if t2, err = time.ParseInLocation(layout, val, pp.timeZone); err != nil {
184 | err = ErrParseDateTime
185 | return
186 | }
187 | t = t2.UTC()
188 | return
189 | }
190 |
191 | func parseInLocation(val string, loc *time.Location) (t time.Time, layout string) {
192 | var err error
193 | var lay string
194 | for _, lay = range Layouts {
195 | if t, err = time.ParseInLocation(lay, val, loc); err == nil {
196 | t = t.UTC()
197 | layout = lay
198 | return
199 | }
200 | }
201 | return
202 | }
203 |
204 | func UnixFloat(sec, unit float64) (t time.Time) {
205 | sec, _ = new(big.Float).Mul(big.NewFloat(sec), big.NewFloat(unit)).Float64()
206 | //2^32 seconds since epoch: 2106-02-07T06:28:16Z
207 | if sec < 0 || sec >= 4294967296.0 {
208 | return Epoch
209 | }
210 | i, f := math.Modf(sec)
211 | return time.Unix(int64(i), int64(f*1e9)).UTC()
212 | }
213 |
--------------------------------------------------------------------------------
/pool/ck_cli.go:
--------------------------------------------------------------------------------
1 | package pool
2 |
3 | import (
4 | "context"
5 | "database/sql"
6 | "fmt"
7 |
8 | "github.com/ClickHouse/clickhouse-go/v2"
9 | "github.com/ClickHouse/clickhouse-go/v2/lib/driver"
10 | "github.com/RoaringBitmap/roaring"
11 | "github.com/housepower/clickhouse_sinker/model"
12 | "github.com/housepower/clickhouse_sinker/util"
13 | "github.com/thanos-io/thanos/pkg/errors"
14 | "go.uber.org/zap"
15 | )
16 |
17 | type Row struct {
18 | proto clickhouse.Protocol
19 | r1 *sql.Row
20 | r2 driver.Row
21 | }
22 |
23 | func (r *Row) Scan(dest ...any) error {
24 | if r.proto == clickhouse.HTTP {
25 | return r.r1.Scan(dest...)
26 | } else {
27 | return r.r2.Scan(dest...)
28 | }
29 | }
30 |
31 | type Rows struct {
32 | protocol clickhouse.Protocol
33 | rs1 *sql.Rows
34 | rs2 driver.Rows
35 | }
36 |
37 | func (r *Rows) Close() error {
38 | if r.protocol == clickhouse.HTTP {
39 | return r.rs1.Close()
40 | } else {
41 | return r.rs2.Close()
42 | }
43 | }
44 |
45 | func (r *Rows) Columns() ([]string, error) {
46 | if r.protocol == clickhouse.HTTP {
47 | return r.rs1.Columns()
48 | } else {
49 | return r.rs2.Columns(), nil
50 | }
51 | }
52 |
53 | func (r *Rows) Next() bool {
54 | if r.protocol == clickhouse.HTTP {
55 | return r.rs1.Next()
56 | } else {
57 | return r.rs2.Next()
58 | }
59 | }
60 |
61 | func (r *Rows) Scan(dest ...any) error {
62 | if r.protocol == clickhouse.HTTP {
63 | return r.rs1.Scan(dest...)
64 | } else {
65 | return r.rs2.Scan(dest...)
66 | }
67 | }
68 |
69 | type Conn struct {
70 | protocol clickhouse.Protocol
71 | c driver.Conn
72 | db *sql.DB
73 | ctx context.Context
74 | }
75 |
76 | func (c *Conn) Query(query string, args ...any) (*Rows, error) {
77 | var rs Rows
78 | rs.protocol = c.protocol
79 | if c.protocol == clickhouse.HTTP {
80 | rows, err := c.db.Query(query, args...)
81 | if err != nil {
82 | return &rs, err
83 | } else {
84 | rs.rs1 = rows
85 | }
86 | } else {
87 | rows, err := c.c.Query(c.ctx, query, args...)
88 | if err != nil {
89 | return &rs, err
90 | } else {
91 | rs.rs2 = rows
92 | }
93 | }
94 | return &rs, nil
95 | }
96 |
97 | func (c *Conn) QueryRow(query string, args ...any) *Row {
98 | var row Row
99 | row.proto = c.protocol
100 | if c.protocol == clickhouse.HTTP {
101 | row.r1 = c.db.QueryRow(query, args...)
102 | } else {
103 | row.r2 = c.c.QueryRow(c.ctx, query, args...)
104 | }
105 | return &row
106 | }
107 |
108 | func (c *Conn) Exec(query string, args ...any) error {
109 | if c.protocol == clickhouse.HTTP {
110 | _, err := c.db.Exec(query, args...)
111 | return err
112 | } else {
113 | return c.c.Exec(c.ctx, query, args...)
114 | }
115 | }
116 |
117 | func (c *Conn) Ping() error {
118 | if c.protocol == clickhouse.HTTP {
119 | return c.db.Ping()
120 | } else {
121 | return c.c.Ping(c.ctx)
122 | }
123 | }
124 |
125 | func (c *Conn) write_v1(prepareSQL string, rows model.Rows, idxBegin, idxEnd int) (numBad int, err error) {
126 | var errExec error
127 |
128 | var stmt *sql.Stmt
129 | var tx *sql.Tx
130 | tx, err = c.db.Begin()
131 | if err != nil {
132 | err = errors.Wrapf(err, "pool.Conn.Begin")
133 | return
134 | }
135 |
136 | if stmt, err = tx.Prepare(prepareSQL); err != nil {
137 | err = errors.Wrapf(err, "tx.Prepare %s", prepareSQL)
138 | return
139 | }
140 | defer stmt.Close()
141 |
142 | var bmBad *roaring.Bitmap
143 | for i, row := range rows {
144 | if _, err = stmt.Exec((*row)[idxBegin:idxEnd]...); err != nil {
145 | if bmBad == nil {
146 | errExec = errors.Wrapf(err, "driver.Batch.Append")
147 | bmBad = roaring.NewBitmap()
148 | }
149 | bmBad.AddInt(i)
150 | }
151 |
152 | }
153 | if errExec != nil {
154 | _ = tx.Rollback()
155 | numBad = int(bmBad.GetCardinality())
156 | util.Logger.Warn(fmt.Sprintf("writeRows skipped %d rows of %d due to invalid content", numBad, len(rows)), zap.Error(errExec))
157 | // write rows again, skip bad ones
158 | if stmt, err = tx.Prepare(prepareSQL); err != nil {
159 | err = errors.Wrapf(err, "tx.Prepare %s", prepareSQL)
160 | return
161 | }
162 | for i, row := range rows {
163 | if !bmBad.ContainsInt(i) {
164 | if _, err = stmt.Exec((*row)[idxBegin:idxEnd]...); err != nil {
165 | break
166 | }
167 | }
168 | }
169 | if err = tx.Commit(); err != nil {
170 | err = errors.Wrapf(err, "tx.Commit")
171 | _ = tx.Rollback()
172 | return
173 | }
174 | return
175 | }
176 | if err = tx.Commit(); err != nil {
177 | err = errors.Wrapf(err, "tx.Commit")
178 | _ = tx.Rollback()
179 | return
180 | }
181 | return
182 | }
183 |
184 | func (c *Conn) write_v2(prepareSQL string, rows model.Rows, idxBegin, idxEnd int) (numBad int, err error) {
185 | var errExec error
186 | var batch driver.Batch
187 | if batch, err = c.c.PrepareBatch(c.ctx, prepareSQL); err != nil {
188 | err = errors.Wrapf(err, "pool.Conn.PrepareBatch %s", prepareSQL)
189 | return
190 | }
191 | var bmBad *roaring.Bitmap
192 | for i, row := range rows {
193 | if err = batch.Append((*row)[idxBegin:idxEnd]...); err != nil {
194 | if bmBad == nil {
195 | errExec = errors.Wrapf(err, "driver.Batch.Append")
196 | bmBad = roaring.NewBitmap()
197 | }
198 | bmBad.AddInt(i)
199 | }
200 | }
201 | if errExec != nil {
202 | _ = batch.Abort()
203 | numBad = int(bmBad.GetCardinality())
204 | util.Logger.Warn(fmt.Sprintf("writeRows skipped %d rows of %d due to invalid content", numBad, len(rows)), zap.Error(errExec))
205 | // write rows again, skip bad ones
206 | if batch, err = c.c.PrepareBatch(c.ctx, prepareSQL); err != nil {
207 | err = errors.Wrapf(err, "pool.Conn.PrepareBatch %s", prepareSQL)
208 | return
209 | }
210 | for i, row := range rows {
211 | if !bmBad.ContainsInt(i) {
212 | if err = batch.Append((*row)[idxBegin:idxEnd]...); err != nil {
213 | break
214 | }
215 | }
216 | }
217 | if err = batch.Send(); err != nil {
218 | err = errors.Wrapf(err, "driver.Batch.Send")
219 | _ = batch.Abort()
220 | return
221 | }
222 | return
223 | }
224 | if err = batch.Send(); err != nil {
225 | err = errors.Wrapf(err, "driver.Batch.Send")
226 | _ = batch.Abort()
227 | return
228 | }
229 | return
230 | }
231 |
232 | func (c *Conn) Write(prepareSQL string, rows model.Rows, idxBegin, idxEnd int) (numBad int, err error) {
233 | util.Logger.Debug("start write to ck", zap.Int("begin", idxBegin), zap.Int("end", idxEnd))
234 | if c.protocol == clickhouse.HTTP {
235 | numBad, err = c.write_v1(prepareSQL, rows, idxBegin, idxEnd)
236 | } else {
237 | numBad, err = c.write_v2(prepareSQL, rows, idxBegin, idxEnd)
238 | }
239 | util.Logger.Debug("loop write completed", zap.Int("numbad", numBad))
240 | return numBad, err
241 | }
242 |
243 | func (c *Conn) AsyncInsert(query string, wait bool) error {
244 | if c.protocol == clickhouse.HTTP {
245 | return fmt.Errorf("DO NOT SUPPORT THIS FUNCTION")
246 | } else {
247 | return c.c.AsyncInsert(c.ctx, query, wait)
248 | }
249 | }
250 |
251 | func (c *Conn) Close() error {
252 | if c.protocol == clickhouse.HTTP {
253 | return c.db.Close()
254 | } else {
255 | return c.c.Close()
256 | }
257 | }
258 |
--------------------------------------------------------------------------------
/pool/conn.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright [2019] housepower
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 |
17 | package pool
18 |
19 | // Clickhouse connection pool
20 |
21 | import (
22 | "context"
23 | "crypto/tls"
24 | "fmt"
25 | "math/rand"
26 | "sync"
27 | "time"
28 |
29 | "github.com/ClickHouse/clickhouse-go/v2"
30 | "github.com/housepower/clickhouse_sinker/config"
31 | "github.com/housepower/clickhouse_sinker/util"
32 | "github.com/thanos-io/thanos/pkg/errors"
33 | "go.uber.org/zap"
34 | )
35 |
36 | var (
37 | lock sync.Mutex
38 | clusterConn []*ShardConn
39 | )
40 |
41 | // ShardConn a datastructure for storing the clickhouse connection
42 | type ShardConn struct {
43 | lock sync.Mutex
44 | conn *Conn
45 | dbVer int
46 | opts clickhouse.Options
47 | replicas []string //ip:port list of replicas
48 | nextRep int //index of next replica
49 | writingPool *util.WorkerPool //the all tasks' writing ClickHouse, cpu-net balance
50 | protocol clickhouse.Protocol
51 | chCfg *config.ClickHouseConfig
52 | }
53 |
54 | func (sc *ShardConn) SubmitTask(fn func()) (err error) {
55 | return sc.writingPool.Submit(fn)
56 | }
57 |
58 | // GetReplica returns the replica to which db connects
59 | func (sc *ShardConn) GetReplica() (replica string) {
60 | sc.lock.Lock()
61 | defer sc.lock.Unlock()
62 | if sc.conn != nil {
63 | curRep := (len(sc.replicas) + sc.nextRep - 1) % len(sc.replicas)
64 | replica = sc.replicas[curRep]
65 | }
66 | return
67 | }
68 |
69 | // Close closes the current replica connection
70 | func (sc *ShardConn) Close() {
71 | sc.lock.Lock()
72 | defer sc.lock.Unlock()
73 | if sc.conn != nil {
74 | sc.conn.Close()
75 | sc.conn = nil
76 | }
77 | if sc.writingPool != nil {
78 | sc.writingPool.StopWait()
79 | }
80 | }
81 |
82 | // NextGoodReplica connects to next good replica
83 | func (sc *ShardConn) NextGoodReplica(ctx context.Context, failedVer int) (db *Conn, dbVer int, err error) {
84 | sc.lock.Lock()
85 | defer sc.lock.Unlock()
86 | if sc.conn != nil {
87 | if sc.dbVer > failedVer {
88 | // Another goroutine has already done connection.
89 | // Notice: Why recording failure version instead timestamp?
90 | // Consider following scenario:
91 | // conn1 = NextGood(0); conn2 = NexGood(0); conn1.Exec failed at ts1;
92 | // conn3 = NextGood(ts1); conn2.Exec failed at ts2;
93 | // conn4 = NextGood(ts2) will close the good connection and break users.
94 | return sc.conn, sc.dbVer, nil
95 | }
96 | sc.conn.Close()
97 | sc.conn = nil
98 | }
99 | savedNextRep := sc.nextRep
100 | // try all replicas, including the current one
101 | conn := Conn{
102 | protocol: sc.protocol,
103 | ctx: ctx,
104 | }
105 | for i := 0; i < len(sc.replicas); i++ {
106 | replica := sc.replicas[sc.nextRep]
107 | sc.opts.Addr = []string{replica}
108 | sc.nextRep = (sc.nextRep + 1) % len(sc.replicas)
109 | if sc.protocol == clickhouse.HTTP {
110 | // refers to https://github.com/ClickHouse/clickhouse-go/issues/1150
111 | // An obscure error in the HTTP protocol when using compression
112 | // disable compression in the HTTP protocol
113 | conn.db = clickhouse.OpenDB(&sc.opts)
114 | conn.db.SetMaxOpenConns(sc.chCfg.MaxOpenConns)
115 | conn.db.SetMaxIdleConns(sc.chCfg.MaxOpenConns)
116 | conn.db.SetConnMaxLifetime(time.Minute * 10)
117 | } else {
118 | sc.opts.Compression = &clickhouse.Compression{
119 | Method: clickhouse.CompressionLZ4,
120 | }
121 | conn.c, err = clickhouse.Open(&sc.opts)
122 | }
123 | if err != nil {
124 | util.Logger.Warn("clickhouse.Open failed", zap.String("replica", replica), zap.Error(err))
125 | continue
126 | }
127 | sc.dbVer++
128 | util.Logger.Info("clickhouse.Open succeeded", zap.Int("dbVer", sc.dbVer), zap.String("replica", replica))
129 | sc.conn = &conn
130 | return sc.conn, sc.dbVer, nil
131 | }
132 | err = errors.Newf("no good replica among replicas %v since %d", sc.replicas, savedNextRep)
133 | return nil, sc.dbVer, err
134 | }
135 |
136 | // Each shard has a pool.Conn which connects to one replica inside the shard.
137 | // We need more control than replica single-point-failure.
138 | func InitClusterConn(chCfg *config.ClickHouseConfig) (err error) {
139 | lock.Lock()
140 | defer lock.Unlock()
141 | freeClusterConn()
142 |
143 | proto := clickhouse.Native
144 | if chCfg.Protocol == clickhouse.HTTP.String() {
145 | proto = clickhouse.HTTP
146 | }
147 |
148 | for _, replicas := range chCfg.Hosts {
149 | numReplicas := len(replicas)
150 | replicaAddrs := make([]string, numReplicas)
151 | for i, ip := range replicas {
152 | // Changing hostnames to IPs breaks TLS connections in many cases
153 | if !chCfg.Secure {
154 | if ips2, err := util.GetIP4Byname(ip); err == nil {
155 | ip = ips2[0]
156 | }
157 | }
158 | replicaAddrs[i] = fmt.Sprintf("%s:%d", ip, chCfg.Port)
159 | }
160 | sc := &ShardConn{
161 | replicas: replicaAddrs,
162 | chCfg: chCfg,
163 | opts: clickhouse.Options{
164 | Auth: clickhouse.Auth{
165 | Database: chCfg.DB,
166 | Username: chCfg.Username,
167 | Password: chCfg.Password,
168 | },
169 | Protocol: proto,
170 | DialTimeout: time.Minute * 10,
171 | ReadTimeout: time.Second * time.Duration(chCfg.ReadTimeout),
172 | Settings: clickhouse.Settings{
173 | "max_execution_time": 0,
174 | },
175 | },
176 | writingPool: util.NewWorkerPool(chCfg.MaxOpenConns, 1),
177 | }
178 | if chCfg.Secure {
179 | tlsConfig := &tls.Config{}
180 | tlsConfig.InsecureSkipVerify = chCfg.InsecureSkipVerify
181 | sc.opts.TLS = tlsConfig
182 | }
183 | if proto == clickhouse.Native {
184 | sc.opts.MaxOpenConns = chCfg.MaxOpenConns
185 | sc.opts.MaxIdleConns = chCfg.MaxOpenConns
186 | sc.opts.ConnMaxLifetime = time.Minute * 10
187 | }
188 | sc.protocol = proto
189 | r := rand.New(rand.NewSource(time.Now().UnixNano()))
190 | idx := r.Intn(numReplicas)
191 | sc.nextRep = idx
192 |
193 | if _, _, err = sc.NextGoodReplica(chCfg.Ctx, idx); err != nil {
194 | return
195 | }
196 | clusterConn = append(clusterConn, sc)
197 | }
198 | return
199 | }
200 |
201 | func freeClusterConn() {
202 | for _, sc := range clusterConn {
203 | sc.Close()
204 | }
205 | clusterConn = []*ShardConn{}
206 | }
207 |
208 | func FreeClusterConn() {
209 | lock.Lock()
210 | defer lock.Unlock()
211 | freeClusterConn()
212 | }
213 |
214 | func NumShard() (cnt int) {
215 | lock.Lock()
216 | defer lock.Unlock()
217 | return len(clusterConn)
218 | }
219 |
220 | // GetShardConn select a clickhouse shard based on batchNum
221 | func GetShardConn(batchNum int64) (sc *ShardConn) {
222 | lock.Lock()
223 | defer lock.Unlock()
224 | sc = clusterConn[batchNum%int64(len(clusterConn))]
225 | return
226 | }
227 |
228 | // CloseAll closed all connection and destroys the pool
229 | func CloseAll() {
230 | FreeClusterConn()
231 | }
232 |
--------------------------------------------------------------------------------
/statistics/statistics.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright [2019] housepower
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 | package statistics
17 |
18 | import (
19 | "context"
20 | "math/rand"
21 | "time"
22 |
23 | "github.com/housepower/clickhouse_sinker/util"
24 | "github.com/prometheus/client_golang/prometheus"
25 | "github.com/prometheus/client_golang/prometheus/collectors"
26 | "github.com/prometheus/client_golang/prometheus/push"
27 | "github.com/prometheus/common/expfmt"
28 | "github.com/thanos-io/thanos/pkg/errors"
29 | "go.uber.org/zap"
30 | )
31 |
32 | var (
33 | prefix = "clickhouse_sinker_"
34 |
35 | // ConsumeMsgsTotal = ParseMsgsErrorTotal + FlushMsgsTotal + FlushMsgsErrorTotal
36 | ConsumeMsgsTotal = prometheus.NewCounterVec(
37 | prometheus.CounterOpts{
38 | Name: prefix + "consume_msgs_total",
39 | Help: "total num of consumed msgs",
40 | },
41 | []string{"task"},
42 | )
43 | ParseMsgsErrorTotal = prometheus.NewCounterVec(
44 | prometheus.CounterOpts{
45 | Name: prefix + "parse_msgs_error_total",
46 | Help: "total num of msgs with parse failure",
47 | },
48 | []string{"task"},
49 | )
50 | FlushMsgsTotal = prometheus.NewCounterVec(
51 | prometheus.CounterOpts{
52 | Name: prefix + "flush_msgs_total",
53 | Help: "total num of flushed msgs",
54 | },
55 | []string{"task"},
56 | )
57 | FlushMsgsErrorTotal = prometheus.NewCounterVec(
58 | prometheus.CounterOpts{
59 | Name: prefix + "flush_msgs_error_total",
60 | Help: "total num of msgs failed to flush to ck",
61 | },
62 | []string{"task"},
63 | )
64 | ConsumeOffsets = prometheus.NewGaugeVec(
65 | prometheus.GaugeOpts{
66 | Name: prefix + "consume_offsets",
67 | Help: "last committed offset for each topic partition pair",
68 | },
69 | []string{"consumer", "topic", "partition"},
70 | )
71 | ConsumeLags = prometheus.NewGaugeVec(
72 | prometheus.GaugeOpts{
73 | Name: prefix + "consume_lags",
74 | Help: "message lags for each task, work with cluster of sinker",
75 | },
76 | []string{"consumer", "topic", "task"},
77 | )
78 | ShardMsgs = prometheus.NewGaugeVec(
79 | prometheus.GaugeOpts{
80 | Name: prefix + "shard_msgs",
81 | Help: "num of msgs in shard",
82 | },
83 | []string{"task"},
84 | )
85 | WritingPoolBacklog = prometheus.NewGaugeVec(
86 | prometheus.GaugeOpts{
87 | Name: prefix + "writing_pool_backlog",
88 | Help: "GlobalWritingPool backlog",
89 | },
90 | []string{"task"},
91 | )
92 | WritingDurations = prometheus.NewHistogramVec(
93 | prometheus.HistogramOpts{
94 | Name: prefix + "writing_durations",
95 | Help: "writing durations",
96 | Buckets: []float64{1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0},
97 | },
98 | []string{"task", "table"},
99 | )
100 | WriteSeriesAllowNew = prometheus.NewCounterVec(
101 | prometheus.CounterOpts{
102 | Name: prefix + "write_series_allow_new",
103 | Help: "num of allowed new series",
104 | },
105 | []string{"task"},
106 | )
107 | WriteSeriesAllowChanged = prometheus.NewCounterVec(
108 | prometheus.CounterOpts{
109 | Name: prefix + "write_series_allow_changed",
110 | Help: "num of allowed changed series",
111 | },
112 | []string{"task"},
113 | )
114 | WriteSeriesDropQuota = prometheus.NewCounterVec(
115 | prometheus.CounterOpts{
116 | Name: prefix + "write_series_drop_quota",
117 | Help: "num of disallowed write_series due to quota",
118 | },
119 | []string{"task"},
120 | )
121 | WriteSeriesDropUnchanged = prometheus.NewCounterVec(
122 | prometheus.CounterOpts{
123 | Name: prefix + "write_series_drop_unchanged",
124 | Help: "num of disallowed write_series due to unchanged",
125 | },
126 | []string{"task"},
127 | )
128 | // WriteSeriesSucceed = WriteSeriesAllowNew + WriteSeriesAllowChanged
129 | WriteSeriesSucceed = prometheus.NewCounterVec(
130 | prometheus.CounterOpts{
131 | Name: prefix + "write_series_succeed",
132 | Help: "num of series handled by writeSeries",
133 | },
134 | []string{"task"},
135 | )
136 | )
137 |
138 | func init() {
139 | prometheus.MustRegister(ConsumeMsgsTotal)
140 | prometheus.MustRegister(ParseMsgsErrorTotal)
141 | prometheus.MustRegister(FlushMsgsTotal)
142 | prometheus.MustRegister(FlushMsgsErrorTotal)
143 | prometheus.MustRegister(ConsumeOffsets)
144 | prometheus.MustRegister(ConsumeLags)
145 | prometheus.MustRegister(ShardMsgs)
146 | prometheus.MustRegister(WritingPoolBacklog)
147 | prometheus.MustRegister(WritingDurations)
148 | prometheus.MustRegister(WriteSeriesAllowNew)
149 | prometheus.MustRegister(WriteSeriesAllowChanged)
150 | prometheus.MustRegister(WriteSeriesDropQuota)
151 | prometheus.MustRegister(WriteSeriesDropUnchanged)
152 | prometheus.MustRegister(WriteSeriesSucceed)
153 | prometheus.MustRegister(collectors.NewBuildInfoCollector())
154 | }
155 |
156 | // Pusher is the service to push the metrics to pushgateway
157 | type Pusher struct {
158 | pgwAddrs []string
159 | pushInterval int
160 | pusher *push.Pusher
161 | inUseAddr int
162 | instance string
163 | ctx context.Context
164 | cancel context.CancelFunc
165 | stopped chan struct{}
166 | }
167 |
168 | func NewPusher(addrs []string, interval int, selfAddr string) *Pusher {
169 | return &Pusher{
170 | pgwAddrs: addrs,
171 | pushInterval: interval,
172 | inUseAddr: -1,
173 | instance: selfAddr,
174 | stopped: make(chan struct{}),
175 | }
176 | }
177 |
178 | var (
179 | errPgwEmpty = errors.Newf("invalid configuration for pusher")
180 | )
181 |
182 | func (p *Pusher) Init() error {
183 | if len(p.pgwAddrs) == 0 || p.pushInterval <= 0 {
184 | return errPgwEmpty
185 | }
186 | p.reconnect()
187 | p.ctx, p.cancel = context.WithCancel(context.Background())
188 | return nil
189 | }
190 |
191 | func (p *Pusher) Run() {
192 | ticker := time.NewTicker(time.Second * time.Duration(p.pushInterval))
193 | util.Logger.Info("start pushing metrics to the specified push gateway address ")
194 | defer ticker.Stop()
195 | FOR:
196 | for {
197 | select {
198 | case <-ticker.C:
199 | if err := p.pusher.Push(); err != nil {
200 | err = errors.Wrapf(err, "")
201 | util.Logger.Error("pushing metrics failed", zap.Error(err))
202 | p.reconnect()
203 | }
204 | case <-p.ctx.Done():
205 | util.Logger.Info("Pusher.Run quit due to context has been canceled")
206 | break FOR
207 | }
208 | }
209 | p.stopped <- struct{}{}
210 | }
211 |
212 | func (p *Pusher) Stop() {
213 | p.cancel()
214 | <-p.stopped
215 | // https://stackoverflow.com/questions/63540280/how-to-set-a-retention-time-for-pushgateway-for-metrics-to-expire
216 | // https://github.com/prometheus/pushgateway/issues/19
217 | if err := p.pusher.Delete(); err != nil {
218 | err = errors.Wrapf(err, "")
219 | util.Logger.Error("failed to delete metric group", zap.String("pushgateway", p.pgwAddrs[p.inUseAddr]),
220 | zap.String("job", "clickhouse_sinker"), zap.String("instance", p.instance), zap.Error(err))
221 | }
222 | util.Logger.Info("stopped metric pusher")
223 | }
224 |
225 | func (p *Pusher) reconnect() {
226 | var nextAddr int
227 | if p.inUseAddr == -1 {
228 | nextAddr = rand.Intn(len(p.pgwAddrs))
229 | } else {
230 | nextAddr = (p.inUseAddr + 1) % len(p.pgwAddrs)
231 | }
232 | p.pusher = push.New(p.pgwAddrs[nextAddr], "clickhouse_sinker").
233 | Collector(ConsumeMsgsTotal).
234 | Collector(ParseMsgsErrorTotal).
235 | Collector(FlushMsgsTotal).
236 | Collector(FlushMsgsErrorTotal).
237 | Collector(ConsumeOffsets).
238 | Collector(ConsumeLags).
239 | Collector(ShardMsgs).
240 | Collector(WritingPoolBacklog).
241 | Collector(WritingDurations).
242 | Collector(WriteSeriesAllowNew).
243 | Collector(WriteSeriesAllowChanged).
244 | Collector(WriteSeriesDropQuota).
245 | Collector(WriteSeriesDropUnchanged).
246 | Collector(WriteSeriesSucceed).
247 | Collector(collectors.NewGoCollector()).
248 | Collector(collectors.NewProcessCollector(collectors.ProcessCollectorOpts{})).
249 | Grouping("instance", p.instance).Format(expfmt.FmtText)
250 | p.inUseAddr = nextAddr
251 | }
252 |
--------------------------------------------------------------------------------
/task/consumer.go:
--------------------------------------------------------------------------------
1 | /*Copyright [2019] housepower
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 |
16 | package task
17 |
18 | import (
19 | "context"
20 | "math"
21 | "sync"
22 | "sync/atomic"
23 | "time"
24 |
25 | "github.com/housepower/clickhouse_sinker/config"
26 | "github.com/housepower/clickhouse_sinker/input"
27 | "github.com/housepower/clickhouse_sinker/model"
28 | "github.com/housepower/clickhouse_sinker/util"
29 | "go.uber.org/zap"
30 |
31 | _ "github.com/ClickHouse/clickhouse-go/v2"
32 | )
33 |
34 | type Commit struct {
35 | group string
36 | offsets model.RecordMap
37 | wg *sync.WaitGroup
38 | consumer *Consumer
39 | }
40 |
41 | type Consumer struct {
42 | sinker *Sinker
43 | inputer *input.KafkaFranz
44 | tasks sync.Map
45 | grpConfig *config.GroupConfig
46 | fetchesCh chan input.Fetches
47 | processWg sync.WaitGroup
48 | ctx context.Context
49 | cancel context.CancelFunc
50 | state atomic.Uint32
51 | errCommit bool
52 |
53 | numFlying int32
54 | mux sync.Mutex
55 | commitDone *sync.Cond
56 | }
57 |
58 | const (
59 | MaxCountInBuf = 1 << 27
60 | MaxParallelism = 10
61 | )
62 |
63 | func newConsumer(s *Sinker, gCfg *config.GroupConfig) *Consumer {
64 | c := &Consumer{
65 | sinker: s,
66 | numFlying: 0,
67 | errCommit: false,
68 | grpConfig: gCfg,
69 | fetchesCh: make(chan input.Fetches),
70 | }
71 | c.state.Store(util.StateStopped)
72 | c.commitDone = sync.NewCond(&c.mux)
73 | return c
74 | }
75 |
76 | func (c *Consumer) addTask(tsk *Service) {
77 | c.tasks.Store(tsk.taskCfg.Name, tsk)
78 | }
79 |
80 | func (c *Consumer) start() {
81 | if c.state.Load() == util.StateRunning {
82 | return
83 | }
84 | c.ctx, c.cancel = context.WithCancel(context.Background())
85 | c.inputer = input.NewKafkaFranz()
86 | c.state.Store(util.StateRunning)
87 | if err := c.inputer.Init(c.sinker.curCfg, c.grpConfig, c.fetchesCh, c.cleanupFn); err == nil {
88 | go c.inputer.Run()
89 | go c.processFetch()
90 | } else {
91 | util.Logger.Fatal("failed to init consumer", zap.String("consumer", c.grpConfig.Name), zap.Error(err))
92 | }
93 | }
94 |
95 | func (c *Consumer) stop() {
96 | if c.state.Load() == util.StateStopped {
97 | return
98 | }
99 | c.state.Store(util.StateStopped)
100 |
101 | // stop the processFetch routine, make sure no more input to the commit chan & writing pool
102 | c.cancel()
103 | c.processWg.Wait()
104 | c.inputer.Stop()
105 | }
106 |
107 | func (c *Consumer) restart() {
108 | c.stop()
109 | c.start()
110 | }
111 |
112 | func (c *Consumer) cleanupFn() {
113 | // ensure the completion of writing to ck
114 | var wg sync.WaitGroup
115 | c.tasks.Range(func(key, value any) bool {
116 | wg.Add(1)
117 | go func(t *Service) {
118 | // drain ensure we have completeted persisting all received messages
119 | t.clickhouse.Drain()
120 | wg.Done()
121 | }(value.(*Service))
122 | return true
123 | })
124 | wg.Wait()
125 |
126 | // ensure the completion of offset submission
127 | c.mux.Lock()
128 | for c.numFlying != 0 {
129 | util.Logger.Debug("draining flying pending commits", zap.String("consumergroup", c.grpConfig.Name), zap.Int32("pending", c.numFlying))
130 | c.commitDone.Wait()
131 | }
132 | c.mux.Unlock()
133 | }
134 |
135 | func (c *Consumer) updateGroupConfig(g *config.GroupConfig) {
136 | if c.state.Load() == util.StateStopped {
137 | return
138 | }
139 | c.grpConfig = g
140 | // restart the processFetch routine because of potential BufferSize or FlushInterval change
141 | // make sure no more input to the commit chan & writing pool
142 | c.cancel()
143 | c.processWg.Wait()
144 | c.ctx, c.cancel = context.WithCancel(context.Background())
145 | go c.processFetch()
146 | }
147 |
148 | func (c *Consumer) processFetch() {
149 | c.processWg.Add(1)
150 | defer c.processWg.Done()
151 | recMap := make(model.RecordMap)
152 | var bufLength int64
153 |
154 | flushFn := func(traceId, with string) {
155 | if len(recMap) == 0 {
156 | return
157 | }
158 | if bufLength > 0 {
159 | util.LogTrace(traceId, util.TraceKindProcessEnd, zap.String("with", with), zap.Int64("bufLength", bufLength))
160 | }
161 | var wg sync.WaitGroup
162 | c.tasks.Range(func(key, value any) bool {
163 | // flush to shard, ck
164 | task := value.(*Service)
165 | task.sharder.Flush(c.ctx, &wg, recMap[task.taskCfg.Topic], traceId)
166 | return true
167 | })
168 | bufLength = 0
169 |
170 | c.mux.Lock()
171 | c.numFlying++
172 | c.mux.Unlock()
173 | c.sinker.commitsCh <- &Commit{group: c.grpConfig.Name, offsets: recMap, wg: &wg, consumer: c}
174 | recMap = make(model.RecordMap)
175 | }
176 |
177 | bufThreshold := c.grpConfig.BufferSize * len(c.sinker.curCfg.Clickhouse.Hosts) * 4 / 5
178 | if bufThreshold > MaxCountInBuf {
179 | bufThreshold = MaxCountInBuf
180 | }
181 |
182 | ticker := time.NewTicker(time.Duration(c.grpConfig.FlushInterval) * time.Second)
183 | defer ticker.Stop()
184 | traceId := "NO_RECORDS_FETCHED"
185 | wait := false
186 | for {
187 | select {
188 | case fetches := <-c.fetchesCh:
189 | if c.state.Load() == util.StateStopped {
190 | continue
191 | }
192 | fetch := fetches.Fetch.Records()
193 | if wait {
194 | util.LogTrace(fetches.TraceId,
195 | util.TraceKindProcessing,
196 | zap.String("message", "bufThreshold not reached, use old traceId"),
197 | zap.String("old_trace_id", traceId),
198 | zap.Int("records", len(fetch)),
199 | zap.Int("bufThreshold", bufThreshold),
200 | zap.Int64("totalLength", bufLength))
201 | } else {
202 | traceId = fetches.TraceId
203 | util.LogTrace(traceId, util.TraceKindProcessStart, zap.Int("records", len(fetch)))
204 | }
205 | items, done := int64(len(fetch)), int64(-1)
206 | var concurrency int
207 | if concurrency = int(items/1000) + 1; concurrency > MaxParallelism {
208 | concurrency = MaxParallelism
209 | }
210 |
211 | var wg sync.WaitGroup
212 | var err error
213 | wg.Add(concurrency)
214 | for i := 0; i < concurrency; i++ {
215 | go func() {
216 | for {
217 | index := atomic.AddInt64(&done, 1)
218 | if index >= items || c.state.Load() == util.StateStopped {
219 | wg.Done()
220 | break
221 | }
222 |
223 | rec := fetch[index]
224 | msg := &model.InputMessage{
225 | Topic: rec.Topic,
226 | Partition: int(rec.Partition),
227 | Key: rec.Key,
228 | Value: rec.Value,
229 | Offset: rec.Offset,
230 | Timestamp: &rec.Timestamp,
231 | }
232 | tablename := ""
233 | for _, it := range rec.Headers {
234 | if it.Key == "__table_name" {
235 | tablename = string(it.Value)
236 | break
237 | }
238 | }
239 |
240 | c.tasks.Range(func(key, value any) bool {
241 | tsk := value.(*Service)
242 | if (tablename != "" && tsk.clickhouse.TableName == tablename) || tsk.taskCfg.Topic == rec.Topic {
243 | //bufLength++
244 | atomic.AddInt64(&bufLength, 1)
245 | if e := tsk.Put(msg, traceId, flushFn); e != nil {
246 | atomic.StoreInt64(&done, items)
247 | err = e
248 | // decrise the error record
249 | util.Rs.Dec(1)
250 | return false
251 | }
252 | }
253 | return true
254 | })
255 | }
256 | }()
257 | }
258 | wg.Wait()
259 |
260 | // record the latest offset in order
261 | // assume the c.state was reset to stopped when facing error, so that further fetch won't get processed
262 | if err == nil {
263 | for _, f := range *fetches.Fetch {
264 | for i := range f.Topics {
265 | ft := &f.Topics[i]
266 | if recMap[ft.Topic] == nil {
267 | recMap[ft.Topic] = make(map[int32]*model.BatchRange)
268 | }
269 | for j := range ft.Partitions {
270 | fpr := ft.Partitions[j].Records
271 | if len(fpr) == 0 {
272 | continue
273 | }
274 | lastOff := fpr[len(fpr)-1].Offset
275 | firstOff := fpr[0].Offset
276 |
277 | or, ok := recMap[ft.Topic][ft.Partitions[j].Partition]
278 | if !ok {
279 | or = &model.BatchRange{Begin: math.MaxInt64, End: -1}
280 | recMap[ft.Topic][ft.Partitions[j].Partition] = or
281 | }
282 | if or.End < lastOff {
283 | or.End = lastOff
284 | }
285 | if or.Begin > firstOff {
286 | or.Begin = firstOff
287 | }
288 | }
289 | }
290 | }
291 | }
292 |
293 | if bufLength > int64(bufThreshold) {
294 | flushFn(traceId, "bufLength reached")
295 | ticker.Reset(time.Duration(c.grpConfig.FlushInterval) * time.Second)
296 | wait = false
297 | } else {
298 | wait = true
299 | }
300 | case <-ticker.C:
301 | flushFn(traceId, "ticker.C triggered")
302 | case <-c.ctx.Done():
303 | util.Logger.Info("stopped processing loop", zap.String("group", c.grpConfig.Name))
304 | return
305 | }
306 | }
307 | }
308 |
--------------------------------------------------------------------------------
/task/sharding.go:
--------------------------------------------------------------------------------
1 | package task
2 |
3 | import (
4 | "context"
5 | "fmt"
6 | "sync"
7 | "time"
8 |
9 | "github.com/cespare/xxhash/v2"
10 | "github.com/housepower/clickhouse_sinker/model"
11 | "github.com/housepower/clickhouse_sinker/pool"
12 | "github.com/housepower/clickhouse_sinker/statistics"
13 | "github.com/housepower/clickhouse_sinker/util"
14 | nanoid "github.com/matoous/go-nanoid/v2"
15 | "github.com/shopspring/decimal"
16 | "github.com/thanos-io/thanos/pkg/errors"
17 | "go.uber.org/zap"
18 | )
19 |
20 | type ShardingPolicy struct {
21 | shards int //number of clickhouse shards
22 | colSeq int //shardingKey column seq, 0 based
23 | stripe uint64 //=0 means hash, >0 means stripe size
24 | }
25 |
26 | func NewShardingPolicy(shardingKey string, shardingStripe uint64, dims []*model.ColumnWithType, shards int) (policy *ShardingPolicy, err error) {
27 | policy = &ShardingPolicy{stripe: shardingStripe, shards: shards}
28 | colSeq := -1
29 | for i, dim := range dims {
30 | if dim.Name == shardingKey {
31 | if dim.Type.Nullable || dim.Type.Array {
32 | err = errors.Newf("invalid shardingKey '%s', expect its type be numerical or string", shardingKey)
33 | return
34 | }
35 | colSeq = i
36 | switch dim.Type.Type {
37 | case model.Int8, model.Int16, model.Int32, model.Int64, model.UInt8, model.UInt16, model.UInt32, model.UInt64, model.Float32, model.Float64, model.Decimal, model.DateTime:
38 | //numerical
39 | if policy.stripe <= 0 {
40 | policy.stripe = uint64(1)
41 | }
42 | case model.String:
43 | //string
44 | policy.stripe = 0
45 | default:
46 | err = errors.Newf("invalid shardingKey '%s', expect its type be numerical or string", shardingKey)
47 | return
48 | }
49 | }
50 | }
51 | if colSeq < 0 {
52 | util.Logger.Info("shardingKey is __offset__, use offset as sharding key")
53 | if policy.stripe <= 0 {
54 | policy.stripe = uint64(1)
55 | }
56 | }
57 | policy.colSeq = colSeq
58 | return
59 | }
60 |
61 | func (policy *ShardingPolicy) Calc(row *model.Row, offset int64) (shard int, err error) {
62 | var val interface{}
63 | if policy.colSeq < 0 {
64 | val = offset
65 | } else {
66 | val = (*row)[policy.colSeq]
67 | }
68 | if policy.stripe > 0 {
69 | var valu64 uint64
70 | switch v := val.(type) {
71 | case int:
72 | valu64 = uint64(v)
73 | case int8:
74 | valu64 = uint64(v)
75 | case int16:
76 | valu64 = uint64(v)
77 | case int32:
78 | valu64 = uint64(v)
79 | case int64:
80 | valu64 = uint64(v)
81 | case uint:
82 | valu64 = uint64(v)
83 | case uint8:
84 | valu64 = uint64(v)
85 | case uint16:
86 | valu64 = uint64(v)
87 | case uint32:
88 | valu64 = uint64(v)
89 | case uint64:
90 | valu64 = v
91 | case float32:
92 | valu64 = uint64(v)
93 | case float64:
94 | valu64 = uint64(v)
95 | case decimal.Decimal:
96 | valu64 = uint64(v.IntPart())
97 | case time.Time:
98 | valu64 = uint64(v.Unix())
99 | default:
100 | err = errors.Newf("failed to convert %+v to integer", v)
101 | return
102 | }
103 | shard = int((valu64 / policy.stripe) % uint64(policy.shards))
104 | } else {
105 | var valu64 uint64
106 | switch v := val.(type) {
107 | case []byte:
108 | valu64 = xxhash.Sum64(v)
109 | case string:
110 | valu64 = xxhash.Sum64String(v)
111 | default:
112 | err = errors.Newf("failed to convert %+v to string", v)
113 | return
114 | }
115 | shard = int(valu64 % uint64(policy.shards))
116 | }
117 | return
118 | }
119 |
120 | type Sharder struct {
121 | service *Service
122 | policy *ShardingPolicy
123 | shards int
124 | mux sync.Mutex
125 | msgBuf []*model.Rows
126 | }
127 |
128 | func NewSharder(service *Service) (sh *Sharder, err error) {
129 | var policy *ShardingPolicy
130 | shards := pool.NumShard()
131 | taskCfg := service.taskCfg
132 | if policy, err = NewShardingPolicy(taskCfg.ShardingKey, taskCfg.ShardingStripe, service.clickhouse.Dims, shards); err != nil {
133 | return sh, errors.Wrapf(err, "error when creating sharding policy for task '%s'", service.taskCfg.Name)
134 | }
135 | sh = &Sharder{
136 | service: service,
137 | policy: policy,
138 | shards: shards,
139 | msgBuf: make([]*model.Rows, shards),
140 | }
141 | for i := 0; i < shards; i++ {
142 | rs := make(model.Rows, 0)
143 | sh.msgBuf[i] = &rs
144 | }
145 | return
146 | }
147 |
148 | func (sh *Sharder) Calc(row *model.Row, offset int64) (int, error) {
149 | return sh.policy.Calc(row, offset)
150 | }
151 |
152 | func (sh *Sharder) PutElement(msgRow *model.MsgRow) {
153 | sh.mux.Lock()
154 | defer sh.mux.Unlock()
155 | rows := sh.msgBuf[msgRow.Shard]
156 | *rows = append(*rows, msgRow.Row)
157 | statistics.ShardMsgs.WithLabelValues(sh.service.taskCfg.Name).Inc()
158 | }
159 |
160 | func (sh *Sharder) Flush(c context.Context, wg *sync.WaitGroup, rmap map[int32]*model.BatchRange, traceId string) {
161 | sh.mux.Lock()
162 | defer sh.mux.Unlock()
163 | select {
164 | case <-c.Done():
165 | util.Logger.Info("batch abandoned because of context canceled")
166 | return
167 | default:
168 | var msgCnt int
169 | util.Logger.Debug("flush records to ck")
170 | taskCfg := sh.service.taskCfg
171 | batchId, _ := nanoid.New()
172 | for i, rows := range sh.msgBuf {
173 | realSize := len(*rows)
174 | if realSize > 0 {
175 | msgCnt += realSize
176 | batch := &model.Batch{
177 | Rows: rows,
178 | BatchIdx: int64(i),
179 | GroupId: batchId,
180 | RealSize: realSize,
181 | Wg: wg,
182 | }
183 | batch.Wg.Add(1)
184 | sh.service.clickhouse.Send(batch, traceId)
185 | rs := make(model.Rows, 0, realSize)
186 | sh.msgBuf[i] = &rs
187 | }
188 | }
189 | if msgCnt > 0 {
190 | util.Logger.Info(fmt.Sprintf("created a batch group for task %v with %d shards, total messages %d", sh.service.taskCfg.Name, len(sh.msgBuf), msgCnt),
191 | zap.String("group", batchId),
192 | zap.Reflect("offsets", rmap))
193 | statistics.ShardMsgs.WithLabelValues(taskCfg.Name).Sub(float64(msgCnt))
194 | }
195 | }
196 | }
197 |
--------------------------------------------------------------------------------
/test/kafka.client.keystore.jks:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/housepower/clickhouse_sinker/2541b53d8e7a3a472c0c2c9394af2574f7c943dd/test/kafka.client.keystore.jks
--------------------------------------------------------------------------------
/test/kafka.client.truststore.jks:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/housepower/clickhouse_sinker/2541b53d8e7a3a472c0c2c9394af2574f7c943dd/test/kafka.client.truststore.jks
--------------------------------------------------------------------------------
/util/aes.go:
--------------------------------------------------------------------------------
1 | package util
2 |
3 | import (
4 | "crypto/aes"
5 | "encoding/hex"
6 | "strings"
7 | )
8 |
9 | var salt = "656f6974656b"
10 |
11 | // select hex(aes_encrypt("123456", unhex("656f6974656b"))); => E310E892E56801CED9ED98AA177F18E6
12 | func AesEncryptECB(origData string) string {
13 | if origData == "" {
14 | return origData
15 | }
16 | var encrypted []byte
17 | var o = []byte(origData)
18 | s, _ := hex.DecodeString(salt)
19 | cipher, _ := aes.NewCipher(generateKey(s))
20 | length := (len(o) + aes.BlockSize) / aes.BlockSize
21 | plain := make([]byte, length*aes.BlockSize)
22 | copy(plain, o)
23 | pad := byte(len(plain) - len(o))
24 | for i := len(o); i < len(plain); i++ {
25 | plain[i] = pad
26 | }
27 | encrypted = make([]byte, len(plain))
28 | for bs, be := 0, cipher.BlockSize(); bs <= len(o); bs, be = bs+cipher.BlockSize(), be+cipher.BlockSize() {
29 | cipher.Encrypt(encrypted[bs:be], plain[bs:be])
30 | }
31 | return strings.ToUpper(hex.EncodeToString(encrypted))
32 | }
33 |
34 | // select aes_decrypt(unhex("E310E892E56801CED9ED98AA177F18E6"), unhex("656f6974656b")); => 123456
35 | func AesDecryptECB(encrypted string) string {
36 | if encrypted == "" {
37 | return encrypted
38 | }
39 | var decrypted []byte
40 | h, _ := hex.DecodeString(encrypted)
41 | s, _ := hex.DecodeString(salt)
42 | cipher, _ := aes.NewCipher(generateKey(s))
43 | decrypted = make([]byte, len(h))
44 |
45 | for bs, be := 0, cipher.BlockSize(); bs < len(h); bs, be = bs+cipher.BlockSize(), be+cipher.BlockSize() {
46 | cipher.Decrypt(decrypted[bs:be], h[bs:be])
47 | }
48 |
49 | bEnd := searchByteSliceIndex(decrypted, 32)
50 | return string(decrypted[:bEnd])
51 | }
52 | func generateKey(key []byte) (genKey []byte) {
53 | genKey = make([]byte, 16)
54 | copy(genKey, key)
55 | for i := 16; i < len(key); {
56 | for j := 0; j < 16 && i < len(key); j, i = j+1, i+1 {
57 | genKey[j] ^= key[i]
58 | }
59 | }
60 | return genKey
61 | }
62 |
63 | func searchByteSliceIndex(bSrc []byte, b byte) int {
64 | for i := 0; i < len(bSrc); i++ {
65 | if bSrc[i] < b {
66 | return i
67 | }
68 | }
69 |
70 | return len(bSrc)
71 | }
72 |
--------------------------------------------------------------------------------
/util/app.go:
--------------------------------------------------------------------------------
1 | /*Copyright [2019] housepower
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 |
16 | package util
17 |
18 | import (
19 | "fmt"
20 | "os"
21 | "os/signal"
22 | "syscall"
23 |
24 | "go.uber.org/zap"
25 | )
26 |
27 | func Run(appName string, initFunc, jobFunc, cleanupFunc func() error) {
28 | Logger.Info(appName + " initialization")
29 | if err := initFunc(); err != nil {
30 | Logger.Fatal(appName+" initialization failed", zap.Error(err))
31 | }
32 | Logger.Info(appName + " initialization completed")
33 | go func() {
34 | if err := jobFunc(); err != nil {
35 | Logger.Fatal(appName+" run failed", zap.Error(err))
36 | }
37 | }()
38 |
39 | sig := make(chan os.Signal, 1)
40 | done := make(chan struct{})
41 | signal.Notify(sig, os.Interrupt, syscall.SIGTERM)
42 | count := 0
43 | for {
44 | select {
45 | case s := <-sig:
46 | if count == 0 {
47 | Logger.Info(fmt.Sprintf("Received termination signal %s, start to clean", s))
48 | count++
49 | go func() {
50 | if err := cleanupFunc(); err != nil {
51 | Logger.Fatal(appName+" clean failed", zap.Error(err))
52 | }
53 | done <- struct{}{}
54 | }()
55 | } else {
56 | Logger.Info(fmt.Sprintf("This is the second termination signal %s. Immediately terminate.", s))
57 | return
58 | }
59 | case <-done:
60 | Logger.Info(appName + " clean completed, exit")
61 | return
62 | }
63 | }
64 | }
65 |
--------------------------------------------------------------------------------
/util/common.go:
--------------------------------------------------------------------------------
1 | /*Copyright [2019] housepower
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 |
16 | package util
17 |
18 | import (
19 | "bytes"
20 | "crypto/tls"
21 | "crypto/x509"
22 | "fmt"
23 | "os"
24 | "os/exec"
25 | "path/filepath"
26 | "reflect"
27 | "strconv"
28 | "strings"
29 |
30 | "go.uber.org/zap"
31 | "go.uber.org/zap/zapcore"
32 | "gopkg.in/natefinch/lumberjack.v2"
33 |
34 | "github.com/google/uuid"
35 | "github.com/thanos-io/thanos/pkg/errors"
36 | )
37 |
38 | var (
39 | Logger *zap.Logger
40 | logAtomLevel zap.AtomicLevel
41 | logPaths []string
42 | logTrace bool
43 | )
44 |
45 | type CmdOptions struct {
46 | ShowVer bool
47 | LogLevel string // "debug", "info", "warn", "error", "dpanic", "panic", "fatal"
48 | LogPaths string // comma-separated paths. "stdout" means the console stdout
49 |
50 | // HTTPHost to bind to. If empty, outbound ip of machine
51 | // is automatically determined and used.
52 | HTTPHost string
53 | HTTPPort int // 0 means a randomly chosen port.
54 |
55 | PushGatewayAddrs string
56 | PushInterval int
57 | LocalCfgFile string
58 | NacosAddr string
59 | NacosNamespaceID string
60 | NacosGroup string
61 | NacosUsername string
62 | NacosPassword string
63 | NacosDataID string
64 | NacosServiceName string // participate in assignment management if not empty
65 | Encrypt string
66 |
67 | Credentials
68 | }
69 |
70 | type Credentials struct {
71 | ClickhouseUsername string
72 | ClickhousePassword string
73 | KafkaUsername string
74 | KafkaPassword string
75 | KafkaGSSAPIUsername string
76 | KafkaGSSAPIPassword string
77 | }
78 |
79 | // StringContains check if contains string in array
80 | func StringContains(arr []string, str string) bool {
81 | for _, s := range arr {
82 | if s == str {
83 | return true
84 | }
85 | }
86 | return false
87 | }
88 |
89 | // GetSourceName returns the field name in message for the given ClickHouse column
90 | func GetSourceName(parser, name string) (sourcename string) {
91 | if parser == "gjson" {
92 | sourcename = strings.Replace(name, ".", "\\.", -1)
93 | } else {
94 | sourcename = name
95 | }
96 | return
97 | }
98 |
99 | // GetShift returns the smallest `shift` which 1< f2 {
365 | return 1
366 | } else if f1 < f2 {
367 | return -1
368 | }
369 | }
370 | return 0
371 | }
372 |
--------------------------------------------------------------------------------
/util/common_test.go:
--------------------------------------------------------------------------------
1 | package util
2 |
3 | import (
4 | "os"
5 | "testing"
6 |
7 | "github.com/stretchr/testify/assert"
8 | "github.com/stretchr/testify/require"
9 | )
10 |
11 | // keytool -genkeypair -alias certificatekey -dname "CN=hello world, OU=eoi, O=eoi, L=shanghai, ST=shanghai, C=CN" -keyalg RSA -validity 3650 -keystore kafka.client.keystore.jks
12 | // keytool -export -alias certificatekey -keystore kafka.client.keystore.jks -rfc -file selfsignedcert.cer
13 | // keytool -import -alias certificatekey -file selfsignedcert.cer -keystore kafka.client.truststore.jks
14 | func TestJksToPem(t *testing.T) {
15 | var err error
16 | var certPemPath, keyPemPath string
17 | jksPaths := []string{"../test/kafka.client.truststore.jks", "../test/kafka.client.keystore.jks"}
18 | jksPassword := "HelloWorld"
19 | for _, jksPath := range jksPaths {
20 | if _, err = os.Stat(jksPath); err != nil {
21 | require.Nil(t, err)
22 | }
23 | certPemPath, keyPemPath, err = JksToPem(jksPath, jksPassword, true)
24 | require.Nil(t, err, "err should be nothing")
25 | t.Logf("converted %s to %s, %s\n", jksPath, certPemPath, keyPemPath)
26 | }
27 | }
28 |
29 | func TestStringContains(t *testing.T) {
30 | tests := []struct {
31 | name string
32 | array []string
33 | result bool
34 | }{
35 | {
36 | name: "false",
37 | array: []string{""},
38 | result: false,
39 | },
40 | {
41 | name: "true",
42 | array: []string{"true", "hi"},
43 | result: true,
44 | },
45 | }
46 | for _, tt := range tests {
47 | t.Run(tt.name, func(t *testing.T) {
48 | result := StringContains(tt.array, tt.name)
49 | require.Equal(t, result, tt.result)
50 | })
51 | }
52 | }
53 |
54 | func TestGetSourceName(t *testing.T) {
55 | tests := []struct {
56 | parser, name, result string
57 | }{
58 | {
59 | parser: "gjson",
60 | name: "a.b.c",
61 | result: "a\\.b\\.c",
62 | },
63 | {
64 | parser: "csv",
65 | name: "a.b.c",
66 | result: "a.b.c",
67 | },
68 | }
69 | for _, tt := range tests {
70 | t.Run(tt.name, func(t *testing.T) {
71 | result := GetSourceName(tt.parser, tt.name)
72 | require.Equal(t, result, tt.result)
73 | })
74 | }
75 | }
76 |
77 | func TestMisc(t *testing.T) {
78 | require.Equal(t, uint(6), GetShift(64))
79 | require.Equal(t, uint(7), GetShift(65))
80 |
81 | tests := []struct {
82 | parser, name, result string
83 | }{
84 | {
85 | parser: "gjson",
86 | name: "a.b.c",
87 | result: "a\\.b\\.c",
88 | },
89 | {
90 | parser: "csv",
91 | name: "a.b.c",
92 | result: "a.b.c",
93 | },
94 | }
95 | for _, tt := range tests {
96 | t.Run(tt.name, func(t *testing.T) {
97 | result := GetSourceName(tt.parser, tt.name)
98 | require.Equal(t, result, tt.result)
99 | })
100 | }
101 | }
102 |
103 | func TestSetValue(t *testing.T) {
104 | a := 5
105 | TrySetValue(&a, 10)
106 | assert.Equal(t, a, 5)
107 | var b string
108 | TrySetValue(&b, "hello")
109 | assert.Equal(t, b, "hello")
110 | TrySetValue(&b, "world")
111 | assert.Equal(t, b, "hello")
112 |
113 | var c uint32
114 | TrySetValue(&c, uint32(100))
115 | assert.Equal(t, c, uint32(100))
116 | TrySetValue(&c, uint32(500))
117 | assert.Equal(t, c, uint32(100))
118 |
119 | var d float32
120 | TrySetValue(&d, float32(6.998))
121 | assert.Equal(t, d, float32(6.998))
122 | TrySetValue(&d, float32(3.14))
123 | assert.Equal(t, d, float32(6.998))
124 |
125 | var e int
126 | TrySetValue(&e, 25)
127 | assert.Equal(t, e, 25)
128 |
129 | // f := 0
130 | // TrySetValue(&f, 25)
131 | // assert.Equal(t, f, 0)
132 |
133 | }
134 |
--------------------------------------------------------------------------------
/util/gosypt.go:
--------------------------------------------------------------------------------
1 | package util
2 |
3 | import (
4 | "reflect"
5 | "strings"
6 |
7 | "github.com/pkg/errors"
8 | )
9 |
10 | const (
11 | GosyptPrefixDefault = "ENC("
12 | GosyptSuffxiDefault = ")"
13 | GosyptAlgorithm = "AESWITHHEXANDBASE64"
14 | )
15 |
16 | /* Golang Simple Encrypt, simulate jasypt */
17 | type Gosypt struct {
18 | prefix string
19 | suffix string
20 | algorithm string
21 | }
22 |
23 | var Gsypt = &Gosypt{
24 | prefix: GosyptPrefixDefault,
25 | suffix: GosyptSuffxiDefault,
26 | algorithm: GosyptAlgorithm,
27 | }
28 |
29 | func (gsypt *Gosypt) ensurePassword(password string) string {
30 | if !strings.HasPrefix(password, gsypt.prefix) || !strings.HasSuffix(password, gsypt.suffix) {
31 | return password
32 | }
33 | passwd := strings.TrimSuffix(strings.TrimPrefix(password, gsypt.prefix), gsypt.suffix)
34 | if gsypt.algorithm == GosyptAlgorithm {
35 | return AesDecryptECB(passwd)
36 | }
37 | return password
38 | }
39 |
40 | func (gsypt *Gosypt) SetAttribution(prefix, suffix, algorithm string) {
41 | gsypt.prefix = prefix
42 | gsypt.suffix = suffix
43 | gsypt.algorithm = algorithm
44 | }
45 |
46 | func (gsypt *Gosypt) Unmarshal(v interface{}) error {
47 | rt := reflect.TypeOf(v)
48 | rv := reflect.ValueOf(v)
49 |
50 | if rt.Kind() != reflect.Ptr {
51 | return errors.Wrap(nil, "invalid args, expect ptr")
52 | }
53 |
54 | for rt.Kind() == reflect.Ptr {
55 | rt = rt.Elem()
56 | rv = rv.Elem()
57 | }
58 |
59 | if rt.Kind() == reflect.Struct {
60 | v, err := gsypt.structHandle(rt, rv)
61 | if err != nil {
62 | return err
63 | }
64 | rv.Set(v)
65 | } else if rt.Kind() == reflect.Slice || rt.Kind() == reflect.Array {
66 | v, err := gsypt.sliceHandle(rt, rv)
67 | if err != nil {
68 | return err
69 | }
70 | rv.Set(v)
71 | } else if rt.Kind() == reflect.Map {
72 | v, err := gsypt.mapHandle(rt, rv)
73 | if err != nil {
74 | return err
75 | }
76 | rv.Set(v)
77 | } else if rt.Kind() == reflect.Interface {
78 | v, err := gsypt.interfaceHandle(rt, rv)
79 | if err != nil {
80 | return err
81 | }
82 | rv.Set(v)
83 | } else if rt.Kind() == reflect.String {
84 | rv.Set(gsypt.stringHandle(rv))
85 | }
86 |
87 | return nil
88 | }
89 |
90 | func (gsypt *Gosypt) sliceHandle(rt reflect.Type, rv reflect.Value) (reflect.Value, error) {
91 | for j := 0; j < rv.Len(); j++ {
92 | if rt.Elem().Kind() == reflect.String {
93 | rv.Index(j).Set(gsypt.stringHandle(rv.Index(j)))
94 | } else {
95 | if err := gsypt.Unmarshal(rv.Index(j).Addr().Interface()); err != nil {
96 | return rv, err
97 | }
98 | }
99 | }
100 | return rv, nil
101 | }
102 |
103 | func (gsypt *Gosypt) mapHandle(rt reflect.Type, rv reflect.Value) (reflect.Value, error) {
104 | for _, k := range rv.MapKeys() {
105 | key := k.Convert(rv.Type().Key())
106 | if rt.Elem().Kind() == reflect.String {
107 | v := gsypt.ensurePassword(rv.MapIndex(key).String())
108 | rv.SetMapIndex(key, reflect.ValueOf(v))
109 | } else {
110 | v := rv.MapIndex(key).Interface()
111 | if err := gsypt.Unmarshal(&v); err != nil {
112 | return rv, err
113 | }
114 | rv.SetMapIndex(key, reflect.ValueOf(v))
115 | }
116 | }
117 | return rv, nil
118 | }
119 |
120 | func (gsypt *Gosypt) interfaceHandle(rt reflect.Type, rv reflect.Value) (reflect.Value, error) {
121 | //todo
122 | return rv, nil
123 | }
124 |
125 | func (gsypt *Gosypt) structHandle(rt reflect.Type, rv reflect.Value) (reflect.Value, error) {
126 | for i := 0; i < rt.NumField(); i++ {
127 | rtf := rt.Field(i)
128 | rvf := rv.Field(i)
129 |
130 | rtt := rtf.Type
131 | for rtt.Kind() == reflect.Ptr {
132 | rtt = rtt.Elem()
133 | }
134 |
135 | if rtt.Kind() == reflect.String {
136 | rv.Field(i).Set(gsypt.stringHandle(rvf))
137 | } else {
138 | if err := gsypt.Unmarshal(rvf.Addr().Interface()); err != nil {
139 | return rv, err
140 | }
141 | }
142 | }
143 | return rv, nil
144 | }
145 |
146 | func (gsypt *Gosypt) stringHandle(rv reflect.Value) reflect.Value {
147 | rv.SetString(gsypt.ensurePassword(rv.String()))
148 | return rv
149 | }
150 |
--------------------------------------------------------------------------------
/util/net.go:
--------------------------------------------------------------------------------
1 | /*Copyright [2019] housepower
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 |
16 | package util
17 |
18 | import (
19 | "fmt"
20 | "net"
21 |
22 | "github.com/thanos-io/thanos/pkg/errors"
23 | )
24 |
25 | const (
26 | HttpPortBase = 10000
27 | MaxPort = 65535
28 | )
29 |
30 | func GetIP4Byname(host string) (ips []string, err error) {
31 | addrs, err := net.LookupIP(host)
32 | if err != nil {
33 | return
34 | }
35 | ips = make([]string, len(addrs))
36 | for i, addr := range addrs {
37 | if ipv4 := addr.To4(); ipv4 != nil {
38 | ips[i] = ipv4.String()
39 | }
40 | }
41 | return
42 | }
43 |
44 | // GetOutboundIP gets preferred outbound ip of this machine
45 | // https://stackoverflow.com/questions/23558425/how-do-i-get-the-local-ip-address-in-go.
46 | func GetOutboundIP() (ip net.IP, err error) {
47 | var conn net.Conn
48 | if conn, err = net.Dial("udp", "8.8.8.8:80"); err != nil {
49 | err = errors.Wrapf(err, "")
50 | return
51 | }
52 | defer conn.Close()
53 | localAddr, _ := conn.LocalAddr().(*net.UDPAddr)
54 | ip = localAddr.IP
55 | return
56 | }
57 |
58 | // GetSpareTCPPort finds a spare TCP port.
59 | func GetSpareTCPPort(portBegin int) int {
60 | for port := portBegin; port <= MaxPort; port++ {
61 | if err := testListenOnPort(port); err == nil {
62 | return port
63 | }
64 | }
65 | return 0
66 | }
67 |
68 | // https://stackoverflow.com/questions/50428176/how-to-get-ip-and-port-from-net-addr-when-it-could-be-a-net-udpaddr-or-net-tcpad
69 | func GetNetAddrPort(addr net.Addr) (port int) {
70 | switch addr := addr.(type) {
71 | case *net.UDPAddr:
72 | port = addr.Port
73 | case *net.TCPAddr:
74 | port = addr.Port
75 | }
76 | return
77 | }
78 |
79 | func testListenOnPort(port int) error {
80 | addr := fmt.Sprintf(":%d", port)
81 | ln, err := net.Listen("tcp", addr)
82 | if err != nil {
83 | return err
84 | }
85 | ln.Close() //nolint:errcheck
86 | return nil
87 | }
88 |
--------------------------------------------------------------------------------
/util/recordpoolsize.go:
--------------------------------------------------------------------------------
1 | package util
2 |
3 | import "sync/atomic"
4 |
5 | type RecordSize struct {
6 | poolSize int64
7 | realSize int64
8 | }
9 |
10 | func (rs *RecordSize) SetPoolSize(size int64) {
11 | rs.poolSize = size
12 | }
13 |
14 | func (rs *RecordSize) Inc(size int64) {
15 | atomic.AddInt64(&rs.realSize, size)
16 | }
17 |
18 | func (rs *RecordSize) Reset() {
19 | atomic.StoreInt64(&rs.realSize, 0)
20 | }
21 |
22 | func (rs *RecordSize) Dec(size int64) {
23 | atomic.AddInt64(&rs.realSize, size*(-1))
24 | }
25 |
26 | func (rs *RecordSize) Get() int64 {
27 | return atomic.LoadInt64(&rs.realSize)
28 | }
29 |
30 | func (rs *RecordSize) Allow() bool {
31 | realSize := atomic.LoadInt64(&rs.realSize)
32 | return realSize < rs.poolSize
33 | }
34 |
35 | var Rs RecordSize
36 |
--------------------------------------------------------------------------------
/util/workerpool.go:
--------------------------------------------------------------------------------
1 | package util
2 |
3 | import (
4 | "errors"
5 | "sync"
6 | "sync/atomic"
7 | )
8 |
9 | const (
10 | StateRunning uint32 = 0
11 | StateStopped uint32 = 1
12 | )
13 |
14 | // WorkerPool is a blocked worker pool inspired by https://github.com/gammazero/workerpool/
15 | type WorkerPool struct {
16 | inNums uint64
17 | outNums uint64
18 | curWorkers int
19 |
20 | maxWorkers int
21 | workChan chan func()
22 |
23 | taskDone *sync.Cond
24 | state uint32
25 | sync.Mutex
26 | }
27 |
28 | // New creates and starts a pool of worker goroutines.
29 | func NewWorkerPool(maxWorkers int, queueSize int) *WorkerPool {
30 | if maxWorkers <= 0 {
31 | Logger.Fatal("WorkerNum must be greater than zero")
32 | }
33 | if queueSize <= 0 {
34 | Logger.Fatal("queueSize must be greater than zero")
35 | }
36 |
37 | w := &WorkerPool{
38 | maxWorkers: maxWorkers,
39 | workChan: make(chan func(), queueSize),
40 | }
41 |
42 | w.taskDone = sync.NewCond(w)
43 |
44 | w.start()
45 | return w
46 | }
47 |
48 | var (
49 | // ErrStopped when stopped
50 | ErrStopped = errors.New("WorkerPool already stopped")
51 | )
52 |
53 | func (w *WorkerPool) wokerFunc() {
54 | w.Lock()
55 | w.curWorkers++
56 | w.Unlock()
57 | LOOP:
58 | for fn := range w.workChan {
59 | fn()
60 | var needQuit bool
61 | w.Lock()
62 | w.outNums++
63 | if w.inNums == w.outNums {
64 | w.taskDone.Signal()
65 | }
66 | if w.curWorkers > w.maxWorkers {
67 | w.curWorkers--
68 | needQuit = true
69 | }
70 | w.Unlock()
71 | if needQuit {
72 | break LOOP
73 | }
74 | }
75 | }
76 |
77 | func (w *WorkerPool) start() {
78 | for i := 0; i < w.maxWorkers; i++ {
79 | go w.wokerFunc()
80 | }
81 | }
82 |
83 | // Resize ensures worker number match the expected one.
84 | func (w *WorkerPool) Resize(maxWorkers int) {
85 | w.Lock()
86 | defer w.Unlock()
87 | for i := 0; i < maxWorkers-w.maxWorkers; i++ {
88 | go w.wokerFunc()
89 | }
90 | w.maxWorkers = maxWorkers
91 | // if maxWorkers