├── Makefile ├── README.md ├── cli.go ├── cluster.go ├── config ├── influxdb.conf └── yaml.go ├── config_example ├── conf ├── db_sharding ├── ser_status ├── slot_conf └── udp_server ├── http ├── handle.go ├── routes.go └── service.go ├── introduction ├── meta ├── cluster.go ├── dbSharding.go ├── influx.go ├── slot.go ├── udp.go └── zk.go ├── midd ├── operator.go └── query.go ├── test ├── 1.py └── ben.go └── utils ├── converters.go └── crc16.go /Makefile: -------------------------------------------------------------------------------- 1 | # Go parameters 2 | GOCMD=go 3 | GOBUILD=$(GOCMD) build 4 | GOCLEAN=$(GOCMD) clean 5 | GOTEST=$(GOCMD) test 6 | GOGET=$(GOCMD) get 7 | BINARY_NAME=mybinary 8 | 9 | cluster: 10 | go build cluster.go 11 | 12 | cli: 13 | go build cli.go -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 自己模仿codis原理做的 influxdb 集群方案,网上都没搜到有类似想法的,但是这个方案可行 2 | I have imitated the influxdb clustering scheme based on the principle of codis. I have not found any similar ideas on the Internet, but this scheme is absolutely feasible and pays tribute to codis. 3 | 4 | 新特性: 5 | 1 同张图上显示 timeshift n hours的数据 和当前数据,可以同张图上 对比历史数据 6 | 对比https://github.com/thedrhax-dockerfiles/influxdb-timeshift-proxy 7 | 原生的influxdb不支持timeshift函数吧,我们这里可以实现 8 | 9 | 2 支持udp server, influxdb原始udp server 是单个goroutine接收数据,改为多goroutine监听同一个端口 增加并发处理程度,udp-server配置来源zk,不需要重启proxy更改配置 10 | 11 | New features: 12 | 13 | 1 The timeshift n hours data and current data are displayed on the same graph, which can be compared with the historical data on the same graph. https://github.com/thedrhax-dockerfiles/influxdb-timeshift-proxy The native influxdb does not support timeshift. Function, we can achieve here 14 | 15 | 16 | 2 利用codis原理 sharding measurement to 不同的slot, 不同的slot属于不同 influxdb实例,从而达到打散数据的目的, 17 | 支持 influxdb 多个备份同时写(多备份节点) 18 | 19 | Using the principle of codis sharding measurement to different slots, different slots belong to different influxdb instances, so as to achieve the purpose of breaking up data, support influxdb multiple backups at the same time write (multiple backup nodes) 20 | 21 | 3 redirect query by the measurement in the query-sql and merge result together 22 | 23 | 4 支持用measurement单独sharding和 measurement + 指定tags sharding 24 | 25 | support with measurement alone sharding and measurement + specify tags sharding 26 | 27 | 当前实现的 1 2 3 4 点已经完全满足influxdb 打散数据的作用,而且线上集群跑的很开心,最大的集群最大速率30~40W point/ per sec, 多个proxy实例前边放了http的LB,多个proxy后边都是一样配置的influxdb集群 28 | 29 | The current implementation of 1 2 3 4 points has fully satisfied the role of influxdb to break up data, and the online cluster runs very happy, the largest cluster maximum rate of 30 ~ 40W point / per sec, multiple proxy instances before the http LB , multiple proxy are behind the same configuration of the influxdb cluster 30 | 31 | 查询是拿 query中的 measurement来 获取slot从而 redirect 请求的, 查询语法和 写入数据语法 和 原生的保持一致,所以已让influxdb+grafana 用户 平滑迁移到 集群方案来了 32 | 33 | Take the measurement in the query to get the slot and redirect request. The query syntax and the write data syntax are the same as the influxdb, so the influxdb+grafana user has been smoothly migrated to the cluster solution. 34 | 35 | 看到的喜欢的 有 兴趣 一起开发哇,还有其他一些特性在增加中 36 | 37 | -------------------------------------------------------------------------------- /cli.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | //"bytes" 5 | //"encoding/json" 6 | "flag" 7 | "fmt" 8 | //"net/http" 9 | "os" 10 | "github.com/infP/config" 11 | "github.com/infP/meta" 12 | "encoding/json" 13 | //"io/ioutil" 14 | //"os/signal" 15 | "io" 16 | "strings" 17 | log "github.com/Sirupsen/logrus" 18 | "bufio" 19 | ) 20 | 21 | 22 | var ( 23 | ata_path string 24 | conf = config.Conf{} 25 | _ = fmt.Sprintf("") 26 | ) 27 | 28 | func init() { 29 | 30 | log.SetFormatter(&log.TextFormatter{ 31 | TimestampFormat: "2006-01-02 15:04:05", 32 | ForceColors: true, 33 | QuoteEmptyFields: true, 34 | FullTimestamp: true, 35 | }) 36 | 37 | log.SetOutput(os.Stdout) 38 | log.SetLevel(log.DebugLevel) 39 | } 40 | 41 | 42 | func fetchContext(name string) []string { 43 | fi, err := os.Open(name) 44 | if err != nil { 45 | log.Info("open slot conf error:", err) 46 | return []string{} 47 | } 48 | 49 | defer fi.Close() 50 | 51 | ret := []string{} 52 | 53 | br := bufio.NewReader(fi) 54 | for { 55 | a, _, c := br.ReadLine() 56 | if c == io.EOF { 57 | break 58 | } 59 | ret = append(ret, strings.Trim(string(a),"\n")) 60 | } 61 | 62 | return ret 63 | } 64 | 65 | func main() { 66 | 67 | ata_path := flag.String("conf","","path") 68 | slot_path := flag.String("slot","","slot") 69 | 70 | udp_path := flag.String("udp","","udp") 71 | 72 | ser_status := flag.String("ser_status","","ser_status") 73 | migra_slot := flag.String("migra_slot","","migra_slot") 74 | 75 | db_sharding := flag.String("db_sharding","","db_sharding") 76 | 77 | //poc code 78 | trigger := flag.String("trigger","","trigger") 79 | 80 | flag.Parse() 81 | 82 | zk := meta.Zk{} 83 | 84 | conf := config.Conf{} 85 | 86 | if *ata_path != "" { 87 | conf.Get_conf(*ata_path) 88 | 89 | zk.Init( conf.Zk ) 90 | 91 | } 92 | 93 | if *slot_path != "" { 94 | ret := fetchContext(*slot_path) 95 | value,_ := json.Marshal(ret) 96 | zk.Set( conf.Zk_path, value ) 97 | } 98 | 99 | if *udp_path != "" { 100 | ret := fetchContext(*udp_path) 101 | value,_ := json.Marshal(ret) 102 | zk.Set( conf.Udp_path, value ) 103 | } 104 | 105 | if *ser_status != "" { 106 | ret := fetchContext(*ser_status) 107 | value,_ := json.Marshal(ret) 108 | 109 | zk.Set( conf.Ser_status, value ) 110 | } 111 | 112 | if *migra_slot != "" { 113 | ret := fetchContext(*migra_slot) 114 | value,_ := json.Marshal(ret) 115 | zk.Set( conf.Migra_slot, value ) 116 | } 117 | 118 | if *db_sharding != "" { 119 | ret := fetchContext(*db_sharding) 120 | value,_ := json.Marshal(ret) 121 | zk.Set( conf.Db_shard, value ) 122 | } 123 | 124 | if *trigger != "" { 125 | ret := fetchContext(*trigger) 126 | value,_ := json.Marshal(ret) 127 | zk.Set( conf.Trigger, value ) 128 | } 129 | 130 | if *ata_path != "" { 131 | zk.Close() 132 | } 133 | 134 | } 135 | 136 | 137 | 138 | -------------------------------------------------------------------------------- /cluster.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | //"bytes" 5 | //"encoding/json" 6 | "flag" 7 | "fmt" 8 | //"net/http" 9 | "os" 10 | "github.com/infP/config" 11 | "github.com/infP/http" 12 | //"errors" 13 | //"io/ioutil" 14 | "os/signal" 15 | "syscall" 16 | //"time" 17 | log "github.com/Sirupsen/logrus" 18 | _"net/http/pprof" 19 | ) 20 | 21 | 22 | /*** 23 | zk: 24 | 1 slot mapping 25 | 2 servers list 26 | 3 db lists 27 | ***/ 28 | var ( 29 | ata_path string 30 | conf = config.Conf{} 31 | ) 32 | 33 | func init() { 34 | 35 | log.SetFormatter(&log.TextFormatter{ 36 | TimestampFormat: "2006-01-02 15:04:05", 37 | ForceColors: true, 38 | QuoteEmptyFields: true, 39 | FullTimestamp: true, 40 | }) 41 | 42 | log.SetOutput(os.Stdout) 43 | //log.SetLevel(log.DebugLevel) 44 | } 45 | 46 | func run(conf *config.Conf) { 47 | 48 | apiSrv, err := http.NewApiService(conf) 49 | if err != nil { 50 | log.Fatalln("Failed to start api service: %v ", err) 51 | } 52 | 53 | go apiSrv.Run() 54 | return 55 | } 56 | 57 | func main() { 58 | 59 | ata_path := flag.String("conf","/root/redis_conf/redis_conf","path") 60 | flag.Parse() 61 | 62 | conf := config.Conf{} 63 | conf.Get_conf(*ata_path) 64 | 65 | 66 | sigCh := make(chan os.Signal, 1) 67 | signal.Notify(sigCh, syscall.SIGHUP) 68 | 69 | run( &conf ) 70 | 71 | for { 72 | select { 73 | case <-sigCh: 74 | fmt.Fprintf(os.Stdout, "got a hup signal \n") 75 | } 76 | } 77 | } 78 | 79 | 80 | 81 | -------------------------------------------------------------------------------- /config/influxdb.conf: -------------------------------------------------------------------------------- 1 | ### Welcome to the InfluxDB configuration file. 2 | 3 | # The values in this file override the default values used by the system if 4 | # a config option is not specified. The commented out lines are the configuration 5 | # field and the default value used. Uncommenting a line and changing the value 6 | # will change the value used at runtime when the process is restarted. 7 | 8 | # Once every 24 hours InfluxDB will report usage data to usage.influxdata.com 9 | # The data includes a random ID, os, arch, version, the number of series and other 10 | # usage data. No data from user databases is ever transmitted. 11 | # Change this option to true to disable reporting. 12 | reporting-disabled = false 13 | 14 | # Bind address to use for the RPC service for backup and restore. 15 | bind-address = "127.0.0.1:8089" 16 | 17 | ### 18 | ### [meta] 19 | ### 20 | ### Controls the parameters for the Raft consensus group that stores metadata 21 | ### about the InfluxDB cluster. 22 | ### 23 | 24 | [meta] 25 | # Where the metadata/raft database is stored 26 | dir = "/data/influxdb/meta" 27 | 28 | # Automatically create a default retention policy when creating a database. 29 | # retention-autocreate = true 30 | 31 | # If log messages are printed for the meta service 32 | # logging-enabled = true 33 | 34 | ### 35 | ### [data] 36 | ### 37 | ### Controls where the actual shard data for InfluxDB lives and how it is 38 | ### flushed from the WAL. "dir" may need to be changed to a suitable place 39 | ### for your system, but the WAL settings are an advanced configuration. The 40 | ### defaults should work for most systems. 41 | ### 42 | 43 | [data] 44 | # The directory where the TSM storage engine stores TSM files. 45 | dir = "/data/influxdb/data" 46 | 47 | # The directory where the TSM storage engine stores WAL files. 48 | wal-dir = "/data/influxdb/wal" 49 | 50 | # The amount of time that a write will wait before fsyncing. A duration 51 | # greater than 0 can be used to batch up multiple fsync calls. This is useful for slower 52 | # disks or when WAL write contention is seen. A value of 0s fsyncs every write to the WAL. 53 | # Values in the range of 0-100ms are recommended for non-SSD disks. 54 | # wal-fsync-delay = "0s" 55 | 56 | 57 | # The type of shard index to use for new shards. The default is an in-memory index that is 58 | # recreated at startup. A value of "tsi1" will use a disk based index that supports higher 59 | # cardinality datasets. 60 | index-version = "tsi1" 61 | 62 | # Trace logging provides more verbose output around the tsm engine. Turning 63 | # this on can provide more useful output for debugging tsm engine issues. 64 | # trace-logging-enabled = false 65 | 66 | # Whether queries should be logged before execution. Very useful for troubleshooting, but will 67 | # log any sensitive data contained within a query. 68 | # query-log-enabled = true 69 | 70 | # Settings for the TSM engine 71 | 72 | # CacheMaxMemorySize is the maximum size a shard's cache can 73 | # reach before it starts rejecting writes. 74 | cache-max-memory-size = 1048576000 75 | # CacheSnapshotMemorySize is the size at which the engine will 76 | # snapshot the cache and write it to a TSM file, freeing up memory 77 | cache-snapshot-memory-size = 26214400 78 | 79 | # CacheSnapshotWriteColdDuration is the length of time at 80 | # which the engine will snapshot the cache and write it to 81 | # a new TSM file if the shard hasn't received writes or deletes 82 | # cache-snapshot-write-cold-duration = "10m" 83 | 84 | # CompactFullWriteColdDuration is the duration at which the engine 85 | # will compact all TSM files in a shard if it hasn't received a 86 | # write or delete 87 | # compact-full-write-cold-duration = "4h" 88 | 89 | # The maximum number of concurrent full and level compactions that can run at one time. A 90 | # value of 0 results in runtime.GOMAXPROCS(0) used at runtime. This setting does not apply 91 | # to cache snapshotting. 92 | # max-concurrent-compactions = 0 93 | 94 | # The maximum series allowed per database before writes are dropped. This limit can prevent 95 | # high cardinality issues at the database level. This limit can be disabled by setting it to 96 | # 0. 97 | max-series-per-database = 0 98 | 99 | # The maximum number of tag values per tag that are allowed before writes are dropped. This limit 100 | # can prevent high cardinality tag values from being written to a measurement. This limit can be 101 | # disabled by setting it to 0. 102 | max-values-per-tag = 0 103 | 104 | ### 105 | ### [coordinator] 106 | ### 107 | ### Controls the clustering service configuration. 108 | ### 109 | 110 | [coordinator] 111 | # The default time a write request will wait until a "timeout" error is returned to the caller. 112 | write-timeout = "10s" 113 | 114 | # The maximum number of concurrent queries allowed to be executing at one time. If a query is 115 | # executed and exceeds this limit, an error is returned to the caller. This limit can be disabled 116 | # by setting it to 0. 117 | # max-concurrent-queries = 0 118 | 119 | # The maximum time a query will is allowed to execute before being killed by the system. This limit 120 | # can help prevent run away queries. Setting the value to 0 disables the limit. 121 | query-timeout = "30s" 122 | 123 | # The time threshold when a query will be logged as a slow query. This limit can be set to help 124 | # discover slow or resource intensive queries. Setting the value to 0 disables the slow query logging. 125 | log-queries-after = "15s" 126 | 127 | # The maximum number of points a SELECT can process. A value of 0 will make 128 | # the maximum point count unlimited. This will only be checked every 10 seconds so queries will not 129 | # be aborted immediately when hitting the limit. 130 | # max-select-point = 0 131 | 132 | # The maximum number of series a SELECT can run. A value of 0 will make the maximum series 133 | # count unlimited. 134 | # max-select-series = 0 135 | 136 | # The maxium number of group by time bucket a SELECT can create. A value of zero will max the maximum 137 | # number of buckets unlimited. 138 | # max-select-buckets = 0 139 | 140 | ### 141 | ### [retention] 142 | ### 143 | ### Controls the enforcement of retention policies for evicting old data. 144 | ### 145 | 146 | [retention] 147 | # Determines whether retention policy enforcement enabled. 148 | # enabled = true 149 | 150 | # The interval of time when retention policy enforcement checks run. 151 | # check-interval = "30m" 152 | 153 | ### 154 | ### [shard-precreation] 155 | ### 156 | ### Controls the precreation of shards, so they are available before data arrives. 157 | ### Only shards that, after creation, will have both a start- and end-time in the 158 | ### future, will ever be created. Shards are never precreated that would be wholly 159 | ### or partially in the past. 160 | 161 | [shard-precreation] 162 | # Determines whether shard pre-creation service is enabled. 163 | # enabled = true 164 | 165 | # The interval of time when the check to pre-create new shards runs. 166 | # check-interval = "10m" 167 | 168 | # The default period ahead of the endtime of a shard group that its successor 169 | # group is created. 170 | # advance-period = "30m" 171 | 172 | ### 173 | ### Controls the system self-monitoring, statistics and diagnostics. 174 | ### 175 | ### The internal database for monitoring data is created automatically if 176 | ### if it does not already exist. The target retention within this database 177 | ### is called 'monitor' and is also created with a retention period of 7 days 178 | ### and a replication factor of 1, if it does not exist. In all cases the 179 | ### this retention policy is configured as the default for the database. 180 | 181 | [monitor] 182 | # Whether to record statistics internally. 183 | # store-enabled = true 184 | 185 | # The destination database for recorded statistics 186 | # store-database = "_internal" 187 | 188 | # The interval at which to record statistics 189 | store-interval = "30s" 190 | 191 | ### 192 | ### [http] 193 | ### 194 | ### Controls how the HTTP endpoints are configured. These are the primary 195 | ### mechanism for getting data into and out of InfluxDB. 196 | ### 197 | 198 | [http] 199 | # Determines whether HTTP endpoint is enabled. 200 | # enabled = true 201 | 202 | # The bind address used by the HTTP service. 203 | bind-address = ":8086" 204 | 205 | # Determines whether user authentication is enabled over HTTP/HTTPS. 206 | # auth-enabled = false 207 | 208 | # The default realm sent back when issuing a basic auth challenge. 209 | # realm = "InfluxDB" 210 | 211 | # Determines whether HTTP request logging is enabled. 212 | # log-enabled = true 213 | 214 | # Determines whether detailed write logging is enabled. 215 | # write-tracing = false 216 | 217 | # Determines whether the pprof endpoint is enabled. This endpoint is used for 218 | # troubleshooting and monitoring. 219 | # pprof-enabled = true 220 | 221 | # Determines whether HTTPS is enabled. 222 | # https-enabled = false 223 | 224 | # The SSL certificate to use when HTTPS is enabled. 225 | # https-certificate = "/etc/ssl/influxdb.pem" 226 | 227 | # Use a separate private key location. 228 | # https-private-key = "" 229 | 230 | # The JWT auth shared secret to validate requests using JSON web tokens. 231 | # shared-secret = "" 232 | 233 | # The default chunk size for result sets that should be chunked. 234 | # max-row-limit = 0 235 | 236 | # The maximum number of HTTP connections that may be open at once. New connections that 237 | # would exceed this limit are dropped. Setting this value to 0 disables the limit. 238 | # max-connection-limit = 0 239 | 240 | # Enable http service over unix domain socket 241 | # unix-socket-enabled = false 242 | 243 | # The path of the unix domain socket. 244 | # bind-socket = "/var/run/influxdb.sock" 245 | 246 | ### 247 | ### [subscriber] 248 | ### 249 | ### Controls the subscriptions, which can be used to fork a copy of all data 250 | ### received by the InfluxDB host. 251 | ### 252 | 253 | [subscriber] 254 | # Determines whether the subscriber service is enabled. 255 | # enabled = true 256 | 257 | # The default timeout for HTTP writes to subscribers. 258 | # http-timeout = "30s" 259 | 260 | # Allows insecure HTTPS connections to subscribers. This is useful when testing with self- 261 | # signed certificates. 262 | # insecure-skip-verify = false 263 | 264 | # The path to the PEM encoded CA certs file. If the empty string, the default system certs will be used 265 | # ca-certs = "" 266 | 267 | # The number of writer goroutines processing the write channel. 268 | # write-concurrency = 40 269 | 270 | # The number of in-flight writes buffered in the write channel. 271 | # write-buffer-size = 1000 272 | 273 | 274 | ### 275 | ### [[graphite]] 276 | ### 277 | ### Controls one or many listeners for Graphite data. 278 | ### 279 | 280 | [[graphite]] 281 | # Determines whether the graphite endpoint is enabled. 282 | # enabled = false 283 | # database = "graphite" 284 | # retention-policy = "" 285 | # bind-address = ":2003" 286 | # protocol = "tcp" 287 | # consistency-level = "one" 288 | 289 | # These next lines control how batching works. You should have this enabled 290 | # otherwise you could get dropped metrics or poor performance. Batching 291 | # will buffer points in memory if you have many coming in. 292 | 293 | # Flush if this many points get buffered 294 | # batch-size = 5000 295 | 296 | # number of batches that may be pending in memory 297 | # batch-pending = 10 298 | 299 | # Flush at least this often even if we haven't hit buffer limit 300 | # batch-timeout = "1s" 301 | 302 | # UDP Read buffer size, 0 means OS default. UDP listener will fail if set above OS max. 303 | # udp-read-buffer = 0 304 | 305 | ### This string joins multiple matching 'measurement' values providing more control over the final measurement name. 306 | # separator = "." 307 | 308 | ### Default tags that will be added to all metrics. These can be overridden at the template level 309 | ### or by tags extracted from metric 310 | # tags = ["region=us-east", "zone=1c"] 311 | 312 | ### Each template line requires a template pattern. It can have an optional 313 | ### filter before the template and separated by spaces. It can also have optional extra 314 | ### tags following the template. Multiple tags should be separated by commas and no spaces 315 | ### similar to the line protocol format. There can be only one default template. 316 | # templates = [ 317 | # "*.app env.service.resource.measurement", 318 | # # Default template 319 | # "server.*", 320 | # ] 321 | 322 | ### 323 | ### [collectd] 324 | ### 325 | ### Controls one or many listeners for collectd data. 326 | ### 327 | 328 | [[collectd]] 329 | # enabled = false 330 | # bind-address = ":25826" 331 | # database = "collectd" 332 | # retention-policy = "" 333 | # 334 | # The collectd service supports either scanning a directory for multiple types 335 | # db files, or specifying a single db file. 336 | # typesdb = "/usr/local/share/collectd" 337 | # 338 | # security-level = "none" 339 | # auth-file = "/etc/collectd/auth_file" 340 | 341 | # These next lines control how batching works. You should have this enabled 342 | # otherwise you could get dropped metrics or poor performance. Batching 343 | # will buffer points in memory if you have many coming in. 344 | 345 | # Flush if this many points get buffered 346 | # batch-size = 5000 347 | 348 | # Number of batches that may be pending in memory 349 | # batch-pending = 10 350 | 351 | # Flush at least this often even if we haven't hit buffer limit 352 | # batch-timeout = "10s" 353 | 354 | # UDP Read buffer size, 0 means OS default. UDP listener will fail if set above OS max. 355 | # read-buffer = 0 356 | 357 | ### 358 | ### [opentsdb] 359 | ### 360 | ### Controls one or many listeners for OpenTSDB data. 361 | ### 362 | 363 | [[opentsdb]] 364 | # enabled = false 365 | # bind-address = ":4242" 366 | # database = "opentsdb" 367 | # retention-policy = "" 368 | # consistency-level = "one" 369 | # tls-enabled = false 370 | # certificate= "/etc/ssl/influxdb.pem" 371 | 372 | # Log an error for every malformed point. 373 | # log-point-errors = true 374 | 375 | # These next lines control how batching works. You should have this enabled 376 | # otherwise you could get dropped metrics or poor performance. Only points 377 | # metrics received over the telnet protocol undergo batching. 378 | 379 | # Flush if this many points get buffered 380 | # batch-size = 1000 381 | 382 | # Number of batches that may be pending in memory 383 | # batch-pending = 5 384 | 385 | # Flush at least this often even if we haven't hit buffer limit 386 | # batch-timeout = "1s" 387 | 388 | ### 389 | ### [[udp]] 390 | ### 391 | ### Controls the listeners for InfluxDB line protocol data via UDP. 392 | ### 393 | 394 | [[udp]] 395 | # enabled = false 396 | # bind-address = ":8089" 397 | # database = "udp" 398 | # retention-policy = "" 399 | 400 | # These next lines control how batching works. You should have this enabled 401 | # otherwise you could get dropped metrics or poor performance. Batching 402 | # will buffer points in memory if you have many coming in. 403 | 404 | # Flush if this many points get buffered 405 | # batch-size = 5000 406 | 407 | # Number of batches that may be pending in memory 408 | # batch-pending = 10 409 | 410 | # Will flush at least this often even if we haven't hit buffer limit 411 | # batch-timeout = "1s" 412 | 413 | # UDP Read buffer size, 0 means OS default. UDP listener will fail if set above OS max. 414 | # read-buffer = 0 415 | 416 | ### 417 | ### [continuous_queries] 418 | ### 419 | ### Controls how continuous queries are run within InfluxDB. 420 | ### 421 | 422 | [continuous_queries] 423 | # Determines whether the continuous query service is enabled. 424 | # enabled = true 425 | 426 | # Controls whether queries are logged when executed by the CQ service. 427 | # log-enabled = true 428 | 429 | # interval for how often continuous queries will be checked if they need to run 430 | # run-interval = "1s" 431 | -------------------------------------------------------------------------------- /config/yaml.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "gopkg.in/yaml.v2" 5 | "io/ioutil" 6 | _ "fmt" 7 | log "github.com/Sirupsen/logrus" 8 | "time" 9 | ) 10 | 11 | 12 | type Conf struct { 13 | Zk string `yaml:"zk"` 14 | Zk_path string `yaml:"zk_path"` 15 | Port string `yaml:"port"` 16 | Write_timeout int `yaml:write_timeout` 17 | 18 | Udp_path string `yaml:udp_path` 19 | Ser_status string `yaml:"ser_status"` 20 | Migra_slot string `yaml:"migra_slot"` 21 | 22 | Persi_queue string `yaml:persi_queue` 23 | 24 | Write_worker int `yaml:write_worker` 25 | 26 | Log_level string `yaml:"log_level"` 27 | 28 | Db_shard string `yaml:"db_shard"` 29 | 30 | //poc code 31 | Trigger string `yaml:"trigger"` 32 | 33 | Write_timeout_dur time.Duration 34 | } 35 | 36 | 37 | func (c *Conf) Get_conf( path string ) error { 38 | 39 | yamlFile, err := ioutil.ReadFile(path) 40 | if err != nil { 41 | return err 42 | } 43 | 44 | err = yaml.Unmarshal(yamlFile, c) 45 | if err != nil { 46 | return err 47 | } 48 | 49 | c.Write_timeout_dur = time.Second * time.Duration(c.Write_timeout) 50 | 51 | m := map[string] log.Level { 52 | "info": log.InfoLevel, 53 | "debug": log.DebugLevel, 54 | } 55 | d,ok := m[ c.Log_level ] 56 | if ok { 57 | log.SetLevel(d) 58 | } 59 | 60 | log.Info("fetch config: zk is:",c.Zk,", server port is:",c.Port,", write_timeout:",c.Write_timeout_dur," udp: ",c.Udp_path) 61 | return nil 62 | } -------------------------------------------------------------------------------- /config_example/conf: -------------------------------------------------------------------------------- 1 | #zk server 2 | zk: 127.0.0.1:2181 3 | 4 | #zk path of slot conf 5 | zk_path: /slot_conf 6 | 7 | #http server port 8 | port: 9000 9 | 10 | #http post to influxdb's timeout 11 | write_timeout: 180 12 | 13 | #zk path to store udp server conf 14 | udp_path: /udp_instance 15 | 16 | #zk path to store server status 17 | ser_status: /ser_status 18 | 19 | #zk path to store migra slot info 20 | migra_slot: /migra_slot 21 | 22 | 23 | #zk path to store tag sharding conf 24 | db_shard: /db_shard 25 | 26 | 27 | # local folder for persistent queue 28 | persi_queue: /tmp/cluster_queue 29 | 30 | #write to influxdb worker number 31 | write_worker: 20 32 | -------------------------------------------------------------------------------- /config_example/db_sharding: -------------------------------------------------------------------------------- 1 | #dbname measurement:tags use to sharding 2 | dbname *:cluster 3 | -------------------------------------------------------------------------------- /config_example/ser_status: -------------------------------------------------------------------------------- 1 | #instance write_flag read_flag if (1) can write(read) else disable group_id(相同group id的slot也一样) 2 | 127.0.0.1:8086 1 1 0 3 | 127.0.0.2:8086 1 1 1 4 | 127.0.0.3:8086 1 1 2 5 | 127.0.0.4:8087 1 1 0 6 | 127.0.0.5:8087 1 1 1 7 | 127.0.0.6:8087 1 1 2 8 | -------------------------------------------------------------------------------- /config_example/slot_conf: -------------------------------------------------------------------------------- 1 | #slot 总数 16383,两台机器一组的slot号一样 2 | 127.0.0.1:8086 0-5461 3 | 127.0.0.2:8086 5462-10922 4 | 127.0.0.3:8086 10922-16383 5 | 127.0.0.4:8087 0-5461 6 | 127.0.0.5:8087 5462-10922 7 | 127.0.0.6:8087 10922-16383 8 | -------------------------------------------------------------------------------- /config_example/udp_server: -------------------------------------------------------------------------------- 1 | #local udp server port, dbname, server workers number 2 | 8088 nginx 7 3 | -------------------------------------------------------------------------------- /http/handle.go: -------------------------------------------------------------------------------- 1 | package http 2 | 3 | import ( 4 | "bytes" 5 | "compress/gzip" 6 | log "github.com/Sirupsen/logrus" 7 | "gopkg.in/kataras/iris.v6" 8 | "time" 9 | "fmt" 10 | "strings" 11 | "strconv" 12 | //"github.com/influxdata/influxdb/influxql" 13 | //"github.com/infP/midd" 14 | "github.com/influxdata/influxdb/models" 15 | "github.com/golang/snappy" 16 | "github.com/gogo/protobuf/proto" 17 | "github.com/influxdata/influxdb/prometheus/remote" 18 | "github.com/infP/utils" 19 | ) 20 | 21 | 22 | var ( 23 | _ = fmt.Sprintf("") 24 | ) 25 | 26 | func errJsonResp(ctx *iris.Context, status int, err string) { 27 | log.Errorf("errJsonResp:%v",err) 28 | 29 | ctx.JSON(status, map[string]string{"error": err}) 30 | } 31 | 32 | func (s *ApiService) index(ctx *iris.Context) { 33 | 34 | log.Debug("entry main page") 35 | ctx.HTML(iris.StatusOK, "ok") 36 | 37 | } 38 | 39 | func (s *ApiService) servePromRead(ctx *iris.Context) { 40 | 41 | var body = ctx.Request.Body 42 | defer ctx.Request.Body.Close() 43 | 44 | bodyBuf := bytes.NewBuffer(nil) 45 | _, err := bodyBuf.ReadFrom(body) 46 | if err != nil { 47 | errJsonResp(ctx, iris.StatusInternalServerError, "problem reading request body") 48 | return 49 | } 50 | 51 | reqBuf, err := snappy.Decode(nil, bodyBuf.Bytes()) 52 | if err != nil { 53 | errJsonResp(ctx, iris.StatusInternalServerError, fmt.Sprintf("failed to write data: %v", err)) 54 | return 55 | } 56 | 57 | var req remote.ReadRequest 58 | if err := proto.Unmarshal(reqBuf, &req); err != nil { 59 | errJsonResp(ctx, iris.StatusInternalServerError, fmt.Sprintf("failed to write data: %v", err)) 60 | return 61 | } 62 | 63 | // Query the DB and create a ReadResponse for Prometheus 64 | db := ctx.FormValue("db") 65 | rp := ctx.FormValue("rp") 66 | 67 | q, err := utils.ReadRequestToInfluxQLQuery( &req, db, rp ) 68 | if err != nil { 69 | errJsonResp(ctx, iris.StatusInternalServerError, fmt.Sprintf("failed to write data: %v", err)) 70 | return 71 | } 72 | 73 | epoch := "" 74 | chunk := "" 75 | timeout := 60 76 | 77 | log.Debugf("read:%v,%v,%v,%v,%v,%v",db,epoch,chunk,rp,q,timeout) 78 | 79 | resp := &remote.ReadResponse{ 80 | Results: []*remote.QueryResult{{}}, 81 | } 82 | 83 | rr,err := s.Operator.Query(db, rp, q.String(), epoch, chunk,timeout) 84 | 85 | for _,r := range rr.Results { 86 | 87 | // read the series data and convert into Prometheus samples 88 | for _, s := range r.Series { 89 | ts := &remote.TimeSeries{ 90 | Labels: utils.TagsToLabelPairs(s.Tags), 91 | } 92 | 93 | for _, v := range s.Values { 94 | t, ok := v[0].(time.Time) 95 | if !ok { 96 | errJsonResp(ctx, iris.StatusInternalServerError, fmt.Sprintf("failed to write data: %v", err)) 97 | return 98 | } 99 | val, ok := v[1].(float64) 100 | if !ok { 101 | errJsonResp(ctx, iris.StatusInternalServerError, fmt.Sprintf("failed to write data: %v", err)) 102 | return 103 | } 104 | timestamp := t.UnixNano() / int64(time.Millisecond) / int64(time.Nanosecond) 105 | ts.Samples = append(ts.Samples, &remote.Sample{ 106 | TimestampMs: timestamp, 107 | Value: val, 108 | }) 109 | } 110 | 111 | resp.Results[0].Timeseries = append(resp.Results[0].Timeseries, ts) 112 | } 113 | } 114 | 115 | data, err := proto.Marshal(resp) 116 | if err != nil { 117 | errJsonResp(ctx, iris.StatusInternalServerError, fmt.Sprintf("failed to write data: %v", err)) 118 | return 119 | } 120 | 121 | w := ctx.ResponseWriter 122 | 123 | w.Header().Set("Content-Type", "application/x-protobuf") 124 | w.Header().Set("Content-Encoding", "snappy") 125 | 126 | compressed := snappy.Encode(nil, data) 127 | 128 | if _, err := w.Write(compressed); err != nil { 129 | errJsonResp(ctx, iris.StatusInternalServerError, fmt.Sprintf("failed to write data: %v", err)) 130 | return 131 | } 132 | 133 | } 134 | 135 | 136 | 137 | func (s *ApiService) servePromWrite(ctx *iris.Context) { 138 | var body = ctx.Request.Body 139 | defer ctx.Request.Body.Close() 140 | dbname := ctx.FormValue("db") 141 | 142 | 143 | bodyBuf := bytes.NewBuffer(nil) 144 | _, err := bodyBuf.ReadFrom(body) 145 | if err != nil { 146 | errJsonResp(ctx, iris.StatusInternalServerError, "problem reading request body") 147 | return 148 | } 149 | 150 | reqBuf, err := snappy.Decode(nil, bodyBuf.Bytes()) 151 | if err != nil { 152 | errJsonResp(ctx, iris.StatusInternalServerError, fmt.Sprintf("failed to write data: %v", err)) 153 | return 154 | } 155 | 156 | // Convert the Prometheus remote write request to Influx Points 157 | var req remote.WriteRequest 158 | if err := proto.Unmarshal(reqBuf, &req); err != nil { 159 | errJsonResp(ctx, iris.StatusInternalServerError, fmt.Sprintf("failed to write data: %v", err)) 160 | return 161 | } 162 | 163 | points, err := utils.WriteRequestToPoints(&req) 164 | if err != nil { 165 | if err != utils.ErrNaNDropped { 166 | errJsonResp(ctx, iris.StatusInternalServerError, fmt.Sprintf("failed to write data: %v", err)) 167 | return 168 | } 169 | } 170 | 171 | err = s.Operator.WritePoints(points, dbname, "", "") 172 | 173 | if err != nil { 174 | errJsonResp(ctx, iris.StatusInternalServerError, fmt.Sprintf("failed to write data: %v", err)) 175 | } else { 176 | ctx.JSON(iris.StatusNoContent, nil) 177 | } 178 | 179 | } 180 | 181 | func (s *ApiService) handleInfluxDbWrite(ctx *iris.Context) { 182 | 183 | var body = ctx.Request.Body 184 | defer ctx.Request.Body.Close() 185 | if ctx.Request.Header.Get("Content-Encoding") == "gzip" { 186 | b, err := gzip.NewReader(ctx.Request.Body) 187 | if err != nil { 188 | errJsonResp(ctx, iris.StatusBadRequest, "unable to decode gzip body") 189 | return 190 | } 191 | defer b.Close() 192 | body = b 193 | } 194 | 195 | bodyBuf := bytes.NewBuffer(nil) 196 | _, err := bodyBuf.ReadFrom(body) 197 | if err != nil { 198 | errJsonResp(ctx, iris.StatusInternalServerError, "problem reading request body") 199 | return 200 | } 201 | 202 | consistency := ctx.FormValue("consistency") 203 | dbname := ctx.FormValue("db") 204 | rpname := ctx.FormValue("rp") //rp null means use default rp 205 | precision := ctx.FormValue("precision") 206 | 207 | 208 | points, parseError := models.ParsePointsWithPrecision(bodyBuf.Bytes(), time.Now().UTC(), consistency) 209 | 210 | if parseError != nil && len(points) == 0 { 211 | if parseError.Error() == "EOF" { 212 | ctx.JSON(iris.StatusOK, "") 213 | return 214 | } 215 | errJsonResp(ctx, iris.StatusInternalServerError, "parsed point fail") 216 | return 217 | } 218 | 219 | 220 | //log.Debugf("dbname: %v, rpname: %v, precision: %v", dbname, rpname, precision) 221 | 222 | err = s.Operator.WritePoints(points, dbname, rpname, precision) 223 | 224 | if err != nil { 225 | errJsonResp(ctx, iris.StatusInternalServerError, fmt.Sprintf("failed to write data: %v", err)) 226 | } else { 227 | ctx.JSON(iris.StatusNoContent, nil) 228 | } 229 | 230 | } 231 | 232 | 233 | func (s *ApiService) handleInfluxDbRead(ctx *iris.Context) { 234 | 235 | dbname := strings.TrimSpace(ctx.FormValue("db")) 236 | epoch := strings.TrimSpace(ctx.FormValue("epoch")) 237 | chunk := strings.TrimSpace(ctx.FormValue("chunk")) 238 | rpname := strings.TrimSpace(ctx.FormValue("rp")) 239 | q := strings.TrimSpace(ctx.FormValue("q")) 240 | timeoutstr := strings.TrimSpace(ctx.FormValue("timeout")) 241 | 242 | start := time.Now() 243 | var timeout int 244 | var err error 245 | if timeoutstr == "" { 246 | timeout = 10 247 | } else { 248 | timeout, err = strconv.Atoi(timeoutstr) 249 | if err != nil { 250 | errJsonResp(ctx, iris.StatusBadRequest, err.Error()) 251 | return 252 | } 253 | } 254 | 255 | log.Debugf("at time:%v read:%v,%v,%v,%v,%v,%v",start,dbname,epoch,chunk,rpname,q,timeout) 256 | 257 | r,err := s.Operator.Query(dbname, rpname, q, epoch, chunk,timeout) 258 | 259 | if err != nil { 260 | errJsonResp(ctx, iris.StatusInternalServerError, fmt.Sprintf("failed to write data: %v", err)) 261 | } else { 262 | ctx.JSON(iris.StatusOK, r) 263 | } 264 | } 265 | 266 | 267 | 268 | -------------------------------------------------------------------------------- /http/routes.go: -------------------------------------------------------------------------------- 1 | package http 2 | 3 | import ( 4 | "gopkg.in/kataras/iris.v6" 5 | "gopkg.in/kataras/iris.v6/middleware/pprof" 6 | ) 7 | 8 | var ( 9 | ROUTES_INFLUXDB_V1 = "/api/influxdb/v1" 10 | ) 11 | 12 | func (s *ApiService) Routes(app *iris.Framework) { 13 | 14 | 15 | app.Get("/debug/pprof/*action", pprof.New()) 16 | 17 | app.Get("/", s.index) 18 | 19 | app.Post("/write", s.handleInfluxDbWrite) 20 | 21 | //influxdbV1 := app.Party(ROUTES_INFLUXDB_V1) 22 | 23 | 24 | app.Get("/query", s.handleInfluxDbRead) 25 | app.Post("/query", s.handleInfluxDbRead) 26 | 27 | 28 | app.Post("/api/v1/prom/write", s.servePromWrite) 29 | 30 | app.Post("/api/v1/prom/read", s.servePromRead) 31 | } 32 | -------------------------------------------------------------------------------- /http/service.go: -------------------------------------------------------------------------------- 1 | package http 2 | 3 | import ( 4 | stdContext "context" 5 | log "github.com/Sirupsen/logrus" 6 | "github.com/infP/config" 7 | "github.com/infP/midd" 8 | "github.com/infP/meta" 9 | "gopkg.in/kataras/iris.v6" 10 | "gopkg.in/kataras/iris.v6/adaptors/cors" 11 | "gopkg.in/kataras/iris.v6/adaptors/httprouter" 12 | "time" 13 | "fmt" 14 | ) 15 | 16 | type ApiService struct { 17 | App *iris.Framework 18 | shutdownCh chan chan error 19 | Conf *config.Conf 20 | 21 | Cluster *meta.Cluster 22 | 23 | Operator *midd.Operator 24 | } 25 | 26 | func (s *ApiService) Run() { 27 | 28 | log.Info("server run,port:",s.Conf.Port) 29 | s.App.Listen( fmt.Sprintf(":%s",s.Conf.Port)) 30 | } 31 | 32 | 33 | 34 | func NewApiService(conf *config.Conf) (*ApiService, error) { 35 | 36 | app := iris.New(iris.Configuration{Gzip: true, Charset: "UTF-8"}) 37 | 38 | app.Adapt( 39 | iris.DevLogger(), 40 | httprouter.New(), 41 | cors.New(cors.Options{AllowedOrigins: []string{"*"}}), 42 | ) 43 | 44 | app.Config.MaxHeaderBytes = 10 << 20 45 | 46 | service := &ApiService{ 47 | App: app, 48 | shutdownCh: make(chan chan error), 49 | Conf: conf, 50 | Operator: &midd.Operator{}, 51 | 52 | Cluster: &meta.Cluster{}, 53 | } 54 | 55 | service.Cluster.Init(conf) 56 | 57 | 58 | service.Operator.SetCluster(service.Cluster) 59 | 60 | service.Routes(app) 61 | 62 | go service.waitShutDown() 63 | 64 | return service, nil 65 | } 66 | 67 | func (s *ApiService) waitShutDown() { 68 | for { 69 | select { 70 | case rCh := <-s.shutdownCh: 71 | timeout := 5 * time.Second 72 | ctx, cancel := stdContext.WithTimeout(stdContext.Background(), timeout) 73 | defer cancel() 74 | err := s.App.Shutdown(ctx) 75 | rCh <- err 76 | } 77 | } 78 | } 79 | 80 | func (s *ApiService) Shutdown() error { 81 | rCh := make(chan error) 82 | s.shutdownCh <- rCh 83 | return <-rCh 84 | } 85 | -------------------------------------------------------------------------------- /introduction: -------------------------------------------------------------------------------- 1 | 1 mv influxdb-cluster infP 2 | vendor目录没有上传,不好也是,依赖得自己下载 3 | 2 go build cluser.go 4 | 3 go build cli.go 5 | 6 | 基础配置: 7 | 1 有个zk,配置到config_example/conf中 8 | 9 | 2 mv cli config_example 10 | 3 cd config_example 11 | 4 ./cli -conf=conf -db_sharding=db_sharding -migra_slot=migra_slot -ser_status=ser_status -slot=slot_conf -udp=udp_server 12 | 读取对应配置文件中的内容写到zk中 13 | 5 cd .. 14 | 6 ./cluster -conf=config_example/conf 启动server 15 | 16 | 7 写数据时http post到 server 17 | 8 读数据 granfana的 datasource 切换到 server地址 18 | 19 | 20 | 21 | timeshift函数使用说明: SELECT timeshift(sum("conn_total"),24) FROM "measurement" (前偏移24小时) 22 | 23 | slot配置: 24 | 1 slot总数16384个 25 | 2 在slot_conf中,由于使用crc算法计算的slot存在这样一个问题,指标名前缀一样的(cpu_idle,cpu_used) 他会算到 几个连号的slot中,因此尽量打散slot的分配 26 | 27 | db_sharding: 28 | 1 有时候单个measurement的series太多,按照measurement 只能sharding到单influxdb上,series太多会爆, 29 | 因此再按照某tag的维度再sharding,比如集群,机器,在db_sharding中配置 30 | 2 db_sharding中按行配置格式: dbname *:cluster 就是dbname这个库的所有表 再按 cluster的tag value再做sharding 31 | 3 也可以指定单一measurement dbname measurementA:tagA 32 | 4 至于通用的series sharding方式 还没开发 33 | 34 | 35 | 监控: grafana中配置 _internal数据库,地址配proxy-server地址,配图还是查询速率,表个数,serise数量,会显示proxy后面挂的所有influxdb实例的监控数据 36 | 37 | 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /meta/cluster.go: -------------------------------------------------------------------------------- 1 | package meta 2 | 3 | import ( 4 | "github.com/infP/config" 5 | log "github.com/Sirupsen/logrus" 6 | "encoding/json" 7 | "reflect" 8 | "strings" 9 | "strconv" 10 | //"github.com/infP/service" 11 | //"github.com/influxdata/influxdb/models" 12 | "fmt" 13 | "sync" 14 | "math/rand" 15 | "time" 16 | ) 17 | 18 | var ( 19 | _ = fmt.Sprintf("") 20 | ) 21 | 22 | type Cluster struct { 23 | Conf *config.Conf 24 | Slot *Slot 25 | Influxs map[string] * Influx 26 | Zk *Zk 27 | slotMachine []string 28 | stop chan bool 29 | udp map[string] * Udp 30 | Migra *Slot 31 | Inf_sync sync.Mutex 32 | 33 | DbShard *DbShard 34 | 35 | ClassIds map[int] []string 36 | } 37 | 38 | func ( c *Cluster) FetchRandInfluxs() []*Influx { 39 | 40 | tmp := []*Influx{} 41 | 42 | for k,_ := range c.ClassIds { 43 | 44 | _len := len( c.ClassIds[k] ) 45 | _len = rand.Intn(_len) 46 | 47 | machine := c.ClassIds[k][ _len ] 48 | 49 | in,_ := c.Influxs[machine] 50 | 51 | tmp = append( tmp,in ) 52 | } 53 | return tmp 54 | } 55 | 56 | func ( c *Cluster) FetchAllInfluxs() []*Influx { 57 | 58 | tmp := []*Influx{} 59 | for _,v := range c.Influxs { 60 | tmp = append(tmp, v) 61 | } 62 | return tmp 63 | } 64 | 65 | func ( c *Cluster) FetchWriteInflux(ip string) *Influx { 66 | k,ok := c.Influxs[ip] 67 | if ok == false { 68 | return nil 69 | } 70 | 71 | if k.Write == false { 72 | return nil 73 | } 74 | 75 | return k 76 | } 77 | 78 | func ( c *Cluster) FetchReadInflux(ip string) *Influx { 79 | k,ok := c.Influxs[ip] 80 | if ok == false { 81 | return nil 82 | } 83 | 84 | if k.Read == false { 85 | return nil 86 | } 87 | 88 | if k.Flag() == false { 89 | return nil 90 | } 91 | return k 92 | } 93 | 94 | func ( c *Cluster) RefreshSlot(tmp []string) { 95 | ret := c.refreshSlot(tmp) 96 | c.Slot.SetSlot( ret ) 97 | } 98 | 99 | func (c *Cluster) RefreshIns(list []string) { 100 | 101 | c.Inf_sync.Lock() 102 | defer c.Inf_sync.Unlock() 103 | 104 | tmp := map[string]int{} 105 | 106 | for _,v := range list { 107 | 108 | if v[0:1] == "#" { 109 | continue 110 | } 111 | 112 | ret := strings.Split(v," ") 113 | if len(ret) < 2 { 114 | continue 115 | } 116 | inst := strings.Trim(ret[0]," ") 117 | tmp[inst] = 0 118 | } 119 | 120 | for k,_ := range c.Influxs { 121 | _,ok := tmp[k] 122 | if ok == false { 123 | c.Influxs[k].Stop() 124 | delete(c.Influxs,k) 125 | log.Info("delete Influxdb,",k) 126 | } 127 | } 128 | 129 | for k,_ := range tmp { 130 | _,ok := c.Influxs[k] 131 | if ok == false { 132 | c.Influxs[k] = &Influx{} 133 | c.Influxs[k].Init(k, c.Conf.Persi_queue,c.Conf.Write_worker) 134 | log.Info("add Influx:",k) 135 | } 136 | } 137 | } 138 | 139 | 140 | //to do,all slot should be cover 141 | func ( c *Cluster) refreshSlot(tmp []string) map[uint16] []string { 142 | 143 | tmpSlot := map[uint16] []string{} 144 | 145 | for _,v := range tmp { 146 | 147 | if v[0:1] == "#" { 148 | continue 149 | } 150 | 151 | ret := strings.Split(v," ") 152 | if len(ret) < 2 { 153 | continue 154 | } 155 | inst := strings.Trim(ret[0]," ") 156 | 157 | for i := 1; i< len(ret); i ++ { 158 | slot := ret[ i ] 159 | tmp1 := strings.Split(slot,"-") 160 | if len(tmp1) != 2 { 161 | log.Error("sloting is not right",slot) 162 | continue 163 | } 164 | 165 | s,err := strconv.Atoi(tmp1[0]) 166 | if err != nil { 167 | log.Error("start slot not right",slot) 168 | continue 169 | } 170 | 171 | e,err := strconv.Atoi(tmp1[1]) 172 | if err != nil { 173 | log.Error("end slot not right",slot) 174 | continue 175 | } 176 | 177 | for i := s; i <= e; i++ { 178 | _,ok := tmpSlot[ uint16(i) ] 179 | if ok == false { 180 | tmpSlot[ uint16(i) ] = []string{} 181 | } 182 | tmpSlot[ uint16(i) ] = append( tmpSlot[ uint16(i) ], inst ) 183 | } 184 | } 185 | } 186 | return tmpSlot 187 | } 188 | 189 | func (c *Cluster) Close() { 190 | c.Zk.Close() 191 | } 192 | 193 | func ( c *Cluster ) Init(conf *config.Conf) { 194 | c.Conf = conf 195 | c.slotMachine = []string{} 196 | c.stop = make(chan bool) 197 | 198 | log.Info("cluster get conf, zk is:",conf.Zk) 199 | log.Info("cluster get conf, zk path:",conf.Zk_path) 200 | 201 | c.Slot = &Slot{} 202 | c.Slot.Init() 203 | 204 | c.Influxs = map[string] *Influx{} 205 | 206 | c.udp = map[string] * Udp{} 207 | 208 | c.Migra = &Slot{} 209 | c.Migra.Init() 210 | 211 | 212 | c.Zk = &Zk{} 213 | c.Zk.Init( c.Conf.Zk ) 214 | 215 | c.DbShard = &DbShard{} 216 | c.DbShard.Init() 217 | 218 | c.ClassIds = map[int] []string{} 219 | 220 | go c.StartWatchSlot() 221 | go c.StartWatchUdp() 222 | go c.StartWatchMigra() 223 | go c.StartWatchSerStatus() 224 | go c.StartWatchDbShard() 225 | 226 | // poc code 227 | go c.StartWatchTrigger() 228 | } 229 | 230 | func (c *Cluster) RefreshDbslot(coming []string) { 231 | c.DbShard.Set(coming) 232 | } 233 | 234 | // poc code 235 | func ( c *Cluster ) StartWatchTrigger() { 236 | 237 | tmp := make(chan []byte,800 ) 238 | 239 | time.Sleep( 5 * time.Second ) 240 | 241 | go c.Zk.Watch(c.Conf.Trigger,tmp) 242 | 243 | loop: 244 | for { 245 | select { 246 | case ret := <- tmp: 247 | log.Info("catch trigger change:", string(ret) ) 248 | 249 | tmp1 := []string{} 250 | 251 | err := json.Unmarshal ( ret , &tmp1 ) 252 | 253 | if err != nil { 254 | log.Error(" trigger context from zk Unmarshal fail:",err) 255 | continue 256 | } 257 | 258 | for _,v := range tmp1 { 259 | 260 | ret := strings.Split(v," ") 261 | m := strings.Split( ret[0], ":" ) 262 | instance := ret[1:] 263 | for _, measu := range m { 264 | ss := c.Slot.FetchSlot([]byte(measu)) 265 | c.Slot.Append(ss,instance) 266 | 267 | for _,vv := range instance { 268 | _,ok := c.Influxs[vv] 269 | if ok == false { 270 | c.Influxs[vv] = &Influx{} 271 | c.Influxs[vv].Init(vv, c.Conf.Persi_queue,1) 272 | log.Info("add trigger consumer:",vv) 273 | } 274 | } 275 | } 276 | 277 | } 278 | 279 | case <- c.stop: 280 | log.Info("stop cluster!") 281 | c.Close() 282 | break loop 283 | 284 | } 285 | } 286 | } 287 | 288 | func ( c *Cluster ) StartWatchDbShard() { 289 | 290 | DbShard := make(chan []byte,800 ) 291 | 292 | go c.Zk.Watch(c.Conf.Db_shard,DbShard) 293 | 294 | loop: 295 | for { 296 | select { 297 | case ret := <- DbShard: 298 | log.Info("catch dbShard change:", string(ret) ) 299 | tmp := []string{} 300 | err := json.Unmarshal ( ret , &tmp ) 301 | 302 | if err != nil { 303 | log.Error("Unmarshal fail:",err) 304 | continue 305 | } else { 306 | c.RefreshDbslot (tmp) 307 | 308 | } 309 | case <- c.stop: 310 | log.Info("stop cluster!") 311 | c.Close() 312 | break loop 313 | 314 | } 315 | } 316 | } 317 | 318 | func ( c *Cluster ) StartWatchSlot() { 319 | 320 | slotMachine := make(chan []byte,800 ) 321 | 322 | go c.Zk.Watch(c.Conf.Zk_path,slotMachine) 323 | 324 | loop: 325 | for { 326 | select { 327 | case ret := <- slotMachine: 328 | log.Info("catch slot change:", string(ret) ) 329 | tmp := []string{} 330 | err := json.Unmarshal ( ret , &tmp ) 331 | 332 | if err != nil { 333 | log.Error("Unmarshal fail:",err) 334 | continue 335 | } else { 336 | result := reflect.DeepEqual(c.slotMachine, tmp) 337 | if result == true { 338 | log.Info("catch zk the same as old,",tmp) 339 | continue 340 | } 341 | 342 | c.RefreshSlot(tmp) 343 | c.RefreshIns(tmp) 344 | c.slotMachine = tmp 345 | } 346 | case <- c.stop: 347 | log.Info("stop cluster!") 348 | c.Close() 349 | break loop 350 | 351 | } 352 | } 353 | } 354 | 355 | func (c *Cluster) CheckUdp(coming []string) { 356 | tmp := map[string] []string {} 357 | 358 | for _,v :=range coming { 359 | 360 | if len(v) <= 1 { 361 | continue 362 | } 363 | 364 | if v[0:1] == "#" { 365 | continue 366 | } 367 | 368 | t := strings.Split(v," ") 369 | if len(v) < 3 { 370 | continue 371 | } 372 | 373 | _,err := strconv.Atoi( t[2]) 374 | if err != nil { 375 | continue 376 | } 377 | 378 | port := t[0] 379 | db := t[1] 380 | 381 | tmp[port] = []string{ db, t[2] } 382 | } 383 | 384 | for k,v := range c.udp { 385 | _,ok := tmp[k] 386 | if ok == false { 387 | log.Info("close udp:",k) 388 | v.Close() 389 | delete(c.udp, k) 390 | } 391 | } 392 | 393 | for k,v := range tmp { 394 | _,ok := c.udp[ k ] 395 | if ok == false { 396 | log.Info("new udp:",k," worker:",v[1]) 397 | c.udp[k] = &Udp{} 398 | c.udp[k].Init(k,v[0],v[1],c) 399 | } else { 400 | if v[0] != c.udp[k].Db() { 401 | c.udp[k].Close() 402 | 403 | c.udp[k] = &Udp{} 404 | c.udp[k].Init(k,v[0],v[1],c) 405 | log.Info("renew udp:",k," worker:",v[1]) 406 | } else if v[1] != c.udp[k].Worker() { 407 | c.udp[k].Close() 408 | 409 | c.udp[k] = &Udp{} 410 | c.udp[k].Init(k,v[0],v[1],c) 411 | log.Info("renew udp:",k," worker:",v[1]) 412 | } 413 | } 414 | } 415 | } 416 | 417 | func ( c *Cluster ) StartWatchUdp() { 418 | 419 | udp := make( chan []byte,2000 ) 420 | 421 | go c.Zk.Watch(c.Conf.Udp_path,udp) 422 | 423 | loop: 424 | for { 425 | select { 426 | case ret := <- udp: 427 | log.Info("catch udp change:", string(ret) ) 428 | tmp := []string{} 429 | err := json.Unmarshal ( ret , &tmp ) 430 | 431 | if err != nil { 432 | log.Error("Unmarshal fail:",err) 433 | continue 434 | } else { 435 | c.CheckUdp(tmp) 436 | } 437 | case <- c.stop: 438 | log.Info("stop cluster!") 439 | c.Close() 440 | break loop 441 | 442 | } 443 | } 444 | } 445 | 446 | 447 | func (c *Cluster) CheckMigr(coming []string) { 448 | ret := c.refreshSlot(coming) 449 | c.Migra.Init() 450 | 451 | totalSlot := 0 452 | for _,_ = range ret { 453 | totalSlot = totalSlot + 1 454 | } 455 | 456 | log.Info("catch Migra conf, total slot is:",totalSlot) 457 | c.Migra.SetSlot( ret ) 458 | } 459 | 460 | func ( c *Cluster ) StartWatchMigra() { 461 | 462 | m := make( chan []byte,2000 ) 463 | 464 | go c.Zk.Watch(c.Conf.Migra_slot,m) 465 | 466 | loop: 467 | for { 468 | select { 469 | case ret := <- m: 470 | //log.Info("catch Migra_slot change:", string(ret) ) 471 | tmp := []string{} 472 | err := json.Unmarshal ( ret , &tmp ) 473 | 474 | if err != nil { 475 | log.Error("Unmarshal fail:",err) 476 | continue 477 | } else { 478 | c.CheckMigr(tmp) 479 | } 480 | case <- c.stop: 481 | log.Info("stop cluster!") 482 | c.Close() 483 | break loop 484 | 485 | } 486 | } 487 | } 488 | 489 | 490 | 491 | func (c *Cluster) CheckSerStatus(list []string) { 492 | 493 | c.Inf_sync.Lock() 494 | defer c.Inf_sync.Unlock() 495 | 496 | tmp := map[string] []bool{} 497 | 498 | classIds := map[int] []string{} 499 | 500 | for _,v := range list { 501 | 502 | if v[0:1] == "#" { 503 | continue 504 | } 505 | 506 | ret := strings.Split(v," ") 507 | if len(ret) != 4 { 508 | continue 509 | } 510 | inst := strings.Trim(ret[0]," ") 511 | 512 | 513 | write_flag,_ := strconv.ParseBool( ret[1] ) 514 | read_flag,_ := strconv.ParseBool( ret[2] ) 515 | 516 | class_id,_ := strconv.Atoi( ret[3] ) 517 | 518 | _,ok := classIds[class_id] 519 | 520 | if ok == false { 521 | classIds[class_id] = []string{} 522 | } 523 | classIds[class_id] = append(classIds[class_id], inst) 524 | 525 | tmp[inst] = []bool{write_flag,read_flag} 526 | } 527 | 528 | c.ClassIds = classIds 529 | log.Info("class ids is:",c.ClassIds) 530 | 531 | 532 | for k,_ := range c.Influxs { 533 | _,ok := tmp[k] 534 | if ok == false { 535 | c.Influxs[k].Stop() 536 | delete(c.Influxs,k) 537 | log.Info("delete Influxdb,",k) 538 | } 539 | } 540 | 541 | for k,v := range tmp { 542 | _,ok := c.Influxs[k] 543 | if ok == false { 544 | c.Influxs[k] = &Influx{} 545 | c.Influxs[k].Init(k, c.Conf.Persi_queue,c.Conf.Write_worker) 546 | c.Influxs[k].Write = v[0] 547 | c.Influxs[k].Read = v[1] 548 | log.Info("add Influx:",k," write flag, read flag:",v) 549 | } else { 550 | if c.Influxs[k].Write != v[0] || c.Influxs[k].Read != v[1] { 551 | c.Influxs[k].Write = v[0] 552 | c.Influxs[k].Read = v[1] 553 | log.Info("change Influx write flag:",v[0]," read_flag:",v[1]) 554 | } 555 | } 556 | } 557 | } 558 | 559 | func ( c *Cluster ) StartWatchSerStatus() { 560 | 561 | m := make( chan []byte,5000 ) 562 | 563 | go c.Zk.Watch(c.Conf.Ser_status,m) 564 | 565 | loop: 566 | for { 567 | select { 568 | case ret := <- m: 569 | //log.Info("catch Migra_slot change:", string(ret) ) 570 | tmp := []string{} 571 | err := json.Unmarshal ( ret , &tmp ) 572 | 573 | if err != nil { 574 | log.Error("Unmarshal fail:",err) 575 | continue 576 | } else { 577 | c.CheckSerStatus(tmp) 578 | } 579 | case <- c.stop: 580 | log.Info("stop cluster!") 581 | c.Close() 582 | break loop 583 | 584 | } 585 | } 586 | } -------------------------------------------------------------------------------- /meta/dbSharding.go: -------------------------------------------------------------------------------- 1 | package meta 2 | 3 | import ( 4 | "fmt" 5 | "github.com/influxdata/influxdb/models" 6 | log "github.com/Sirupsen/logrus" 7 | "github.com/influxdata/influxql" 8 | "strings" 9 | "bytes" 10 | "sort" 11 | "reflect" 12 | ) 13 | 14 | var ( 15 | _ = fmt.Sprintf("") 16 | ) 17 | 18 | type DbShard struct { 19 | //支持单个measurement+ 多tag的sharding 20 | db_tag map[string] []string 21 | db_s map[string] string 22 | } 23 | 24 | 25 | func (i *DbShard) Init() { 26 | i.db_tag = map[string] []string{} 27 | i.db_s = map[string] string{} 28 | } 29 | 30 | func (i *DbShard) Check(db string) bool { 31 | _,ok := i.db_s[ db ] 32 | return ok 33 | } 34 | 35 | func FetchEqual(conda interface{}, tmp map[string]string ) { 36 | types := reflect.TypeOf(conda).String() 37 | 38 | if types == "influxql.BinaryExpr" { 39 | cond := conda.(influxql.BinaryExpr) 40 | switch cond.Op { 41 | case influxql.EQ: 42 | key := cond.LHS.String() 43 | value := strings.Trim(cond.RHS.String(),"'") 44 | tmp[key] = value 45 | 46 | default: 47 | FetchEqual(cond.LHS,tmp) 48 | FetchEqual(cond.RHS,tmp) 49 | } 50 | } 51 | if types == "*influxql.BinaryExpr" { 52 | cond := conda.(*influxql.BinaryExpr) 53 | switch cond.Op { 54 | case influxql.EQ: 55 | key := cond.LHS.String() 56 | value := strings.Trim(cond.RHS.String(),"'") 57 | tmp[key] = value 58 | 59 | default: 60 | FetchEqual(cond.LHS,tmp) 61 | FetchEqual(cond.RHS,tmp) 62 | } 63 | } 64 | 65 | if types == "influxql.ParenExpr" { 66 | cond1 := conda.(influxql.ParenExpr) 67 | cond := cond1.Expr 68 | FetchEqual(cond,tmp) 69 | } 70 | 71 | if types == "*influxql.ParenExpr" { 72 | cond1 := conda.(*influxql.ParenExpr) 73 | cond := cond1.Expr 74 | FetchEqual(cond,tmp) 75 | } 76 | 77 | 78 | } 79 | 80 | func (i *DbShard) QueryE(db string,m string) bool { 81 | pn,ok := i.db_s[ db ] 82 | if ok == false { 83 | return true 84 | } 85 | if pn != "*" && m != pn { 86 | return true 87 | } 88 | return false 89 | } 90 | 91 | func (i *DbShard) QueryFetch(db string,stat1 *influxql.SelectStatement,m string) ( []byte,bool ) { 92 | 93 | pn,ok := i.db_s[ db ] 94 | if ok == false { 95 | return []byte(m),true 96 | } 97 | 98 | if pn != "*" && m != pn { 99 | return []byte(m),true 100 | } 101 | 102 | zbuf := bytes.NewBuffer(nil) 103 | zbuf.Write( []byte(m) ) 104 | 105 | tmp := map[string]string{} 106 | 107 | FetchEqual(stat1.Condition, tmp) 108 | 109 | flag := true 110 | 111 | tagList := i.db_tag[ db ] 112 | for k,_ := range tagList { 113 | 114 | data,ok := tmp[ tagList[k] ] 115 | if ok { 116 | zbuf.Write( []byte(data) ) 117 | } else { 118 | flag = false 119 | } 120 | } 121 | 122 | if flag { 123 | return zbuf.Bytes(),true 124 | } else { 125 | return []byte{},false 126 | } 127 | } 128 | 129 | func (i *DbShard) Fetch(db string, point models.Point) []byte { 130 | m,ok := i.db_s[ db ] 131 | pn := point.Name() 132 | 133 | if ok == false { 134 | return pn 135 | } 136 | 137 | if m != "*" && m != string(pn) { 138 | return pn 139 | } 140 | 141 | zbuf := bytes.NewBuffer(nil) 142 | zbuf.Write( pn ) 143 | 144 | t := point.Tags() 145 | tagList := i.db_tag[ db ] 146 | 147 | for _,v1 := range t { 148 | for k,_ := range tagList { 149 | if string(v1.Key) == tagList[k] { 150 | zbuf.Write( v1.Value ) 151 | } 152 | } 153 | } 154 | 155 | return zbuf.Bytes() 156 | } 157 | 158 | 159 | func (i *DbShard) Set(coming []string) { 160 | 161 | db_tag := map[string] []string{} 162 | db_s := map[string] string{} 163 | 164 | for _,v := range coming { 165 | 166 | if len(v) <= 2 { 167 | continue 168 | } 169 | 170 | if v[0:1] == "#" { 171 | continue 172 | } 173 | 174 | ret := strings.Split(v," ") 175 | if len(ret) < 2 { 176 | continue 177 | } 178 | 179 | keyList := strings.Split(ret[1],":") 180 | 181 | if len(keyList) != 2 { 182 | continue 183 | } 184 | 185 | m := keyList[0] 186 | tagListstr := keyList[1] 187 | tagList := strings.Split(tagListstr,",") 188 | 189 | sort.Strings( tagList ) 190 | 191 | db_s[ ret[0] ] = m 192 | db_tag[ ret[0] ] = tagList 193 | } 194 | 195 | i.db_tag = db_tag 196 | i.db_s = db_s 197 | log.Info("db sharding strategy: measurement list:", i.db_s," tag list:",i.db_tag ) 198 | } 199 | 200 | 201 | 202 | 203 | -------------------------------------------------------------------------------- /meta/influx.go: -------------------------------------------------------------------------------- 1 | package meta 2 | 3 | 4 | import ( 5 | "net/http" 6 | "net" 7 | "time" 8 | "crypto/tls" 9 | "github.com/influxdata/influxdb/models" 10 | "fmt" 11 | "bytes" 12 | "compress/gzip" 13 | log "github.com/Sirupsen/logrus" 14 | "net/url" 15 | "io/ioutil" 16 | "github.com/influxdata/influxdb/query" 17 | "encoding/json" 18 | "github.com/beeker1121/goque" 19 | //"github.com/infP/midd" 20 | "os" 21 | "math/rand" 22 | "strings" 23 | ) 24 | 25 | var ( 26 | _ = fmt.Sprintf("") 27 | FlushInterval = 1 * time.Second 28 | MAX_LEN = 2000 29 | ) 30 | 31 | var ( 32 | //TODO: move to function, support config 33 | httpClient = &http.Client{Transport: &http.Transport{ 34 | MaxIdleConns: 100, 35 | MaxIdleConnsPerHost: 100, 36 | IdleConnTimeout: 30 * time.Second, 37 | DisableCompression: false, 38 | TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, 39 | Dial: (&net.Dialer{ 40 | Timeout: 10 * time.Second, 41 | KeepAlive: 30 * time.Second, 42 | }).Dial, 43 | TLSHandshakeTimeout: 10 * time.Second, 44 | }} 45 | ) 46 | 47 | 48 | 49 | type Influx struct { 50 | Addr string 51 | httpClient *http.Client 52 | stop chan bool 53 | httpAddr string 54 | queryAddr string 55 | pingAddr string 56 | 57 | Read bool 58 | Write bool 59 | 60 | queue_path string 61 | queue *goque.Queue 62 | worker int 63 | worker_stop map[int] []chan bool 64 | 65 | flag bool 66 | 67 | queue1 chan *Data 68 | } 69 | 70 | type Response struct { 71 | Results []*query.Result `json:"results"` 72 | //Err error 73 | Index int 74 | } 75 | 76 | 77 | func CreatePath(path string) { 78 | 79 | _, err := os.Stat(path) 80 | 81 | if err != nil { 82 | err := os.MkdirAll(path, 0711) 83 | 84 | if err != nil { 85 | log.Error("Error creating directory",path) 86 | return 87 | } 88 | } 89 | } 90 | 91 | 92 | func (i *Influx) Init(add string,path string, worker int) { 93 | i.queue1 = make(chan *Data) 94 | 95 | i.Addr = add 96 | i.httpClient = httpClient 97 | i.stop = make(chan bool) 98 | i.httpAddr = fmt.Sprintf("http://%s/write", i.Addr) 99 | i.queryAddr = fmt.Sprintf("http://%s/query", i.Addr) 100 | i.pingAddr = fmt.Sprintf("http://%s/ping", i.Addr) 101 | 102 | i.Read = true 103 | i.Write = true 104 | i.queue_path = fmt.Sprintf("%s/%s",path,add) 105 | CreatePath(i.queue_path) 106 | i.queue,_ = goque.OpenQueue(i.queue_path) 107 | i.worker = worker 108 | i.worker_stop = map[int] []chan bool{} 109 | i.flag = false 110 | 111 | for k := 0; k < i.worker; k ++ { 112 | i.worker_stop[k] = []chan bool{} 113 | i.worker_stop[k] = append(i.worker_stop[k], make(chan bool)) 114 | 115 | go i.RunSer( k,i.worker_stop[k][0] ) 116 | } 117 | go i.alive( i.stop ) 118 | 119 | log.Info("run influxdb Ser:",i.Addr," worker:",i.worker) 120 | } 121 | 122 | func (i *Influx) Stop() { 123 | if i.queue != nil { 124 | i.queue.Close() 125 | } 126 | i.stop <- true 127 | for k,_ := range i.worker_stop { 128 | i.worker_stop[k][0] <- true 129 | //i.worker_stop[k][1] <- true 130 | } 131 | 132 | log.Info("close influxdb:", i.Addr) 133 | } 134 | 135 | 136 | func (i *Influx) PutPointsBytes(ret []models.Point, db string, rp string) { 137 | 138 | //count := len(ret) 139 | 140 | //_, err := i.queue.Enqueue( i.PointsString(ret) ) 141 | //if err != nil { 142 | // log.Error("Enqueue err:",err) 143 | //} 144 | 145 | //log.Info("PutPoints addr:", i.Addr," db:",db," points:", count ," err:",err) 146 | } 147 | 148 | 149 | type Data struct { 150 | Name string 151 | Dd []byte 152 | Leng int 153 | } 154 | 155 | 156 | func ( i *Influx) PointsString(points models.Points) ([]byte,int) { 157 | zbuf := bytes.NewBuffer(nil) 158 | 159 | c := 0 160 | 161 | for _, p := range points { 162 | t := p.String() 163 | 164 | zbuf.Write( []byte(t) ) 165 | 166 | if _, err := zbuf.Write([]byte("\n")); err != nil { 167 | break 168 | } 169 | c = c + 1 170 | } 171 | return zbuf.Bytes(),c 172 | } 173 | 174 | func (i *Influx) PutPoints(ret []models.Point,db string,rp string) { 175 | 176 | b,c := i.PointsString(ret) 177 | 178 | 179 | a := Data{ Name: fmt.Sprintf("%s:%s",db,rp), Dd: b,Leng:c } 180 | 181 | i.queue1 <- &a 182 | //_, err := i.queue.EnqueueObject( a ) 183 | 184 | //if err != nil { 185 | // log.Error("Enqueue err:",err) 186 | //} 187 | } 188 | 189 | func dialTimeout(network, addr string) (net.Conn, error) { 190 | var timeout = time.Duration(2 * time.Second) 191 | return net.DialTimeout(network, addr, timeout) 192 | } 193 | 194 | func (i *Influx) alive( stop chan bool ) { 195 | 196 | i.flag = true 197 | 198 | transport := http.Transport{ 199 | Dial: dialTimeout, 200 | } 201 | 202 | client := http.Client{ 203 | Transport: &transport, 204 | } 205 | 206 | tim := time.Tick(time.Second* 5) 207 | failCount := 0 208 | 209 | loop: 210 | for { 211 | select { 212 | case <- tim: 213 | 214 | resp, err := client.Get(i.pingAddr) 215 | if err != nil { 216 | failCount = failCount + 1 217 | if failCount >= 3 { 218 | log.Info("ping fail:",i.pingAddr) 219 | i.flag = false 220 | if resp != nil { 221 | resp.Body.Close() 222 | } 223 | continue 224 | } 225 | } 226 | 227 | if resp == nil { 228 | failCount = failCount + 1 229 | continue 230 | } 231 | if resp.StatusCode/100 != 2 { 232 | resp.Body.Close() 233 | failCount = failCount + 1 234 | if failCount >= 3 { 235 | log.Info("ping fail:",i.pingAddr) 236 | i.flag = false 237 | continue 238 | } 239 | } 240 | 241 | if failCount > 0 { 242 | i.flag = true 243 | failCount = 0 244 | } 245 | resp.Body.Close() 246 | 247 | //log.Info("ping:",i.pingAddr," failCount:",failCount," flag:",i.flag) 248 | case <- stop: 249 | break loop 250 | } 251 | } 252 | } 253 | 254 | func (i *Influx) Flag() bool { 255 | return i.flag 256 | } 257 | 258 | func (i *Influx) RunSer(count int,stop chan bool) { 259 | 260 | total := map[string] *bytes.Buffer{} 261 | 262 | num := rand.Int31n(999) 263 | time.Sleep(time.Duration(num) * time.Millisecond) 264 | 265 | 266 | startCollect := time.Tick(FlushInterval) 267 | 268 | tc := map[string] int{} 269 | 270 | queue1 := make(chan *Data) 271 | 272 | queue1 = i.queue1 273 | loop: 274 | for { 275 | select { 276 | 277 | case <- startCollect: 278 | if i.flag == false { 279 | if queue1 != nil { 280 | queue1 = nil 281 | } 282 | continue 283 | } else { 284 | if queue1 == nil { 285 | queue1 = i.queue1 286 | } 287 | } 288 | 289 | for k,_ := range total { 290 | //fmt.Printf("****len**kkk*%v\n", k ) 291 | i.WritePointsBytes(total[k].Bytes(),k) 292 | total[ k ].Reset() 293 | delete( total,k ) 294 | } 295 | case <- stop: 296 | break loop 297 | case ret := <- queue1: 298 | if i.flag == false { 299 | continue 300 | } 301 | 302 | //长度太长 直接发出去 303 | if ret.Leng >= MAX_LEN { 304 | //fmt.Printf("****directory***%v\n", ret.Leng ) 305 | i.WritePointsBytes( ret.Dd,ret.Name) 306 | continue 307 | } 308 | 309 | _,ok := total[ret.Name] 310 | if ok == false { 311 | total[ret.Name] = bytes.NewBuffer(nil) 312 | tc[ ret.Name ] = 0 313 | } 314 | 315 | tc[ ret.Name ] = tc[ ret.Name ] + ret.Leng 316 | total[ret.Name].Write( ret.Dd ) 317 | 318 | //长度太长 直接发出去 319 | if tc[ ret.Name ] > MAX_LEN { 320 | //fmt.Printf("*******3******\n") 321 | i.WritePointsBytes(total[ ret.Name ].Bytes(), ret.Name ) 322 | total[ ret.Name ].Reset() 323 | 324 | delete( total, ret.Name ) 325 | } 326 | } 327 | } 328 | log.Info("close RunSer",i.Addr," worker no:",count) 329 | } 330 | 331 | 332 | func ( i *Influx) mustGzipPoints(points models.Points, precision string) []byte { 333 | zbuf := bytes.NewBuffer(nil) 334 | 335 | gz := gzip.NewWriter(zbuf) 336 | for _, p := range points { 337 | if _, err := gz.Write([]byte(p.PrecisionString(precision))); err != nil { 338 | break 339 | } 340 | if _, err := gz.Write([]byte("\n")); err != nil { 341 | break 342 | } 343 | } 344 | gz.Flush() 345 | gz.Close() 346 | return zbuf.Bytes() 347 | } 348 | 349 | func ( i *Influx) WritePointsBytes(ret []byte,dbname string ) { 350 | 351 | //start := time.Now() 352 | 353 | u, err := url.Parse(i.httpAddr) 354 | 355 | if err != nil { 356 | log.Errorf("failed to parse influxdb instance addr: %v", err) 357 | return 358 | } 359 | p := url.Values{} 360 | 361 | t := strings.Split(dbname,":") 362 | 363 | p.Add("db", t[0]) 364 | p.Add("rp", t[1]) 365 | //p.Add("precision", precision) 366 | u.RawQuery = p.Encode() 367 | 368 | zbuf := bytes.NewBuffer(nil) 369 | gz := gzip.NewWriter(zbuf) 370 | gz.Write( ret ) 371 | gz.Flush() 372 | gz.Close() 373 | 374 | req, err := http.NewRequest("POST", u.String(), bytes.NewBuffer( zbuf.Bytes() ) ) 375 | if err != nil { 376 | log.Errorf("NewRequest Error: %v", err) 377 | return 378 | } 379 | 380 | // all data should be gzip encoded. 381 | req.Header.Add("Content-Encoding", "gzip") 382 | 383 | resp, err := i.httpClient.Do(req) 384 | if err != nil { 385 | log.Errorf("influx.PostPoints - post data failed: %v", err) 386 | //errCh <- err 387 | return 388 | } 389 | 390 | //log.Info("write data cost time:", time.Now().Sub(start)," addr:",i.httpAddr) 391 | 392 | defer resp.Body.Close() 393 | 394 | if resp.StatusCode/100 != 2 { 395 | 396 | if _, err := ioutil.ReadAll(resp.Body); err != nil { 397 | log.Error("status code not 2xx",err) 398 | return 399 | } 400 | } 401 | } 402 | 403 | func ( i *Influx) WritePoints(points models.Points, errCh chan error,dbname string, rp string, precision string ) { 404 | 405 | start := time.Now() 406 | 407 | u, err := url.Parse(i.httpAddr) 408 | 409 | if err != nil { 410 | log.Errorf("failed to parse influxdb instance addr: %v", err) 411 | errCh <- err 412 | return 413 | } 414 | p := url.Values{} 415 | p.Add("db", dbname) 416 | p.Add("rp", rp) 417 | p.Add("precision", precision) 418 | u.RawQuery = p.Encode() 419 | 420 | 421 | by := i.mustGzipPoints( points, precision ) 422 | 423 | req, err := http.NewRequest("POST", u.String(), bytes.NewBuffer( by ) ) 424 | if err != nil { 425 | log.Errorf("NewRequest Error: %v", err) 426 | errCh <- err 427 | return 428 | } 429 | 430 | // all data should be gzip encoded. 431 | req.Header.Add("Content-Encoding", "gzip") 432 | 433 | //fmt.Printf("******%+v*\n", req ) 434 | resp, err := i.httpClient.Do(req) 435 | if err != nil { 436 | log.Errorf("influx.PostPoints - post data failed: %v", err) 437 | //i.PutPoints(points,dbname) 438 | errCh <- err 439 | return 440 | } 441 | 442 | log.Info("write data cost time:", time.Now().Sub(start)," addr:",i.httpAddr) 443 | 444 | defer resp.Body.Close() 445 | 446 | if resp.StatusCode/100 != 2 { 447 | //i.PutPoints(points,dbname) 448 | 449 | if _, err := ioutil.ReadAll(resp.Body); err != nil { 450 | log.Error("status code not 2xx",err) 451 | errCh <- err 452 | return 453 | } 454 | } 455 | errCh <- nil 456 | } 457 | 458 | 459 | 460 | func ( i *Influx) Query(dbname, rpname, sql, epoch, chunk string, ret chan *Response,index int) { 461 | 462 | if i.flag == false { 463 | ret <- &Response{Results: []*query.Result{}, Index: index} 464 | return 465 | } 466 | 467 | start := time.Now() 468 | 469 | p := url.Values{} 470 | p.Add("db", dbname) 471 | p.Add("rp", rpname) 472 | p.Add("epoch", epoch) 473 | p.Add("chunk", chunk) 474 | p.Add("q", sql) 475 | 476 | 477 | req, err := http.NewRequest("GET", i.queryAddr, nil) 478 | if err != nil { 479 | log.Errorf("NewRequest Error: %v", err) 480 | ret <- &Response{Results: []*query.Result{}, Index: index} 481 | return 482 | } 483 | 484 | req.URL.RawQuery = p.Encode() 485 | 486 | //req.Header.Add("Accept-Encoding", "gzip") 487 | 488 | resp, err := i.httpClient.Do(req) 489 | 490 | if resp == nil { 491 | ret <- &Response{Results: []*query.Result{}, Index: index} 492 | return 493 | } 494 | 495 | defer resp.Body.Close() 496 | 497 | body, err := ioutil.ReadAll(resp.Body) 498 | if err != nil { 499 | ret <- &Response{Results: []*query.Result{}, Index: index} 500 | return 501 | } 502 | 503 | result := Response{} 504 | err = json.Unmarshal ( body , &result ) 505 | result.Index = index 506 | 507 | if err != nil { 508 | log.Error("Unmarshal fail,",err) 509 | ret <- &Response{Results: []*query.Result{}, Index: index} 510 | return 511 | } 512 | 513 | if resp.StatusCode/100 != 2 { 514 | 515 | if _, err := ioutil.ReadAll(resp.Body); err != nil { 516 | log.Error("status code not 2xx",err) 517 | ret <- &Response{Results: []*query.Result{}, Index: index} 518 | return 519 | } 520 | } 521 | ret <- &result 522 | log.Info("post time cost:",time.Now().Sub(start),", add:",i.queryAddr," sql:",sql," db:",dbname) 523 | return 524 | } 525 | 526 | 527 | 528 | 529 | -------------------------------------------------------------------------------- /meta/slot.go: -------------------------------------------------------------------------------- 1 | package meta 2 | 3 | 4 | import ( 5 | "github.com/infP/utils" 6 | "fmt" 7 | "reflect" 8 | log "github.com/Sirupsen/logrus" 9 | "sync" 10 | ) 11 | 12 | var ( 13 | kClusterSlots = uint16(16384) 14 | _ = fmt.Sprintf("") 15 | ) 16 | 17 | type Slot struct { 18 | meta map[uint16] []string 19 | sy sync.Mutex 20 | } 21 | 22 | 23 | func (a *Slot) Init() { 24 | a.sy.Lock() 25 | defer a.sy.Unlock() 26 | a.meta = map[uint16] []string {} 27 | } 28 | 29 | 30 | func (a *Slot) Append(slot uint16, instance []string) { 31 | v,ok := a.meta[slot] 32 | if ok == false { 33 | a.meta[slot] = []string{} 34 | } 35 | 36 | v = append( v, instance... ) 37 | a.meta[slot] = v 38 | log.Info( fmt.Sprintf("slot append****instance:%v****slot:%v**all:%+v",instance, slot,a.meta[slot]) ) 39 | 40 | } 41 | 42 | 43 | func (a *Slot) SetSlot(tmp map[uint16] []string) { 44 | a.sy.Lock() 45 | defer a.sy.Unlock() 46 | 47 | count := 0 48 | for k,v := range tmp { 49 | cv,ok := a.meta[k] 50 | if ok == false { 51 | a.meta[k] = v 52 | log.Debug("change slot:",k," list:",v) 53 | } 54 | result := reflect.DeepEqual(v, cv) 55 | if result == false { 56 | log.Debug("change slot:",k," list:",v) 57 | a.meta[k] = v 58 | count = count + 1 59 | } 60 | } 61 | if count != 0 { 62 | log.Info("**set slot***total*",count) 63 | } 64 | } 65 | 66 | func (s *Slot) FetchWriteInstances(measurement []byte) []string { 67 | slot := s.FetchSlot(measurement) 68 | //log.Debug("fetch slot:",string(measurement),slot) 69 | if s.meta == nil { 70 | return []string{} 71 | } 72 | 73 | s.sy.Lock() 74 | defer s.sy.Unlock() 75 | 76 | list,ok := s.meta[slot] 77 | if ok { 78 | return list 79 | } else { 80 | return []string{} 81 | } 82 | } 83 | 84 | func (sl *Slot) FetchSlot(key []byte) uint16 { 85 | var s, e int 86 | key_len := len(key) 87 | for s = 0; s < key_len; s++ { 88 | if key[s] == '{' { 89 | break 90 | } 91 | } 92 | 93 | if s == key_len { 94 | return utils.Crc16(key) & (kClusterSlots-1) 95 | } 96 | 97 | for e = s+1; e < key_len; e++ { 98 | if key[e] == '}' { 99 | break 100 | } 101 | } 102 | 103 | if e == key_len || e == s+1 { 104 | return utils.Crc16(key) & (kClusterSlots-1) 105 | } 106 | 107 | return utils.Crc16(key[s+1:e]) & (kClusterSlots-1) 108 | } -------------------------------------------------------------------------------- /meta/udp.go: -------------------------------------------------------------------------------- 1 | package meta 2 | 3 | import ( 4 | "strconv" 5 | "fmt" 6 | log "github.com/Sirupsen/logrus" 7 | //reuse "github.com/jbenet/go-reuseport" 8 | "golang.org/x/sys/unix" 9 | "net" 10 | "time" 11 | "github.com/influxdata/influxdb/models" 12 | "errors" 13 | ) 14 | 15 | const ( 16 | // MaxUDPPayload is largest payload size the UDP service will accept. 17 | MaxUDPPayload = 64 * 1024 18 | //548 19 | ) 20 | 21 | 22 | //Listener 23 | type Udp struct { 24 | addr string 25 | port int 26 | db string 27 | worker int 28 | inst map[int] chan bool 29 | 30 | Cluster *Cluster 31 | Slot *Slot 32 | Migra *Slot 33 | 34 | } 35 | 36 | func (s *Udp) Db() string { 37 | return s.db 38 | } 39 | 40 | func (c *Udp) Worker() string { 41 | return fmt.Sprintf("%d",c.worker) 42 | } 43 | 44 | func (s *Udp) Init(port string, db string,worker string,c *Cluster) { 45 | s.db = db 46 | s.addr = fmt.Sprintf("0.0.0.0:%s",port) 47 | 48 | s.port,_ = strconv.Atoi( port ) 49 | s.worker,_ = strconv.Atoi( worker ) 50 | s.inst = map[int] chan bool{} 51 | 52 | s.Cluster = c 53 | s.Slot = c.Slot 54 | s.Migra = c.Migra 55 | 56 | for k := 0; k < s.worker; k ++ { 57 | stop := make(chan bool,2) 58 | go s.Open(stop) 59 | s.inst[ k ] = stop 60 | } 61 | } 62 | 63 | 64 | 65 | func (s *Udp) initSocket() (int, error) { 66 | 67 | fd, err := unix.Socket(unix.AF_INET, unix.SOCK_DGRAM, unix.IPPROTO_UDP) 68 | 69 | err = unix.SetsockoptInt(fd, unix.SOL_SOCKET, unix.SO_REUSEADDR, 1) 70 | if err != nil { 71 | log.Error("socket resue err:",err) 72 | return 0, err 73 | } 74 | 75 | if err = unix.SetsockoptInt(fd, unix.SOL_SOCKET, unix.SO_REUSEPORT, 1); err != nil { 76 | log.Error("socket resue err:",err) 77 | return 0,err 78 | } 79 | 80 | sec := 20 81 | var l unix.Linger 82 | if sec >= 0 { 83 | l.Onoff = 1 84 | l.Linger = int32(sec) 85 | } else { 86 | l.Onoff = 0 87 | l.Linger = 0 88 | } 89 | 90 | err = unix.SetsockoptLinger(fd, unix.SOL_SOCKET, unix.SO_LINGER, &l) 91 | if err != nil { 92 | log.Error("socket set Linger fail;",err) 93 | } 94 | 95 | t := 5 * time.Second 96 | tv := unix.NsecToTimeval(t.Nanoseconds()) 97 | err = unix.SetsockoptTimeval(fd, unix.SOL_SOCKET, unix.SO_RCVTIMEO, &tv) 98 | if err != nil { 99 | log.Error("socket set time out fail;",err) 100 | } 101 | 102 | 103 | netadd,err := net.ResolveUDPAddr("udp", "0.0.0.0:12346") 104 | 105 | ip4 := netadd.IP.To4() 106 | var buf [4]byte 107 | copy(buf[:], ip4) 108 | n := unix.SockaddrInet4{ Addr: buf,Port: s.port } 109 | 110 | if err = unix.Bind(fd, &n); err != nil { 111 | log.Error("bind socket fail:",err) 112 | } 113 | 114 | return fd,nil 115 | } 116 | 117 | 118 | func (p *Udp) Write(dd []byte) error { 119 | 120 | points, parseError := models.ParsePointsWithPrecision(dd, time.Now().UTC(), "") 121 | 122 | if parseError != nil { 123 | log.Error("udp format points fail:",parseError) 124 | return parseError 125 | } 126 | 127 | if p.Slot == nil { 128 | return errors.New("slot fail!") 129 | } 130 | 131 | modelP := map[string] []models.Point{} 132 | 133 | instanceCount := 0 134 | 135 | //log.Info("******points*******",points) 136 | //log.Info("*****len*points*******",len(points) 137 | 138 | for _,v := range points { 139 | 140 | str := p.Cluster.DbShard.Fetch(p.db,v) 141 | 142 | machines := p.Slot.FetchWriteInstances( str ) 143 | if len(machines) == 0 { 144 | log.Warn("measurement does not get slot:", string( v.Name() ) ) 145 | } 146 | 147 | m_machines := p.Migra.FetchWriteInstances( str) 148 | if len(m_machines) != 0 { 149 | for k,_ := range m_machines { 150 | machines = append( machines, m_machines[k] ) 151 | } 152 | } 153 | 154 | for _,instance := range machines { 155 | _,ok := modelP[ instance ] 156 | if ok == false { 157 | modelP[ instance ] = []models.Point{} 158 | instanceCount = instanceCount + 1 159 | } 160 | modelP[ instance ] = append( modelP[ instance ], v ) 161 | } 162 | } 163 | 164 | 165 | for k,v := range modelP { 166 | ins := p.Cluster.FetchWriteInflux(k) 167 | if ins != nil { 168 | ins.PutPoints(v, p.db,"") 169 | //ins.PutPointsBytes(v,p.db) 170 | } 171 | } 172 | 173 | return nil 174 | } 175 | 176 | 177 | // Open starts the service. 178 | func (s *Udp) Open(stop chan bool) { 179 | 180 | fd, err := s.initSocket() 181 | if err != nil { 182 | log.Error("open socket fail:",err) 183 | return 184 | } 185 | 186 | buf := make([]byte, MaxUDPPayload) 187 | 188 | log.Info("open udp server successfully,port is:",s.port," fd is:",fd) 189 | loop: 190 | for { 191 | select { 192 | case <-stop: 193 | log.Info("close udp handle") 194 | //handle.Close() 195 | unix.Close(fd) 196 | break loop 197 | default: 198 | n,_,err := unix.Recvfrom(fd,buf,0) 199 | if err == nil { 200 | dd := make([]byte, n) 201 | copy(dd[:], buf) 202 | s.Write(dd) 203 | } 204 | } 205 | } 206 | 207 | } 208 | 209 | func (s *Udp) Close() { 210 | fmt.Printf("!!!close\n") 211 | for k,_ := range s.inst { 212 | s.inst[ k ] <- true 213 | } 214 | } 215 | 216 | -------------------------------------------------------------------------------- /meta/zk.go: -------------------------------------------------------------------------------- 1 | package meta 2 | 3 | 4 | import ( 5 | "github.com/samuel/go-zookeeper/zk" 6 | "time" 7 | "fmt" 8 | log "github.com/Sirupsen/logrus" 9 | "sync" 10 | "math/rand" 11 | ) 12 | 13 | 14 | type Zk struct { 15 | host string 16 | zk *zk.Conn 17 | stop chan bool 18 | crate_mut sync.Mutex 19 | } 20 | 21 | 22 | func (c* Zk) Re_new() { 23 | c.Close() 24 | c.Init(c.host) 25 | } 26 | 27 | func (c *Zk) Init(hostname string) error { 28 | 29 | c.crate_mut.Lock() 30 | 31 | defer c.crate_mut.Unlock() 32 | 33 | r_session := rand.New(rand.NewSource(time.Now().UnixNano())) 34 | a := r_session.Intn(18000 - 15000) + 15000 35 | 36 | conn, _, err := zk.Connect([]string{hostname}, time.Second * time.Duration(a) ) 37 | if err != nil { 38 | fmt.Println("create zk err:%v",err) 39 | return err 40 | } 41 | 42 | c.zk = conn 43 | c.host = hostname 44 | c.stop = make(chan bool,0) 45 | 46 | log.Info("zk init success:",hostname) 47 | return nil 48 | } 49 | 50 | func (c*Zk) Create(path string,data string) error { 51 | 52 | var flags int32 53 | flags = 0 54 | var acls = zk.WorldACL(zk.PermAll) 55 | 56 | var data_byte = []byte( data ) 57 | 58 | _,err_create := c.zk.Create(path,data_byte,flags,acls) 59 | 60 | if err_create != nil { 61 | fmt.Println(err_create) 62 | 63 | c.Re_new() 64 | return err_create 65 | } 66 | 67 | log.Info("create data success:",path) 68 | return nil 69 | } 70 | 71 | func (c *Zk) Set( path string,data []byte ) error { 72 | 73 | old_data, stat, err := c.zk.Get(path) 74 | if err != nil { 75 | //log.Error("*****set data******\n",err,path) 76 | c.Create(path,"") 77 | } 78 | 79 | old_data_string := string( old_data ) 80 | 81 | log.Info("zk old data:",old_data_string) 82 | log.Info("zk new data:",string(data) ) 83 | 84 | if old_data_string != string(data) { 85 | stat, err = c.zk.Set(path, data, stat.Version) 86 | log.Info("zk set data:",stat,err) 87 | } 88 | if err != nil { 89 | c.Re_new() 90 | } 91 | return err 92 | } 93 | 94 | 95 | func (c *Zk) Close() { 96 | if c.zk != nil { 97 | c.zk.Close() 98 | } 99 | } 100 | 101 | 102 | 103 | func (c *Zk) Watch(path string,res chan []byte) { 104 | 105 | var co_retry <-chan time.Time 106 | 107 | loop: 108 | for { 109 | data, tag, ch, err := c.zk.GetW(path) 110 | if err != nil { 111 | log.Error("GetW path :%v Error: %v", path , err) 112 | co_retry = time.After(time.Second * 60) 113 | } else { 114 | res <- data 115 | co_retry = nil 116 | } 117 | select { 118 | case <-co_retry: 119 | log.Warn("60 seconds end ,We try to watch ref path :%v again \n ",path) 120 | case <-c.stop: 121 | log.Warn("stop RunBody path:%v",path ) 122 | break loop 123 | case <-ch: 124 | log.Info("Watch Path has changed Event: ", path,tag ) 125 | } 126 | } 127 | 128 | 129 | } 130 | 131 | 132 | -------------------------------------------------------------------------------- /midd/operator.go: -------------------------------------------------------------------------------- 1 | package midd 2 | 3 | import ( 4 | "github.com/influxdata/influxdb/models" 5 | "github.com/infP/meta" 6 | log "github.com/Sirupsen/logrus" 7 | "fmt" 8 | "errors" 9 | "time" 10 | "github.com/influxdata/influxdb/query" 11 | "github.com/influxdata/influxql" 12 | //"github.com/abrander/influxdb/influxql" 13 | "strings" 14 | "math/rand" 15 | "runtime" 16 | ) 17 | 18 | var ( 19 | _ = fmt.Sprintf("") 20 | trace = make([]byte, 1024, 1024) 21 | ) 22 | 23 | 24 | type Operator struct { 25 | Cluster *meta.Cluster 26 | Slot *meta.Slot 27 | Migra *meta.Slot 28 | WriteTimeOut time.Duration 29 | } 30 | 31 | 32 | var ( 33 | MAX_KEY_SIZE = 200 34 | MAX_VAL_SIZE = 200 35 | ) 36 | 37 | 38 | 39 | func (p *Operator) SetCluster(c *meta.Cluster) { 40 | p.Cluster = c 41 | p.Slot = p.Cluster.Slot 42 | p.WriteTimeOut = c.Conf.Write_timeout_dur 43 | p.Migra = p.Cluster.Migra 44 | } 45 | 46 | 47 | 48 | func (p *Operator) Query(dbname, rpname, sql, epoch, chunk string, timeout int) (meta.Response, error) { 49 | 50 | defer func() { 51 | if r := recover(); r != nil { 52 | err := fmt.Errorf("query: Recover from panic: %s\n", r) 53 | count := runtime.Stack(trace, true) 54 | trace_msg := fmt.Sprintf("Stack of %d bytes: %s\n", count, trace) 55 | log.Error("query crashed: %v, %v", err, trace_msg) 56 | } 57 | }() 58 | 59 | qr := strings.NewReader(sql) 60 | pl := influxql.NewParser(qr) 61 | q, err := pl.ParseQuery() 62 | if err != nil { 63 | log.Error("parse query fail,",err) 64 | return meta.Response{},err 65 | } 66 | 67 | 68 | var results meta.Response 69 | 70 | results.Results = []*query.Result{ } 71 | /*** 72 | SELECT value FROM cpu_load_short WHERE region='us-west'; SELECT * from cpu_load_shortt 73 | split into two statements 74 | ***/ 75 | finish := make(chan *meta.Response, 0) 76 | count := 0 77 | 78 | mergeFlag := false 79 | 80 | over := map[int] map[string] interface{} {} 81 | 82 | for k, stat := range q.Statements { 83 | switch stat1 := stat.(type) { 84 | case *influxql.ShowFieldKeysStatement: 85 | 86 | sql := stat.String() 87 | if strings.Contains(sql,"LIMIT") == false { 88 | sql = fmt.Sprintf("%s LIMIT %d",sql,MAX_KEY_SIZE) 89 | } 90 | 91 | if p.Cluster.DbShard.Check(dbname) { 92 | ret := p.Cluster.FetchAllInfluxs() 93 | for kk,_ := range ret { 94 | go ret[kk].Query(dbname, rpname, sql, epoch, chunk, finish,k) 95 | count = count + 1 96 | } 97 | } 98 | 99 | for _, m := range stat1.Sources.Measurements() { 100 | 101 | b := p.Cluster.DbShard.QueryE(dbname, m.Name) 102 | 103 | if b == true { 104 | mergeFlag = false 105 | machines := p.Slot.FetchWriteInstances( []byte(m.Name) ) 106 | if len(machines) == 0 { 107 | log.Warn("measurement does not get slot:", m.Name ) 108 | continue 109 | } 110 | 111 | c_m := []* meta.Influx{} 112 | 113 | for _,v := range machines { 114 | ins := p.Cluster.FetchReadInflux(v) 115 | if ins != nil { 116 | c_m = append(c_m, ins) 117 | } 118 | } 119 | 120 | _len := len(c_m) 121 | _len = rand.Intn(_len) 122 | 123 | machine := c_m[ _len ] 124 | 125 | go machine.Query(dbname, rpname, sql, epoch, chunk, finish,k) 126 | count = count + 1 127 | //log.Info("query on ",machine.Addr," ind:",_len, ", sql: ",stat," db:",dbname) 128 | } else { 129 | mergeFlag = true 130 | ret := p.Cluster.FetchRandInfluxs() 131 | for kk,_ := range ret { 132 | go ret[kk].Query(dbname, rpname, stat.String(), epoch, chunk, finish,k) 133 | count = count + 1 134 | } 135 | } 136 | } 137 | 138 | case *influxql.ShowMeasurementsStatement: 139 | 140 | case *influxql.ShowTagValuesStatement: 141 | 142 | mergeFlag = true 143 | 144 | for _, m := range stat1.Sources.Measurements() { 145 | 146 | b := p.Cluster.DbShard.QueryE(dbname, m.Name) 147 | 148 | if b == true { 149 | mergeFlag = false 150 | machines := p.Slot.FetchWriteInstances( []byte(m.Name) ) 151 | if len(machines) == 0 { 152 | log.Warn("measurement does not get slot:", m.Name ) 153 | continue 154 | } 155 | 156 | c_m := []* meta.Influx{} 157 | 158 | for _,v := range machines { 159 | ins := p.Cluster.FetchReadInflux(v) 160 | if ins != nil { 161 | c_m = append(c_m, ins) 162 | } 163 | } 164 | 165 | _len := len(c_m) 166 | _len = rand.Intn(_len) 167 | 168 | machine := c_m[ _len ] 169 | sql := stat.String() 170 | if strings.Contains(sql,"LIMIT") == false { 171 | sql = fmt.Sprintf("%s LIMIT %d",sql,MAX_VAL_SIZE) 172 | } 173 | 174 | go machine.Query(dbname, rpname, sql, epoch, chunk, finish,k) 175 | count = count + 1 176 | //log.Info("query on ",machine.Addr," ind:",_len, ", sql: ",stat," db:",dbname) 177 | } else { 178 | mergeFlag = true 179 | ret := p.Cluster.FetchRandInfluxs() 180 | for kk,_ := range ret { 181 | go ret[kk].Query(dbname, rpname, stat.String(), epoch, chunk, finish,k) 182 | count = count + 1 183 | } 184 | } 185 | } 186 | 187 | case *influxql.ShowTagKeysStatement: 188 | 189 | mergeFlag = true 190 | for _, m := range stat1.Sources.Measurements() { 191 | 192 | //dbname 193 | b := p.Cluster.DbShard.QueryE(dbname, m.Name) 194 | 195 | if b == true { 196 | mergeFlag = false 197 | machines := p.Slot.FetchWriteInstances( []byte(m.Name) ) 198 | if len(machines) == 0 { 199 | log.Warn("measurement does not get slot:", m.Name ) 200 | continue 201 | } 202 | 203 | c_m := []* meta.Influx{} 204 | 205 | for _,v := range machines { 206 | ins := p.Cluster.FetchReadInflux(v) 207 | if ins != nil { 208 | c_m = append(c_m, ins) 209 | } 210 | } 211 | 212 | _len := len(c_m) 213 | _len = rand.Intn(_len) 214 | 215 | machine := c_m[ _len ] 216 | 217 | sql := stat.String() 218 | if strings.Contains(sql,"LIMIT") == false { 219 | sql = fmt.Sprintf("%s LIMIT %d",sql,MAX_KEY_SIZE) 220 | } 221 | 222 | go machine.Query(dbname, rpname, sql, epoch, chunk, finish,k) 223 | count = count + 1 224 | } else { 225 | mergeFlag = true 226 | ret := p.Cluster.FetchRandInfluxs() 227 | for kk,_ := range ret { 228 | go ret[kk].Query(dbname, rpname, stat.String(), epoch, chunk, finish,k) 229 | count = count + 1 230 | } 231 | } 232 | //log.Info("query on ",machine.Addr," ind:",_len, ", sql: ",stat," db:",dbname) 233 | } 234 | 235 | case *influxql.SelectStatement: 236 | 237 | mergeFlag = false 238 | if dbname == "_internal" { 239 | ret := p.Cluster.FetchAllInfluxs() 240 | for kk,_ := range ret { 241 | go ret[kk].Query(dbname, rpname, stat.String(), epoch, chunk, finish,k) 242 | count = count + 1 243 | } 244 | } else { 245 | 246 | tmp := QueryFilter(stat1) 247 | 248 | over[k] = tmp 249 | 250 | for _, m := range stat1.Sources.Measurements() { 251 | 252 | //dbname 253 | str,b := p.Cluster.DbShard.QueryFetch(dbname, stat1, m.Name) 254 | 255 | if b == true { 256 | machines := p.Slot.FetchWriteInstances( str ) 257 | if len(machines) == 0 { 258 | log.Warn("measurement does not get slot:", m.Name ) 259 | continue 260 | } 261 | 262 | c_m := []* meta.Influx{} 263 | 264 | for _,v := range machines { 265 | ins := p.Cluster.FetchReadInflux(v) 266 | if ins != nil { 267 | c_m = append(c_m, ins) 268 | } 269 | } 270 | 271 | _len := len(c_m) 272 | _len = rand.Intn(_len) 273 | 274 | machine := c_m[ _len ] 275 | go machine.Query(dbname, rpname, stat.String(), epoch, chunk, finish,k) 276 | count = count + 1 277 | } else { 278 | ret := p.Cluster.FetchRandInfluxs() 279 | for kk,_ := range ret { 280 | go ret[kk].Query(dbname, rpname, stat.String(), epoch, chunk, finish,k) 281 | count = count + 1 282 | } 283 | } 284 | } 285 | } 286 | 287 | } 288 | } 289 | 290 | //每个sql 有单独的 result 291 | total := len(q.Statements) 292 | if mergeFlag == false { 293 | for i:=0;i< total;i++ { 294 | t := &query.Result{ StatementID:i,Series: models.Rows{},Messages: []*query.Message{} } 295 | results.Results = append( results.Results, t ) 296 | } 297 | } else { 298 | t := &query.Result{ StatementID:0,Series: models.Rows{},Messages: []*query.Message{} } 299 | results.Results = append( results.Results, t ) 300 | } 301 | 302 | 303 | timeOut := time.After( time.Duration(timeout) * time.Second ) 304 | ansCount := 0 305 | loop: 306 | for { 307 | select { 308 | 309 | case <-timeOut: 310 | log.Warnf("influxdb query time out!!") 311 | err = errors.New("query data time out") 312 | break loop 313 | 314 | case ret := <-finish: 315 | 316 | ind := ret.Index 317 | for k,_ := range ret.Results { 318 | p.DataSumm(ret.Results[k].Series, over[ind] ) 319 | results.Results[ind].Series = append( results.Results[ind].Series, ret.Results[k].Series... ) 320 | } 321 | ansCount = ansCount + 1 322 | if ansCount == count { 323 | break loop 324 | } 325 | } 326 | } 327 | 328 | return results, nil 329 | } 330 | 331 | func (p *Operator) DataSumm(s models.Rows, tmp map[string]interface{} ) models.Rows { 332 | 333 | cc,ok := tmp["flag"] 334 | cquery := "" 335 | if ok == true { 336 | cquery = cc.(string) 337 | } 338 | 339 | switch cquery{ 340 | case "timeshift": 341 | 342 | ttt,ok := tmp["time"] 343 | if ok == false { 344 | return s 345 | } 346 | tt1 := float64( ttt.( int ) ) 347 | for k,_ := range s { 348 | for j,_ := range s[k].Values { 349 | aa := s[k].Values[j][0].(float64) + tt1 350 | s[k].Values[j][0] = aa 351 | //fmt.Printf( "******k******%T**%+v\n",s[k].Values[j], s[k].Values[j] ) 352 | } 353 | } 354 | } 355 | return s 356 | } 357 | 358 | func (p *Operator) WritePoints(points models.Points, 359 | dbname string, 360 | rpname string, 361 | precision string ) error { 362 | 363 | 364 | if p.Slot == nil { 365 | return errors.New("slot fail!") 366 | } 367 | 368 | modelP := map[string] []models.Point{} 369 | 370 | //instanceCount := 0 371 | 372 | for _,v := range points { 373 | str := p.Cluster.DbShard.Fetch(dbname,v) 374 | machines := p.Slot.FetchWriteInstances( str ) 375 | if len(machines) == 0 { 376 | log.Warn("measurement does not get slot:", string( v.Name() ) ) 377 | } 378 | 379 | m_machines := p.Migra.FetchWriteInstances( str ) 380 | if len(m_machines) != 0 { 381 | for k,_ := range m_machines { 382 | machines = append( machines, m_machines[k] ) 383 | //log.Debug("**migra***%v\n",m_machines[k]) 384 | } 385 | } 386 | 387 | for _,instance := range machines { 388 | _,ok := modelP[ instance ] 389 | if ok == false { 390 | modelP[ instance ] = []models.Point{} 391 | //instanceCount = instanceCount + 1 392 | } 393 | modelP[ instance ] = append( modelP[ instance ], v ) 394 | } 395 | } 396 | 397 | //instanceFinish := make(chan error, instanceCount+1) 398 | 399 | for k,v := range modelP { 400 | ins := p.Cluster.FetchWriteInflux(k) 401 | if ins != nil { 402 | //go ins.WritePoints(v,instanceFinish, dbname, rpname, precision) 403 | ins.PutPoints(v, dbname,rpname) 404 | //instanceFinish <- nil 405 | } else { 406 | //instanceCount = instanceCount - 1 407 | log.Error("catch influxdb not exist:",k) 408 | } 409 | } 410 | return nil 411 | } 412 | 413 | /***finishCount := 0 414 | timeOut := time.After( p.WriteTimeOut ) 415 | //loop: 416 | for { 417 | select { 418 | case <-timeOut: 419 | log.Warnf("influxdb write time out!!") 420 | return errors.New("write data time out") 421 | 422 | case err := <-instanceFinish: 423 | if err != nil { 424 | log.Error("write err",err) 425 | } else { 426 | finishCount = finishCount + 1 427 | } 428 | 429 | if finishCount == instanceCount { 430 | log.Debug("finish all influxdb write: ",finishCount,instanceCount) 431 | return nil 432 | } else { 433 | //return errors.New("some write data action fail") 434 | } 435 | } 436 | }***/ 437 | 438 | 439 | 440 | 441 | 442 | 443 | 444 | -------------------------------------------------------------------------------- /midd/query.go: -------------------------------------------------------------------------------- 1 | package midd 2 | 3 | import ( 4 | //"github.com/influxdata/influxdb/models" 5 | //"github.com/infP/meta" 6 | //log "github.com/Sirupsen/logrus" 7 | "fmt" 8 | //"errors" 9 | //"time" 10 | //"github.com/influxdata/influxdb/query" 11 | "github.com/influxdata/influxql" 12 | //"github.com/abrander/influxdb/influxql" 13 | //"strings" 14 | //"math/rand" 15 | //"runtime" 16 | "reflect" 17 | "time" 18 | ) 19 | 20 | var ( 21 | _ = fmt.Sprintf("") 22 | ) 23 | 24 | 25 | func shTime(in string, h int) { 26 | 27 | } 28 | 29 | // now - 6h time shift 22h is > now - 28h and < now - 22h 30 | func findTime(conda interface{}, tmp map[string] time.Duration, h int ) { 31 | types := reflect.TypeOf(conda).String() 32 | 33 | if types == "influxql.BinaryExpr" { 34 | cond := conda.(influxql.BinaryExpr) 35 | key := cond.LHS.String() 36 | 37 | if key == "time" { 38 | dd := cond.RHS.(*influxql.BinaryExpr) 39 | switch dd.RHS.(type) { 40 | case *influxql.DurationLiteral: 41 | r1 := dd.RHS.(*influxql.DurationLiteral) 42 | v := r1.Val 43 | r1.Val = v + time.Duration( h) * time.Hour 44 | tmp[ "add" ] = time.Duration( h) * time.Hour 45 | } 46 | 47 | } else { 48 | findTime(cond.LHS,tmp,h) 49 | findTime(cond.RHS,tmp,h) 50 | } 51 | } 52 | 53 | if types == "*influxql.BinaryExpr" { 54 | 55 | cond := conda.(*influxql.BinaryExpr) 56 | 57 | key := cond.LHS.String() 58 | 59 | if key == "time" { 60 | dd := cond.RHS.(*influxql.BinaryExpr) 61 | switch dd.RHS.(type) { 62 | case *influxql.DurationLiteral: 63 | r1 := dd.RHS.(*influxql.DurationLiteral) 64 | v := r1.Val 65 | r1.Val = v + time.Duration( h) * time.Hour 66 | tmp[ "add" ] = time.Duration( h) * time.Hour 67 | } 68 | 69 | } else { 70 | findTime(cond.LHS,tmp,h) 71 | findTime(cond.RHS,tmp,h) 72 | } 73 | } 74 | 75 | if types == "influxql.ParenExpr" { 76 | cond1 := conda.(influxql.ParenExpr) 77 | cond := cond1.Expr 78 | findTime(cond,tmp,h) 79 | } 80 | 81 | if types == "*influxql.ParenExpr" { 82 | cond1 := conda.(*influxql.ParenExpr) 83 | cond := cond1.Expr 84 | findTime(cond,tmp,h) 85 | } 86 | } 87 | 88 | 89 | func timeShift(stat1 *influxql.SelectStatement,args [] influxql.Expr, k int) int{ 90 | //con := stat1.Condition 91 | 92 | //args len must 2 93 | if len(args) != 2 { 94 | return 0 95 | } 96 | 97 | stat1.Fields[k].Expr = args[0] 98 | h := args[1].(*influxql.IntegerLiteral).Val 99 | 100 | //fmt.Printf("******args***%v*%T*****%v\n",con,con, stat1) 101 | tmp := map[string]time.Duration{} 102 | findTime(stat1.Condition, tmp, int(h)) 103 | ti,ok := tmp["add"] 104 | if ok == true { 105 | sql := fmt.Sprintf("%s AND time < now() - %s",stat1.Condition.String(), ti.String()) 106 | stat1.Condition,_ = influxql.ParseExpr(sql) 107 | } 108 | return int(h)*3600*1000 109 | } 110 | 111 | 112 | func QueryFilter(stat1 *influxql.SelectStatement) map[string]interface{} { 113 | fields := stat1.Fields 114 | 115 | tmp := map[string]interface{} {} 116 | 117 | for k,_ := range fields { 118 | e := fields[k].Expr 119 | 120 | switch d := e.(type) { 121 | case *influxql.Call: 122 | f := d.Name 123 | //Args := e.Args 124 | switch f { 125 | case "timeshift": 126 | t := timeShift(stat1,d.Args,k) 127 | tmp["flag"] = "timeshift" 128 | tmp["time"] = t 129 | break 130 | } 131 | } 132 | } 133 | return tmp 134 | } 135 | -------------------------------------------------------------------------------- /test/1.py: -------------------------------------------------------------------------------- 1 | from socket import * 2 | import time 3 | 4 | HOST = '10.4.4.13' 5 | PORT = 8088 6 | BUFSIZE = 1024 7 | 8 | ADDR = (HOST, PORT) 9 | 10 | udpCliSock = socket(AF_INET, SOCK_DGRAM) 11 | 12 | a = 1 13 | while True: 14 | 15 | a = a + 1 16 | data = '''cpu_load_short777,host=server07,region=us-west4 value=0.6{2} {1}000000000 17 | cpu_load_short777,host=server0{0},region=us-west value=0.5{2} {1}000000000'''.format( 6,int(time.time()),a ) 18 | if not data: 19 | break 20 | udpCliSock.sendto(data,ADDR) 21 | time.sleep(1) 22 | 23 | udpCliSock.close() 24 | -------------------------------------------------------------------------------- /test/ben.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "net/http" 5 | "net" 6 | "time" 7 | "fmt" 8 | "net/url" 9 | "io/ioutil" 10 | "bytes" 11 | log "github.com/Sirupsen/logrus" 12 | "compress/gzip" 13 | "sync/atomic" 14 | "crypto/tls" 15 | "github.com/influxdata/influxdb/models" 16 | "os" 17 | "math/rand" 18 | "strconv" 19 | ) 20 | 21 | 22 | 23 | var ( 24 | //TODO: move to function, support config 25 | httpClient = &http.Client{Transport: &http.Transport{ 26 | MaxIdleConns: 100, 27 | MaxIdleConnsPerHost: 100, 28 | IdleConnTimeout: 30 * time.Second, 29 | DisableCompression: false, 30 | TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, 31 | Dial: (&net.Dialer{ 32 | Timeout: 10 * time.Second, 33 | KeepAlive: 30 * time.Second, 34 | }).Dial, 35 | TLSHandshakeTimeout: 10 * time.Second, 36 | }} 37 | 38 | 39 | count uint64 40 | total uint64 41 | 42 | LEN = 500 43 | ) 44 | 45 | 46 | func mustGzipPoints(points models.Points, precision string) []byte { 47 | zbuf := bytes.NewBuffer(nil) 48 | 49 | gz := gzip.NewWriter(zbuf) 50 | for _, p := range points { 51 | if _, err := gz.Write([]byte(p.PrecisionString(precision))); err != nil { 52 | break 53 | } 54 | if _, err := gz.Write([]byte("\n")); err != nil { 55 | break 56 | } 57 | } 58 | gz.Flush() 59 | gz.Close() 60 | return zbuf.Bytes() 61 | } 62 | 63 | 64 | func generate(ret chan []byte,c int) { 65 | 66 | ddd := rand.Intn(1000) 67 | 68 | zbuf := bytes.NewBuffer(nil) 69 | 70 | gz := gzip.NewWriter(zbuf) 71 | 72 | co := 0 73 | 74 | //for k:=200;k >= 0; k -- { 75 | for k:=0;k<50000; k++ { 76 | //for j:=120; j >= 0; j -- { 77 | for j:=0; j <= 120; j ++ { 78 | t := time.Now().Add( - time.Second * time.Duration( k*3600 + j*30 ) ).Unix() 79 | 80 | //fmt.Printf("***t********%v\n",t) 81 | // 10000 个 measrement 82 | for i:=1;i<=100; i ++ { 83 | //m := c * i 84 | co = co + 1 85 | a := []byte( fmt.Sprintf("meament_%s_%d,host=opshost%d,app=opsapp%d,pod=opspod%d,test=opstest%d value=%d %d000000000",os.Args[1],ddd,i,i,i,i,i,t) ) 86 | 87 | //points, _ := models.ParsePointsWithPrecision(a, time.Now().UTC(), "") 88 | 89 | //fmt.Printf("data****%v*****%v\n", points[0].PrecisionString(""),parseError) 90 | 91 | //gz.Write([]byte(points[0].PrecisionString(""))) 92 | gz.Write( []byte(a) ) 93 | gz.Write([]byte("\n")) 94 | 95 | if co >= 100 && co % 100 == 0 { 96 | co = 0 97 | gz.Flush() 98 | gz.Close() 99 | 100 | ret <- zbuf.Bytes() 101 | 102 | zbuf = bytes.NewBuffer(nil) 103 | gz = gzip.NewWriter(zbuf) 104 | } 105 | } 106 | } 107 | } 108 | 109 | } 110 | 111 | 112 | func static() { 113 | startCollect := time.Tick(1*time.Second) 114 | 115 | var last uint64 116 | last = 0 117 | var l uint64 118 | 119 | l = 0 120 | 121 | //loop: 122 | for { 123 | 124 | select { 125 | case <- startCollect: 126 | fmt.Printf("****summary***%v***%v***%v\n", count - last, total - l, float64( total - l ) / float64( count - last ) ) 127 | last = count 128 | l = total 129 | } 130 | } 131 | } 132 | 133 | 134 | func main() { 135 | 136 | count = 0 137 | total = 0 138 | tt := map[int] chan []byte{} 139 | 140 | tta,_:=strconv.Atoi( os.Args[2] ) 141 | 142 | for i:=1; i <= tta; i ++ { 143 | h := make( chan []byte ) 144 | tt[ i ] = h 145 | go generate(h,i) 146 | //go send(h,i) 147 | } 148 | 149 | time.Sleep(5* time.Second) 150 | 151 | for i:=1; i <= tta; i ++ { 152 | c,_ := tt[i] 153 | go send(c,i) 154 | } 155 | 156 | static() 157 | } 158 | 159 | func send(ret chan []byte,i int) { 160 | 161 | // end := time.Now() 162 | //loop: 163 | for { 164 | select { 165 | case data := <- ret: 166 | // fmt.Printf("*****tt***%v\n", time.Now().Sub( end ) ) 167 | post(data) 168 | //fmt.Printf("******end*****%v\n",time.Now() ) 169 | //end = time.Now() 170 | } 171 | } 172 | 173 | } 174 | 175 | func post(ret []byte) { 176 | 177 | 178 | atomic.AddUint64(&count, uint64(LEN)) 179 | 180 | //addr := "http://10.0.48.114:6666/write" 181 | //addr := "http://10.0.48.20:6666/write" 182 | 183 | var addr string 184 | 185 | if os.Args[1] == "0" { 186 | addr = "http://10.0.49.59:8086/write" 187 | } 188 | 189 | if os.Args[1] == "1" { 190 | addr = "http://10.0.48.20:6666/write" 191 | } 192 | 193 | start := time.Now().UnixNano() 194 | 195 | u, err := url.Parse(addr) 196 | 197 | if err != nil { 198 | log.Errorf("failed to parse influxdb instance addr: %v", err) 199 | //errCh <- err 200 | return 201 | } 202 | 203 | 204 | p := url.Values{} 205 | p.Add("db", "mydb") 206 | u.RawQuery = p.Encode() 207 | 208 | req, err := http.NewRequest("POST", u.String(), bytes.NewBuffer( ret ) ) 209 | if err != nil { 210 | log.Errorf("NewRequest Error: %v", err) 211 | return 212 | } 213 | 214 | req.Header.Add("Content-Encoding", "gzip") 215 | 216 | resp, err := httpClient.Do(req) 217 | if err != nil { 218 | fmt.Printf("influx.PostPoints - post data failed: %v\n", err) 219 | return 220 | } 221 | 222 | 223 | cost_time := (time.Now().UnixNano() - start)/1000000 224 | 225 | //log.Info("write data cost time:", cost_time) 226 | 227 | atomic.AddUint64(&total, uint64( cost_time )) 228 | defer resp.Body.Close() 229 | 230 | if resp.StatusCode/100 != 2 { 231 | if r, err := ioutil.ReadAll(resp.Body); err != nil { 232 | fmt.Printf("status code not 2xx:%v\n",err) 233 | return 234 | } else { 235 | fmt.Printf("***r*****%v\n", string(r) ) 236 | } 237 | } 238 | 239 | } 240 | -------------------------------------------------------------------------------- /utils/converters.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "math" 7 | "regexp" 8 | "time" 9 | 10 | "github.com/influxdata/influxdb/models" 11 | "github.com/influxdata/influxdb/prometheus/remote" 12 | "github.com/influxdata/influxql" 13 | ) 14 | 15 | const ( 16 | // measurementName is where all prometheus time series go to 17 | measurementName = "_" 18 | 19 | // fieldName is the field all prometheus values get written to 20 | fieldName = "f64" 21 | ) 22 | 23 | var ErrNaNDropped = errors.New("dropped NaN from Prometheus since they are not supported") 24 | 25 | // WriteRequestToPoints converts a Prometheus remote write request of time series and their 26 | // samples into Points that can be written into Influx 27 | func WriteRequestToPoints(req *remote.WriteRequest) ([]models.Point, error) { 28 | var maxPoints int 29 | for _, ts := range req.Timeseries { 30 | maxPoints += len(ts.Samples) 31 | } 32 | points := make([]models.Point, 0, maxPoints) 33 | 34 | var droppedNaN error 35 | 36 | for _, ts := range req.Timeseries { 37 | tags := make(map[string]string, len(ts.Labels)) 38 | for _, l := range ts.Labels { 39 | tags[l.Name] = l.Value 40 | } 41 | 42 | for _, s := range ts.Samples { 43 | // skip NaN values, which are valid in Prometheus 44 | if math.IsNaN(s.Value) { 45 | droppedNaN = ErrNaNDropped 46 | continue 47 | } 48 | 49 | measurementName,_ := tags["__name__"] 50 | delete(tags,"__name__") 51 | // convert and append 52 | t := time.Unix(0, s.TimestampMs*int64(time.Millisecond)) 53 | fields := map[string]interface{}{fieldName: s.Value} 54 | p, err := models.NewPoint(measurementName, models.NewTags(tags), fields, t) 55 | if err != nil { 56 | return nil, err 57 | } 58 | 59 | points = append(points, p) 60 | } 61 | } 62 | return points, droppedNaN 63 | } 64 | 65 | // ReadRequestToInfluxQLQuery converts a Prometheus remote read request to an equivalent InfluxQL 66 | // query that will return the requested data when executed 67 | func ReadRequestToInfluxQLQuery(req *remote.ReadRequest, db, rp string) (*influxql.Query, error) { 68 | if len(req.Queries) != 1 { 69 | return nil, errors.New("Prometheus read endpoint currently only supports one query at a time") 70 | } 71 | promQuery := req.Queries[0] 72 | 73 | stmt := &influxql.SelectStatement{ 74 | IsRawQuery: true, 75 | Fields: []*influxql.Field{ 76 | {Expr: &influxql.VarRef{Val: fieldName}}, 77 | }, 78 | Sources: []influxql.Source{&influxql.Measurement{ 79 | Name: measurementName, 80 | Database: db, 81 | RetentionPolicy: rp, 82 | }}, 83 | Dimensions: []*influxql.Dimension{{Expr: &influxql.Wildcard{}}}, 84 | } 85 | 86 | cond, err := condFromMatchers(promQuery, promQuery.Matchers) 87 | if err != nil { 88 | return nil, err 89 | } 90 | 91 | stmt.Condition = cond 92 | 93 | return &influxql.Query{Statements: []influxql.Statement{stmt}}, nil 94 | } 95 | 96 | // condFromMatcher converts a Prometheus LabelMatcher into an equivalent InfluxQL BinaryExpr 97 | func condFromMatcher(m *remote.LabelMatcher) (*influxql.BinaryExpr, error) { 98 | var op influxql.Token 99 | var rhs influxql.Expr 100 | 101 | switch m.Type { 102 | case remote.MatchType_EQUAL: 103 | op = influxql.EQ 104 | case remote.MatchType_NOT_EQUAL: 105 | op = influxql.NEQ 106 | case remote.MatchType_REGEX_MATCH: 107 | op = influxql.EQREGEX 108 | case remote.MatchType_REGEX_NO_MATCH: 109 | op = influxql.NEQREGEX 110 | default: 111 | return nil, fmt.Errorf("unknown match type %v", m.Type) 112 | } 113 | 114 | if op == influxql.EQREGEX || op == influxql.NEQREGEX { 115 | re, err := regexp.Compile(m.Value) 116 | if err != nil { 117 | return nil, err 118 | } 119 | 120 | // Convert regex values to InfluxDB format. 121 | rhs = &influxql.RegexLiteral{Val: re} 122 | } else { 123 | rhs = &influxql.StringLiteral{Val: m.Value} 124 | } 125 | 126 | return &influxql.BinaryExpr{ 127 | Op: op, 128 | LHS: &influxql.VarRef{Val: m.Name}, 129 | RHS: rhs, 130 | }, nil 131 | } 132 | 133 | // condFromMatchers converts a Prometheus remote query and a collection of Prometheus label matchers 134 | // into an equivalent influxql.BinaryExpr. This assume a schema that is written via the Prometheus 135 | // remote write endpoint, which uses a measurement name of _ and a field name of f64. Tags and labels 136 | // are kept equivalent. 137 | func condFromMatchers(q *remote.Query, matchers []*remote.LabelMatcher) (*influxql.BinaryExpr, error) { 138 | if len(matchers) > 0 { 139 | lhs, err := condFromMatcher(matchers[0]) 140 | if err != nil { 141 | return nil, err 142 | } 143 | rhs, err := condFromMatchers(q, matchers[1:]) 144 | if err != nil { 145 | return nil, err 146 | } 147 | 148 | return &influxql.BinaryExpr{ 149 | Op: influxql.AND, 150 | LHS: lhs, 151 | RHS: rhs, 152 | }, nil 153 | } 154 | 155 | return &influxql.BinaryExpr{ 156 | Op: influxql.AND, 157 | LHS: &influxql.BinaryExpr{ 158 | Op: influxql.GTE, 159 | LHS: &influxql.VarRef{Val: "time"}, 160 | RHS: &influxql.TimeLiteral{Val: time.Unix(0, q.StartTimestampMs*int64(time.Millisecond))}, 161 | }, 162 | RHS: &influxql.BinaryExpr{ 163 | Op: influxql.LTE, 164 | LHS: &influxql.VarRef{Val: "time"}, 165 | RHS: &influxql.TimeLiteral{Val: time.Unix(0, q.EndTimestampMs*int64(time.Millisecond))}, 166 | }, 167 | }, nil 168 | } 169 | 170 | // TagsToLabelPairs converts a map of Influx tags into a slice of Prometheus label pairs 171 | func TagsToLabelPairs(tags map[string]string) []*remote.LabelPair { 172 | pairs := make([]*remote.LabelPair, 0, len(tags)) 173 | for k, v := range tags { 174 | if v == "" { 175 | // If we select metrics with different sets of labels names, 176 | // InfluxDB returns *all* possible tag names on all returned 177 | // series, with empty tag values on series where they don't 178 | // apply. In Prometheus, an empty label value is equivalent 179 | // to a non-existent label, so we just skip empty ones here 180 | // to make the result correct. 181 | continue 182 | } 183 | pairs = append(pairs, &remote.LabelPair{ 184 | Name: k, 185 | Value: v, 186 | }) 187 | } 188 | return pairs 189 | } 190 | -------------------------------------------------------------------------------- /utils/crc16.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | /* use redis sloting code from redis-go-cluster package */ 4 | /* 5 | * Copyright 2001-2010 Georges Menie (www.menie.org) 6 | * Copyright 2010-2012 Salvatore Sanfilippo (adapted to Redis coding style) 7 | * All rights reserved. 8 | * 9 | * Redistribution and use in source and binary forms, with or without 10 | * modification, are permitted provided that the following conditions are met: 11 | * 12 | * * Redistributions of source code must retain the above copyright 13 | * notice, this list of conditions and the following disclaimer. 14 | * * Redistributions in binary form must reproduce the above copyright 15 | * notice, this list of conditions and the following disclaimer in the 16 | * documentation and/or other materials provided with the distribution. 17 | * * Neither the name of the University of California, Berkeley nor the 18 | * names of its contributors may be used to endorse or promote products 19 | * derived from this software without specific prior written permission. 20 | * 21 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY 22 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 23 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 | * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY 25 | * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 26 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 28 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 30 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | 33 | /* CRC16 implementation according to CCITT standards. 34 | * 35 | * Note by @antirez: this is actually the XMODEM CRC 16 algorithm, using the 36 | * following parameters: 37 | * 38 | * Name : "XMODEM", also known as "ZMODEM", "CRC-16/ACORN" 39 | * Width : 16 bit 40 | * Poly : 1021 (That is actually x^16 + x^12 + x^5 + 1) 41 | * Initialization : 0000 42 | * Reflect Input byte : False 43 | * Reflect Output CRC : False 44 | * Xor constant to output CRC : 0000 45 | * Output for "123456789" : 31C3 46 | */ 47 | 48 | 49 | var crc16tab = [256]uint16{ 50 | 0x0000,0x1021,0x2042,0x3063,0x4084,0x50a5,0x60c6,0x70e7, 51 | 0x8108,0x9129,0xa14a,0xb16b,0xc18c,0xd1ad,0xe1ce,0xf1ef, 52 | 0x1231,0x0210,0x3273,0x2252,0x52b5,0x4294,0x72f7,0x62d6, 53 | 0x9339,0x8318,0xb37b,0xa35a,0xd3bd,0xc39c,0xf3ff,0xe3de, 54 | 0x2462,0x3443,0x0420,0x1401,0x64e6,0x74c7,0x44a4,0x5485, 55 | 0xa56a,0xb54b,0x8528,0x9509,0xe5ee,0xf5cf,0xc5ac,0xd58d, 56 | 0x3653,0x2672,0x1611,0x0630,0x76d7,0x66f6,0x5695,0x46b4, 57 | 0xb75b,0xa77a,0x9719,0x8738,0xf7df,0xe7fe,0xd79d,0xc7bc, 58 | 0x48c4,0x58e5,0x6886,0x78a7,0x0840,0x1861,0x2802,0x3823, 59 | 0xc9cc,0xd9ed,0xe98e,0xf9af,0x8948,0x9969,0xa90a,0xb92b, 60 | 0x5af5,0x4ad4,0x7ab7,0x6a96,0x1a71,0x0a50,0x3a33,0x2a12, 61 | 0xdbfd,0xcbdc,0xfbbf,0xeb9e,0x9b79,0x8b58,0xbb3b,0xab1a, 62 | 0x6ca6,0x7c87,0x4ce4,0x5cc5,0x2c22,0x3c03,0x0c60,0x1c41, 63 | 0xedae,0xfd8f,0xcdec,0xddcd,0xad2a,0xbd0b,0x8d68,0x9d49, 64 | 0x7e97,0x6eb6,0x5ed5,0x4ef4,0x3e13,0x2e32,0x1e51,0x0e70, 65 | 0xff9f,0xefbe,0xdfdd,0xcffc,0xbf1b,0xaf3a,0x9f59,0x8f78, 66 | 0x9188,0x81a9,0xb1ca,0xa1eb,0xd10c,0xc12d,0xf14e,0xe16f, 67 | 0x1080,0x00a1,0x30c2,0x20e3,0x5004,0x4025,0x7046,0x6067, 68 | 0x83b9,0x9398,0xa3fb,0xb3da,0xc33d,0xd31c,0xe37f,0xf35e, 69 | 0x02b1,0x1290,0x22f3,0x32d2,0x4235,0x5214,0x6277,0x7256, 70 | 0xb5ea,0xa5cb,0x95a8,0x8589,0xf56e,0xe54f,0xd52c,0xc50d, 71 | 0x34e2,0x24c3,0x14a0,0x0481,0x7466,0x6447,0x5424,0x4405, 72 | 0xa7db,0xb7fa,0x8799,0x97b8,0xe75f,0xf77e,0xc71d,0xd73c, 73 | 0x26d3,0x36f2,0x0691,0x16b0,0x6657,0x7676,0x4615,0x5634, 74 | 0xd94c,0xc96d,0xf90e,0xe92f,0x99c8,0x89e9,0xb98a,0xa9ab, 75 | 0x5844,0x4865,0x7806,0x6827,0x18c0,0x08e1,0x3882,0x28a3, 76 | 0xcb7d,0xdb5c,0xeb3f,0xfb1e,0x8bf9,0x9bd8,0xabbb,0xbb9a, 77 | 0x4a75,0x5a54,0x6a37,0x7a16,0x0af1,0x1ad0,0x2ab3,0x3a92, 78 | 0xfd2e,0xed0f,0xdd6c,0xcd4d,0xbdaa,0xad8b,0x9de8,0x8dc9, 79 | 0x7c26,0x6c07,0x5c64,0x4c45,0x3ca2,0x2c83,0x1ce0,0x0cc1, 80 | 0xef1f,0xff3e,0xcf5d,0xdf7c,0xaf9b,0xbfba,0x8fd9,0x9ff8, 81 | 0x6e17,0x7e36,0x4e55,0x5e74,0x2e93,0x3eb2,0x0ed1,0x1ef0, 82 | } 83 | 84 | func Crc16(buf []byte) uint16 { 85 | var crc uint16 86 | for _, n := range buf { 87 | crc = (crc<>uint16(8)) ^ uint16(n))&0x00FF]; 88 | } 89 | return crc 90 | } 91 | --------------------------------------------------------------------------------