├── .gitignore ├── Dockerfile_build_ubuntu64 ├── Godeps ├── LICENSE ├── README.md ├── build.py ├── go.mod ├── go.sum ├── kapacitor.toml ├── main.go ├── relay ├── config.go ├── http.go ├── relay.go ├── retry.go └── udp.go ├── sample.toml ├── sample_buffered.toml └── scripts ├── influxdb-relay.service ├── init.sh ├── logrotate ├── post-install.sh └── post-uninstall.sh /.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | /build 3 | -------------------------------------------------------------------------------- /Dockerfile_build_ubuntu64: -------------------------------------------------------------------------------- 1 | FROM ubuntu:trusty 2 | 3 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \ 4 | python-software-properties \ 5 | software-properties-common \ 6 | wget \ 7 | git \ 8 | mercurial \ 9 | make \ 10 | ruby \ 11 | ruby-dev \ 12 | rpm \ 13 | zip \ 14 | python \ 15 | python-boto 16 | 17 | RUN gem install fpm 18 | 19 | # Install go 20 | ENV GOPATH /root/go 21 | ENV GO_VERSION 1.6.2 22 | ENV GO_ARCH amd64 23 | RUN wget https://storage.googleapis.com/golang/go${GO_VERSION}.linux-${GO_ARCH}.tar.gz; \ 24 | tar -C /usr/local/ -xf /go${GO_VERSION}.linux-${GO_ARCH}.tar.gz ; \ 25 | rm /go${GO_VERSION}.linux-${GO_ARCH}.tar.gz 26 | ENV PATH /usr/local/go/bin:$PATH 27 | 28 | ENV PROJECT_DIR $GOPATH/src/github.com/influxdata/influxdb-relay 29 | ENV PATH $GOPATH/bin:$PATH 30 | RUN mkdir -p $PROJECT_DIR 31 | WORKDIR $PROJECT_DIR 32 | 33 | VOLUME $PROJECT_DIR 34 | 35 | ENTRYPOINT [ "/root/go/src/github.com/influxdata/influxdb-relay/build.py" ] 36 | -------------------------------------------------------------------------------- /Godeps: -------------------------------------------------------------------------------- 1 | github.com/influxdata/influxdb1-client 02f0bf5dbca364fe200ee3f5709c8cc0c9bda0d0 2 | github.com/naoina/go-stringutil 6b638e95a32d0c1131db0e7fe83775cbea4a0d0b 3 | github.com/naoina/toml 751171607256bb66e64c9f0220c00662420c38e9 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 InfluxData 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # InfluxDB Relay 2 | 3 | This project adds a basic high availability layer to InfluxDB. With the right architecture and disaster recovery processes, this achieves a highly available setup. 4 | 5 | *NOTE:* `influxdb-relay` must be built with Go 1.5+ 6 | 7 | ## Usage 8 | 9 | To build from source and run: 10 | 11 | ```sh 12 | $ # Install influxdb-relay to your $GOPATH/bin 13 | $ go get -u github.com/influxdata/influxdb-relay 14 | $ # Edit your configuration file 15 | $ cp $GOPATH/src/github.com/influxdata/influxdb-relay/sample.toml ./relay.toml 16 | $ vim relay.toml 17 | $ # Start relay! 18 | $ $GOPATH/bin/influxdb-relay -config relay.toml 19 | ``` 20 | 21 | ## Configuration 22 | 23 | ```toml 24 | [[http]] 25 | # Name of the HTTP server, used for display purposes only. 26 | name = "example-http" 27 | 28 | # TCP address to bind to, for HTTP server. 29 | bind-addr = "127.0.0.1:9096" 30 | 31 | # Enable HTTPS requests. 32 | ssl-combined-pem = "/etc/ssl/influxdb-relay.pem" 33 | 34 | # Array of InfluxDB instances to use as backends for Relay. 35 | output = [ 36 | # name: name of the backend, used for display purposes only. 37 | # location: full URL of the /write endpoint of the backend 38 | # timeout: Go-parseable time duration. Fail writes if incomplete in this time. 39 | # skip-tls-verification: skip verification for HTTPS location. WARNING: it's insecure. Don't use in production. 40 | { name="local1", location="http://127.0.0.1:8086/write", timeout="10s" }, 41 | { name="local2", location="http://127.0.0.1:7086/write", timeout="10s" }, 42 | ] 43 | 44 | [[udp]] 45 | # Name of the UDP server, used for display purposes only. 46 | name = "example-udp" 47 | 48 | # UDP address to bind to. 49 | bind-addr = "127.0.0.1:9096" 50 | 51 | # Socket buffer size for incoming connections. 52 | read-buffer = 0 # default 53 | 54 | # Precision to use for timestamps 55 | precision = "n" # Can be n, u, ms, s, m, h 56 | 57 | # Array of InfluxDB instances to use as backends for Relay. 58 | output = [ 59 | # name: name of the backend, used for display purposes only. 60 | # location: host and port of backend. 61 | # mtu: maximum output payload size 62 | { name="local1", location="127.0.0.1:8089", mtu=512 }, 63 | { name="local2", location="127.0.0.1:7089", mtu=1024 }, 64 | ] 65 | ``` 66 | 67 | ## Description 68 | 69 | The architecture is fairly simple and consists of a load balancer, two or more InfluxDB Relay processes and two or more InfluxDB processes. The load balancer should point UDP traffic and HTTP POST requests with the path `/write` to the two relays while pointing GET requests with the path `/query` to the two InfluxDB servers. 70 | 71 | The setup should look like this: 72 | 73 | ``` 74 | ┌─────────────────┐ 75 | │writes & queries │ 76 | └─────────────────┘ 77 | │ 78 | ▼ 79 | ┌───────────────┐ 80 | │ │ 81 | ┌────────│ Load Balancer │─────────┐ 82 | │ │ │ │ 83 | │ └──────┬─┬──────┘ │ 84 | │ │ │ │ 85 | │ │ │ │ 86 | │ ┌──────┘ └────────┐ │ 87 | │ │ ┌─────────────┐ │ │┌──────┐ 88 | │ │ │/write or UDP│ │ ││/query│ 89 | │ ▼ └─────────────┘ ▼ │└──────┘ 90 | │ ┌──────────┐ ┌──────────┐ │ 91 | │ │ InfluxDB │ │ InfluxDB │ │ 92 | │ │ Relay │ │ Relay │ │ 93 | │ └──┬────┬──┘ └────┬──┬──┘ │ 94 | │ │ | | │ │ 95 | │ | ┌─┼──────────────┘ | │ 96 | │ │ │ └──────────────┐ │ │ 97 | │ ▼ ▼ ▼ ▼ │ 98 | │ ┌──────────┐ ┌──────────┐ │ 99 | │ │ │ │ │ │ 100 | └─▶│ InfluxDB │ │ InfluxDB │◀─┘ 101 | │ │ │ │ 102 | └──────────┘ └──────────┘ 103 | ``` 104 | 105 | 106 | 107 | The relay will listen for HTTP or UDP writes and write the data to each InfluxDB server via the HTTP write or UDP endpoint, as appropriate. If the write is sent via HTTP, the relay will return a success response as soon as one of the InfluxDB servers returns a success. If any InfluxDB server returns a 4xx response, that will be returned to the client immediately. If all servers return a 5xx, a 5xx will be returned to the client. If some but not all servers return a 5xx that will not be returned to the client. You should monitor each instance's logs for 5xx errors. 108 | 109 | With this setup a failure of one Relay or one InfluxDB can be sustained while still taking writes and serving queries. However, the recovery process might require operator intervention. 110 | 111 | ## Buffering 112 | 113 | The relay can be configured to buffer failed requests for HTTP backends. 114 | The intent of this logic is reduce the number of failures during short outages or periodic network issues. 115 | > This retry logic is **NOT** sufficient for for long periods of downtime as all data is buffered in RAM 116 | 117 | Buffering has the following configuration options (configured per HTTP backend): 118 | 119 | * buffer-size-mb -- An upper limit on how much point data to keep in memory (in MB) 120 | * max-batch-kb -- A maximum size on the aggregated batches that will be submitted (in KB) 121 | * max-delay-interval -- the max delay between retry attempts per backend. 122 | The initial retry delay is 500ms and is doubled after every failure. 123 | 124 | If the buffer is full then requests are dropped and an error is logged. 125 | If a requests makes it into the buffer it is retried until success. 126 | 127 | Retries are serialized to a single backend. In addition, writes will be aggregated and batched as long as the body of the request will be less than `max-batch-kb` 128 | If buffered requests succeed then there is no delay between subsequent attempts. 129 | 130 | If the relay stays alive the entire duration of a downed backend server without filling that server's allocated buffer, and the relay can stay online until the entire buffer is flushed, it would mean that no operator intervention would be required to "recover" the data. The data will simply be batched together and written out to the recovered server in the order it was received. 131 | 132 | *NOTE*: The limits for buffering are not hard limits on the memory usage of the application, and there will be additional overhead that would be much more challenging to account for. The limits listed are just for the amount of point line protocol (including any added timestamps, if applicable). Factors such as small incoming batch sizes and a smaller max batch size will increase the overhead in the buffer. There is also the general application memory overhead to account for. This means that a machine with 2GB of memory should not have buffers that sum up to _almost_ 2GB. 133 | 134 | ## Recovery 135 | 136 | InfluxDB organizes its data on disk into logical blocks of time called shards. We can use this to create a hot recovery process with zero downtime. 137 | 138 | The length of time that shards represent in InfluxDB are typically 1 hour, 1 day, or 7 days, depending on the retention duration, but can be explicitly set when creating the retention policy. For the sake of our example, let's assume shard durations of 1 day. 139 | 140 | Let's say one of the InfluxDB servers goes down for an hour on 2016-03-10. Once midnight UTC rolls over, all InfluxDB processes are now writing data to the shard for 2016-03-11 and the file(s) for 2016-03-10 have gone cold for writes. We can then restore things using these steps: 141 | 142 | 1. Tell the load balancer to stop sending query traffic to the server that was down (this should be done as soon as an outage is detected to prevent partial or inconsistent query returns.) 143 | 2. Create backup of 2016-03-10 shard from a server that was up the entire day 144 | 3. Restore the backup of the shard from the good server to the server that had downtime 145 | 4. Tell the load balancer to resume sending queries to the previously downed server 146 | 147 | During this entire process the Relays should be sending current writes to all servers, including the one with downtime. 148 | 149 | ## Sharding 150 | 151 | It's possible to add another layer on top of this kind of setup to shard data. Depending on your needs you could shard on the measurement name or a specific tag like `customer_id`. The sharding layer would have to service both queries and writes. 152 | 153 | As this relay does not handle queries, it will not implement any sharding logic. Any sharding would have to be done externally to the relay. 154 | 155 | 156 | ## Caveats 157 | 158 | While `influxdb-relay` does provide some level of high availability, there are a few scenarios that need to be accounted for: 159 | 160 | - `influxdb-relay` will not relay the `/query` endpoint, and this includes schema modification (create database, `DROP`s, etc). This means that databases must be created before points are written to the backends. 161 | - Continuous queries will still only write their results locally. If a server goes down, the continuous query will have to be backfilled after the data has been recovered for that instance. 162 | - Overwriting points is potentially unpredictable. For example, given servers A and B, if B is down, and point X is written (we'll call the value X1) just before B comes back online, that write is queued behind every other write that occurred while B was offline. Once B is back online, the first buffered write succeeds, and all new writes are now allowed to pass-through. At this point (before X1 is written to B), X is written again (with value X2 this time) to both A and B. When the relay reaches the end of B's buffered writes, it will write X (with value X1) to B... At this point A now has X2, but B has X1. 163 | - It is probably best to avoid re-writing points (if possible). Otherwise, please be aware that overwriting the same field for a given point can lead to data differences. 164 | - This could potentially be mitigated by waiting for the buffer to flush before opening writes back up to being passed-through. 165 | 166 | ## Building 167 | 168 | The recommended method for building `influxdb-relay` is to use Docker 169 | and the included `Dockerfile_build_ubuntu64` Dockerfile, which 170 | includes all of the necessary dependencies. 171 | 172 | To build the docker image, you can run: 173 | 174 | ``` 175 | docker build -f Dockerfile_build_ubuntu64 -t influxdb-relay-builder:latest . 176 | ``` 177 | 178 | And then to build the project: 179 | 180 | ``` 181 | docker run --rm -v $(pwd):/root/go/src/github.com/influxdata/influxdb-relay influxdb-relay-builder 182 | ``` 183 | 184 | *NOTE* By default, builds will be for AMD64 Linux (since the container 185 | is running AMD64 Linux), but to change the target platform or 186 | architecture, use the `--platform` and `--arch` CLI options. 187 | 188 | Which should immediately call the included `build.py` build script, 189 | and leave any build output in the `./build` directory. To see a list 190 | of available build commands, append a `--help` to the command above. 191 | 192 | ``` 193 | docker run -v $(pwd):/root/go/src/github.com/influxdata/influxdb-relay influxdb-relay-builder --help 194 | ``` 195 | 196 | ### Packages 197 | 198 | To build system packages for Linux (`deb`, `rpm`, etc), use the 199 | `--package` option: 200 | 201 | ``` 202 | docker run -v $(pwd):/root/go/src/github.com/influxdata/influxdb-relay influxdb-relay-builder --package 203 | ``` 204 | 205 | To build packages for other platforms or architectures, use the 206 | `--platform` and `--arch` options. For example, to build an amd64 207 | package for Mac OS X, use the options `--package --platform darwin`. 208 | 209 | -------------------------------------------------------------------------------- /build.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python2.7 -u 2 | 3 | import sys 4 | import os 5 | import subprocess 6 | import time 7 | from datetime import datetime 8 | import shutil 9 | import tempfile 10 | import hashlib 11 | import re 12 | import logging 13 | import argparse 14 | 15 | # Packaging variables 16 | PACKAGE_NAME = "influxdb-relay" 17 | INSTALL_ROOT_DIR = "/usr/bin" 18 | LOG_DIR = "/var/log/influxdb-relay" 19 | DATA_DIR = "/var/lib/influxdb-relay" 20 | SCRIPT_DIR = "/usr/lib/influxdb-relay/scripts" 21 | CONFIG_DIR = "/etc/influxdb-relay" 22 | LOGROTATE_DIR = "/etc/logrotate.d" 23 | 24 | INIT_SCRIPT = "scripts/init.sh" 25 | SYSTEMD_SCRIPT = "scripts/influxdb-relay.service" 26 | POSTINST_SCRIPT = "scripts/post-install.sh" 27 | POSTUNINST_SCRIPT = "scripts/post-uninstall.sh" 28 | LOGROTATE_SCRIPT = "scripts/logrotate" 29 | DEFAULT_CONFIG = "sample.toml" 30 | 31 | CONFIGURATION_FILES = [ 32 | CONFIG_DIR + '/influxdb-relay.conf', 33 | LOGROTATE_DIR + '/influxdb-relay', 34 | ] 35 | 36 | PACKAGE_LICENSE = "MIT" 37 | PACKAGE_URL = "https://github.com/influxdata/influxdb-relay" 38 | DESCRIPTION = "Service to replicate InfluxDB data for high availability." 39 | 40 | prereqs = [ 'git', 'go' ] 41 | go_vet_command = "go tool vet -composites=true ./" 42 | optional_prereqs = [ 'fpm', 'rpmbuild' ] 43 | 44 | fpm_common_args = "-f -s dir --log error \ 45 | --url {} \ 46 | --after-install {} \ 47 | --after-remove {} \ 48 | --license {} \ 49 | --directories {} \ 50 | --directories {} \ 51 | --description \"{}\"".format( 52 | PACKAGE_URL, 53 | POSTINST_SCRIPT, 54 | POSTUNINST_SCRIPT, 55 | PACKAGE_LICENSE, 56 | LOG_DIR, 57 | DATA_DIR, 58 | DESCRIPTION) 59 | 60 | for f in CONFIGURATION_FILES: 61 | fpm_common_args += " --config-files {}".format(f) 62 | 63 | targets = { 64 | 'influxdb-relay' : './main.go' 65 | } 66 | 67 | supported_builds = { 68 | 'darwin': [ "amd64" ], 69 | 'linux': [ "amd64", "static_amd64" ] 70 | } 71 | 72 | supported_packages = { 73 | "darwin": [ "tar" ], 74 | "linux": [ "deb", "rpm", "tar" ] 75 | } 76 | 77 | def print_banner(): 78 | logging.info(r""" 79 | ___ __ _ ___ ___ ___ _ 80 | |_ _|_ _ / _| |_ ___ _| \| _ )___| _ \___| |__ _ _ _ 81 | | || ' \| _| | || \ \ / |) | _ \___| / -_) / _` | || | 82 | |___|_||_|_| |_|\_,_/_\_\___/|___/ |_|_\___|_\__,_|\_, | 83 | |__/ 84 | Build Script 85 | """) 86 | 87 | def create_package_fs(build_root): 88 | """Create a filesystem structure to mimic the package filesystem. 89 | """ 90 | logging.debug("Creating package filesystem at location: {}".format(build_root)) 91 | # Using [1:] for the path names due to them being absolute 92 | # (will overwrite previous paths, per 'os.path.join' documentation) 93 | dirs = [ INSTALL_ROOT_DIR[1:], 94 | LOG_DIR[1:], 95 | DATA_DIR[1:], 96 | SCRIPT_DIR[1:], 97 | CONFIG_DIR[1:], 98 | LOGROTATE_DIR[1:] ] 99 | for d in dirs: 100 | os.makedirs(os.path.join(build_root, d)) 101 | os.chmod(os.path.join(build_root, d), 0o755) 102 | 103 | def package_scripts(build_root, config_only=False): 104 | """Copy the necessary scripts and configuration files to the package 105 | filesystem. 106 | """ 107 | if config_only: 108 | logging.debug("Copying configuration to build directory.") 109 | shutil.copyfile(DEFAULT_CONFIG, os.path.join(build_root, "influxdb-relay.conf")) 110 | os.chmod(os.path.join(build_root, "influxdb-relay.conf"), 0o644) 111 | else: 112 | logging.debug("Copying scripts and sample configuration to build directory.") 113 | shutil.copyfile(INIT_SCRIPT, os.path.join(build_root, SCRIPT_DIR[1:], INIT_SCRIPT.split('/')[1])) 114 | os.chmod(os.path.join(build_root, SCRIPT_DIR[1:], INIT_SCRIPT.split('/')[1]), 0o644) 115 | shutil.copyfile(SYSTEMD_SCRIPT, os.path.join(build_root, SCRIPT_DIR[1:], SYSTEMD_SCRIPT.split('/')[1])) 116 | os.chmod(os.path.join(build_root, SCRIPT_DIR[1:], SYSTEMD_SCRIPT.split('/')[1]), 0o644) 117 | shutil.copyfile(LOGROTATE_SCRIPT, os.path.join(build_root, LOGROTATE_DIR[1:], "influxdb-relay")) 118 | os.chmod(os.path.join(build_root, LOGROTATE_DIR[1:], "influxdb-relay"), 0o644) 119 | shutil.copyfile(DEFAULT_CONFIG, os.path.join(build_root, CONFIG_DIR[1:], "influxdb-relay.conf")) 120 | os.chmod(os.path.join(build_root, CONFIG_DIR[1:], "influxdb-relay.conf"), 0o644) 121 | 122 | def go_get(branch, update=False, no_uncommitted=False): 123 | """Retrieve build dependencies or restore pinned dependencies. 124 | """ 125 | if local_changes() and no_uncommitted: 126 | logging.error("There are uncommitted changes in the current directory.") 127 | return False 128 | return True 129 | 130 | def run(command, allow_failure=False, shell=False): 131 | """Run shell command (convenience wrapper around subprocess). 132 | """ 133 | out = None 134 | logging.debug("{}".format(command)) 135 | try: 136 | if shell: 137 | out = subprocess.check_output(command, stderr=subprocess.STDOUT, shell=shell) 138 | else: 139 | out = subprocess.check_output(command.split(), stderr=subprocess.STDOUT) 140 | out = out.decode('utf-8').strip() 141 | # logging.debug("Command output: {}".format(out)) 142 | except subprocess.CalledProcessError as e: 143 | if allow_failure: 144 | logging.warn("Command '{}' failed with error: {}".format(command, e.output)) 145 | return None 146 | else: 147 | logging.error("Command '{}' failed with error: {}".format(command, e.output)) 148 | sys.exit(1) 149 | except OSError as e: 150 | if allow_failure: 151 | logging.warn("Command '{}' failed with error: {}".format(command, e)) 152 | return out 153 | else: 154 | logging.error("Command '{}' failed with error: {}".format(command, e)) 155 | sys.exit(1) 156 | else: 157 | return out 158 | 159 | def create_temp_dir(prefix = None): 160 | """ Create temporary directory with optional prefix. 161 | """ 162 | if prefix is None: 163 | return tempfile.mkdtemp(prefix="{}-build.".format(PACKAGE_NAME)) 164 | else: 165 | return tempfile.mkdtemp(prefix=prefix) 166 | 167 | def increment_minor_version(version): 168 | """Return the version with the minor version incremented and patch 169 | version set to zero. 170 | """ 171 | ver_list = version.split('.') 172 | if len(ver_list) != 3: 173 | logging.warn("Could not determine how to increment version '{}', will just use provided version.".format(version)) 174 | return version 175 | ver_list[1] = str(int(ver_list[1]) + 1) 176 | ver_list[2] = str(0) 177 | inc_version = '.'.join(ver_list) 178 | logging.debug("Incremented version from '{}' to '{}'.".format(version, inc_version)) 179 | return inc_version 180 | 181 | def get_current_version_tag(): 182 | """Retrieve the raw git version tag. 183 | """ 184 | version = run("git describe --always --tags --abbrev=0") 185 | return version 186 | 187 | def get_current_version(): 188 | """Parse version information from git tag output. 189 | """ 190 | version_tag = get_current_commit(short=True) 191 | return version_tag 192 | 193 | def get_current_commit(short=False): 194 | """Retrieve the current git commit. 195 | """ 196 | command = None 197 | if short: 198 | command = "git log --pretty=format:'%h' -n 1" 199 | else: 200 | command = "git rev-parse HEAD" 201 | out = run(command) 202 | return out.strip('\'\n\r ') 203 | 204 | def get_current_branch(): 205 | """Retrieve the current git branch. 206 | """ 207 | command = "git rev-parse --abbrev-ref HEAD" 208 | out = run(command) 209 | return out.strip() 210 | 211 | def local_changes(): 212 | """Return True if there are local un-committed changes. 213 | """ 214 | output = run("git diff-files --ignore-submodules --").strip() 215 | if len(output) > 0: 216 | return True 217 | return False 218 | 219 | def get_system_arch(): 220 | """Retrieve current system architecture. 221 | """ 222 | arch = os.uname()[4] 223 | if arch == "x86_64": 224 | arch = "amd64" 225 | elif arch == "386": 226 | arch = "i386" 227 | elif 'arm' in arch: 228 | # Prevent uname from reporting full ARM arch (eg 'armv7l') 229 | arch = "arm" 230 | return arch 231 | 232 | def get_system_platform(): 233 | """Retrieve current system platform. 234 | """ 235 | if sys.platform.startswith("linux"): 236 | return "linux" 237 | else: 238 | return sys.platform 239 | 240 | def get_go_version(): 241 | """Retrieve version information for Go. 242 | """ 243 | out = run("go version") 244 | matches = re.search('go version go(\S+)', out) 245 | if matches is not None: 246 | return matches.groups()[0].strip() 247 | return None 248 | 249 | def check_path_for(b): 250 | """Check the the user's path for the provided binary. 251 | """ 252 | def is_exe(fpath): 253 | return os.path.isfile(fpath) and os.access(fpath, os.X_OK) 254 | 255 | for path in os.environ["PATH"].split(os.pathsep): 256 | path = path.strip('"') 257 | full_path = os.path.join(path, b) 258 | if os.path.isfile(full_path) and os.access(full_path, os.X_OK): 259 | return full_path 260 | 261 | def check_environ(build_dir = None): 262 | """Check environment for common Go variables. 263 | """ 264 | logging.info("Checking environment...") 265 | for v in [ "GOPATH", "GOBIN", "GOROOT" ]: 266 | logging.debug("Using '{}' for {}".format(os.environ.get(v), v)) 267 | 268 | cwd = os.getcwd() 269 | if build_dir is None and os.environ.get("GOPATH") and os.environ.get("GOPATH") not in cwd: 270 | logging.warn("Your current directory is not under your GOPATH. This may lead to build failures.") 271 | return True 272 | 273 | def check_prereqs(): 274 | """Check user path for required dependencies. 275 | """ 276 | logging.info("Checking for dependencies...") 277 | for req in prereqs: 278 | if not check_path_for(req): 279 | logging.error("Could not find dependency: {}".format(req)) 280 | return False 281 | return True 282 | 283 | def go_list(vendor=False, relative=False): 284 | """ 285 | Return a list of packages 286 | If vendor is False vendor package are not included 287 | If relative is True the package prefix defined by PACKAGE_URL is stripped 288 | """ 289 | p = subprocess.Popen(["go", "list", "./..."], stdout=subprocess.PIPE, stderr=subprocess.PIPE) 290 | out, err = p.communicate() 291 | packages = out.split('\n') 292 | if packages[-1] == '': 293 | packages = packages[:-1] 294 | if not vendor: 295 | non_vendor = [] 296 | for p in packages: 297 | if '/vendor/' not in p: 298 | non_vendor.append(p) 299 | packages = non_vendor 300 | if relative: 301 | relative_pkgs = [] 302 | for p in packages: 303 | r = p.replace(PACKAGE_URL, '.') 304 | if r != '.': 305 | relative_pkgs.append(r) 306 | packages = relative_pkgs 307 | return packages 308 | 309 | def build(version=None, 310 | platform=None, 311 | arch=None, 312 | nightly=False, 313 | clean=False, 314 | outdir=".", 315 | tags=[], 316 | static=False): 317 | """Build each target for the specified architecture and platform. 318 | """ 319 | logging.info("Starting build for {}/{}...".format(platform, arch)) 320 | logging.info("Using Go version: {}".format(get_go_version())) 321 | logging.info("Using git branch: {}".format(get_current_branch())) 322 | logging.info("Using git commit: {}".format(get_current_commit())) 323 | if static: 324 | logging.info("Using statically-compiled output.") 325 | if len(tags) > 0: 326 | logging.info("Using build tags: {}".format(','.join(tags))) 327 | 328 | logging.info("Sending build output to: {}".format(outdir)) 329 | if not os.path.exists(outdir): 330 | os.makedirs(outdir) 331 | elif clean and outdir != '/' and outdir != ".": 332 | logging.info("Cleaning build directory '{}' before building.".format(outdir)) 333 | shutil.rmtree(outdir) 334 | os.makedirs(outdir) 335 | 336 | logging.info("Using version '{}' for build.".format(version)) 337 | 338 | tmp_build_dir = create_temp_dir() 339 | for target, path in targets.items(): 340 | logging.info("Building target: {}".format(target)) 341 | build_command = "" 342 | 343 | # Handle static binary output 344 | if static is True or "static_" in arch: 345 | if "static_" in arch: 346 | static = True 347 | arch = arch.replace("static_", "") 348 | build_command += "CGO_ENABLED=0 " 349 | 350 | # Handle variations in architecture output 351 | if arch == "i386" or arch == "i686": 352 | arch = "386" 353 | elif "arm" in arch: 354 | arch = "arm" 355 | build_command += "GOOS={} GOARCH={} ".format(platform, arch) 356 | 357 | if "arm" in arch: 358 | if arch == "armel": 359 | build_command += "GOARM=5 " 360 | elif arch == "armhf" or arch == "arm": 361 | build_command += "GOARM=6 " 362 | elif arch == "arm64": 363 | # TODO(rossmcdonald) - Verify this is the correct setting for arm64 364 | build_command += "GOARM=7 " 365 | else: 366 | logging.error("Invalid ARM architecture specified: {}".format(arch)) 367 | logging.error("Please specify either 'armel', 'armhf', or 'arm64'.") 368 | return False 369 | if platform == 'windows': 370 | target = target + '.exe' 371 | build_command += "go build -o {} ".format(os.path.join(outdir, target)) 372 | if len(tags) > 0: 373 | build_command += "-tags {} ".format(','.join(tags)) 374 | if "1.4" in get_go_version(): 375 | if static: 376 | build_command += "-ldflags=\"-s -X main.version {} -X main.branch {} -X main.commit {}\" ".format(version, 377 | get_current_branch(), 378 | get_current_commit()) 379 | else: 380 | build_command += "-ldflags=\"-X main.version {} -X main.branch {} -X main.commit {}\" ".format(version, 381 | get_current_branch(), 382 | get_current_commit()) 383 | 384 | else: 385 | # Starting with Go 1.5, the linker flag arguments changed to 'name=value' from 'name value' 386 | if static: 387 | build_command += "-ldflags=\"-s -X main.version={} -X main.branch={} -X main.commit={}\" ".format(version, 388 | get_current_branch(), 389 | get_current_commit()) 390 | else: 391 | build_command += "-ldflags=\"-X main.version={} -X main.branch={} -X main.commit={}\" ".format(version, 392 | get_current_branch(), 393 | get_current_commit()) 394 | if static: 395 | build_command += "-a -installsuffix cgo " 396 | build_command += path 397 | start_time = datetime.utcnow() 398 | run(build_command, shell=True) 399 | end_time = datetime.utcnow() 400 | logging.info("Time taken: {}s".format((end_time - start_time).total_seconds())) 401 | return True 402 | 403 | def generate_md5_from_file(path): 404 | """Generate MD5 signature based on the contents of the file at path. 405 | """ 406 | m = hashlib.md5() 407 | with open(path, 'rb') as f: 408 | for chunk in iter(lambda: f.read(4096), b""): 409 | m.update(chunk) 410 | return m.hexdigest() 411 | 412 | def package(build_output, version, nightly=False, iteration=1, static=False, release=False): 413 | """Package the output of the build process. 414 | """ 415 | outfiles = [] 416 | tmp_build_dir = create_temp_dir() 417 | logging.debug("Packaging for build output: {}".format(build_output)) 418 | logging.info("Using temporary directory: {}".format(tmp_build_dir)) 419 | try: 420 | for platform in build_output: 421 | # Create top-level folder displaying which platform (linux, etc) 422 | os.makedirs(os.path.join(tmp_build_dir, platform)) 423 | for arch in build_output[platform]: 424 | logging.info("Creating packages for {}/{}".format(platform, arch)) 425 | # Create second-level directory displaying the architecture (amd64, etc) 426 | current_location = build_output[platform][arch] 427 | 428 | # Create directory tree to mimic file system of package 429 | build_root = os.path.join(tmp_build_dir, 430 | platform, 431 | arch, 432 | '{}-{}-{}'.format(PACKAGE_NAME, version, iteration)) 433 | os.makedirs(build_root) 434 | 435 | # Copy packaging scripts to build directory 436 | if platform == "windows" or static or "static_" in arch: 437 | # For windows and static builds, just copy 438 | # binaries to root of package (no other scripts or 439 | # directories) 440 | package_scripts(build_root, config_only=True) 441 | else: 442 | create_package_fs(build_root) 443 | package_scripts(build_root) 444 | 445 | for binary in targets: 446 | # Copy newly-built binaries to packaging directory 447 | if platform == 'windows': 448 | binary = binary + '.exe' 449 | if platform == 'windows' or static or "static_" in arch: 450 | # Where the binary should go in the package filesystem 451 | to = os.path.join(build_root, binary) 452 | # Where the binary currently is located 453 | fr = os.path.join(current_location, binary) 454 | else: 455 | # Where the binary currently is located 456 | fr = os.path.join(current_location, binary) 457 | # Where the binary should go in the package filesystem 458 | to = os.path.join(build_root, INSTALL_ROOT_DIR[1:], binary) 459 | shutil.copy(fr, to) 460 | 461 | for package_type in supported_packages[platform]: 462 | # Package the directory structure for each package type for the platform 463 | logging.debug("Packaging directory '{}' as '{}'.".format(build_root, package_type)) 464 | name = PACKAGE_NAME 465 | # Reset version, iteration, and current location on each run 466 | # since they may be modified below. 467 | package_version = "{}".format(version) 468 | package_iteration = iteration 469 | if "static_" in arch: 470 | # Remove the "static_" from the displayed arch on the package 471 | package_arch = arch.replace("static_", "") 472 | else: 473 | package_arch = arch 474 | package_build_root = build_root 475 | current_location = build_output[platform][arch] 476 | if package_type in ['zip', 'tar']: 477 | # For tars and zips, start the packaging one folder above 478 | # the build root (to include the package name) 479 | package_build_root = os.path.join('/', '/'.join(build_root.split('/')[:-1])) 480 | if nightly: 481 | if static or "static_" in arch: 482 | name = '{}-static-nightly_{}_{}'.format(name, 483 | platform, 484 | package_arch) 485 | else: 486 | name = '{}-nightly_{}_{}'.format(name, 487 | platform, 488 | package_arch) 489 | else: 490 | if static or "static_" in arch: 491 | name = '{}-{}-{}-static_{}_{}'.format(name, 492 | package_version, 493 | package_iteration, 494 | platform, 495 | package_arch) 496 | else: 497 | name = '{}-{}-{}_{}_{}'.format(name, 498 | package_version, 499 | package_iteration, 500 | platform, 501 | package_arch) 502 | current_location = os.path.join(os.getcwd(), current_location) 503 | if package_type == 'tar': 504 | tar_command = "cd {} && tar -cvzf {}.tar.gz ./*".format(build_root, name) 505 | run(tar_command, shell=True) 506 | run("mv {}.tar.gz {}".format(os.path.join(build_root, name), current_location), shell=True) 507 | outfile = os.path.join(current_location, name + ".tar.gz") 508 | outfiles.append(outfile) 509 | elif package_type == 'zip': 510 | zip_command = "cd {} && zip -r {}.zip ./*".format(build_root, name) 511 | run(zip_command, shell=True) 512 | run("mv {}.zip {}".format(os.path.join(build_root, name), current_location), shell=True) 513 | outfile = os.path.join(current_location, name + ".zip") 514 | outfiles.append(outfile) 515 | elif package_type not in ['zip', 'tar'] and static or "static_" in arch: 516 | logging.info("Skipping package type '{}' for static builds.".format(package_type)) 517 | else: 518 | fpm_command = "fpm {} --name {} -a {} -t {} --version {} --iteration {} -C {} -p {} ".format( 519 | fpm_common_args, 520 | name, 521 | package_arch, 522 | package_type, 523 | package_version, 524 | package_iteration, 525 | package_build_root, 526 | current_location) 527 | if package_type == "rpm": 528 | fpm_command += "--depends coreutils --rpm-posttrans {}".format(POSTINST_SCRIPT) 529 | out = run(fpm_command, shell=True) 530 | matches = re.search(':path=>"(.*)"', out) 531 | outfile = None 532 | if matches is not None: 533 | outfile = matches.groups()[0] 534 | if outfile is None: 535 | logging.warn("Could not determine output from packaging output!") 536 | else: 537 | if nightly: 538 | # Strip nightly version from package name 539 | new_outfile = outfile.replace("{}-{}".format(package_version, package_iteration), "nightly") 540 | os.rename(outfile, new_outfile) 541 | outfile = new_outfile 542 | else: 543 | if package_type == 'rpm': 544 | # rpm's convert any dashes to underscores 545 | package_version = package_version.replace("-", "_") 546 | new_outfile = outfile.replace("{}-{}".format(package_version, package_iteration), package_version) 547 | os.rename(outfile, new_outfile) 548 | outfile = new_outfile 549 | outfiles.append(os.path.join(os.getcwd(), outfile)) 550 | logging.debug("Produced package files: {}".format(outfiles)) 551 | return outfiles 552 | finally: 553 | # Cleanup 554 | shutil.rmtree(tmp_build_dir) 555 | 556 | def main(args): 557 | global PACKAGE_NAME 558 | 559 | if args.release and args.nightly: 560 | logging.error("Cannot be both a nightly and a release.") 561 | return 1 562 | 563 | if args.nightly: 564 | args.version = increment_minor_version(args.version) 565 | args.version = "{}~n{}".format(args.version, 566 | datetime.utcnow().strftime("%Y%m%d%H%M")) 567 | args.iteration = 0 568 | 569 | # Pre-build checks 570 | check_environ() 571 | if not check_prereqs(): 572 | return 1 573 | if args.build_tags is None: 574 | args.build_tags = [] 575 | else: 576 | args.build_tags = args.build_tags.split(',') 577 | 578 | orig_commit = get_current_commit(short=True) 579 | orig_branch = get_current_branch() 580 | 581 | if args.platform not in supported_builds and args.platform != 'all': 582 | logging.error("Invalid build platform: {}".format(target_platform)) 583 | return 1 584 | 585 | build_output = {} 586 | 587 | if args.branch != orig_branch and args.commit != orig_commit: 588 | logging.error("Can only specify one branch or commit to build from.") 589 | return 1 590 | elif args.branch != orig_branch: 591 | logging.info("Moving to git branch: {}".format(args.branch)) 592 | run("git checkout {}".format(args.branch)) 593 | elif args.commit != orig_commit: 594 | logging.info("Moving to git commit: {}".format(args.commit)) 595 | run("git checkout {}".format(args.commit)) 596 | 597 | if not args.no_get: 598 | if not go_get(args.branch, no_uncommitted=args.no_uncommitted): 599 | return 1 600 | 601 | platforms = [] 602 | single_build = True 603 | if args.platform == 'all': 604 | platforms = supported_builds.keys() 605 | single_build = False 606 | else: 607 | platforms = [args.platform] 608 | 609 | for platform in platforms: 610 | build_output.update( { platform : {} } ) 611 | archs = [] 612 | if args.arch == "all": 613 | single_build = False 614 | archs = supported_builds.get(platform) 615 | else: 616 | archs = [args.arch] 617 | 618 | for arch in archs: 619 | od = args.outdir 620 | if not single_build: 621 | od = os.path.join(args.outdir, platform, arch) 622 | if not build(version=args.version, 623 | platform=platform, 624 | arch=arch, 625 | nightly=args.nightly, 626 | clean=args.clean, 627 | outdir=od, 628 | tags=args.build_tags, 629 | static=args.static): 630 | return 1 631 | build_output.get(platform).update( { arch : od } ) 632 | 633 | # Build packages 634 | if args.package: 635 | if not check_path_for("fpm"): 636 | logging.error("FPM ruby gem required for packaging. Stopping.") 637 | return 1 638 | packages = package(build_output, 639 | args.version, 640 | nightly=args.nightly, 641 | iteration=args.iteration, 642 | static=args.static, 643 | release=args.release) 644 | logging.info("Packages created:") 645 | for p in packages: 646 | logging.info("{} (MD5={})".format(p.split('/')[-1:][0], 647 | generate_md5_from_file(p))) 648 | if orig_branch != get_current_branch(): 649 | logging.info("Moving back to original git branch: {}".format(orig_branch)) 650 | run("git checkout {}".format(orig_branch)) 651 | 652 | return 0 653 | 654 | if __name__ == '__main__': 655 | LOG_LEVEL = logging.INFO 656 | if '--debug' in sys.argv[1:]: 657 | LOG_LEVEL = logging.DEBUG 658 | log_format = '[%(levelname)s] %(funcName)s: %(message)s' 659 | logging.basicConfig(level=LOG_LEVEL, 660 | format=log_format) 661 | 662 | parser = argparse.ArgumentParser(description='InfluxDB build and packaging script.') 663 | parser.add_argument('--verbose','-v','--debug', 664 | action='store_true', 665 | help='Use debug output') 666 | parser.add_argument('--outdir', '-o', 667 | metavar='', 668 | default='./build/', 669 | type=os.path.abspath, 670 | help='Output directory') 671 | parser.add_argument('--name', '-n', 672 | metavar='', 673 | type=str, 674 | help='Name to use for package name (when package is specified)') 675 | parser.add_argument('--arch', 676 | metavar='', 677 | type=str, 678 | default=get_system_arch(), 679 | help='Target architecture for build output') 680 | parser.add_argument('--platform', 681 | metavar='', 682 | type=str, 683 | default=get_system_platform(), 684 | help='Target platform for build output') 685 | parser.add_argument('--branch', 686 | metavar='', 687 | type=str, 688 | default=get_current_branch(), 689 | help='Build from a specific branch') 690 | parser.add_argument('--commit', 691 | metavar='', 692 | type=str, 693 | default=get_current_commit(short=True), 694 | help='Build from a specific commit') 695 | parser.add_argument('--version', 696 | metavar='', 697 | type=str, 698 | default=get_current_version(), 699 | help='Version information to apply to build output (ex: 0.1.0)') 700 | parser.add_argument('--iteration', 701 | metavar='', 702 | type=str, 703 | default="1", 704 | help='Package iteration to apply to build output (defaults to 1)') 705 | parser.add_argument('--nightly', 706 | action='store_true', 707 | help='Mark build output as nightly build (will incremement the minor version)') 708 | parser.add_argument('--package', 709 | action='store_true', 710 | help='Package binary output') 711 | parser.add_argument('--release', 712 | action='store_true', 713 | help='Mark build output as release') 714 | parser.add_argument('--clean', 715 | action='store_true', 716 | help='Clean output directory before building') 717 | parser.add_argument('--no-get', 718 | action='store_true', 719 | help='Do not retrieve pinned dependencies when building') 720 | parser.add_argument('--no-uncommitted', 721 | action='store_true', 722 | help='Fail if uncommitted changes exist in the working directory') 723 | parser.add_argument('--generate', 724 | action='store_true', 725 | help='Run "go generate" before building') 726 | parser.add_argument('--build-tags', 727 | metavar='', 728 | help='Optional build tags to use for compilation') 729 | parser.add_argument('--static', 730 | action='store_true', 731 | help='Create statically-compiled binary output') 732 | args = parser.parse_args() 733 | print_banner() 734 | sys.exit(main(args)) 735 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/influxdata/influxdb-relay 2 | 3 | go 1.13 4 | 5 | require ( 6 | github.com/influxdata/influxdb1-client v0.0.0-20200515024757-02f0bf5dbca3 7 | github.com/kylelemons/godebug v1.1.0 // indirect 8 | github.com/naoina/go-stringutil v0.1.0 // indirect 9 | github.com/naoina/toml v0.1.1 10 | ) 11 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/influxdata/influxdb1-client v0.0.0-20200515024757-02f0bf5dbca3 h1:k3/6a1Shi7GGCp9QpyYuXsMM6ncTOjCzOE9Fd6CDA+Q= 2 | github.com/influxdata/influxdb1-client v0.0.0-20200515024757-02f0bf5dbca3/go.mod h1:qj24IKcXYK6Iy9ceXlo3Tc+vtHo9lIhSX5JddghvEPo= 3 | github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= 4 | github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= 5 | github.com/naoina/go-stringutil v0.1.0 h1:rCUeRUHjBjGTSHl0VC00jUPLz8/F9dDzYI70Hzifhks= 6 | github.com/naoina/go-stringutil v0.1.0/go.mod h1:XJ2SJL9jCtBh+P9q5btrd/Ylo8XwT/h1USek5+NqSA0= 7 | github.com/naoina/toml v0.1.1 h1:PT/lllxVVN0gzzSqSlHEmP8MJB4MY2U7STGxiouV4X8= 8 | github.com/naoina/toml v0.1.1/go.mod h1:NBIhNtsFMo3G2szEBne+bO4gS192HuIYRqfvOWb4i1E= 9 | -------------------------------------------------------------------------------- /kapacitor.toml: -------------------------------------------------------------------------------- 1 | # The relay can also be used to dual write data to Kapacitor 2 | # 3 | # There are a few advantages of using the relay instead of relying on InfluxDB subscriptions 4 | # 5 | # 1. Traffic is HTTP/TCP meaning you don't have to worry about dropped UDP packets 6 | # 2. Less load on InfluxDB since it doesn't have to fork the data. 7 | # 3. Slighly lower latency since there is one less hop. 8 | # 9 | # Kapacitor is wire compatible with InfluxDB for writes. 10 | # This means you can add a Kapacitor instance to the output list as if it were an InfluxDB instance. 11 | # 12 | 13 | 14 | # This example config sends all writes to 15 | # two InfluxDB hosts and a single Kapacitor host. 16 | [[http]] 17 | name = "kapacitor-http" 18 | bind-addr = "0.0.0.0:9096" 19 | default-retention-policy = "autogen" 20 | output = [ 21 | { name="influxdb1", location = "http://influxdb1:8086/write" }, 22 | { name="influxdb2", location = "http://influxdb2:8086/write" }, 23 | { name="kapacitor1", location = "http://kapacitor1:9092/write" }, 24 | ] 25 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "log" 7 | "os" 8 | "os/signal" 9 | 10 | "github.com/influxdata/influxdb-relay/relay" 11 | ) 12 | 13 | var ( 14 | configFile = flag.String("config", "", "Configuration file to use") 15 | ) 16 | 17 | func main() { 18 | flag.Parse() 19 | 20 | if *configFile == "" { 21 | fmt.Fprintln(os.Stderr, "Missing configuration file") 22 | flag.PrintDefaults() 23 | os.Exit(1) 24 | } 25 | 26 | cfg, err := relay.LoadConfigFile(*configFile) 27 | if err != nil { 28 | fmt.Fprintln(os.Stderr, "Problem loading config file:", err) 29 | } 30 | 31 | r, err := relay.New(cfg) 32 | if err != nil { 33 | log.Fatal(err) 34 | } 35 | 36 | sigChan := make(chan os.Signal, 1) 37 | signal.Notify(sigChan, os.Interrupt) 38 | 39 | go func() { 40 | <-sigChan 41 | r.Stop() 42 | }() 43 | 44 | log.Println("starting relays...") 45 | r.Run() 46 | } 47 | -------------------------------------------------------------------------------- /relay/config.go: -------------------------------------------------------------------------------- 1 | package relay 2 | 3 | import ( 4 | "os" 5 | 6 | "github.com/naoina/toml" 7 | ) 8 | 9 | type Config struct { 10 | HTTPRelays []HTTPConfig `toml:"http"` 11 | UDPRelays []UDPConfig `toml:"udp"` 12 | } 13 | 14 | type HTTPConfig struct { 15 | // Name identifies the HTTP relay 16 | Name string `toml:"name"` 17 | 18 | // Addr should be set to the desired listening host:port 19 | Addr string `toml:"bind-addr"` 20 | 21 | // Set certificate in order to handle HTTPS requests 22 | SSLCombinedPem string `toml:"ssl-combined-pem"` 23 | 24 | // Default retention policy to set for forwarded requests 25 | DefaultRetentionPolicy string `toml:"default-retention-policy"` 26 | 27 | // Outputs is a list of backed servers where writes will be forwarded 28 | Outputs []HTTPOutputConfig `toml:"output"` 29 | } 30 | 31 | type HTTPOutputConfig struct { 32 | // Name of the backend server 33 | Name string `toml:"name"` 34 | 35 | // Location should be set to the URL of the backend server's write endpoint 36 | Location string `toml:"location"` 37 | 38 | // Timeout sets a per-backend timeout for write requests. (Default 10s) 39 | // The format used is the same seen in time.ParseDuration 40 | Timeout string `toml:"timeout"` 41 | 42 | // Buffer failed writes up to maximum count. (Default 0, retry/buffering disabled) 43 | BufferSizeMB int `toml:"buffer-size-mb"` 44 | 45 | // Maximum batch size in KB (Default 512) 46 | MaxBatchKB int `toml:"max-batch-kb"` 47 | 48 | // Maximum delay between retry attempts. 49 | // The format used is the same seen in time.ParseDuration (Default 10s) 50 | MaxDelayInterval string `toml:"max-delay-interval"` 51 | 52 | // Skip TLS verification in order to use self signed certificate. 53 | // WARNING: It's insecure. Use it only for developing and don't use in production. 54 | SkipTLSVerification bool `toml:"skip-tls-verification"` 55 | } 56 | 57 | type UDPConfig struct { 58 | // Name identifies the UDP relay 59 | Name string `toml:"name"` 60 | 61 | // Addr is where the UDP relay will listen for packets 62 | Addr string `toml:"bind-addr"` 63 | 64 | // Precision sets the precision of the timestamps (input and output) 65 | Precision string `toml:"precision"` 66 | 67 | // ReadBuffer sets the socket buffer for incoming connections 68 | ReadBuffer int `toml:"read-buffer"` 69 | 70 | // Outputs is a list of backend servers where writes will be forwarded 71 | Outputs []UDPOutputConfig `toml:"output"` 72 | } 73 | 74 | type UDPOutputConfig struct { 75 | // Name identifies the UDP backend 76 | Name string `toml:"name"` 77 | 78 | // Location should be set to the host:port of the backend server 79 | Location string `toml:"location"` 80 | 81 | // MTU sets the maximum output payload size, default is 1024 82 | MTU int `toml:"mtu"` 83 | } 84 | 85 | // LoadConfigFile parses the specified file into a Config object 86 | func LoadConfigFile(filename string) (cfg Config, err error) { 87 | f, err := os.Open(filename) 88 | if err != nil { 89 | return cfg, err 90 | } 91 | defer f.Close() 92 | 93 | return cfg, toml.NewDecoder(f).Decode(&cfg) 94 | } 95 | -------------------------------------------------------------------------------- /relay/http.go: -------------------------------------------------------------------------------- 1 | package relay 2 | 3 | import ( 4 | "bytes" 5 | "compress/gzip" 6 | "crypto/tls" 7 | "errors" 8 | "fmt" 9 | "io/ioutil" 10 | "log" 11 | "net" 12 | "net/http" 13 | "strconv" 14 | "strings" 15 | "sync" 16 | "sync/atomic" 17 | "time" 18 | 19 | "github.com/influxdata/influxdb1-client/models" 20 | ) 21 | 22 | // HTTP is a relay for HTTP influxdb writes 23 | type HTTP struct { 24 | addr string 25 | name string 26 | schema string 27 | 28 | cert string 29 | rp string 30 | 31 | closing int64 32 | l net.Listener 33 | 34 | backends []*httpBackend 35 | } 36 | 37 | const ( 38 | DefaultHTTPTimeout = 10 * time.Second 39 | DefaultMaxDelayInterval = 10 * time.Second 40 | DefaultBatchSizeKB = 512 41 | 42 | KB = 1024 43 | MB = 1024 * KB 44 | ) 45 | 46 | func NewHTTP(cfg HTTPConfig) (Relay, error) { 47 | h := new(HTTP) 48 | 49 | h.addr = cfg.Addr 50 | h.name = cfg.Name 51 | 52 | h.cert = cfg.SSLCombinedPem 53 | h.rp = cfg.DefaultRetentionPolicy 54 | 55 | h.schema = "http" 56 | if h.cert != "" { 57 | h.schema = "https" 58 | } 59 | 60 | for i := range cfg.Outputs { 61 | backend, err := newHTTPBackend(&cfg.Outputs[i]) 62 | if err != nil { 63 | return nil, err 64 | } 65 | 66 | h.backends = append(h.backends, backend) 67 | } 68 | 69 | return h, nil 70 | } 71 | 72 | func (h *HTTP) Name() string { 73 | if h.name == "" { 74 | return fmt.Sprintf("%s://%s", h.schema, h.addr) 75 | } 76 | return h.name 77 | } 78 | 79 | func (h *HTTP) Run() error { 80 | l, err := net.Listen("tcp", h.addr) 81 | if err != nil { 82 | return err 83 | } 84 | 85 | // support HTTPS 86 | if h.cert != "" { 87 | cert, err := tls.LoadX509KeyPair(h.cert, h.cert) 88 | if err != nil { 89 | return err 90 | } 91 | 92 | l = tls.NewListener(l, &tls.Config{ 93 | Certificates: []tls.Certificate{cert}, 94 | }) 95 | } 96 | 97 | h.l = l 98 | 99 | log.Printf("Starting %s relay %q on %v", strings.ToUpper(h.schema), h.Name(), h.addr) 100 | 101 | err = http.Serve(l, h) 102 | if atomic.LoadInt64(&h.closing) != 0 { 103 | return nil 104 | } 105 | return err 106 | } 107 | 108 | func (h *HTTP) Stop() error { 109 | atomic.StoreInt64(&h.closing, 1) 110 | return h.l.Close() 111 | } 112 | 113 | func (h *HTTP) ServeHTTP(w http.ResponseWriter, r *http.Request) { 114 | start := time.Now() 115 | 116 | if r.URL.Path == "/ping" && (r.Method == "GET" || r.Method == "HEAD") { 117 | w.Header().Add("X-InfluxDB-Version", "relay") 118 | w.WriteHeader(http.StatusNoContent) 119 | return 120 | } 121 | 122 | if r.URL.Path != "/write" { 123 | jsonError(w, http.StatusNotFound, "invalid write endpoint") 124 | return 125 | } 126 | 127 | if r.Method != "POST" { 128 | w.Header().Set("Allow", "POST") 129 | if r.Method == "OPTIONS" { 130 | w.WriteHeader(http.StatusNoContent) 131 | } else { 132 | jsonError(w, http.StatusMethodNotAllowed, "invalid write method") 133 | } 134 | return 135 | } 136 | 137 | queryParams := r.URL.Query() 138 | 139 | // fail early if we're missing the database 140 | if queryParams.Get("db") == "" { 141 | jsonError(w, http.StatusBadRequest, "missing parameter: db") 142 | return 143 | } 144 | 145 | if queryParams.Get("rp") == "" && h.rp != "" { 146 | queryParams.Set("rp", h.rp) 147 | } 148 | 149 | var body = r.Body 150 | 151 | if r.Header.Get("Content-Encoding") == "gzip" { 152 | b, err := gzip.NewReader(r.Body) 153 | if err != nil { 154 | jsonError(w, http.StatusBadRequest, "unable to decode gzip body") 155 | } 156 | defer b.Close() 157 | body = b 158 | } 159 | 160 | bodyBuf := getBuf() 161 | _, err := bodyBuf.ReadFrom(body) 162 | if err != nil { 163 | putBuf(bodyBuf) 164 | jsonError(w, http.StatusInternalServerError, "problem reading request body") 165 | return 166 | } 167 | 168 | precision := queryParams.Get("precision") 169 | points, err := models.ParsePointsWithPrecision(bodyBuf.Bytes(), start, precision) 170 | if err != nil { 171 | putBuf(bodyBuf) 172 | jsonError(w, http.StatusBadRequest, "unable to parse points") 173 | return 174 | } 175 | 176 | outBuf := getBuf() 177 | for _, p := range points { 178 | if _, err = outBuf.WriteString(p.PrecisionString(precision)); err != nil { 179 | break 180 | } 181 | if err = outBuf.WriteByte('\n'); err != nil { 182 | break 183 | } 184 | } 185 | 186 | // done with the input points 187 | putBuf(bodyBuf) 188 | 189 | if err != nil { 190 | putBuf(outBuf) 191 | jsonError(w, http.StatusInternalServerError, "problem writing points") 192 | return 193 | } 194 | 195 | // normalize query string 196 | query := queryParams.Encode() 197 | 198 | outBytes := outBuf.Bytes() 199 | 200 | // check for authorization performed via the header 201 | authHeader := r.Header.Get("Authorization") 202 | 203 | var wg sync.WaitGroup 204 | wg.Add(len(h.backends)) 205 | 206 | var responses = make(chan *responseData, len(h.backends)) 207 | 208 | for _, b := range h.backends { 209 | b := b 210 | go func() { 211 | defer wg.Done() 212 | resp, err := b.post(outBytes, query, authHeader) 213 | if err != nil { 214 | log.Printf("Problem posting to relay %q backend %q: %v", h.Name(), b.name, err) 215 | } else { 216 | if resp.StatusCode/100 == 5 { 217 | log.Printf("5xx response for relay %q backend %q: %v", h.Name(), b.name, resp.StatusCode) 218 | } 219 | responses <- resp 220 | } 221 | }() 222 | } 223 | 224 | go func() { 225 | wg.Wait() 226 | close(responses) 227 | putBuf(outBuf) 228 | }() 229 | 230 | var errResponse *responseData 231 | 232 | for resp := range responses { 233 | switch resp.StatusCode / 100 { 234 | case 2: 235 | w.WriteHeader(http.StatusNoContent) 236 | return 237 | 238 | case 4: 239 | // user error 240 | resp.Write(w) 241 | return 242 | 243 | default: 244 | // hold on to one of the responses to return back to the client 245 | errResponse = resp 246 | } 247 | } 248 | 249 | // no successful writes 250 | if errResponse == nil { 251 | // failed to make any valid request... 252 | jsonError(w, http.StatusServiceUnavailable, "unable to write points") 253 | return 254 | } 255 | 256 | errResponse.Write(w) 257 | } 258 | 259 | type responseData struct { 260 | ContentType string 261 | ContentEncoding string 262 | StatusCode int 263 | Body []byte 264 | } 265 | 266 | func (rd *responseData) Write(w http.ResponseWriter) { 267 | if rd.ContentType != "" { 268 | w.Header().Set("Content-Type", rd.ContentType) 269 | } 270 | 271 | if rd.ContentEncoding != "" { 272 | w.Header().Set("Content-Encoding", rd.ContentEncoding) 273 | } 274 | 275 | w.Header().Set("Content-Length", strconv.Itoa(len(rd.Body))) 276 | w.WriteHeader(rd.StatusCode) 277 | w.Write(rd.Body) 278 | } 279 | 280 | func jsonError(w http.ResponseWriter, code int, message string) { 281 | w.Header().Set("Content-Type", "application/json") 282 | data := fmt.Sprintf("{\"error\":%q}\n", message) 283 | w.Header().Set("Content-Length", fmt.Sprint(len(data))) 284 | w.WriteHeader(code) 285 | w.Write([]byte(data)) 286 | } 287 | 288 | type poster interface { 289 | post([]byte, string, string) (*responseData, error) 290 | } 291 | 292 | type simplePoster struct { 293 | client *http.Client 294 | location string 295 | } 296 | 297 | func newSimplePoster(location string, timeout time.Duration, skipTLSVerification bool) *simplePoster { 298 | // Configure custom transport for http.Client 299 | // Used for support skip-tls-verification option 300 | transport := &http.Transport{ 301 | TLSClientConfig: &tls.Config{ 302 | InsecureSkipVerify: skipTLSVerification, 303 | }, 304 | } 305 | 306 | return &simplePoster{ 307 | client: &http.Client{ 308 | Timeout: timeout, 309 | Transport: transport, 310 | }, 311 | location: location, 312 | } 313 | } 314 | 315 | func (b *simplePoster) post(buf []byte, query string, auth string) (*responseData, error) { 316 | req, err := http.NewRequest("POST", b.location, bytes.NewReader(buf)) 317 | if err != nil { 318 | return nil, err 319 | } 320 | 321 | req.URL.RawQuery = query 322 | req.Header.Set("Content-Type", "text/plain") 323 | req.Header.Set("Content-Length", strconv.Itoa(len(buf))) 324 | if auth != "" { 325 | req.Header.Set("Authorization", auth) 326 | } 327 | 328 | resp, err := b.client.Do(req) 329 | if err != nil { 330 | return nil, err 331 | } 332 | 333 | data, err := ioutil.ReadAll(resp.Body) 334 | if err != nil { 335 | return nil, err 336 | } 337 | 338 | if err = resp.Body.Close(); err != nil { 339 | return nil, err 340 | } 341 | 342 | return &responseData{ 343 | ContentType: resp.Header.Get("Conent-Type"), 344 | ContentEncoding: resp.Header.Get("Conent-Encoding"), 345 | StatusCode: resp.StatusCode, 346 | Body: data, 347 | }, nil 348 | } 349 | 350 | type httpBackend struct { 351 | poster 352 | name string 353 | } 354 | 355 | func newHTTPBackend(cfg *HTTPOutputConfig) (*httpBackend, error) { 356 | if cfg.Name == "" { 357 | cfg.Name = cfg.Location 358 | } 359 | 360 | timeout := DefaultHTTPTimeout 361 | if cfg.Timeout != "" { 362 | t, err := time.ParseDuration(cfg.Timeout) 363 | if err != nil { 364 | return nil, fmt.Errorf("error parsing HTTP timeout '%v'", err) 365 | } 366 | timeout = t 367 | } 368 | 369 | var p poster = newSimplePoster(cfg.Location, timeout, cfg.SkipTLSVerification) 370 | 371 | // If configured, create a retryBuffer per backend. 372 | // This way we serialize retries against each backend. 373 | if cfg.BufferSizeMB > 0 { 374 | max := DefaultMaxDelayInterval 375 | if cfg.MaxDelayInterval != "" { 376 | m, err := time.ParseDuration(cfg.MaxDelayInterval) 377 | if err != nil { 378 | return nil, fmt.Errorf("error parsing max retry time %v", err) 379 | } 380 | max = m 381 | } 382 | 383 | batch := DefaultBatchSizeKB * KB 384 | if cfg.MaxBatchKB > 0 { 385 | batch = cfg.MaxBatchKB * KB 386 | } 387 | 388 | p = newRetryBuffer(cfg.BufferSizeMB*MB, batch, max, p) 389 | } 390 | 391 | return &httpBackend{ 392 | poster: p, 393 | name: cfg.Name, 394 | }, nil 395 | } 396 | 397 | var ErrBufferFull = errors.New("retry buffer full") 398 | 399 | var bufPool = sync.Pool{New: func() interface{} { return new(bytes.Buffer) }} 400 | 401 | func getBuf() *bytes.Buffer { 402 | if bb, ok := bufPool.Get().(*bytes.Buffer); ok { 403 | return bb 404 | } 405 | return new(bytes.Buffer) 406 | } 407 | 408 | func putBuf(b *bytes.Buffer) { 409 | b.Reset() 410 | bufPool.Put(b) 411 | } 412 | -------------------------------------------------------------------------------- /relay/relay.go: -------------------------------------------------------------------------------- 1 | package relay 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "sync" 7 | ) 8 | 9 | type Service struct { 10 | relays map[string]Relay 11 | } 12 | 13 | func New(config Config) (*Service, error) { 14 | s := new(Service) 15 | s.relays = make(map[string]Relay) 16 | 17 | for _, cfg := range config.HTTPRelays { 18 | h, err := NewHTTP(cfg) 19 | if err != nil { 20 | return nil, err 21 | } 22 | if s.relays[h.Name()] != nil { 23 | return nil, fmt.Errorf("duplicate relay: %q", h.Name()) 24 | } 25 | s.relays[h.Name()] = h 26 | } 27 | 28 | for _, cfg := range config.UDPRelays { 29 | u, err := NewUDP(cfg) 30 | if err != nil { 31 | return nil, err 32 | } 33 | if s.relays[u.Name()] != nil { 34 | return nil, fmt.Errorf("duplicate relay: %q", u.Name()) 35 | } 36 | s.relays[u.Name()] = u 37 | } 38 | 39 | return s, nil 40 | } 41 | 42 | func (s *Service) Run() { 43 | var wg sync.WaitGroup 44 | wg.Add(len(s.relays)) 45 | 46 | for k := range s.relays { 47 | relay := s.relays[k] 48 | go func() { 49 | defer wg.Done() 50 | 51 | if err := relay.Run(); err != nil { 52 | log.Printf("Error running relay %q: %v", relay.Name(), err) 53 | } 54 | }() 55 | } 56 | 57 | wg.Wait() 58 | } 59 | 60 | func (s *Service) Stop() { 61 | for _, v := range s.relays { 62 | v.Stop() 63 | } 64 | } 65 | 66 | type Relay interface { 67 | Name() string 68 | Run() error 69 | Stop() error 70 | } 71 | -------------------------------------------------------------------------------- /relay/retry.go: -------------------------------------------------------------------------------- 1 | package relay 2 | 3 | import ( 4 | "bytes" 5 | "sync" 6 | "sync/atomic" 7 | "time" 8 | ) 9 | 10 | const ( 11 | retryInitial = 500 * time.Millisecond 12 | retryMultiplier = 2 13 | ) 14 | 15 | type Operation func() error 16 | 17 | // Buffers and retries operations, if the buffer is full operations are dropped. 18 | // Only tries one operation at a time, the next operation is not attempted 19 | // until success or timeout of the previous operation. 20 | // There is no delay between attempts of different operations. 21 | type retryBuffer struct { 22 | buffering int32 23 | 24 | initialInterval time.Duration 25 | multiplier time.Duration 26 | maxInterval time.Duration 27 | 28 | maxBuffered int 29 | maxBatch int 30 | 31 | list *bufferList 32 | 33 | p poster 34 | } 35 | 36 | func newRetryBuffer(size, batch int, max time.Duration, p poster) *retryBuffer { 37 | r := &retryBuffer{ 38 | initialInterval: retryInitial, 39 | multiplier: retryMultiplier, 40 | maxInterval: max, 41 | maxBuffered: size, 42 | maxBatch: batch, 43 | list: newBufferList(size, batch), 44 | p: p, 45 | } 46 | go r.run() 47 | return r 48 | } 49 | 50 | func (r *retryBuffer) post(buf []byte, query string, auth string) (*responseData, error) { 51 | if atomic.LoadInt32(&r.buffering) == 0 { 52 | resp, err := r.p.post(buf, query, auth) 53 | // TODO A 5xx caused by the point data could cause the relay to buffer forever 54 | if err == nil && resp.StatusCode/100 != 5 { 55 | return resp, err 56 | } 57 | atomic.StoreInt32(&r.buffering, 1) 58 | } 59 | 60 | // already buffering or failed request 61 | batch, err := r.list.add(buf, query, auth) 62 | if err != nil { 63 | return nil, err 64 | } 65 | 66 | batch.wg.Wait() 67 | return batch.resp, nil 68 | } 69 | 70 | func (r *retryBuffer) run() { 71 | buf := bytes.NewBuffer(make([]byte, 0, r.maxBatch)) 72 | for { 73 | buf.Reset() 74 | batch := r.list.pop() 75 | 76 | for _, b := range batch.bufs { 77 | buf.Write(b) 78 | } 79 | 80 | interval := r.initialInterval 81 | for { 82 | resp, err := r.p.post(buf.Bytes(), batch.query, batch.auth) 83 | if err == nil && resp.StatusCode/100 != 5 { 84 | batch.resp = resp 85 | atomic.StoreInt32(&r.buffering, 0) 86 | batch.wg.Done() 87 | break 88 | } 89 | 90 | if interval != r.maxInterval { 91 | interval *= r.multiplier 92 | if interval > r.maxInterval { 93 | interval = r.maxInterval 94 | } 95 | } 96 | 97 | time.Sleep(interval) 98 | } 99 | } 100 | } 101 | 102 | type batch struct { 103 | query string 104 | auth string 105 | bufs [][]byte 106 | size int 107 | full bool 108 | 109 | wg sync.WaitGroup 110 | resp *responseData 111 | 112 | next *batch 113 | } 114 | 115 | func newBatch(buf []byte, query string, auth string) *batch { 116 | b := new(batch) 117 | b.bufs = [][]byte{buf} 118 | b.size = len(buf) 119 | b.query = query 120 | b.auth = auth 121 | b.wg.Add(1) 122 | return b 123 | } 124 | 125 | type bufferList struct { 126 | cond *sync.Cond 127 | head *batch 128 | size int 129 | maxSize int 130 | maxBatch int 131 | } 132 | 133 | func newBufferList(maxSize, maxBatch int) *bufferList { 134 | return &bufferList{ 135 | cond: sync.NewCond(new(sync.Mutex)), 136 | maxSize: maxSize, 137 | maxBatch: maxBatch, 138 | } 139 | } 140 | 141 | // pop will remove and return the first element of the list, blocking if necessary 142 | func (l *bufferList) pop() *batch { 143 | l.cond.L.Lock() 144 | 145 | for l.size == 0 { 146 | l.cond.Wait() 147 | } 148 | 149 | b := l.head 150 | l.head = l.head.next 151 | l.size -= b.size 152 | 153 | l.cond.L.Unlock() 154 | 155 | return b 156 | } 157 | 158 | func (l *bufferList) add(buf []byte, query string, auth string) (*batch, error) { 159 | l.cond.L.Lock() 160 | 161 | if l.size+len(buf) > l.maxSize { 162 | l.cond.L.Unlock() 163 | return nil, ErrBufferFull 164 | } 165 | 166 | l.size += len(buf) 167 | l.cond.Signal() 168 | 169 | var cur **batch 170 | 171 | // non-nil batches that either don't match the query string, don't match the auth 172 | // credentials, or would be too large when adding the current set of points 173 | // (auth must be checked to prevent potential problems in multi-user scenarios) 174 | for cur = &l.head; *cur != nil; cur = &(*cur).next { 175 | if (*cur).query != query || (*cur).auth != auth || (*cur).full { 176 | continue 177 | } 178 | 179 | if (*cur).size+len(buf) > l.maxBatch { 180 | // prevent future writes from preceding this write 181 | (*cur).full = true 182 | continue 183 | } 184 | 185 | break 186 | } 187 | 188 | if *cur == nil { 189 | // new tail element 190 | *cur = newBatch(buf, query, auth) 191 | } else { 192 | // append to current batch 193 | b := *cur 194 | b.size += len(buf) 195 | b.bufs = append(b.bufs, buf) 196 | } 197 | 198 | l.cond.L.Unlock() 199 | return *cur, nil 200 | } 201 | -------------------------------------------------------------------------------- /relay/udp.go: -------------------------------------------------------------------------------- 1 | package relay 2 | 3 | import ( 4 | "bytes" 5 | "errors" 6 | "log" 7 | "net" 8 | "sync" 9 | "sync/atomic" 10 | "time" 11 | 12 | "github.com/influxdata/influxdb1-client/models" 13 | ) 14 | 15 | const ( 16 | defaultMTU = 1024 17 | ) 18 | 19 | // UDP is a relay for UDP influxdb writes 20 | type UDP struct { 21 | addr string 22 | name string 23 | precision string 24 | 25 | closing int64 26 | l *net.UDPConn 27 | c *net.UDPConn 28 | 29 | backends []*udpBackend 30 | } 31 | 32 | func NewUDP(config UDPConfig) (Relay, error) { 33 | u := new(UDP) 34 | 35 | u.name = config.Name 36 | u.addr = config.Addr 37 | u.precision = config.Precision 38 | 39 | l, err := net.ListenPacket("udp", u.addr) 40 | if err != nil { 41 | return nil, err 42 | } 43 | 44 | ul, ok := l.(*net.UDPConn) 45 | if !ok { 46 | return nil, errors.New("problem listening for UDP") 47 | } 48 | 49 | if config.ReadBuffer != 0 { 50 | if err := ul.SetReadBuffer(config.ReadBuffer); err != nil { 51 | return nil, err 52 | } 53 | } 54 | 55 | u.l = ul 56 | 57 | // UDP doesn't really "listen", this just gets us a socket with 58 | // the local UDP address set to something random 59 | u.c, err = net.ListenUDP("udp", nil) 60 | if err != nil { 61 | return nil, err 62 | } 63 | 64 | for i := range config.Outputs { 65 | cfg := &config.Outputs[i] 66 | if cfg.Name == "" { 67 | cfg.Name = cfg.Location 68 | } 69 | 70 | if cfg.MTU == 0 { 71 | cfg.MTU = defaultMTU 72 | } 73 | 74 | addr, err := net.ResolveUDPAddr("udp", cfg.Location) 75 | if err != nil { 76 | return nil, err 77 | } 78 | 79 | u.backends = append(u.backends, &udpBackend{u, cfg.Name, addr, cfg.MTU}) 80 | } 81 | 82 | return u, nil 83 | } 84 | 85 | func (u *UDP) Name() string { 86 | if u.name == "" { 87 | return u.addr 88 | } 89 | return u.name 90 | } 91 | 92 | // udpPool is used to reuse and auto-size payload buffers, if incoming packets 93 | // are never larger than 2K, then none of the buffers will be larger than that. 94 | // This prevents having to manually tune the UDP buffer size, or having every 95 | // buffer be 64K to hold the maximum possible payload 96 | var udpPool = sync.Pool{New: func() interface{} { return new(bytes.Buffer) }} 97 | 98 | func getUDPBuf() *bytes.Buffer { 99 | return udpPool.Get().(*bytes.Buffer) 100 | } 101 | 102 | func putUDPBuf(b *bytes.Buffer) { 103 | b.Reset() 104 | udpPool.Put(b) 105 | } 106 | 107 | type packet struct { 108 | timestamp time.Time 109 | data *bytes.Buffer 110 | from *net.UDPAddr 111 | } 112 | 113 | func (u *UDP) Run() error { 114 | 115 | // buffer that can hold the largest possible UDP payload 116 | var buf [65536]byte 117 | 118 | // arbitrary queue size for now 119 | queue := make(chan packet, 1024) 120 | 121 | var wg sync.WaitGroup 122 | 123 | go func() { 124 | for p := range queue { 125 | u.post(&p) 126 | wg.Done() 127 | } 128 | }() 129 | 130 | log.Printf("Starting UDP relay %q on %v", u.Name(), u.l.LocalAddr()) 131 | 132 | for { 133 | n, remote, err := u.l.ReadFromUDP(buf[:]) 134 | if err != nil { 135 | if atomic.LoadInt64(&u.closing) == 0 { 136 | log.Printf("Error reading packet in relay %q from %v: %v", u.name, remote, err) 137 | } else { 138 | err = nil 139 | } 140 | close(queue) 141 | wg.Wait() 142 | return err 143 | } 144 | start := time.Now() 145 | 146 | wg.Add(1) 147 | 148 | // copy the data into a buffer and queue it for processing 149 | b := getUDPBuf() 150 | b.Grow(n) 151 | // bytes.Buffer.Write always returns a nil error, and will panic if out of memory 152 | _, _ = b.Write(buf[:n]) 153 | queue <- packet{start, b, remote} 154 | } 155 | } 156 | 157 | func (u *UDP) Stop() error { 158 | atomic.StoreInt64(&u.closing, 1) 159 | return u.l.Close() 160 | } 161 | 162 | func (u *UDP) post(p *packet) { 163 | points, err := models.ParsePointsWithPrecision(p.data.Bytes(), p.timestamp, u.precision) 164 | if err != nil { 165 | log.Printf("Error parsing packet in relay %q from %v: %v", u.Name(), p.from, err) 166 | putUDPBuf(p.data) 167 | return 168 | } 169 | 170 | out := getUDPBuf() 171 | for _, pt := range points { 172 | if _, err = out.WriteString(pt.PrecisionString(u.precision)); err != nil { 173 | break 174 | } 175 | if err = out.WriteByte('\n'); err != nil { 176 | break 177 | } 178 | } 179 | 180 | putUDPBuf(p.data) 181 | 182 | if err != nil { 183 | putUDPBuf(out) 184 | log.Printf("Error writing points in relay %q: %v", u.Name(), err) 185 | return 186 | } 187 | 188 | for _, b := range u.backends { 189 | if err := b.post(out.Bytes()); err != nil { 190 | log.Printf("Error writing points in relay %q to backend %q: %v", u.Name(), b.name, err) 191 | } 192 | } 193 | 194 | putUDPBuf(out) 195 | } 196 | 197 | type udpBackend struct { 198 | u *UDP 199 | name string 200 | addr *net.UDPAddr 201 | mtu int 202 | } 203 | 204 | var errPacketTooLarge = errors.New("payload larger than MTU") 205 | 206 | func (b *udpBackend) post(data []byte) error { 207 | var err error 208 | for len(data) > b.mtu { 209 | // find the last line that will fit within the MTU 210 | idx := bytes.LastIndexByte(data[:b.mtu], '\n') 211 | if idx < 0 { 212 | // first line is larger than MTU 213 | return errPacketTooLarge 214 | } 215 | _, err = b.u.c.WriteToUDP(data[:idx+1], b.addr) 216 | if err != nil { 217 | return err 218 | } 219 | data = data[idx+1:] 220 | } 221 | 222 | _, err = b.u.c.WriteToUDP(data, b.addr) 223 | return err 224 | } 225 | -------------------------------------------------------------------------------- /sample.toml: -------------------------------------------------------------------------------- 1 | 2 | 3 | [[http]] 4 | name = "example-http" 5 | bind-addr = "127.0.0.1:9096" 6 | output = [ 7 | { name="local1", location = "http://127.0.0.1:8086/write" }, 8 | { name="local2", location = "http://127.0.0.1:7086/write" }, 9 | ] 10 | 11 | [[udp]] 12 | name = "example-udp" 13 | bind-addr = "127.0.0.1:9096" 14 | read-buffer = 0 # default 15 | output = [ 16 | { name="local1", location="127.0.0.1:8089", mtu=512 }, 17 | { name="local2", location="127.0.0.1:7089", mtu=1024 }, 18 | ] 19 | -------------------------------------------------------------------------------- /sample_buffered.toml: -------------------------------------------------------------------------------- 1 | 2 | 3 | [[http]] 4 | name = "example-http" 5 | bind-addr = "127.0.0.1:9096" 6 | output = [ 7 | { name="local1", location = "http://127.0.0.1:8086/write", buffer-size-mb = 100, max-batch-kb = 50, max-delay-interval = "5s" }, 8 | { name="local2", location = "http://127.0.0.1:7086/write", buffer-size-mb = 100, max-batch-kb = 50, max-delay-interval = "5s" }, 9 | ] 10 | -------------------------------------------------------------------------------- /scripts/influxdb-relay.service: -------------------------------------------------------------------------------- 1 | # If you modify this, please also make sure to edit init.sh 2 | 3 | [Unit] 4 | Description=InfluxDB-Relay is a service to replicate InfluxDB data for high availability 5 | Documentation=https://github.com/influxdata/influxdb-relay 6 | After=network.target 7 | 8 | [Service] 9 | User=influxdb-relay 10 | Group=influxdb-relay 11 | LimitNOFILE=65536 12 | ExecStart=/usr/bin/influxdb-relay -config /etc/influxdb-relay/influxdb-relay.conf 13 | KillMode=control-group 14 | Restart=on-failure 15 | 16 | [Install] 17 | WantedBy=multi-user.target 18 | Alias=influxdb-relay.service 19 | -------------------------------------------------------------------------------- /scripts/init.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ### BEGIN INIT INFO 3 | # Provides: influxdb-relay 4 | # Required-Start: $all 5 | # Required-Stop: $remote_fs $syslog 6 | # Default-Start: 2 3 4 5 7 | # Default-Stop: 0 1 6 8 | # Short-Description: Start the InfluxDB-Relay process 9 | ### END INIT INFO 10 | 11 | # If you modify this, please make sure to also edit influxdb.service 12 | 13 | # Command-line options that can be set in /etc/default/influxdb-relay. 14 | # These will override any config file values. 15 | DEFAULT=/etc/default/influxdb-relay 16 | 17 | # Daemon options 18 | DAEMON_OPTS= 19 | 20 | # Process name ( For display ) 21 | NAME=influxdb-relay 22 | 23 | # User and group 24 | USER=influxdb-relay 25 | GROUP=influxdb-relay 26 | 27 | # Check for sudo or root privileges before continuing 28 | if [ "$UID" != "0" ]; then 29 | echo "You must be root to run this script" 30 | exit 1 31 | fi 32 | 33 | # Daemon name, where is the actual executable If the daemon is not 34 | # there, then exit. 35 | DAEMON=/usr/bin/influxdb-relay 36 | if [ ! -x $DAEMON ]; then 37 | echo "Executable $DAEMON does not exist!" 38 | exit 5 39 | fi 40 | 41 | # Configuration file 42 | CONFIG=/etc/influxdb-relay/influxdb-relay.conf 43 | 44 | # PID file for the daemon 45 | PIDFILE=/var/run/influxdb-relay/influxdb-relay.pid 46 | PIDDIR=`dirname $PIDFILE` 47 | if [ ! -d "$PIDDIR" ]; then 48 | mkdir -p $PIDDIR 49 | chown $USER:$GROUP $PIDDIR 50 | fi 51 | 52 | # Max open files 53 | OPEN_FILE_LIMIT=65536 54 | 55 | if [ -r /lib/lsb/init-functions ]; then 56 | source /lib/lsb/init-functions 57 | fi 58 | 59 | # Logging 60 | if [ -z "$STDOUT" ]; then 61 | STDOUT=/var/log/influxdb-relay/influxdb-relay.log 62 | fi 63 | 64 | if [ ! -f "$STDOUT" ]; then 65 | mkdir -p $(dirname $STDOUT) 66 | fi 67 | 68 | if [ -z "$STDERR" ]; then 69 | STDERR=/var/log/influxdb-relay/influxdb-relay.log 70 | fi 71 | 72 | if [ ! -f "$STDERR" ]; then 73 | mkdir -p $(dirname $STDERR) 74 | fi 75 | 76 | # Override init script variables with DEFAULT values 77 | if [ -r $DEFAULT ]; then 78 | source $DEFAULT 79 | fi 80 | 81 | function log_failure_msg() { 82 | echo "$@" "[ FAILED ]" 83 | } 84 | 85 | function log_success_msg() { 86 | echo "$@" "[ OK ]" 87 | } 88 | 89 | function start() { 90 | # Check if config file exist 91 | if [ ! -r $CONFIG ]; then 92 | log_failure_msg "config file $CONFIG doesn't exist (or you don't have permission to view)" 93 | exit 4 94 | fi 95 | 96 | # Check that the PID file exists, and check the actual status of process 97 | if [ -f $PIDFILE ]; then 98 | PID="$(cat $PIDFILE)" 99 | if kill -0 "$PID" &>/dev/null; then 100 | # Process is already up 101 | log_success_msg "$NAME process is already running" 102 | return 0 103 | fi 104 | else 105 | su -s /bin/sh -c "touch $PIDFILE" $USER &>/dev/null 106 | if [ $? -ne 0 ]; then 107 | log_failure_msg "$PIDFILE not writable, check permissions" 108 | exit 5 109 | fi 110 | fi 111 | 112 | # Bump the file limits, before launching the daemon. These will 113 | # carry over to launched processes. 114 | ulimit -n $OPEN_FILE_LIMIT 115 | if [ $? -ne 0 ]; then 116 | log_failure_msg "Unable to set ulimit to $OPEN_FILE_LIMIT" 117 | exit 1 118 | fi 119 | 120 | # Launch process 121 | echo "Starting $NAME..." 122 | if which start-stop-daemon &>/dev/null; then 123 | start-stop-daemon \ 124 | --chuid $GROUP:$USER \ 125 | --start \ 126 | --quiet \ 127 | --pidfile $PIDFILE \ 128 | --exec $DAEMON \ 129 | -- \ 130 | -config $CONFIG \ 131 | $DAEMON_OPTS \ 132 | >>$STDOUT 2>>$STDERR & \ 133 | echo $! > "$PIDFILE" 134 | else 135 | local CMD="$DAEMON -config $CONFIG $DAEMON_OPTS >>$STDOUT 2>>$STDERR & echo \$!" 136 | su -s /bin/sh -c "$CMD" $USER > "$PIDFILE" 137 | fi 138 | 139 | # Sleep to verify process is still up 140 | sleep 1 141 | if [ -f $PIDFILE ]; then 142 | # PIDFILE exists 143 | if kill -0 $(cat $PIDFILE) &>/dev/null; then 144 | # PID up, service running 145 | log_success_msg "$NAME process was started" 146 | return 0 147 | fi 148 | fi 149 | log_failure_msg "$NAME process was unable to start" 150 | exit 1 151 | } 152 | 153 | function stop() { 154 | # Stop the daemon. 155 | if [ -f $PIDFILE ]; then 156 | local PID="$(cat $PIDFILE)" 157 | if kill -0 $PID &>/dev/null; then 158 | echo "Stopping $NAME..." 159 | # Process still up, send SIGTERM and remove PIDFILE 160 | kill -s SIGTERM $PID &>/dev/null && rm -f "$PIDFILE" &>/dev/null 161 | while true; do 162 | # Enter loop to ensure process is stopped 163 | kill -0 $PID &>/dev/null 164 | if [ "$?" != "0" ]; then 165 | # Process stopped, break from loop 166 | log_success_msg "$NAME process was stopped" 167 | return 0 168 | fi 169 | 170 | # Process still up after signal, sleep and wait 171 | sleep 1 172 | n=$(expr $n + 1) 173 | if [ $n -eq 30 ]; then 174 | # After 30 seconds, send SIGKILL 175 | echo "Timeout exceeded, sending SIGKILL..." 176 | kill -s SIGKILL $PID &>/dev/null 177 | elif [ $n -eq 40 ]; then 178 | # After 40 seconds, error out 179 | log_failure_msg "could not stop $NAME process" 180 | exit 1 181 | fi 182 | done 183 | fi 184 | fi 185 | log_success_msg "$NAME process already stopped" 186 | } 187 | 188 | function restart() { 189 | # Restart the daemon. 190 | stop 191 | start 192 | } 193 | 194 | function status() { 195 | # Check the status of the process. 196 | if [ -f $PIDFILE ]; then 197 | PID="$(cat $PIDFILE)" 198 | if kill -0 $PID &>/dev/null; then 199 | log_success_msg "$NAME process is running" 200 | exit 0 201 | fi 202 | fi 203 | log_failure_msg "$NAME process is not running" 204 | exit 1 205 | } 206 | 207 | case $1 in 208 | start) 209 | start 210 | ;; 211 | 212 | stop) 213 | stop 214 | ;; 215 | 216 | restart) 217 | restart 218 | ;; 219 | 220 | status) 221 | status 222 | ;; 223 | 224 | version) 225 | $DAEMON version 226 | ;; 227 | 228 | *) 229 | # For invalid arguments, print the usage message. 230 | echo "Usage: $0 {start|stop|restart|status|version}" 231 | exit 2 232 | ;; 233 | esac 234 | -------------------------------------------------------------------------------- /scripts/logrotate: -------------------------------------------------------------------------------- 1 | /var/log/influxdb-relay/influxdb-relay.log { 2 | daily 3 | rotate 7 4 | missingok 5 | dateext 6 | copytruncate 7 | compress 8 | } 9 | -------------------------------------------------------------------------------- /scripts/post-install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | BIN_DIR=/usr/bin 4 | DATA_DIR=/var/lib/influxdb-relay 5 | LOG_DIR=/var/log/influxdb-relay 6 | SCRIPT_DIR=/usr/lib/influxdb-relay/scripts 7 | LOGROTATE_DIR=/etc/logrotate.d 8 | 9 | function install_init { 10 | cp -f $SCRIPT_DIR/init.sh /etc/init.d/influxdb-relay 11 | chmod +x /etc/init.d/influxdb-relay 12 | } 13 | 14 | function install_systemd { 15 | cp -f $SCRIPT_DIR/influxdb-relay.service /lib/systemd/system/influxdb-relay.service 16 | systemctl enable influxdb-relay 17 | } 18 | 19 | function install_update_rcd { 20 | update-rc.d influxdb-relay defaults 21 | } 22 | 23 | function install_chkconfig { 24 | chkconfig --add influxdb-relay 25 | } 26 | 27 | id influxdb-relay &>/dev/null 28 | if [[ $? -ne 0 ]]; then 29 | useradd --system -U -M influxdb-relay -s /bin/false -d $DATA_DIR 30 | fi 31 | 32 | chown -R -L influxdb-relay:influxdb-relay $DATA_DIR 33 | chown -R -L influxdb-relay:influxdb-relay $LOG_DIR 34 | 35 | # Remove legacy symlink, if it exists 36 | if [[ -L /etc/init.d/influxdb-relay ]]; then 37 | rm -f /etc/init.d/influxdb-relay 38 | fi 39 | 40 | # Distribution-specific logic 41 | if [[ -f /etc/redhat-release ]]; then 42 | # RHEL-variant logic 43 | which systemctl &>/dev/null 44 | if [[ $? -eq 0 ]]; then 45 | install_systemd 46 | else 47 | # Assuming sysv 48 | install_init 49 | install_chkconfig 50 | fi 51 | elif [[ -f /etc/debian_version ]]; then 52 | # Debian/Ubuntu logic 53 | which systemctl &>/dev/null 54 | if [[ $? -eq 0 ]]; then 55 | install_systemd 56 | else 57 | # Assuming sysv 58 | install_init 59 | install_update_rcd 60 | fi 61 | elif [[ -f /etc/os-release ]]; then 62 | source /etc/os-release 63 | if [[ $ID = "amzn" ]]; then 64 | # Amazon Linux logic 65 | install_init 66 | install_chkconfig 67 | fi 68 | fi 69 | -------------------------------------------------------------------------------- /scripts/post-uninstall.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | function disable_systemd { 4 | systemctl disable influxdb-relay 5 | rm -f /lib/systemd/system/influxdb-relay.service 6 | } 7 | 8 | function disable_update_rcd { 9 | update-rc.d -f influxdb-relay remove 10 | rm -f /etc/init.d/influxdb-relay 11 | } 12 | 13 | function disable_chkconfig { 14 | chkconfig --del influxdb-relay 15 | rm -f /etc/init.d/influxdb-relay 16 | } 17 | 18 | if [[ -f /etc/redhat-release ]]; then 19 | # RHEL-variant logic 20 | if [[ "$1" = "0" ]]; then 21 | # Influxdb-Relay is no longer installed, remove from init system 22 | which systemctl &>/dev/null 23 | if [[ $? -eq 0 ]]; then 24 | disable_systemd 25 | else 26 | # Assuming sysv 27 | disable_chkconfig 28 | fi 29 | fi 30 | elif [[ -f /etc/lsb-release ]]; then 31 | # Debian/Ubuntu logic 32 | if [[ "$1" != "upgrade" ]]; then 33 | # Remove/purge 34 | which systemctl &>/dev/null 35 | if [[ $? -eq 0 ]]; then 36 | disable_systemd 37 | else 38 | # Assuming sysv 39 | disable_update_rcd 40 | fi 41 | fi 42 | elif [[ -f /etc/os-release ]]; then 43 | source /etc/os-release 44 | if [[ $ID = "amzn" ]]; then 45 | # Amazon Linux logic 46 | if [[ "$1" = "0" ]]; then 47 | # Influxdb-Relay is no longer installed, remove from init system 48 | disable_chkconfig 49 | fi 50 | fi 51 | fi 52 | --------------------------------------------------------------------------------