├── .gitignore ├── LICENSE.md ├── README.md └── out_clickhouse.rb /.gitignore: -------------------------------------------------------------------------------- 1 | td-agent.conf 2 | test.sh 3 | *.swp 4 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE 2 | Version 2, December 2004 3 | 4 | Copyright (C) 2004 Sam Hocevar 5 | 6 | Everyone is permitted to copy and distribute verbatim or modified 7 | copies of this license document, and changing it is allowed as long 8 | as the name is changed. 9 | 10 | DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE 11 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 12 | 13 | 0. You just DO WHAT THE FUCK YOU WANT TO. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # What is this? 2 | It's a output plugin for [Fluentd](https://www.fluentd.org/), that sends data into [Yandex ClickHouse](https://clickhouse.yandex) database. By now it supports buffered output (*I still don't know how*) and handling few exceptions. 3 | # How to use it? 4 | I'm not a ruby programmer who knows how to write gems, so **just put [out_clickhouse.rb](out_clickhouse.rb) to /etc/td-agent/plugin**. 5 | There's example td-agent.conf: 6 | ``` 7 | 8 | @type http 9 | port 8888 10 | 11 | 12 | @type clickhouse 13 | host 127.0.0.1 14 | port 8123 15 | table FLUENT 16 | datetime_name DateTime # name for internal fluentd datetime field 17 | fields DateTime,tag,Num # in this order values will be inserted in CH 18 | 19 | ``` 20 | Before launching td-agent, create table into ClickHouse: 21 | `CREATE TABLE FLUENT ( Date Date MATERIALIZED toDate(DateTime), DateTime DateTime, Str String, Num Int32) ENGINE = MergeTree(Date, Date, DateTime, 8192)` 22 | Start td-agent and send a few events to fluentd: 23 | ``` 24 | curl -X POST -d 'json={"Num":1}' http://localhost:8888/inp 25 | curl -X POST -d 'json={"Num":2}' http://localhost:8888/inp 26 | curl -X POST -d 'json={"Num":3}' http://localhost:8888/inp 27 | ``` 28 | After a few seconds, when buffer flushes, in ClickHouse you could see this: 29 | ```:) SELECT * FROM FLUENT ; 30 | ┌───────Date─┬────────────DateTime─┬─Str─┬─Num─┐ 31 | │ 2017-11-06 │ 2017-11-06 14:42:03 │ inp │ 1 │ 32 | │ 2017-11-06 │ 2017-11-06 14:42:06 │ inp │ 2 │ 33 | │ 2017-11-06 │ 2017-11-06 14:42:09 │ inp │ 3 │ 34 | └────────────┴─────────────────────┴─────┴─────┘ 35 | ``` 36 | # Wow, it doesn't even support HTTP auth 37 | Yes, and besides auth, there's still a work to do: 38 | * SSL 39 | * Timezones that doesn't suck 40 | * GZIP. ClickHouse supports compressing, so why not? 41 | * and more 42 | -------------------------------------------------------------------------------- /out_clickhouse.rb: -------------------------------------------------------------------------------- 1 | require 'fluent/output' 2 | require 'fluent/config/error' 3 | require 'net/http' 4 | require 'date' 5 | require 'csv' 6 | 7 | module Fluent 8 | class ClickhouseOutput < BufferedOutput 9 | Fluent::Plugin.register_output("clickhouse", self) 10 | 11 | DEFAULT_TIMEKEY = 60 * 60 * 24 12 | 13 | desc "IP or fqdn of ClickHouse node" 14 | config_param :host, :string 15 | desc "Port of ClickHouse HTTP interface" 16 | config_param :port, :integer, default: 8123 17 | desc "Database to use" 18 | config_param :database, :string, default: "default" 19 | desc "Table to use" 20 | config_param :table, :string 21 | desc "Offset in minutes, could be useful to substract timestamps because of timezones" 22 | config_param :tz_offset, :integer, default: 0 23 | # TODO auth and SSL params. and maybe gzip 24 | desc "Order of fields while insert" 25 | config_param :fields, :array, value_type: :string 26 | desc "Which part of tag should be taken" 27 | config_param :tag_part, :integer, default: nil 28 | desc "Name of internal fluentd time field (if need to use)" 29 | config_param :datetime_name, :string, default: nil 30 | config_section :buffer do 31 | config_set_default :@type, "file" 32 | config_set_default :chunk_keys, ["time"] 33 | config_set_default :flush_at_shutdown, true 34 | config_set_default :timekey, DEFAULT_TIMEKEY 35 | end 36 | 37 | def configure(conf) 38 | super 39 | @uri, @uri_params = make_uri(conf) 40 | @database = conf["database"] || "default" 41 | @table = conf["table"] 42 | @fields = fields.select{|f| !f.empty? } 43 | @tz_offset = conf["tz_offset"].to_i 44 | @tag_part = conf["tag_part"] 45 | @datetime_name = conf["datetime_name"] 46 | test_connection(conf) 47 | end 48 | 49 | def test_connection(conf) 50 | uri = @uri.clone 51 | uri.query = URI.encode_www_form(@uri_params.merge({"query" => "SHOW TABLES"})) 52 | begin 53 | res = Net::HTTP.get_response(uri) 54 | rescue Errno::ECONNREFUSED 55 | raise Fluent::ConfigError, "Couldn't connect to ClickHouse at #{ @uri } - connection refused" 56 | end 57 | if res.code != "200" 58 | raise Fluent::ConfigError, "ClickHouse server responded non-200 code: #{ res.body }" 59 | end 60 | end 61 | 62 | def make_uri(conf) 63 | uri = URI("http://#{ conf["host"] }:#{ conf["port"] || 8123 }/") 64 | params = {"database" => conf["database"] || "default"} 65 | return uri, params 66 | end 67 | 68 | def format(tag, timestamp, record) 69 | if @datetime_name 70 | record[@datetime_name] = timestamp + @tz_offset * 60 71 | end 72 | row = [] 73 | @fields.each { |key| 74 | if key == "tag" 75 | if @tag_part then val = tag 76 | else 77 | val = tag.split(".")[@tag_part.to_i] 78 | end 79 | else 80 | val = record[key] 81 | end 82 | row << val 83 | } 84 | return CSV.generate_line(row) 85 | end 86 | 87 | def write(chunk) 88 | uri = @uri.clone 89 | query = {"query" => "INSERT INTO #{@table} (#{@fields.join(",")}) FORMAT CSV"} 90 | uri.query = URI.encode_www_form(@uri_params.merge(query)) 91 | req = Net::HTTP::Post.new(uri) 92 | req.body = chunk.read 93 | http = Net::HTTP.new(uri.hostname, uri.port) 94 | resp = http.request(req) 95 | if resp.code != "200" 96 | log.warn "Clickhouse responded: #{resp.body}" 97 | end 98 | end 99 | end 100 | end 101 | --------------------------------------------------------------------------------