├── .gitignore ├── README.md ├── cmd ├── zeek-clickhouse-hec-server │ ├── main.go │ └── main_test.go └── zeek-clickhouse-load-stdin │ └── main.go ├── dns_view.sql ├── go.mod ├── go.sum ├── inserter.go ├── schema.sql ├── test_data ├── conn.log ├── files.log ├── http.log └── log.json ├── types.go ├── zeek_to_view.py ├── zeekjson.go ├── zeekjson_test.go ├── zeektsv.go └── zeektsv_test.go /.gitignore: -------------------------------------------------------------------------------- 1 | cmd/zeek-clickhouse-hec-server/zeek-clickhouse-hec-server 2 | cmd/zeek-clickhouse-load-stdin/zeek-clickhouse-load-stdin 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # zeek-clickhouse 2 | 3 | WIP Implementation of [Fast and Reliable Schema-Agnostic Log Analytics Platform](https://eng.uber.com/logging/) 4 | -------------------------------------------------------------------------------- /cmd/zeek-clickhouse-hec-server/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "errors" 6 | "flag" 7 | "fmt" 8 | "io/ioutil" 9 | "log" 10 | "net/http" 11 | "sync" 12 | "time" 13 | 14 | zeekclickhouse "github.com/JustinAzoff/zeek-clickhouse" 15 | ) 16 | 17 | var beginning = []byte(`"event":`) 18 | 19 | func extractEvent(message []byte) ([]byte, error) { 20 | //Should probably just use jsonparser here, but I know the structure 21 | start := bytes.Index(message, []byte(beginning)) 22 | if start == -1 { 23 | return message, errors.New("Can't find start of event") 24 | } 25 | return message[start+len(beginning) : len(message)-1], nil 26 | } 27 | 28 | type HECClickhouse struct { 29 | ch *zeekclickhouse.Inserter 30 | lock sync.Mutex 31 | } 32 | 33 | func NewHECClickhouse(URI string) (*HECClickhouse, error) { 34 | inserter, err := zeekclickhouse.NewInserter(URI) 35 | if err != nil { 36 | return nil, err 37 | } 38 | err = inserter.LoadSchema() 39 | if err != nil { 40 | log.Fatal(err) 41 | } 42 | return &HECClickhouse{ 43 | ch: inserter, 44 | }, nil 45 | } 46 | func (h *HECClickhouse) doInsert(records []zeekclickhouse.DBRecord) error { 47 | var err error 48 | h.lock.Lock() 49 | defer h.lock.Unlock() 50 | 51 | err = h.ch.Begin() 52 | if err != nil { 53 | return fmt.Errorf("Error starting tx: %w", err) 54 | } 55 | 56 | for _, rec := range records { 57 | err = h.ch.Insert(rec) 58 | if err != nil { 59 | return fmt.Errorf("Error inserting : %w", err) 60 | } 61 | } 62 | err = h.ch.Commit() 63 | if err != nil { 64 | return fmt.Errorf("Error commiting : %w", err) 65 | } 66 | return nil 67 | } 68 | 69 | func (h *HECClickhouse) Ingest(w http.ResponseWriter, req *http.Request) { 70 | defer req.Body.Close() 71 | body, err := ioutil.ReadAll(req.Body) 72 | if err != nil { 73 | log.Printf("Error reading body: %v", err) 74 | w.WriteHeader(http.StatusInternalServerError) 75 | w.Write([]byte(`{"text":"Failure","code":503}` + "\n")) 76 | return 77 | } 78 | separated := bytes.Replace(body, []byte("}{"), []byte("}\n{"), -1) 79 | splitIntoLines := bytes.Split(separated, []byte("\n")) 80 | count := len(splitIntoLines) 81 | log.Printf("Got %d lines", count) 82 | records := make([]zeekclickhouse.DBRecord, 0, count) 83 | for _, line := range splitIntoLines { 84 | zeek, err := extractEvent(line) 85 | if err != nil { 86 | log.Printf("Error extracting event: %v", err) 87 | continue 88 | } 89 | rec, err := zeekclickhouse.ZeekToDBRecord(zeek) 90 | if err != nil { 91 | log.Printf("Error converting record: %v", err) 92 | continue 93 | } 94 | records = append(records, rec) 95 | } 96 | err = h.doInsert(records) 97 | 98 | if err != nil { 99 | log.Printf("Error: %v", err) 100 | w.WriteHeader(http.StatusInternalServerError) 101 | w.Write([]byte(`{"text":"Failure","code":503}` + "\n")) 102 | return 103 | } 104 | w.Write([]byte(`{"text":"Success","code":0}` + "\n")) 105 | } 106 | 107 | func main() { 108 | var uri string 109 | var bindAddr string 110 | flag.StringVar(&uri, "uri", "tcp://192.168.2.68:9000?debug=false", "server uri") 111 | flag.StringVar(&bindAddr, "bind", ":8090", "bind addr") 112 | flag.Parse() 113 | 114 | myHandler := http.NewServeMux() 115 | srv := &http.Server{ 116 | Addr: bindAddr, 117 | Handler: myHandler, 118 | ReadTimeout: 60 * time.Second, 119 | WriteTimeout: 120 * time.Second, 120 | MaxHeaderBytes: 1 << 20, 121 | } 122 | 123 | hec, err := NewHECClickhouse(uri) 124 | if err != nil { 125 | log.Fatal(err) 126 | } 127 | myHandler.HandleFunc("/ingest", hec.Ingest) 128 | log.Printf("Listening on %s\n", bindAddr) 129 | if err := srv.ListenAndServe(); err != http.ErrServerClosed { 130 | // Error starting or closing listener: 131 | log.Fatalf("HTTP server ListenAndServe: %v", err) 132 | } 133 | 134 | } 135 | -------------------------------------------------------------------------------- /cmd/zeek-clickhouse-hec-server/main_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "testing" 4 | 5 | var rec = []byte(`{"time":1615153266.918483,"sourcetype":"corelight_weird","event":{"_path":"weird","_system_name":"ja-ap200.home","_write_ts":"2021-03-07T21:41:06.918483Z","ts":"2021-03-07T21:41:06.918483Z","name":"non_ip_packet_in_ethernet","notice":false}}`) 6 | 7 | func TestExtractEvent(t *testing.T) { 8 | out, err := extractEvent(rec) 9 | if err != nil { 10 | t.Errorf("Error extracting event: %v", err) 11 | } 12 | t.Logf("Got %s", out) 13 | } 14 | -------------------------------------------------------------------------------- /cmd/zeek-clickhouse-load-stdin/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "log" 7 | "os" 8 | "time" 9 | 10 | clickhouse "github.com/ClickHouse/clickhouse-go" 11 | zeekclickhouse "github.com/JustinAzoff/zeek-clickhouse" 12 | ) 13 | 14 | func main() { 15 | var uri string 16 | var batchSize int 17 | var format string 18 | flag.StringVar(&format, "format", "json", "json or csv") 19 | flag.StringVar(&uri, "uri", "tcp://192.168.2.68:9000?debug=false", "server uri") 20 | flag.IntVar(&batchSize, "batchsize", 100_000, "commit batch size") 21 | flag.Parse() 22 | 23 | inserter, err := zeekclickhouse.NewInserter(uri) 24 | if err != nil { 25 | log.Fatal(err) 26 | } 27 | if err := inserter.Ping(); err != nil { 28 | if exception, ok := err.(*clickhouse.Exception); ok { 29 | fmt.Printf("[%d] %s \n%s\n", exception.Code, exception.Message, exception.StackTrace) 30 | } else { 31 | fmt.Println(err) 32 | } 33 | return 34 | } 35 | err = inserter.LoadSchema() 36 | if err != nil { 37 | log.Fatal(err) 38 | } 39 | err = inserter.Begin() 40 | if err != nil { 41 | log.Fatal(err) 42 | } 43 | var z zeekclickhouse.DBConverter 44 | switch format { 45 | case "tsv": 46 | z = zeekclickhouse.NewZeekTSVReader(os.Stdin) 47 | case "json": 48 | z = zeekclickhouse.NewZeekJSONReader(os.Stdin) 49 | default: 50 | log.Fatalf("Invalid format: %v. Not tsv or json", format) 51 | } 52 | n := 0 53 | totalRecords := 0 54 | startTime := time.Now() 55 | for { 56 | rec, err := z.Next() 57 | if err != nil { 58 | log.Printf("Next: %v", err) 59 | break 60 | } 61 | err = inserter.Insert(rec) 62 | if err != nil { 63 | log.Printf("Error inserting: %v: %v", rec, err) 64 | continue 65 | } 66 | n++ 67 | totalRecords++ 68 | if n%batchSize == 0 { 69 | log.Printf("Committing %d records", n) 70 | n = 0 71 | if err := inserter.Commit(); err != nil { 72 | log.Fatal(err) 73 | } 74 | err = inserter.Begin() 75 | if err != nil { 76 | log.Fatal(err) 77 | } 78 | } 79 | 80 | } 81 | 82 | if n > 0 { 83 | log.Printf("Committing %d records", n) 84 | if err := inserter.Commit(); err != nil { 85 | log.Fatal(err) 86 | } 87 | } 88 | 89 | if totalRecords > 0 { 90 | duration := time.Since(startTime) 91 | log.Printf("Inserted %d records in %.1f seconds, %.0f records/sec", totalRecords, duration.Seconds(), float64(totalRecords)/duration.Seconds()) 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /dns_view.sql: -------------------------------------------------------------------------------- 1 | create view dns as 2 | select 3 | _timestamp, 4 | ts, 5 | day, 6 | _hostname, 7 | _source, 8 | 9 | uid, 10 | "id.orig_h", "id.orig_p", 11 | "id.resp_h", "id.resp_p", 12 | `string.values`[indexOf(`string.names`, 'proto')] AS proto, 13 | `number.values`[indexOf(`number.names`, 'trans_id')] AS trans_id, 14 | `number.values`[indexOf(`number.names`, 'rtt')] AS rtt, 15 | `string.values`[indexOf(`string.names`, 'query')] AS query, 16 | `number.values`[indexOf(`number.names`, 'qclass')] AS qclass, 17 | `string.values`[indexOf(`string.names`, 'qclass_name')] AS qclass_name, 18 | `number.values`[indexOf(`number.names`, 'qtype')] AS qtype, 19 | `string.values`[indexOf(`string.names`, 'qtype_name')] AS qtype_name, 20 | `number.values`[indexOf(`number.names`, 'rcode')] AS rcode, 21 | `string.values`[indexOf(`string.names`, 'rcode_name')] AS rcode_name, 22 | `bool.values`[indexOf(`bool.names`, 'AA')] AS AA, 23 | `bool.values`[indexOf(`bool.names`, 'TC')] AS TC, 24 | `bool.values`[indexOf(`bool.names`, 'RD')] AS RD, 25 | `bool.values`[indexOf(`bool.names`, 'RA')] AS RA, 26 | `number.values`[indexOf(`number.names`, 'Z')] AS Z, 27 | `array.values`[indexOf(`array.names`, 'answers')] AS answers, 28 | `array.values`[indexOf(`array.names`, 'TTLs')] AS TTLs, 29 | `bool.values`[indexOf(`bool.names`, 'rejected')] AS rejected 30 | FROM logs 31 | WHERE _path='dns' 32 | ; 33 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/JustinAzoff/zeek-clickhouse 2 | 3 | go 1.16 4 | 5 | require ( 6 | github.com/ClickHouse/clickhouse-go v1.4.3 7 | github.com/buger/jsonparser v1.1.1 // indirect 8 | ) 9 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/ClickHouse/clickhouse-go v1.4.3 h1:iAFMa2UrQdR5bHJ2/yaSLffZkxpcOYQMCUuKeNXGdqc= 2 | github.com/ClickHouse/clickhouse-go v1.4.3/go.mod h1:EaI/sW7Azgz9UATzd5ZdZHRUhHgv5+JMS9NSr2smCJI= 3 | github.com/bkaradzic/go-lz4 v1.0.0/go.mod h1:0YdlkowM3VswSROI7qDxhRvJ3sLhlFrRRwjwegp5jy4= 4 | github.com/buger/jsonparser v1.1.1 h1:2PnMjfWD7wBILjqQbt530v576A/cAbQvEW9gGIpYMUs= 5 | github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0= 6 | github.com/cloudflare/golz4 v0.0.0-20150217214814-ef862a3cdc58/go.mod h1:EOBUe0h4xcZ5GoxqC5SDxFQ8gwyZPKQoEzownBlhI80= 7 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 8 | github.com/go-sql-driver/mysql v1.4.0/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w= 9 | github.com/jmoiron/sqlx v1.2.0/go.mod h1:1FEQNm3xlJgrMD+FBdI9+xvCksHtbpVBBw5dYhBSsks= 10 | github.com/lib/pq v1.0.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= 11 | github.com/mattn/go-sqlite3 v1.9.0/go.mod h1:FPy6KqzDD04eiIsT53CuJW3U88zkxoIYsOqkbpncsNc= 12 | github.com/pierrec/lz4 v2.0.5+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= 13 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 14 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 15 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= 16 | -------------------------------------------------------------------------------- /inserter.go: -------------------------------------------------------------------------------- 1 | package zeekclickhouse 2 | 3 | import ( 4 | "database/sql" 5 | _ "embed" 6 | "log" 7 | 8 | "github.com/ClickHouse/clickhouse-go" 9 | ) 10 | 11 | //go:embed schema.sql 12 | var schema string 13 | 14 | type Inserter struct { 15 | URI string 16 | conn *sql.DB 17 | tx *sql.Tx 18 | stmt *sql.Stmt 19 | } 20 | 21 | func NewInserter(URI string) (*Inserter, error) { 22 | connect, err := sql.Open("clickhouse", URI) 23 | if err != nil { 24 | log.Fatal(err) 25 | } 26 | return &Inserter{ 27 | URI: URI, 28 | conn: connect, 29 | }, nil 30 | } 31 | func (i *Inserter) Ping() error { 32 | return i.conn.Ping() 33 | } 34 | func (i *Inserter) LoadSchema() error { 35 | _, err := i.conn.Exec(schema) 36 | return err 37 | } 38 | func (i *Inserter) Begin() error { 39 | tx, err := i.conn.Begin() 40 | if err != nil { 41 | return err 42 | } 43 | stmt, err := tx.Prepare(` 44 | INSERT INTO logs ( 45 | _timestamp, _path, _hostname, _source, 46 | "string.names", "string.values", 47 | "number.names", "number.values", 48 | "bool.names", "bool.values", 49 | "array.names", "array.values" 50 | ) VALUES ( 51 | ?, ?, ?, ?, 52 | ?, ?, 53 | ?, ?, 54 | ?, ?, 55 | ?, ? 56 | )`) 57 | if err != nil { 58 | return err 59 | } 60 | i.tx = tx 61 | i.stmt = stmt 62 | return nil 63 | } 64 | func (i *Inserter) Commit() error { 65 | if err := i.tx.Commit(); err != nil { 66 | return err 67 | } 68 | i.stmt.Close() 69 | return nil 70 | } 71 | 72 | func (i *Inserter) Insert(rec DBRecord) error { 73 | _, err := i.stmt.Exec( 74 | rec.Timestamp, rec.Path, rec.Hostname, rec.Source, 75 | clickhouse.Array(rec.string_names), clickhouse.Array(rec.string_values), 76 | clickhouse.Array(rec.number_names), clickhouse.Array(rec.number_values), 77 | clickhouse.Array(rec.bool_names), clickhouse.Array(rec.bool_values), 78 | clickhouse.Array(rec.array_names), clickhouse.Array(rec.array_values), 79 | ) 80 | return err 81 | } 82 | -------------------------------------------------------------------------------- /schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE if not exists logs 2 | ( 3 | _timestamp UInt64, 4 | _path String, 5 | _hostname String, 6 | 7 | //raw log event 8 | _source String, 9 | 10 | //type specific fields names and field values 11 | "string.names" Array(LowCardinality(String)), 12 | "string.values" Array(String), 13 | "number.names" Array(LowCardinality(String)), 14 | "number.values" Array(Float64), 15 | "bool.names" Array(LowCardinality(String)), 16 | "bool.values" Array(UInt8), 17 | 18 | "array.names" Array(LowCardinality(String)), 19 | "array.values" Array(Array(String)), 20 | 21 | //Materialized fields 22 | ts DateTime DEFAULT FROM_UNIXTIME(toUInt64(_timestamp/1000)), 23 | day Date DEFAULT toDate(FROM_UNIXTIME(toUInt64(_timestamp/1000))), 24 | uid String DEFAULT "string.values"[indexOf("string.names", 'uid')], 25 | "id.orig_h" String DEFAULT "string.values"[indexOf("string.names", 'id.orig_h')], 26 | "id.orig_p" String DEFAULT "number.values"[indexOf("number.names", 'id.orig_p')], 27 | "id.resp_h" String DEFAULT "string.values"[indexOf("string.names", 'id.resp_h')], 28 | "id.resp_p" String DEFAULT "number.values"[indexOf("number.names", 'id.resp_p')] 29 | ) 30 | ENGINE = MergeTree() 31 | ORDER BY (_timestamp, _path, uid, cityHash64(uid)) 32 | SAMPLE BY (cityHash64(uid)) 33 | PARTITION BY (toYYYYMM(FROM_UNIXTIME(toUInt64(_timestamp/1000))), _path); 34 | -------------------------------------------------------------------------------- /test_data/conn.log: -------------------------------------------------------------------------------- 1 | #separator \x09 2 | #set_separator , 3 | #empty_field (empty) 4 | #unset_field - 5 | #path conn 6 | #open 2021-03-13-11-01-13 7 | #fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p proto service duration orig_bytes resp_bytes conn_state local_orig local_resp missed_bytes history orig_pkts orig_ip_bytes resp_pkts resp_ip_bytes tunnel_parents 8 | #types time string addr port addr port enum string interval count count string bool bool count string count count count count set[string] 9 | 1389719053.175317 CtzNEOAu9XflFepEg 10.0.2.15 55127 192.150.187.43 80 tcp http 3.860107 347 4213 S1 - - 0 ShADad 6 607 5 4417 - 10 | #close 2021-03-13-11-01-13 11 | -------------------------------------------------------------------------------- /test_data/files.log: -------------------------------------------------------------------------------- 1 | #separator \x09 2 | #set_separator , 3 | #empty_field (empty) 4 | #unset_field - 5 | #path files 6 | #open 2021-03-13-11-01-13 7 | #fields ts fuid tx_hosts rx_hosts conn_uids source depth analyzers mime_type filename duration local_orig is_orig seen_bytes total_bytes missing_bytes overflow_bytes timedout parent_fuid md5 sha1 sha256 extracted extracted_cutoff extracted_size 8 | #types time string set[addr] set[addr] set[string] string count set[string] string string interval bool bool count count count count bool string string string string string bool count 9 | 1389719057.033246 Fwy0p54cZCKE34pDQd 192.150.187.43 10.0.2.15 CtzNEOAu9XflFepEg HTTP 0 (empty) text/plain - 0.002178 - F 3912 3912 0 0 F - - - - - - - 10 | #close 2021-03-13-11-01-13 11 | -------------------------------------------------------------------------------- /test_data/http.log: -------------------------------------------------------------------------------- 1 | #separator \x09 2 | #set_separator , 3 | #empty_field (empty) 4 | #unset_field - 5 | #path http 6 | #open 2021-03-13-11-01-13 7 | #fields ts uid id.orig_h id.orig_p id.resp_h id.resp_p trans_depth method host uri referrer version user_agent origin request_body_len response_body_len status_code status_msg info_code info_msg tags username password proxied orig_fuids orig_filenames orig_mime_types resp_fuids resp_filenames resp_mime_types 8 | #types time string addr port addr port count string string string string string string string count count count string count string set[enum] string string set[string] vector[string] vector[string] vector[string] vector[string] vector[string] vector[string] 9 | 1389719056.899932 CtzNEOAu9XflFepEg 10.0.2.15 55127 192.150.187.43 80 1 GET bro.org /download/CHANGES.binpac.txt http://bro.org/download/index.html 1.1 Mozilla/5.0 (X11; Linux i686; rv:24.0) Gecko/20100101 Firefox/24.0 - 0 3912 200 OK - - (empty) - - - - - - Fwy0p54cZCKE34pDQd - text/plain 10 | #close 2021-03-13-11-01-13 11 | -------------------------------------------------------------------------------- /test_data/log.json: -------------------------------------------------------------------------------- 1 | {"_path":"conn","_system_name":"ja-ap200.home","_write_ts":"2021-03-02T17:26:50.530727Z","ts":"2021-03-02T17:26:40.530716Z","uid":"Ch4oAU2XJZ1NzniWbd","id.orig_h":"192.168.2.3","id.orig_p":49828,"id.resp_h":"8.8.8.8","id.resp_p":53,"proto":"udp","service":"dns","duration":0.007523059844970703,"orig_bytes":34,"resp_bytes":93,"conn_state":"SF","local_orig":true,"local_resp":false,"missed_bytes":0,"history":"Dd","orig_pkts":1,"orig_ip_bytes":62,"resp_pkts":1,"resp_ip_bytes":121,"resp_cc":"US","community_id":"1:yi9CjBFhqwMf5qGWaWHHreCj8DA="} 2 | -------------------------------------------------------------------------------- /types.go: -------------------------------------------------------------------------------- 1 | package zeekclickhouse 2 | 3 | type DBRecord struct { 4 | Timestamp int64 5 | Path string 6 | Hostname string 7 | Source string 8 | 9 | string_names []string 10 | string_values []string 11 | 12 | number_names []string 13 | number_values []float64 14 | 15 | bool_names []string 16 | bool_values []bool 17 | 18 | array_names []string 19 | array_values [][]string 20 | } 21 | 22 | type DBConverter interface { 23 | Next() (DBRecord, error) 24 | } 25 | -------------------------------------------------------------------------------- /zeek_to_view.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import sys 3 | 4 | f = sys.argv[1] 5 | d = open(f).read().splitlines() 6 | fields = d[0].split() 7 | types = d[1].split() 8 | pairs = zip(fields, types) 9 | 10 | def to_sql_type(type): 11 | if type.startswith(("set[", "vector[")): 12 | return "array" 13 | 14 | type_mapping = { 15 | 'uid': 'string', 16 | 'string': 'string', 17 | 'addr': 'string', 18 | 'enum': 'string', 19 | 20 | 'time': 'number', 21 | 'port': 'number', 22 | 'count': 'number', 23 | 'interval': 'number', 24 | 'double': 'number', 25 | 26 | 'bool': 'bool', 27 | } 28 | 29 | sql_type = type_mapping[type] 30 | return sql_type 31 | 32 | def gen_field(field, type): 33 | sql_type = to_sql_type(type) 34 | 35 | return f" `{sql_type}.values`[indexOf(`{sql_type}.names`, '{field}')] AS `{field}`," 36 | 37 | 38 | for f, t in pairs: 39 | if f.startswith("#"): continue 40 | print(gen_field(f, t)) 41 | -------------------------------------------------------------------------------- /zeekjson.go: -------------------------------------------------------------------------------- 1 | package zeekclickhouse 2 | 3 | import ( 4 | "bufio" 5 | "fmt" 6 | "io" 7 | "log" 8 | "strconv" 9 | "time" 10 | 11 | "github.com/buger/jsonparser" 12 | ) 13 | 14 | func ZeekToDBRecord(line []byte) (DBRecord, error) { 15 | var rec DBRecord 16 | path, err := jsonparser.GetString(line, "_path") 17 | if err != nil { 18 | return rec, fmt.Errorf("Invalid record %q, missing _path", line) 19 | } 20 | rec.Source = string(line) 21 | rec.Path = path 22 | 23 | hostname, err := jsonparser.GetString(line, "_system_name") 24 | if err == nil { 25 | rec.Hostname = hostname 26 | } 27 | ts, err := jsonparser.GetUnsafeString(line, "ts") 28 | if err != nil { 29 | return rec, fmt.Errorf("Invalid record %q, missing ts", line) 30 | } 31 | t, err := time.Parse("2006-01-02T15:04:05.000000Z", ts) 32 | if err != nil { 33 | return rec, fmt.Errorf("Failed to parse timestamp %s: %v", ts, err) 34 | } 35 | rec.Timestamp = t.UnixNano() / 1e6 36 | err = jsonparser.ObjectEach(line, func(key []byte, value []byte, dataType jsonparser.ValueType, offset int) error { 37 | skey := string(key) 38 | //Already pulled out 39 | if skey == "_path" || skey == "_system_name" { 40 | return nil 41 | } 42 | switch dataType { 43 | case jsonparser.String: 44 | rec.string_names = append(rec.string_names, skey) 45 | rec.string_values = append(rec.string_values, string(value)) 46 | case jsonparser.Number: 47 | rec.number_names = append(rec.number_names, skey) 48 | val, err := strconv.ParseFloat(string(value), 64) 49 | if err != nil { 50 | return err 51 | } 52 | rec.number_values = append(rec.number_values, val) 53 | case jsonparser.Boolean: 54 | rec.bool_names = append(rec.bool_names, skey) 55 | val, err := strconv.ParseBool(string(value)) 56 | if err != nil { 57 | return err 58 | } 59 | rec.bool_values = append(rec.bool_values, val) 60 | case jsonparser.Array: 61 | rec.array_names = append(rec.array_names, skey) 62 | var items []string 63 | jsonparser.ArrayEach(value, func(nestedvalue []byte, dataType jsonparser.ValueType, offset int, err error) { 64 | items = append(items, string(nestedvalue)) 65 | }) 66 | rec.array_values = append(rec.array_values, items) 67 | default: 68 | log.Printf("Don't handle: Key: '%s' Value: '%s' Type: %s", skey, string(value), dataType) 69 | } 70 | return nil 71 | }) 72 | return rec, err 73 | } 74 | 75 | type ZeekJSONReader struct { 76 | r *bufio.Reader 77 | } 78 | 79 | func (z *ZeekJSONReader) Next() (DBRecord, error) { 80 | var rec DBRecord 81 | line, err := z.r.ReadSlice('\n') 82 | if err != nil { 83 | return rec, err 84 | } 85 | return ZeekToDBRecord(line) 86 | } 87 | 88 | func NewZeekJSONReader(r io.Reader) *ZeekJSONReader { 89 | br := bufio.NewReaderSize(r, 16*1024*1024) 90 | return &ZeekJSONReader{ 91 | r: br, 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /zeekjson_test.go: -------------------------------------------------------------------------------- 1 | package zeekclickhouse 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | var LogExample = `{"_path":"dns","_system_name":"ja-ap200.home","_write_ts":"2021-03-05T14:04:11.711171Z","ts":"2021-03-05T14:04:11.710918Z","uid":"CFl2ra4eJhk9DpOFYk","id.orig_h":"192.168.2.11","id.orig_p":57844,"id.resp_h":"192.168.2.1","id.resp_p":53,"proto":"udp","trans_id":30780,"rtt":0.0002529621124267578,"query":"unifi.home","qclass":1,"qclass_name":"C_INTERNET","qtype":1,"qtype_name":"A","rcode":0,"rcode_name":"NOERROR","AA":true,"TC":false,"RD":true,"RA":true,"Z":0,"answers":["192.168.2.25"],"TTLs":[86400.0],"rejected":false}` 8 | 9 | func TestConvert(t *testing.T) { 10 | rec, err := ZeekToDBRecord([]byte(LogExample)) 11 | if err != nil { 12 | t.Fatalf("ZeekToDBRecord failed: %v", err) 13 | } 14 | t.Logf("Got: %+v", rec) 15 | 16 | if rec.Path != "dns" { 17 | t.Errorf("Expected rec.path=dns, got %v", rec.Path) 18 | } 19 | if rec.Hostname != "ja-ap200.home" { 20 | t.Errorf("Expected rec.Hostname=ja-ap200.home, got %v", rec.Hostname) 21 | } 22 | //FIXME: is this even what i want? it throws away the .000918 23 | if rec.Timestamp != 1614953051710 { 24 | t.Errorf("Expected rec.Hostname=1614953051710, got %v", rec.Timestamp) 25 | } 26 | //TODO: test the kv fields 27 | } 28 | 29 | func BenchmarkConvert(b *testing.B) { 30 | // run the Fib function b.N times 31 | var r string 32 | brec := []byte(LogExample) 33 | for n := 0; n < b.N; n++ { 34 | rec, err := ZeekToDBRecord(brec) 35 | r = rec.Path 36 | if err != nil { 37 | b.Fatalf("ZeekToDBRecord failed: %v", err) 38 | } 39 | } 40 | if r != "dns" { 41 | b.Errorf("Expected rec.path=dns, got %v", r) 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /zeektsv.go: -------------------------------------------------------------------------------- 1 | package zeekclickhouse 2 | 3 | import ( 4 | "bufio" 5 | "bytes" 6 | "encoding/hex" 7 | "fmt" 8 | "io" 9 | "log" 10 | "strconv" 11 | "strings" 12 | ) 13 | 14 | func decodeHexEscaped(escaped string) ([]byte, error) { 15 | cleaned := strings.ReplaceAll(escaped, "\\x", "") 16 | decoded, err := hex.DecodeString(cleaned) 17 | return decoded, err 18 | } 19 | 20 | type ZeekTSVReader struct { 21 | r *bufio.Reader 22 | path string 23 | fields []string 24 | types []string 25 | separator string 26 | setSeparator string 27 | emptyField string 28 | unsetField string 29 | } 30 | 31 | func (z *ZeekTSVReader) Next() (DBRecord, error) { 32 | var rec DBRecord 33 | line, err := z.r.ReadSlice('\n') 34 | if err != nil { 35 | return rec, err 36 | } 37 | line = line[0 : len(line)-1] 38 | sline := string(line) 39 | if bytes.HasPrefix(line, []byte{'#'}) { 40 | //Handle #separator separately 41 | if bytes.HasPrefix(line, []byte("#separator")) { 42 | sep := string(line[len("#separator "):]) 43 | decoded_sep, err := decodeHexEscaped(sep) 44 | if err != nil { 45 | return rec, fmt.Errorf("Invalid separator: %v: %w", sep, err) 46 | } 47 | z.separator = string(decoded_sep) 48 | return z.Next() 49 | } 50 | parts := strings.Split(sline, string(z.separator)) 51 | key := parts[0] 52 | switch key { 53 | case "#path": 54 | z.path = parts[1] 55 | case "#set_separator": 56 | z.setSeparator = parts[1] 57 | case "#empty_field": 58 | z.emptyField = parts[1] 59 | case "#unset_field": 60 | z.unsetField = parts[1] 61 | case "#fields": 62 | z.fields = parts[1:] 63 | case "#types": 64 | z.types = parts[1:] 65 | case "#open": 66 | //do nothing 67 | case "#close": 68 | //do nothing 69 | default: 70 | log.Printf("unhandled header line: %s", line) 71 | } 72 | return z.Next() 73 | } 74 | parts := strings.Split(sline, z.separator) 75 | var fname string 76 | var ftype string 77 | for i, val := range parts { 78 | if val == z.emptyField || val == z.unsetField { 79 | continue 80 | } 81 | fname = z.fields[i] 82 | ftype = z.types[i] 83 | if strings.HasPrefix(ftype, "vector") || strings.HasPrefix(ftype, "set") { 84 | vval := strings.Split(val, z.setSeparator) 85 | rec.array_names = append(rec.array_names, fname) 86 | rec.array_values = append(rec.array_values, vval) 87 | continue 88 | } 89 | if fname == "ts" { 90 | nval, err := strconv.ParseFloat(string(val), 64) 91 | if err != nil { 92 | return rec, err 93 | } 94 | rec.Timestamp = int64(nval * 1000) 95 | } 96 | switch ftype { 97 | case "uid", "string", "addr", "enum": 98 | rec.string_names = append(rec.string_names, fname) 99 | rec.string_values = append(rec.string_values, val) 100 | case "time", "port", "count", "interval", "double", "int": 101 | nval, err := strconv.ParseFloat(string(val), 64) 102 | if err != nil { 103 | return rec, err 104 | } 105 | rec.number_names = append(rec.number_names, fname) 106 | rec.number_values = append(rec.number_values, nval) 107 | case "bool": 108 | bval, err := strconv.ParseBool(string(val)) 109 | if err != nil { 110 | return rec, err 111 | } 112 | rec.bool_names = append(rec.bool_names, fname) 113 | rec.bool_values = append(rec.bool_values, bval) 114 | default: 115 | log.Printf("Unhandled %v %v", ftype, fname) 116 | } 117 | } 118 | rec.Path = z.path 119 | return rec, nil 120 | } 121 | 122 | func NewZeekTSVReader(r io.Reader) *ZeekTSVReader { 123 | br := bufio.NewReaderSize(r, 16*1024*1024) 124 | return &ZeekTSVReader{ 125 | r: br, 126 | separator: "\t", 127 | setSeparator: ",", 128 | } 129 | } 130 | -------------------------------------------------------------------------------- /zeektsv_test.go: -------------------------------------------------------------------------------- 1 | package zeekclickhouse 2 | 3 | import ( 4 | "os" 5 | "testing" 6 | ) 7 | 8 | func TestZeekTSVReader(t *testing.T) { 9 | r, err := os.Open("test_data/http.log") 10 | if err != nil { 11 | t.Fatalf("failed to open test data: %v", err) 12 | } 13 | 14 | z := NewZeekTSVReader(r) 15 | 16 | rec, err := z.Next() 17 | if err != nil { 18 | t.Fatalf("Next() failed: %v", err) 19 | } 20 | t.Logf("Got: %+v", rec) 21 | 22 | if rec.Path != "http" { 23 | t.Errorf("Expected rec.path=http, got %v", rec.Path) 24 | } 25 | //if rec.Hostname != "ja-ap200.home" { 26 | // t.Errorf("Expected rec.Hostname=ja-ap200.home, got %v", rec.Hostname) 27 | //} 28 | //FIXME: is this even what i want? it throws away the .000932 29 | if rec.Timestamp != 1389719056899 { 30 | t.Errorf("Expected rec.Timestamp=1389719056899, got %v", rec.Timestamp) 31 | } 32 | //TODO: test the kv fields 33 | } 34 | --------------------------------------------------------------------------------