├── logd ├── res ├── hosts_white_list.txt ├── ip_blacklist.txt ├── common_spiderlist.txt └── colsmap.json ├── connection.go ├── loglib ├── log.go ├── file_log.go ├── agent.go └── net_log.go ├── README.md ├── conf ├── monitor.ini └── config.ini ├── lib ├── net.go ├── wait_quit.go ├── globalList.go ├── rotate_writer.go ├── cfg_reader.go ├── quit_list.go └── lib.go ├── tcpClient.go ├── collector.go ├── main.go ├── heart_beat ├── heart_beat_checker.go └── heart_beat.go ├── test.go ├── singleConnection.go ├── multiConnection.go ├── monitor ├── monitor.go └── log_receiver.go ├── tcp_pack └── pack.go ├── receiver.go ├── fileOutputer.go ├── db └── mysql.go ├── runner.go ├── integrity └── integrity_checker.go ├── sender.go ├── etl_outputer.go ├── tcpReceiver.go ├── LICENSE ├── tailer.go └── mongodb_outputer.go /logd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/logd-team/logd/HEAD/logd -------------------------------------------------------------------------------- /res/hosts_white_list.txt: -------------------------------------------------------------------------------- 1 | ^[a-z,A-Z,\.]{1,8}\.hao[12]2[23]\.com$ 2 | www\.cliponyu\.com 3 | -------------------------------------------------------------------------------- /connection.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "net" 5 | ) 6 | 7 | type Connection interface { 8 | reconnect(conn *net.TCPConn) 9 | getConn() *net.TCPConn 10 | close() 11 | } -------------------------------------------------------------------------------- /res/ip_blacklist.txt: -------------------------------------------------------------------------------- 1 | #国际化机器 2 | 119.63.195.248 3 | 180.76.4.31 4 | 185.10.107.69 5 | 63.217.158.62 6 | 103.247.62.62 7 | 105.203.253.66 8 | #Monitor机器 9 | 180.149.143.26 10 | 180.149.143.27 11 | 180.149.143.153 12 | 180.149.143.154 13 | -------------------------------------------------------------------------------- /loglib/log.go: -------------------------------------------------------------------------------- 1 | package loglib 2 | 3 | const ( 4 | DEBUG = 0 5 | INFO = 1 6 | WARNING = 2 7 | ERROR = 3 8 | ) 9 | 10 | var prefixes = []string{"DEBUG", "INFO", "WARNING", "ERROR"} 11 | 12 | type Log interface { 13 | SetLevel(level int) bool 14 | Debug(msg string) 15 | Info(msg string) 16 | Warning(msg string) 17 | Error(msg string) 18 | } 19 | 20 | 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | Logd is an open-source distributed, reliable, high-performance and available service which is based on streaming data flows and used for collecting huge amounts of log data. 3 | 4 | ## Features 5 | 6 | * Written in [Go](http://golang.org) 7 | * Easy to get running (3 or 4 commands) 8 | * no other dependencies 9 | 10 | ## download 11 | please download the zip file directly. 12 | -------------------------------------------------------------------------------- /conf/monitor.ini: -------------------------------------------------------------------------------- 1 | ;log receiver相关 2 | [receiver] 3 | db_host = hk01-hao123-hunter00.hk01.baidu.com 4 | db_port = 3306 5 | db_uname = root 6 | db_passwd = 123456 7 | db_db = db_logd_test 8 | db_charset= utf8 9 | recv_port = 4040 10 | ;心跳相关 11 | [heart_beat] 12 | ;检测间隔时间,单位是秒 13 | check_interval = 30 14 | [logAgent] 15 | ;可选level: debug, info, warning, error,不区分大小写 16 | level = info 17 | local_dir = logs 18 | -------------------------------------------------------------------------------- /res/common_spiderlist.txt: -------------------------------------------------------------------------------- 1 | Googlebot 2 | msnbot 3 | Baidu QA Crawler 4 | Tsinghua AI Lab Robot 5 | Wget 6 | PKU CS TEST 7 | Sosospider 8 | Baiduspider 9 | PycURL 10 | Jakarta Commons 11 | msnbot 12 | Java 13 | BaiduInternalRobot 14 | Twisted PageGetter 15 | WebSpider 16 | Incutio 17 | hylanda 18 | AA bot 19 | UNTRUSTED 20 | TestAgent 21 | AppEngine-Google 22 | Huasai 23 | Python 24 | robot 25 | Sogou web spider 26 | QihooBot 27 | SikooCVS/Nutch 28 | msrabot 29 | user-agent 30 | Yahoo! Slurp 31 | Iaskspider 32 | sogou spider 33 | Sosoblogspider 34 | YoudaoBot 35 | spider 36 | crawler 37 | slurp 38 | mediapartners-google 39 | yandex 40 | feedfetcher-google 41 | agentname 42 | HTMLParser 43 | bingbot 44 | Indy Library 45 | -------------------------------------------------------------------------------- /lib/net.go: -------------------------------------------------------------------------------- 1 | package lib 2 | 3 | import ( 4 | "net" 5 | "log" 6 | "runtime" 7 | "strings" 8 | "bytes" 9 | "encoding/binary" 10 | ) 11 | 12 | func GetConnection(addr string) (*net.TCPConn, error) { 13 | tcpAddr, err := net.ResolveTCPAddr("tcp4", addr) 14 | if err != nil { 15 | _, f, l, _ := runtime.Caller(1) 16 | f = strings.Replace(f, GetBinPath(), "", -1) 17 | log.Println(f, ":", l, "[GetConnection] resolve tcp address failed", err) 18 | return nil, err 19 | } 20 | conn, err := net.DialTCP("tcp4", nil, tcpAddr) 21 | if err != nil { 22 | _, f, l, _ := runtime.Caller(1) 23 | f = strings.Replace(f, GetBinPath(), "", -1) 24 | log.Println(f, ":", l, "[GetConnection] connect to address:" , addr, "failed!") 25 | } 26 | return conn, err 27 | } 28 | 29 | func IpToUint32(ip string) uint32 { 30 | ipo := net.ParseIP(ip) 31 | r := bytes.NewReader([]byte(ipo.To4())) 32 | var ipl uint32 = 0 33 | err := binary.Read(r, binary.BigEndian, &ipl) 34 | if err != nil { 35 | log.Println("ip to long error:", err) 36 | } 37 | return ipl 38 | } 39 | -------------------------------------------------------------------------------- /lib/wait_quit.go: -------------------------------------------------------------------------------- 1 | /* 2 | 封装一个类用于等待类退出,分为两步: 3 | 1、类处理完退出操作后调用AllDone() 4 | 2、类的Quit()方法中调用Quit() 5 | 6 | */ 7 | 8 | package lib 9 | 10 | import ( 11 | "time" 12 | "logd/loglib" 13 | ) 14 | 15 | type WaitQuit struct { 16 | name string 17 | allowTimeout int //允许的超时次数, 小于0表示不限次数 18 | nTimeout int //已超时次数 19 | ch chan bool 20 | } 21 | 22 | func NewWaitQuit(modName string, allow ...int) *WaitQuit { 23 | ch := make(chan bool) 24 | a := 2 25 | if len(allow) > 0 { 26 | a = allow[0] 27 | } 28 | return &WaitQuit{modName, a, 0, ch} 29 | } 30 | 31 | func (this *WaitQuit) AllDone() { 32 | this.ch <- true 33 | } 34 | 35 | func (this *WaitQuit) Quit() bool { 36 | ret := false 37 | select { 38 | case <- this.ch: 39 | loglib.Info(this.name + " safe quit.") 40 | ret = true 41 | case <- time.After(2 * time.Second): 42 | loglib.Info(this.name + " quit timeout") 43 | this.nTimeout++ 44 | if this.allowTimeout > 0 && this.nTimeout >= this.allowTimeout { 45 | ret = true 46 | } 47 | } 48 | return ret 49 | 50 | } 51 | -------------------------------------------------------------------------------- /lib/globalList.go: -------------------------------------------------------------------------------- 1 | package lib 2 | 3 | import ( 4 | "fmt" 5 | "sync" 6 | // "time" 7 | "container/list" 8 | ) 9 | 10 | 11 | type GlobalList struct { 12 | list *list.List 13 | m *sync.Mutex 14 | // once sync.Once 15 | } 16 | 17 | func GlobalListInit() (globalList *GlobalList) { 18 | 19 | globalList = new(GlobalList) 20 | globalList.list = list.New() 21 | globalList.m = new(sync.Mutex) 22 | 23 | return globalList 24 | } 25 | 26 | // var globaleFileList *list.List = list.New() 27 | // var m *sync.Mutex = new(sync.Mutex) 28 | // var once sync.Once 29 | 30 | func (gl *GlobalList) Setup() { 31 | for i:=10;i<20;i++ { 32 | gl.list.PushBack(i) 33 | } 34 | fmt.Println("setup over") 35 | } 36 | 37 | func (gl *GlobalList) Remove() (e *list.Element) { 38 | 39 | gl.m.Lock() 40 | // var result int 41 | if gl.list.Len() > 0 { 42 | e = gl.list.Front() 43 | // result = front.Value.(int) 44 | gl.list.Remove(e) 45 | }else { 46 | e = nil 47 | } 48 | // fmt.Println("gl len:",gl.list.Len()) 49 | gl.m.Unlock() 50 | 51 | return e 52 | } 53 | 54 | func (gl *GlobalList) PushBack(v interface{}) { 55 | 56 | gl.m.Lock() 57 | gl.list.PushBack(v) 58 | gl.m.Unlock() 59 | 60 | } 61 | -------------------------------------------------------------------------------- /lib/rotate_writer.go: -------------------------------------------------------------------------------- 1 | package lib 2 | 3 | import ( 4 | "os" 5 | "log" 6 | "io" 7 | "errors" 8 | "time" 9 | ) 10 | 11 | var nilWriterErr = errors.New("get nil rotate writer") 12 | 13 | type RotateWriter struct { 14 | fout io.Writer 15 | currHour string 16 | outDir string 17 | } 18 | 19 | func NewRotateWriter(outDir string) *RotateWriter { 20 | currHour, fout := initCurrentHourWriter(outDir) 21 | return &RotateWriter{fout, currHour, outDir} 22 | } 23 | 24 | func initCurrentHourWriter(outDir string) (string, io.Writer) { 25 | currHour := time.Now().Format("2006010215") 26 | logFile,err := os.OpenFile(outDir + "/" + currHour, os.O_RDWR|os.O_APPEND|os.O_CREATE, 0666) 27 | if err != nil { 28 | log.Println("[error] init rotate writer", currHour, "failed!") 29 | } 30 | return currHour, logFile 31 | } 32 | 33 | func (this *RotateWriter) Write(p []byte) (n int, err error) { 34 | hour := time.Now().Format("2006010215") 35 | if this.currHour != hour { 36 | this.currHour, this.fout = initCurrentHourWriter(this.outDir) 37 | } 38 | if this.fout != nil { 39 | return this.fout.Write(p) 40 | } 41 | return 0, nilWriterErr 42 | } 43 | -------------------------------------------------------------------------------- /tcpClient.go: -------------------------------------------------------------------------------- 1 | package main 2 | import ( 3 | "net" 4 | "logd/lib" 5 | "time" 6 | "fmt" 7 | ) 8 | 9 | type TcpClient struct { 10 | logChan chan map[string]string 11 | } 12 | 13 | //工厂初始化函数 14 | func TcpClientInit(c chan map[string]string) (tc TcpClient) { 15 | // var tc TcpClient 16 | tc.logChan = c 17 | 18 | return tc 19 | } 20 | 21 | func (tc TcpClient) StartLogAgentServer() { 22 | service := ":1202" 23 | tcpAddr, err := net.ResolveTCPAddr("tcp4", service) 24 | lib.CheckError(err) 25 | listener, err := net.ListenTCP("tcp", tcpAddr) 26 | lib.CheckError(err) 27 | 28 | 29 | for { 30 | conn, err := listener.Accept() 31 | lib.CheckError(err) 32 | 33 | go tc.handleConnnection(conn,tc.logChan) 34 | 35 | } 36 | } 37 | 38 | func (tc TcpClient) handleConnnection(conn net.Conn,c chan map[string]string) { 39 | defer conn.Close() 40 | 41 | conn.SetReadDeadline(time.Now().Add(2 * time.Minute)) 42 | request := make([]byte, 128) 43 | 44 | 45 | //get consumer id 46 | requestLen, _ := conn.Read(request) 47 | if requestLen == 0 { 48 | return 49 | } 50 | msg := string(request) 51 | fmt.Println(msg) 52 | m := map[string]string{"hour":time.Now().Format("2006010215"), "line":msg} 53 | c <- m 54 | conn.Write([]byte("ok")) 55 | } 56 | -------------------------------------------------------------------------------- /lib/cfg_reader.go: -------------------------------------------------------------------------------- 1 | package lib 2 | 3 | /************* 4 | * 配置读取函数,配置格式为ini格式 5 | * 6 | *****************/ 7 | import ( 8 | "bufio" 9 | "os" 10 | "log" 11 | "strings" 12 | ) 13 | 14 | func ReadConfig(cfgFile string) map[string]map[string]string { 15 | fin, err := os.Open(cfgFile) 16 | if err != nil { 17 | log.Fatal(err) 18 | } 19 | config := make(map[string]map[string]string) 20 | config[""] = make(map[string]string) 21 | var section = "" 22 | scanner := bufio.NewScanner(fin) 23 | //逐行读取 24 | for scanner.Scan() { 25 | line := strings.Trim(scanner.Text(), " ") 26 | if line == "" || line[0] == ';' { 27 | //这行是注释,跳过 28 | continue 29 | } 30 | lSqr := strings.Index(line, "[") 31 | rSqr := strings.Index(line, "]") 32 | if lSqr == 0 && rSqr == len(line)-1 { 33 | section = line[lSqr+1 : rSqr] 34 | _, ok := config[section] 35 | if !ok { 36 | config[section] = make(map[string]string) 37 | } 38 | continue; 39 | } 40 | 41 | parts := strings.Split(line, "=") 42 | if len(parts) == 2 { 43 | key := strings.Trim(parts[0], " ") 44 | val := strings.Trim(parts[1], " ") 45 | config[section][key] = val 46 | } 47 | } 48 | fin.Close() 49 | return config 50 | } 51 | -------------------------------------------------------------------------------- /conf/config.ini: -------------------------------------------------------------------------------- 1 | ;这是一份logd组件的配置实例,具体的角色可根据需要保留想应的配置 2 | ;例如前端log agent可能只需要[tail] 和 [logAgent] 3 | ; 4 | [tail] 5 | ;支持带时间格式的路径,会根据格式自动获取下一小时的文件 6 | ;格式需要用<>括起 7 | log_file = /tmp/access.log.<%Y%m%d%H> 8 | record_file = 9 | ;多少条发送一次 10 | recv_buffer_size = 2000 11 | send_to = localhost:1302 12 | senders = 2 13 | line_pattern= 14 | 15 | [collector] 16 | ;don't use localhost:port 17 | listen = :1302 18 | send_to = localhost:1306 19 | senders = 50 20 | 21 | [fcollector] 22 | listen = :1306 23 | save_dir = tmp 24 | 25 | [etlcollector] 26 | listen = :1306 27 | save_dir = etl 28 | spider_list = res/common_spiderlist.txt 29 | columns_file = res/colsmap.json 30 | hosts_white_list = res/hosts_white_list.txt 31 | ip_black_list = res/ip_blacklist.txt 32 | 33 | [mgocollector] 34 | listen = 1306 35 | mongos = hk01-bl235.hk01:27018,hk01-bl187.hk01:27018,hk01-bl233.hk01:27018,hk01-bl231.hk01:27018 36 | db = mytest 37 | collection = logdata 38 | ;并发写入的goroutine个数 39 | savers = 20 40 | ;是否使用upsert 41 | upsert = true 42 | ;批量插入的记录数,upsert=false才有用 43 | bulk_size = 50 44 | 45 | [logAgent] 46 | ;可选level: debug, info, warning, error,不区分大小写 47 | local_level = debug 48 | local_dir = - 49 | tcp_level = warning 50 | ;monitor的ip:port 51 | tcp_addr = localhost:4040 52 | 53 | ;监控相关的配置 54 | [monitor] 55 | ;心跳端口 56 | hb_port = 4000 57 | mon_addr = localhost:4040 58 | -------------------------------------------------------------------------------- /lib/quit_list.go: -------------------------------------------------------------------------------- 1 | /* 2 | 用于串行的等待各模块退出,各模块需提供一个钩子方法:Quit() bool 3 | */ 4 | 5 | package lib 6 | 7 | import ( 8 | "sync" 9 | "container/list" 10 | "os" 11 | "os/signal" 12 | "syscall" 13 | "log" 14 | ) 15 | 16 | type QuitFunc func() bool 17 | 18 | type QuitList struct { 19 | lst *list.List 20 | mutex *sync.RWMutex 21 | } 22 | 23 | func NewQuitList() *QuitList { 24 | lst := list.New() 25 | mutex := &sync.RWMutex{} 26 | 27 | return &QuitList{lst, mutex} 28 | } 29 | 30 | func (this *QuitList) Append(f QuitFunc) (lstLen int) { 31 | this.mutex.Lock() 32 | this.lst.PushBack(f) 33 | lstLen = this.lst.Len() 34 | this.mutex.Unlock() 35 | 36 | return 37 | } 38 | 39 | //自带信号处理方法,需要在ExecQuit()执行前调用 40 | //也可用其他信号处理方法替代 41 | func (this *QuitList) HandleQuitSignal() { 42 | //signal handling, for elegant quit 43 | ch := make(chan os.Signal) 44 | signal.Notify(ch, syscall.SIGINT, syscall.SIGQUIT) 45 | s := <-ch 46 | log.Println("get signal:", s) 47 | } 48 | 49 | func (this *QuitList) ExecQuit() { 50 | log.Println("begin quit...") 51 | for this.lst.Len() > 0 { 52 | 53 | this.mutex.Lock() 54 | for e := this.lst.Front(); e != nil; e = e.Next() { 55 | f, ok := e.Value.(QuitFunc) 56 | if ok { 57 | if f() { 58 | this.lst.Remove(e) 59 | } 60 | }else{ 61 | log.Println("trans error") 62 | this.lst.Remove(e) 63 | } 64 | 65 | } 66 | 67 | this.mutex.Unlock() 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /collector.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "net" 5 | "logd/lib" 6 | "time" 7 | "fmt" 8 | "compress/zlib" 9 | "bytes" 10 | "io" 11 | "os" 12 | // "io/ioutil" 13 | ) 14 | 15 | type Collector struct { 16 | 17 | } 18 | 19 | //工厂初始化函数 20 | func CollectorInit() (collector Collector) { 21 | // var tc TcpClient 22 | 23 | return collector 24 | } 25 | 26 | func (collector Collector) StartCollectorServer() { 27 | service := ":1312" 28 | tcpAddr, err := net.ResolveTCPAddr("tcp4", service) 29 | lib.CheckError(err) 30 | listener, err := net.ListenTCP("tcp", tcpAddr) 31 | lib.CheckError(err) 32 | 33 | 34 | for { 35 | conn, err := listener.Accept() 36 | lib.CheckError(err) 37 | 38 | go collector.handleConnnection(conn) 39 | 40 | } 41 | } 42 | 43 | func (collector Collector) handleConnnection(conn net.Conn) { 44 | defer conn.Close() 45 | filename := "test.log" 46 | logFile,err := os.OpenFile(filename,os.O_RDWR|os.O_APPEND|os.O_CREATE, 0666) 47 | lib.CheckError(err) 48 | defer logFile.Close() 49 | 50 | conn.SetReadDeadline(time.Now().Add(30 * time.Minute)) 51 | request := make([]byte, 12800) 52 | 53 | for { 54 | //get consumer id 55 | requestLen, _ := conn.Read(request) 56 | if requestLen == 0 { 57 | return 58 | } 59 | // msg := string(request) 60 | fmt.Println("received:") 61 | b := bytes.NewBuffer(request) 62 | r, err := zlib.NewReader(b) 63 | if err != nil { 64 | panic(err) 65 | } 66 | 67 | io.Copy(os.Stdout, r) 68 | // result,err := ioutil.ReadAll(r) 69 | // lib.CheckError(err) 70 | // _,err = logFile.Write(result) 71 | // lib.CheckError(err) 72 | r.Close() 73 | 74 | // c <- msg 75 | conn.Write([]byte("ok")) 76 | // conn.Close() 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | import ( 3 | "os" 4 | "fmt" 5 | "runtime" 6 | "runtime/pprof" 7 | // "bytes" 8 | // "time" 9 | "logd/monitor" 10 | // "logd/heart_beat" 11 | "logd/lib" 12 | // "strconv" 13 | // "strings" 14 | "logd/loglib" 15 | "flag" 16 | "log" 17 | ) 18 | func main() { 19 | runtime.GOMAXPROCS(runtime.NumCPU()) 20 | 21 | var cpuProfile = flag.String("cpuprofile", "", "profile file") 22 | var memProfile = flag.String("memprofile", "", "mem profile") 23 | flag.Parse() 24 | 25 | if *cpuProfile != "" { 26 | f, err := os.Create(*cpuProfile) 27 | if err != nil { 28 | log.Fatal(err) 29 | } 30 | pprof.StartCPUProfile(f) 31 | defer pprof.StopCPUProfile() 32 | } 33 | 34 | cfgFile := flag.Arg(1) 35 | cfg := lib.ReadConfig(cfgFile) 36 | loglib.Init(cfg["logAgent"]) 37 | hbPort, ok := cfg["monitor"]["hb_port"] 38 | if ok { 39 | loglib.HeartBeatPort = hbPort 40 | } 41 | 42 | savePid() 43 | 44 | switch flag.Arg(0) { 45 | case "logd": 46 | logdGo(cfg) 47 | 48 | case "tail": 49 | tailerGo(cfg) 50 | 51 | case "client": 52 | testClient2() 53 | case "collector": 54 | collectorGo(cfg) 55 | 56 | case "fcollector": 57 | fcollectorGo(cfg) 58 | 59 | case "etlcollector": 60 | etlcollectorGo(cfg) 61 | case "mgocollector": 62 | mgocollectorGo(cfg) 63 | case "monitor": 64 | mon := monitor.New(cfgFile) 65 | mon.Run() 66 | case "test": 67 | testClient2() 68 | default: 69 | fmt.Println("unknown parameters") 70 | os.Exit(1) 71 | } 72 | 73 | if *memProfile != "" { 74 | f, err := os.Create(*memProfile) 75 | if err != nil { 76 | log.Fatal(err) 77 | } 78 | pprof.WriteHeapProfile(f) 79 | f.Close() 80 | } 81 | } 82 | 83 | func savePid() { 84 | var d = lib.GetBinPath() + "/var" 85 | os.MkdirAll(d, 0775) 86 | fout, _ := os.Create(d + "/logd.pid") 87 | fmt.Fprintf(fout, "%d", os.Getpid()) 88 | fout.Close() 89 | } 90 | -------------------------------------------------------------------------------- /heart_beat/heart_beat_checker.go: -------------------------------------------------------------------------------- 1 | package heart_beat 2 | 3 | import ( 4 | "net" 5 | "logd/loglib" 6 | "time" 7 | ) 8 | 9 | type HeartBeatChecker struct { 10 | } 11 | 12 | type CheckResult struct { 13 | Addr string 14 | Err bool //false表示成功 15 | Msg string 16 | } 17 | 18 | func NewHeartBeatChecker() *HeartBeatChecker { 19 | return &HeartBeatChecker{} 20 | } 21 | 22 | func (this *HeartBeatChecker) Run(addrs []string, interval int, processor ResultProcessor) { 23 | for { 24 | this.CheckAround(addrs, processor) 25 | time.Sleep(time.Duration(interval) * time.Second) 26 | } 27 | } 28 | func (this *HeartBeatChecker) CheckAround(addrs []string, processor ResultProcessor) { 29 | 30 | nAddrs := len(addrs) 31 | if nAddrs > 0 { 32 | chans := make(chan CheckResult , nAddrs) 33 | results := make([]CheckResult, nAddrs) 34 | 35 | for _, addr := range addrs { 36 | go check(addr, chans) 37 | } 38 | 39 | for i:=0; i 0 { 102 | err = json.Unmarshal(ret, &m) 103 | r, ok := m["err"] 104 | if err == nil && ok && r == "0" { 105 | loglib.Info(req + " to monitor success!") 106 | return true 107 | } 108 | loglib.Error(req + " heart beat failed!") 109 | } 110 | } 111 | return false 112 | } 113 | -------------------------------------------------------------------------------- /loglib/file_log.go: -------------------------------------------------------------------------------- 1 | package loglib 2 | 3 | import ( 4 | "log" 5 | "time" 6 | "os" 7 | "path" 8 | "sync" 9 | ) 10 | 11 | type FileLog struct{ 12 | logger *log.Logger 13 | fout *os.File 14 | level int 15 | dir string 16 | dateStr string 17 | mutex *sync.Mutex 18 | } 19 | 20 | 21 | func NewFileLog(dir string, level int) *FileLog { 22 | if dir != "-" { 23 | if _, err := os.Stat(dir); err != nil && os.IsNotExist(err) { 24 | err = os.MkdirAll(dir, 0775) 25 | if err != nil{ 26 | log.Println("Try to create dir [" + dir + "] failed") 27 | dir = "-" //创建目录失败,则使用stdout 28 | } 29 | } 30 | } 31 | logger, fout := initFileLogger(dir) 32 | dateStr := time.Now().Format("20060102") 33 | mutex := &sync.Mutex{} 34 | return &FileLog{logger:logger, fout:fout, dir:dir, level: level, dateStr: dateStr, mutex:mutex} 35 | } 36 | 37 | func initFileLogger(dir string) (logger *log.Logger, w *os.File) { 38 | //增加输出到标准输出,便于程序调试 39 | if dir == "-" { 40 | logger = log.New(os.Stdout, "", log.LstdFlags) 41 | w = os.Stdout 42 | }else{ 43 | var fname = "logd.log." + time.Now().Format("20060102") 44 | logFile := path.Join(dir, fname) 45 | fout, err := os.OpenFile(logFile, os.O_RDWR | os.O_APPEND | os.O_CREATE, 0644) 46 | if err != nil { 47 | log.Println("Open log file [" + logFile + "] failed: ", err) 48 | }else{ 49 | logger = log.New(fout, "", log.LstdFlags | log.Lshortfile) 50 | w = fout 51 | } 52 | } 53 | return logger, w 54 | } 55 | 56 | func (l *FileLog) logging(level int, msg string) { 57 | if level >= 0 && level < len(prefixes) { 58 | if l.dir != "-" { 59 | dateStr := time.Now().Format("20060102") 60 | l.mutex.Lock() 61 | if dateStr != l.dateStr { 62 | logger, fout := initFileLogger(l.dir) //自动切割 63 | if logger != nil && fout != nil { //出错则不切割 64 | l.fout.Close() 65 | l.dateStr = dateStr 66 | l.logger, l.fout = logger, fout 67 | } 68 | } 69 | l.mutex.Unlock() 70 | } 71 | l.logger.Println("[" + prefixes[level] + "] " + msg) 72 | } 73 | } 74 | 75 | //设置日志级别,低于该级别的日志将不输出 76 | func (l *FileLog) SetLevel(level int) bool { 77 | if level >= 0 && level < len(prefixes) { 78 | l.level = level 79 | return true 80 | }else { 81 | log.Println("invalid log level") 82 | } 83 | return false 84 | 85 | } 86 | 87 | func (l *FileLog) Debug(msg string) { 88 | if l.level == DEBUG { 89 | l.logging(DEBUG, msg) 90 | } 91 | } 92 | 93 | func (l *FileLog) Info(msg string) { 94 | if l.level <= INFO { 95 | l.logging(INFO, msg) 96 | } 97 | } 98 | 99 | func (l *FileLog) Warning(msg string) { 100 | if l.level <= WARNING { 101 | l.logging(WARNING, msg) 102 | } 103 | } 104 | 105 | func (l *FileLog) Error(msg string) { 106 | if l.level <= ERROR { 107 | l.logging(ERROR, msg) 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /res/colsmap.json: -------------------------------------------------------------------------------- 1 | { 2 | "access":{ 3 | "columns": ["", "event_product", "event_time", "event_hour", "", "event_ip", "event_ipinlong", "", "", "", "event_baiduid", "event_userid", "event_cookie", "event_referer", "event_useragent", "event_url", "event_urlpath", "event_urlparams", "", "event_httpstatus", "event_others", "event_isspider", "event_spiderdetail", "", "", "", "", "", "", "globalhao123_httpmethod", "globalhao123_httpversion", "globalhao123_tn", "globalhao123_level", "globalhao123_page", "globalhao123_flashid", "globalhao123_bdtime", "globalhao123_ftime", "globalhao123_bddate", "globalhao123_fdate", "globalhao123_bdhour", "globalhao123_fhour", "globalhao123_bdminute", "globalhao123_fminute"], 4 | "partitions": ["event_day", "globalhao123_host", "globalhao123_type"] 5 | }, 6 | "click": { 7 | "columns": ["", "event_product", "event_time", "event_hour", "", "event_ip", "event_ipinlong", "", "", "", "event_baiduid", "event_userid", "event_cookie", "event_referer", "event_useragent", "event_url", "event_urlpath", "event_urlparams", "", "event_httpstatus", "event_others", "event_isspider", "event_spiderdetail", "", "", "", "", "", "", "globalhao123_httpmethod", "globalhao123_httpversion", "globalhao123_tn", "globalhao123_level", "globalhao123_page", "globalhao123_flashid", "globalhao123_bdtime", "globalhao123_ftime", "globalhao123_bddate", "globalhao123_fdate", "globalhao123_bdhour", "globalhao123_fhour", "globalhao123_bdminute", "globalhao123_fminute", "globalhao123click_sort", "globalhao123click_position", "globalhao123click_value"], 8 | "partitions": ["event_day", "globalhao123_host", "globalhao123_type"] 9 | }, 10 | "open": { 11 | "columns": ["event_time", "", "event_ip", "event_ipinlong", "", "", "", "event_baiduid", "event_userid", "event_cookie", "event_referer", "event_useragent", "event_url", "event_urlpath", "event_urlparams", "", "event_httpstatus", "event_others", "event_isspider", "event_spiderdetail", "", "", "", "", "", "", "globalhao123_httpmethod", "globalhao123_httpversion", "globalhao123_tn", "globalhao123_level", "globalhao123_page", "globalhao123_channel", "globalhao123_flashid", "globalhao123_bdtime", "globalhao123_ftime", "globalhao123_bddate", "globalhao123_fdate", "globalhao123_bdhour", "globalhao123_fhour", "globalhao123_bdminute", "globalhao123_fminute", "globalhao123click_position", "globalhao123click_sort", "globalhao123click_value", "globalhao123click_url", "globalhao123open_appid"], 12 | "partitions": ["event_action", "event_day", "event_hour", "event_product", "globalhao123_host", "globalhao123_type"] 13 | }, 14 | "others": { 15 | "columns": ["", "event_product", "event_time", "event_hour", "", "event_ip", "event_ipinlong", "", "", "", "event_baiduid", "event_userid", "event_cookie", "event_referer", "event_useragent", "event_url", "event_urlpath", "event_urlparams", "", "event_httpstatus", "event_others", "event_isspider", "event_spiderdetail", "", "", "", "", "", "", "globalhao123_httpmethod", "globalhao123_httpversion", "globalhao123_tn", "globalhao123_level", "globalhao123_page", "globalhao123_flashid", "globalhao123_bdtime", "globalhao123_ftime", "globalhao123_bddate", "globalhao123_fdate", "globalhao123_bdhour", "globalhao123_fhour", "globalhao123_bdminute", "globalhao123_fminute"], 16 | "partitions": ["event_day", "globalhao123_host", "globalhao123_type"] 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /loglib/agent.go: -------------------------------------------------------------------------------- 1 | /************** 2 | * 3 | * 根据配置文件中的local_dir和tcp_addr分别生成logger,在本地和远程上报错误日志 4 | * 5 | */ 6 | 7 | package loglib 8 | 9 | import ( 10 | "strings" 11 | // "os" 12 | "runtime" 13 | "fmt" 14 | ) 15 | 16 | var logAgent *LogAgent 17 | var HeartBeatPort = "" 18 | 19 | func Init(config map[string]string) { 20 | logAgent = newLogAgent(config) 21 | } 22 | 23 | type LogAgent struct { 24 | logs []Log 25 | // message map[string]string 26 | } 27 | 28 | var levels = map[string]int{"debug":0, "info":1, "warning":2, "error":3} 29 | 30 | func newLogAgent(config map[string]string) *LogAgent { 31 | var level = DEBUG 32 | levelStr, ok := config["local_level"] 33 | if ok && levelStr != "" { 34 | tmp, ok := levels[ strings.ToLower(levelStr) ] 35 | if ok { 36 | level = tmp 37 | } 38 | } 39 | 40 | agent := new(LogAgent) 41 | 42 | dir, ok := config["local_dir"] 43 | if ok { 44 | fileLog := NewFileLog(dir, level) 45 | agent.logs = append(agent.logs, fileLog) 46 | } 47 | 48 | level = WARNING //tcp 默认level 49 | levelStr, ok = config["tcp_level"] 50 | if ok && levelStr != "" { 51 | tmp, ok := levels[ strings.ToLower(levelStr) ] 52 | if ok { 53 | level = tmp 54 | } 55 | } 56 | addr, ok := config["tcp_addr"] 57 | if ok { 58 | netLog := NewNetLog(addr, level) 59 | agent.logs = append(agent.logs, netLog) 60 | } 61 | return agent 62 | } 63 | 64 | func SetLevel(level int) { 65 | logAgent.setLevel(level) 66 | } 67 | 68 | func Debug(msg string) { 69 | logAgent.debug(makeDebugMessage(msg)) 70 | } 71 | 72 | func makeDebugMessage(msg string) string { 73 | //test 74 | var m string 75 | var pc []uintptr = make([]uintptr, 100) 76 | i := runtime.Callers(0,pc) 77 | 78 | for j := 0; j < i; j++ { 79 | // _,_,line,_ := runtime.Caller(j) 80 | f := runtime.FuncForPC(pc[j]) 81 | pName := getPackageName(f.Name() ) 82 | if pName == "runtime" || pName == "logd/loglib" { 83 | continue 84 | }else { 85 | file,line := f.FileLine(pc[j]) 86 | m = fmt.Sprintf("file:%s,func_name:%s,line:%d,msg:%s",file,f.Name(),line,msg) 87 | break 88 | } 89 | 90 | } 91 | return m 92 | } 93 | 94 | func Info(msg string) { 95 | logAgent.info(msg) 96 | } 97 | 98 | func Warning(msg string) { 99 | logAgent.warning(msg) 100 | } 101 | func Error(msg string) { 102 | logAgent.error(msg) 103 | 104 | } 105 | 106 | func (a *LogAgent) setLevel(level int) { 107 | for _, l := range a.logs { 108 | l.SetLevel(level) 109 | } 110 | } 111 | 112 | func (a *LogAgent) debug(msg string) { 113 | for _, l := range a.logs { 114 | l.Debug(msg) 115 | } 116 | } 117 | 118 | func (a *LogAgent) info(msg string) { 119 | for _, l := range a.logs { 120 | l.Info(msg) 121 | } 122 | 123 | } 124 | 125 | func (a *LogAgent) warning(msg string) { 126 | for _, l := range a.logs { 127 | l.Warning(msg) 128 | } 129 | 130 | } 131 | func (a *LogAgent) error(msg string) { 132 | for _, l := range a.logs { 133 | l.Error(msg) 134 | } 135 | 136 | } 137 | 138 | func getPackageName(function string) string { 139 | a := strings.Split(function,".") 140 | 141 | if len(a) != 2 { 142 | return function 143 | }else { 144 | return a[0] 145 | } 146 | 147 | 148 | } 149 | 150 | 151 | -------------------------------------------------------------------------------- /loglib/net_log.go: -------------------------------------------------------------------------------- 1 | package loglib 2 | 3 | import ( 4 | "time" 5 | "net" 6 | "log" 7 | "logd/tcp_pack" 8 | "encoding/json" 9 | "runtime" 10 | "strings" 11 | "os" 12 | "os/exec" 13 | "path/filepath" 14 | "sync" 15 | ) 16 | 17 | type NetLog struct { 18 | conn *net.TCPConn 19 | level int 20 | addr string //tcp address 21 | ip string //self ip 22 | mutex *sync.Mutex 23 | } 24 | 25 | func NewNetLog(addr string, level int) *NetLog { 26 | conn, _ := getConnection(addr) 27 | mutex := &sync.Mutex{} 28 | return &NetLog{conn, level, addr, getIp(), mutex} 29 | } 30 | 31 | func getIp() string { 32 | out, _ := exec.Command("/bin/sh", "-c", `/sbin/ifconfig | awk -F"[: ]+" '/inet addr/{print $4}' | head -n 1`).Output() 33 | return strings.Trim(string(out), "\n") 34 | } 35 | 36 | //获取可执行文件的所在路径 37 | func getBinPath() string { 38 | d, err := filepath.Abs(filepath.Dir(os.Args[0])) 39 | if err != nil { 40 | log.Println("[get bin path] error:", err) 41 | } 42 | return d 43 | } 44 | 45 | func getConnection(addr string) (*net.TCPConn, error) { 46 | tcpAddr, err := net.ResolveTCPAddr("tcp4", addr) 47 | if err != nil { 48 | _, f, l, _ := runtime.Caller(1) 49 | f = strings.Replace(f, getBinPath(), "", -1) 50 | log.Println(f, ":", l, "[GetConnection] resolve tcp address failed", err) 51 | return nil, err 52 | } 53 | conn, err := net.DialTCP("tcp4", nil, tcpAddr) 54 | if err != nil { 55 | _, f, l, _ := runtime.Caller(1) 56 | f = strings.Replace(f, getBinPath(), "", -1) 57 | log.Println(f, ":", l, "[GetConnection] connect to address:" , addr, "failed!") 58 | } 59 | return conn, err 60 | } 61 | 62 | func (l *NetLog) logging(level int, msg string) { 63 | if level >= 0 && level < len(prefixes) { 64 | l.mutex.Lock() 65 | 66 | if l.conn == nil { 67 | l.conn, _ = getConnection(l.addr) //重连一次 68 | } 69 | 70 | if l.conn != nil { 71 | m := map[string]string{"time": time.Now().Format("2006/01/02 15:04:05"), "type": prefixes[level], "msg":msg, "ip": l.ip, "port": HeartBeatPort} 72 | data, err := json.Marshal(m) 73 | if err != nil { 74 | log.Println("marshal net log error:", err) 75 | return 76 | } 77 | data = tcp_pack.Pack(data) 78 | _, err = l.conn.Write(data) 79 | if err != nil { 80 | log.Println("send log failed: ", msg, "error:", err) 81 | }else{ 82 | log.Println("send log :" + msg) 83 | } 84 | }else{ 85 | log.Println("send log failed: ", msg, "error: no connection") 86 | } 87 | l.mutex.Unlock() 88 | } 89 | 90 | } 91 | //设置日志级别,低于该级别的日志将不输出 92 | func (l *NetLog) SetLevel(level int) bool { 93 | if level >= 0 && level < len(prefixes) { 94 | l.level = level 95 | return true 96 | }else { 97 | log.Println("invalid log level") 98 | } 99 | return false 100 | 101 | } 102 | 103 | func (l *NetLog) Debug(msg string) { 104 | if l.level == DEBUG { 105 | l.logging(DEBUG, msg) 106 | } 107 | } 108 | 109 | func (l *NetLog) Info(msg string) { 110 | if l.level <= INFO { 111 | l.logging(INFO, msg) 112 | } 113 | } 114 | 115 | func (l *NetLog) Warning(msg string) { 116 | if l.level <= WARNING { 117 | l.logging(WARNING, msg) 118 | } 119 | } 120 | 121 | func (l *NetLog) Error(msg string) { 122 | if l.level <= ERROR { 123 | l.logging(ERROR, msg) 124 | } 125 | } 126 | -------------------------------------------------------------------------------- /monitor/monitor.go: -------------------------------------------------------------------------------- 1 | package monitor 2 | 3 | import ( 4 | "strconv" 5 | "time" 6 | "sync" 7 | "logd/heart_beat" 8 | "logd/db" 9 | "logd/lib" 10 | "logd/loglib" 11 | ) 12 | 13 | var registerTable = "registered_node" 14 | 15 | type Monitor struct { 16 | configFile string 17 | ipRoleMap map[string]string 18 | receiver *LogReceiver 19 | hbChecker *heart_beat.HeartBeatChecker 20 | mutex *sync.RWMutex 21 | checkInterval int 22 | dbConn *db.Mysql 23 | } 24 | 25 | func New(configFile string) *Monitor { 26 | //new Monitor 27 | monitor := new(Monitor) 28 | monitor.configFile = configFile 29 | config := lib.ReadConfig(configFile) 30 | 31 | // heart beat checker 32 | cfg, ok := config["heart_beat"] 33 | if !ok { 34 | loglib.Error("miss heart beat config") 35 | return nil 36 | } 37 | checkInterval, _ := strconv.Atoi(cfg["check_interval"]) 38 | delete(cfg, "check_interval") 39 | mutex := &sync.RWMutex{} 40 | 41 | monitor.mutex = mutex 42 | monitor.checkInterval = checkInterval 43 | 44 | hbChecker := heart_beat.NewHeartBeatChecker() 45 | monitor.hbChecker = hbChecker 46 | 47 | //log receiver 48 | cfg, ok = config["receiver"] 49 | if !ok { 50 | loglib.Error("miss receiver config!") 51 | return nil 52 | } 53 | mysql := db.NewMysql(cfg["db_host"], cfg["db_port"], cfg["db_uname"], cfg["db_passwd"], cfg["db_db"], cfg["db_charset"]) 54 | monitor.dbConn = mysql 55 | monitor.ipRoleMap = getIpRoleMap(mysql) 56 | recvPort, _ := strconv.Atoi(cfg["recv_port"]) 57 | receiver := NewLogReceiver(recvPort, mysql, monitor.ipRoleMap, monitor.mutex) 58 | monitor.receiver = receiver 59 | 60 | return monitor 61 | } 62 | 63 | func (this *Monitor) Run() { 64 | //错误日志 65 | go this.receiver.Run() 66 | 67 | //心跳检测 68 | i := 0 69 | for { 70 | ips := make([]string, 0) 71 | this.mutex.Lock() 72 | //没检查3次就从数据库同步一次ip信息 73 | //避免有些节点没有注册上就无法被监控 74 | //没有注册上可以直接在数据库添加 75 | if i > 2 { 76 | this.ipRoleMap = getIpRoleMap(this.dbConn) 77 | i = 0 78 | } 79 | for k, _ := range this.ipRoleMap { 80 | ips = append(ips, k) 81 | } 82 | this.mutex.Unlock() 83 | this.hbChecker.CheckAround(ips, this) 84 | i++ 85 | time.Sleep(time.Duration(this.checkInterval) * time.Second) 86 | } 87 | } 88 | 89 | func getIpRoleMap(dbConn *db.Mysql) map[string]string { 90 | sql := "select ip, port, role from " + registerTable 91 | res, err := dbConn.Query(sql) 92 | m := make(map[string]string) 93 | if err != nil { 94 | loglib.Error("read registered nodes failed! Error: " + err.Error()) 95 | }else{ 96 | for _, row := range res.Rows { 97 | ip := row[0] 98 | port := row[1] 99 | role := row[2] 100 | if port != "" { 101 | ip = ip + ":" + port 102 | } 103 | m[ip] = role 104 | } 105 | loglib.Info("readed working nodes from database.") 106 | } 107 | return m 108 | } 109 | func (this *Monitor) Process(results []heart_beat.CheckResult) { 110 | for _, res := range results { 111 | role, ok := this.ipRoleMap[ res.Addr ] 112 | if !ok { 113 | role = "" 114 | } 115 | //错误记录 116 | if res.Err { 117 | this.receiver.AddLog(time.Now().Format("2006-01-02 15:04:05"), res.Addr, role, "no heart-beat", res.Msg) 118 | }else{ 119 | //this.receiver.AddLog(time.Now().Format("2006-01-02 15:04:05"), res.Addr, role, "alive", res.Msg) 120 | } 121 | } 122 | } 123 | 124 | -------------------------------------------------------------------------------- /tcp_pack/pack.go: -------------------------------------------------------------------------------- 1 | package tcp_pack 2 | 3 | import ( 4 | "encoding/binary" 5 | "encoding/json" 6 | "net" 7 | "bytes" 8 | "log" 9 | "errors" 10 | ) 11 | 12 | //将字节数组加上4字节的长度头 13 | func Pack(data []byte) []byte { 14 | lenBuf := make([]byte, 4) 15 | nData := len(data) 16 | binary.PutUvarint(lenBuf, uint64(nData)) 17 | data = append(lenBuf, data...) 18 | return data 19 | } 20 | 21 | //解包字节数组,返回内容和长度 22 | func UnPack(conn net.Conn) (int, []byte) { 23 | packLen := -1 //-1表示有错 24 | b := new(bytes.Buffer) 25 | 26 | if conn != nil { 27 | // read pack length 28 | buf := make([]byte, 4) 29 | conn.Read(buf) 30 | l, _ := binary.Uvarint(buf) 31 | packLen = int(l) 32 | var currLen = 0 33 | //当包较小时,就按packlen读,不然会读多,若较大,则按一个固定值分多次读 34 | //一次读取不会超过10m,但是大小不定 35 | bufSize := packLen 36 | if bufSize > 10485760 { 37 | bufSize = 10485760 38 | } 39 | request := make([]byte, bufSize) 40 | //read enough bytes 41 | for currLen < packLen { 42 | requestLen, _ := conn.Read(request) 43 | if requestLen == 0 { 44 | break 45 | } 46 | currLen += requestLen 47 | b.Write(request[:requestLen]) 48 | } 49 | } 50 | return packLen, b.Bytes() 51 | } 52 | 53 | type PackHeader struct { 54 | Route []map[string]string 55 | PackLen int 56 | } 57 | 58 | func Packing(data []byte, info map[string]string, hasHeader bool) []byte { 59 | content := make([]byte, 0) 60 | var header PackHeader 61 | if hasHeader { 62 | var l int 63 | var err error 64 | header, l, err = ExtractHeader(data) 65 | if err != nil { 66 | return data 67 | } 68 | header.Route = append(header.Route, info) 69 | content = data[4+l : ] 70 | }else{ 71 | header.Route = append(header.Route, info) 72 | content = data 73 | } 74 | header.PackLen = len(content) 75 | headerStr, err := json.Marshal(header) 76 | if err != nil { 77 | log.Println("marshal header: ", header , "error", err) 78 | } 79 | buf := make([]byte, 4) 80 | binary.PutUvarint(buf, uint64(len(headerStr))) 81 | buf = append(buf, []byte(headerStr)...) 82 | buf = append(buf, content...) 83 | return buf 84 | } 85 | 86 | func ExtractHeader(data []byte) (PackHeader, int, error) { 87 | var header PackHeader 88 | var err = errors.New("pack header doesn't have enough bytes") 89 | var l = uint64(0) 90 | if len(data) > 4 { 91 | buf := data[0:4] 92 | l, _ = binary.Uvarint(buf) 93 | //header 94 | buf = data[4: 4+l] 95 | err = json.Unmarshal(buf, &header) 96 | if err != nil { 97 | log.Println("wrong format pack header") 98 | } 99 | } 100 | return header, int(l), err 101 | 102 | } 103 | 104 | func GetPackId(data []byte) string { 105 | header, _, err := ExtractHeader(data) 106 | packId := "unkown" 107 | if err == nil && len(header.Route) > 0 { 108 | route := header.Route[0] 109 | hour, _ := route["hour"] 110 | done, ok := route["done"] 111 | if ok { 112 | done = "_done" 113 | } 114 | packId = route["ip"] + "_" + hour + "_" + route["id"] + done 115 | } 116 | return packId 117 | } 118 | 119 | func ParseHeader(vbytes []byte) map[string]string { 120 | m := map[string]string{"ip":"", "hour":"", "done":"", "lines":"0"} 121 | var header PackHeader 122 | err := json.Unmarshal(vbytes, &header) 123 | if err != nil { 124 | log.Println("wrong format pack header") 125 | }else{ 126 | if len(header.Route) > 0 { 127 | m = header.Route[0] 128 | } 129 | } 130 | return m 131 | } 132 | -------------------------------------------------------------------------------- /receiver.go: -------------------------------------------------------------------------------- 1 | 2 | package main 3 | 4 | import ( 5 | "time" 6 | "container/list" 7 | "bytes" 8 | "compress/zlib" 9 | "fmt" 10 | "logd/tcp_pack" 11 | "logd/lib" 12 | "logd/loglib" 13 | ) 14 | 15 | type Receiver struct { 16 | sendBuffer chan bytes.Buffer 17 | logList *list.List 18 | listBufferSize int //多少条日志发送一次 19 | receiveChan chan map[string]string 20 | nTailedLines int //tailler重启时用于计算开始的id 21 | wq *lib.WaitQuit 22 | } 23 | 24 | //工厂初始化函数 25 | func ReceiverInit(buffer chan bytes.Buffer,c chan map[string]string, listBufferSize int, nTailedLines int) (r Receiver) { 26 | // var r Receiver 27 | r.sendBuffer = buffer 28 | r.logList = list.New() 29 | r.receiveChan=c 30 | r.listBufferSize = listBufferSize 31 | r.wq = lib.NewWaitQuit("receiver") 32 | r.nTailedLines = nTailedLines 33 | return r 34 | } 35 | 36 | func (r Receiver) clearList() (b bytes.Buffer){ 37 | var result bytes.Buffer 38 | for (r.logList.Len() >0 ) { 39 | a := r.logList.Front() 40 | r.logList.Remove(a) 41 | result.WriteString(a.Value.(string)) 42 | // fmt.Println("removed : ",a.Value) 43 | } 44 | // fmt.Println("removed : ",result) 45 | // var b bytes.Buffer 46 | 47 | w := zlib.NewWriter(&b) 48 | w.Write(result.Bytes()) 49 | w.Close() 50 | 51 | // fmt.Println("ziped : ",b) 52 | return b 53 | 54 | } 55 | 56 | //goroutine 57 | //clear list & zipping & send_to_buffer 58 | func (r Receiver) writeList() { 59 | //收尾工作 60 | defer func(){ 61 | if err := recover(); err != nil { 62 | loglib.Error(fmt.Sprintf("receiver panic:%v", err)) 63 | } 64 | close(r.sendBuffer) 65 | }() 66 | 67 | st := time.Now() 68 | var nLines = 0 69 | var id = r.initId() 70 | ip := lib.GetIp() 71 | var changed = false 72 | 73 | for logMap := range r.receiveChan { 74 | logLine := logMap["line"] 75 | changed = false 76 | 77 | if logLine == "logfile changed" { 78 | changed = true 79 | }else{ 80 | r.logList.PushBack(logLine) 81 | } 82 | nLines = r.logList.Len() 83 | //达到指定行数或发现日志rotate 84 | //因此每小时只有最后一个包比listBufferSize小 85 | //如果quit时包小于listBufferSize就丢弃,重启后再读 86 | if nLines >= r.listBufferSize || changed { 87 | hour := logMap["hour"] 88 | repull, ok := logMap["repull"] //兼容补拉 89 | 90 | b := r.clearList() 91 | //r.sendBuffer <- b 92 | ed := time.Now() 93 | elapse := ed.Sub(st) 94 | loglib.Info(fmt.Sprintf("add a pack, id: %s_%d, lines:%d, elapse: %s", hour, id, nLines, elapse)) 95 | 96 | //route信息 97 | m := make(map[string]string) 98 | m["ip"] = ip 99 | m["hour"] = hour 100 | m["id"] = fmt.Sprintf("%d", id) 101 | m["lines"] = fmt.Sprintf("%d", nLines) 102 | m["stage"] = "make pack" 103 | m["st"] = st.Format("2006-01-02 15:04:05.000") 104 | m["ed"] = ed.Format("2006-01-02 15:04:05.000") 105 | m["elapse"] = elapse.String() 106 | if ok && repull == "1" { 107 | m["repull"] = "1" 108 | } 109 | 110 | if changed { 111 | m["done"] = "1" 112 | //这种空包用于给那些日志行数正好是listBufferSize倍数的小时标记结束 113 | //设置repull为1以便空包能够不被拦截 114 | if nLines == 0 { 115 | m["repull"] = "1" 116 | } 117 | } 118 | 119 | vbytes := tcp_pack.Packing(b.Bytes(), m, false) 120 | b.Reset() 121 | b.Write(vbytes) 122 | r.sendBuffer <- b 123 | id++ 124 | st = time.Now() 125 | nLines = 0 126 | } 127 | 128 | if changed { 129 | id = 1 //每小时id刷新 130 | } 131 | 132 | } 133 | 134 | if nLines > 0 { 135 | loglib.Info(fmt.Sprintf("receiver abandon %d lines", nLines)) 136 | } 137 | 138 | } 139 | 140 | 141 | 142 | 143 | // //goroutine 144 | // func sender() { 145 | // for { 146 | // fmt.Println("ready to get data from sendBuffer") 147 | // b := <- sendBuffer 148 | // fmt.Println("sender get:",b) 149 | // } 150 | // } 151 | 152 | 153 | func (r Receiver) Start() { 154 | r.writeList() 155 | r.wq.AllDone() 156 | } 157 | 158 | func (r Receiver) Quit() bool { 159 | return r.wq.Quit() 160 | } 161 | 162 | func (r Receiver) initId() int { 163 | return (r.nTailedLines / r.listBufferSize) + 1 164 | } 165 | -------------------------------------------------------------------------------- /fileOutputer.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "logd/lib" 5 | "logd/loglib" 6 | "compress/zlib" 7 | "bytes" 8 | "os" 9 | "fmt" 10 | "strconv" 11 | "encoding/binary" 12 | "io" 13 | "path/filepath" 14 | "logd/integrity" 15 | "logd/tcp_pack" 16 | "time" 17 | ) 18 | 19 | type fileOutputer struct { 20 | 21 | buffer chan bytes.Buffer 22 | saveDir string 23 | dataDir string 24 | headerDir string 25 | icDir string 26 | writers map[string]*os.File //存日志的fd 27 | headerWriters map[string]*os.File //存header的fd 28 | ic *integrity.IntegrityChecker 29 | checkTime time.Time 30 | 31 | wq *lib.WaitQuit 32 | } 33 | 34 | 35 | //工厂初始化函数 36 | func FileOutputerInit(buffer chan bytes.Buffer, saveDir string) (f fileOutputer) { 37 | 38 | f.buffer = buffer 39 | f.saveDir = saveDir 40 | f.dataDir = filepath.Join(saveDir, "log_data") 41 | f.headerDir = filepath.Join(saveDir, "headers") 42 | f.icDir = filepath.Join(saveDir, "received") 43 | f.ic = integrity.NewIntegrityChecker(f.icDir) 44 | 45 | os.MkdirAll(f.dataDir, 0775) 46 | os.MkdirAll(f.headerDir, 0775) 47 | 48 | f.writers = make(map[string]*os.File) 49 | f.headerWriters = make(map[string]*os.File) 50 | f.checkTime = time.Now().Add(2 * time.Minute) 51 | 52 | f.wq = lib.NewWaitQuit("file outputer", -1) 53 | return f 54 | } 55 | 56 | func (f *fileOutputer) Start() { 57 | defer func(){ 58 | if err := recover(); err != nil { 59 | loglib.Error(fmt.Sprintf("file outputer panic:%v", err)) 60 | } 61 | 62 | f.ic.SaveStatus() 63 | f.closeWriters(f.writers) 64 | f.closeWriters(f.headerWriters) 65 | f.wq.AllDone() 66 | }() 67 | 68 | //使用range遍历,方便安全退出,只要发送方退出时关闭chan,这里就可以退出了 69 | for b := range f.buffer { 70 | f.extract(&b) 71 | } 72 | } 73 | 74 | func (f *fileOutputer) Quit() bool { 75 | return f.wq.Quit() 76 | } 77 | 78 | func (f *fileOutputer) extract(bp *bytes.Buffer) { 79 | buf := make([]byte, 4) 80 | bp.Read(buf) 81 | 82 | l, _ := binary.Uvarint(buf) 83 | headerLen := int(l) 84 | //get pack header 85 | buf = make([]byte, headerLen) 86 | bp.Read(buf) 87 | header := tcp_pack.ParseHeader(buf) 88 | 89 | r, err := zlib.NewReader(bp) 90 | if err != nil { 91 | loglib.Error("zlib reader Error: " + err.Error()) 92 | }else{ 93 | lines, _ := strconv.Atoi(header["lines"]) 94 | done := false 95 | if header["done"] == "1" { 96 | done = true 97 | } 98 | f.ic.Add(header["ip"], header["hour"], header["id"], lines, done) 99 | 100 | writerKey := header["ip"] + "_" + header["hour"] 101 | fout := f.getWriter(f.writers, f.dataDir, writerKey) 102 | 103 | //一头一尾写头信息,节省硬盘 104 | buf = append(buf, '\n') 105 | //fout.Write(buf) 106 | nn, err := io.Copy(fout, r) 107 | if err != nil { 108 | loglib.Warning(fmt.Sprintf("save %s_%s_%s error:%s, saved:%d", header["ip"], header["hour"], header["id"], err, nn)) 109 | } 110 | //fout.Write(buf) 111 | 112 | //单独存一份header便于查数 113 | fout = f.getWriter(f.headerWriters, f.headerDir, writerKey) 114 | n, err := fout.Write(buf) 115 | if err != nil { 116 | loglib.Info(fmt.Sprintf("writer header %s %d %s", writerKey, n, err.Error())) 117 | } 118 | 119 | if done || time.Now().Unix() > f.checkTime.Unix() { 120 | hourFinish, _ := f.ic.Check() 121 | for ip, hours := range hourFinish { 122 | for _, hour := range hours { 123 | writerKey = ip + "_" + hour 124 | } 125 | } 126 | f.closeWriters(f.writers) 127 | f.closeWriters(f.headerWriters) 128 | f.checkTime.Add(2 * time.Minute) 129 | } 130 | 131 | r.Close() 132 | } 133 | } 134 | 135 | func (f *fileOutputer) getWriter(writers map[string]*os.File, parentDir string, key string) *os.File { 136 | w, ok := writers[key] 137 | if !ok || w == nil { 138 | fname := filepath.Join(parentDir, key) 139 | w1, err := os.OpenFile(fname, os.O_WRONLY | os.O_APPEND | os.O_CREATE, 0666) 140 | writers[key] = w1 141 | w = w1 142 | if err != nil { 143 | loglib.Error(fmt.Sprintf("file outputer create writer: %s error: %s", fname, err.Error())) 144 | } 145 | } 146 | return w 147 | } 148 | 149 | func (f *fileOutputer) closeWriter(writers map[string]*os.File, key string) { 150 | w, ok := writers[key] 151 | if ok { 152 | if w != nil { 153 | w.Close() 154 | } 155 | delete(writers, key) 156 | } 157 | } 158 | //关闭全部writer 159 | func (f *fileOutputer) closeWriters(writers map[string]*os.File) { 160 | for key, w := range writers { 161 | if w != nil { 162 | w.Close() 163 | } 164 | delete(writers, key) 165 | } 166 | 167 | } 168 | -------------------------------------------------------------------------------- /monitor/log_receiver.go: -------------------------------------------------------------------------------- 1 | package monitor 2 | 3 | import ( 4 | "net" 5 | "fmt" 6 | "logd/loglib" 7 | "logd/db" 8 | "logd/tcp_pack" 9 | "encoding/json" 10 | "sync" 11 | "time" 12 | ) 13 | 14 | var errorLogTable = "service_error_log" 15 | 16 | type LogReceiver struct { 17 | port int 18 | dbConn *db.Mysql 19 | ipRoleMap map[string]string 20 | mutex *sync.RWMutex 21 | } 22 | 23 | func NewLogReceiver(port int, dbConn *db.Mysql, ipRoleMap map[string]string, mutex *sync.RWMutex) *LogReceiver { 24 | return &LogReceiver{port, dbConn, ipRoleMap, mutex} 25 | } 26 | 27 | func (lr *LogReceiver) Run() { 28 | l, err := net.Listen("tcp", fmt.Sprintf(":%d", lr.port)) 29 | if err != nil { 30 | loglib.Error("[log receiver] " + err.Error()) 31 | return 32 | } 33 | defer l.Close() 34 | for { 35 | conn, err := l.Accept() 36 | if err != nil { 37 | loglib.Error("[log receiver] " + err.Error()) 38 | return 39 | } 40 | go lr.handleConnection(conn) 41 | } 42 | 43 | } 44 | 45 | func (lr *LogReceiver) handleConnection(conn net.Conn){ 46 | defer conn.Close() 47 | for { 48 | var m map[string]string 49 | n, buf := tcp_pack.UnPack(conn) 50 | if n <= 0 { 51 | break 52 | } 53 | err := json.Unmarshal(buf, &m) 54 | if err == nil { 55 | _, ok := m["req"] 56 | if ok { 57 | m = lr.handleRegister(m) 58 | data, _ := json.Marshal(m) 59 | conn.Write(tcp_pack.Pack(data)) 60 | }else{ 61 | lr.handleLog(m) 62 | } 63 | } 64 | } 65 | } 66 | 67 | func (lr *LogReceiver) AddLog(timeStr string, ip string, role string, logType string, msg string) (db.MysqlResult, error) { 68 | return lr.dbConn.Exec("insert into " + errorLogTable + "(ctime, ip, role, error_type, error_msg) values(?,?,?,?,?)", timeStr, ip, role, logType, msg) 69 | } 70 | 71 | //处理日志上报 72 | func (lr *LogReceiver) handleLog(logInfo map[string]string) { 73 | ip, ok := logInfo["ip"] 74 | port, ok := logInfo["port"] 75 | remoteAddr := ip 76 | if port != "" { 77 | remoteAddr += ":" + port 78 | } 79 | role, ok := lr.ipRoleMap[remoteAddr] 80 | if !ok { 81 | role = "" 82 | } 83 | res, err := lr.AddLog(logInfo["time"], remoteAddr, role, logInfo["type"], logInfo["msg"]) 84 | if err == nil { 85 | loglib.Info(fmt.Sprintf("[log receiver] add %d error log", res.NumRows )) 86 | } 87 | 88 | } 89 | 90 | //处理注册请求 91 | func (lr *LogReceiver) handleRegister(registerInfo map[string]string) map[string]string { 92 | req, ok := registerInfo["req"] 93 | m := map[string]string{"err":"-1", "msg":"unkown request <" + req + ">"} 94 | ip, _ := registerInfo["ip"] 95 | port, _ := registerInfo["port"] 96 | hostname, _ := registerInfo["hostname"] 97 | role, _ := registerInfo["role"] 98 | addr := ip 99 | loglib.Info(ip + ":" + port + " " + req) 100 | 101 | if ok && (req == "register" || req == "unregister") { 102 | if port != "" { 103 | addr = ip + ":" + port 104 | } 105 | if req == "register" { 106 | ret, err := lr.register(ip, port, role, hostname) 107 | if ret { 108 | lr.mutex.Lock() 109 | lr.ipRoleMap[addr] = role 110 | lr.mutex.Unlock() 111 | 112 | m["err"] = "0" 113 | m["msg"] = "success" 114 | }else{ 115 | m["err"] = "1" 116 | if err != nil { 117 | m["msg"] = err.Error() 118 | }else{ 119 | m["msg"] = "done" 120 | } 121 | } 122 | }else if req == "unregister" { 123 | ret, err := lr.unRegister(ip, port) 124 | if ret { 125 | lr.mutex.Lock() 126 | delete(lr.ipRoleMap, addr) 127 | lr.mutex.Unlock() 128 | 129 | m["err"] = "0" 130 | m["msg"] = "success" 131 | }else{ 132 | m["err"] = "1" 133 | if err != nil { 134 | m["msg"] = err.Error() 135 | }else{ 136 | m["msg"] = "done" 137 | } 138 | } 139 | } 140 | } 141 | loglib.Info("[monitor] " + ip + ":" + port + " " + role + " " + req + ": " + m["msg"]) 142 | return m 143 | } 144 | 145 | //工作节点注册 146 | func (lr *LogReceiver) register(ip string, port string, role string, hostname string) (bool, error) { 147 | t := time.Now().Format("2006-01-02 15:04:05") 148 | res, err := lr.dbConn.Exec("insert into " + registerTable + "(ip, port, role, hostname, ctime) values(?, ?, ?, ?, ?) on duplicate key update role=values(role), hostname=values(hostname), ctime=values(ctime)", ip, port, role, hostname, t) 149 | if err == nil && res.NumRows >= 0 { 150 | return true, err 151 | } 152 | return false, err 153 | } 154 | 155 | //工作节点取消注册 156 | func (lr *LogReceiver) unRegister(ip string, port string) (bool, error) { 157 | res, err := lr.dbConn.Exec("delete from " + registerTable + " where ip=? and port=?", ip, port) 158 | if err == nil && res.NumRows >= 0 { 159 | return true, err 160 | } 161 | return false, err 162 | } 163 | -------------------------------------------------------------------------------- /db/mysql.go: -------------------------------------------------------------------------------- 1 | /******************* 2 | * 3 | * 对database/sql做简单封装,便于使用 4 | * mysql驱动使用go-sql-driver 5 | * 6 | *********************/ 7 | 8 | package db 9 | 10 | import ( 11 | "fmt" 12 | "log" 13 | "regexp" 14 | "strings" 15 | "database/sql" 16 | "reflect" 17 | _ "github.com/go-sql-driver/mysql" 18 | ) 19 | 20 | type Mysql struct { 21 | db *sql.DB 22 | } 23 | 24 | func NewMysql(host string, port string, uname string, passwd string, db string, charset string) *Mysql{ 25 | dsn := fmt.Sprintf("%s:%s@(%s:%s)/%s?charset=%s", uname, passwd, host, port, db, charset) 26 | dbObj, err := sql.Open("mysql", dsn) 27 | if err != nil { 28 | log.Fatalf("create db obj error for %s:%s/%s", host, port, db) 29 | } 30 | err = dbObj.Ping() 31 | if err != nil { 32 | log.Fatalf("connect to %s:%s/%s failed. quit...", host, port, db) 33 | } 34 | return &Mysql{dbObj} 35 | } 36 | 37 | func (my *Mysql) Query(sqlStr string, args ...interface{} ) (result *MysqlResult, reterr error) { 38 | defer func(){ 39 | if err := recover(); err != nil { 40 | log.Println(fmt.Sprintf("Mysql.Query panic:%v", err)) 41 | result = nil 42 | reterr, _ = err.(error) 43 | } 44 | }() 45 | 46 | rows, err := my.db.Query(sqlStr, args...) 47 | 48 | if err != nil { 49 | log.Println(err) 50 | return result, err 51 | } 52 | 53 | columns, err := rows.Columns() 54 | nCols := len(columns) 55 | 56 | //列名与列号的映射 57 | colIndexMap := make(map[string]int) 58 | for i, v := range columns { 59 | colIndexMap[v] = i 60 | } 61 | result.ColIndexMap = colIndexMap 62 | 63 | row := make([]interface{}, nCols) 64 | valueArgs := make([]interface{}, nCols) 65 | for i, _ := range valueArgs { 66 | valueArgs[i] = &row[i] //用于存数据的参数 67 | } 68 | var i uint = 0 69 | for rows.Next() { 70 | err = rows.Scan(valueArgs...) 71 | if err == nil { 72 | strRow := make([]string, nCols) 73 | for i, v := range row { 74 | newv, ok := v.([]byte) 75 | if ok { 76 | strRow[i] = string(newv) 77 | }else{ 78 | strRow[i] = "" 79 | } 80 | } 81 | result.Rows = append(result.Rows, strRow) 82 | i++ 83 | }else{ 84 | log.Printf("scan row %d error\n", i) 85 | } 86 | } 87 | if rows != nil { 88 | rows.Close() 89 | } 90 | result.NumRows = i 91 | return result, err 92 | } 93 | 94 | func (my *Mysql) Exec(sqlStr string, args ...interface{}) (MysqlResult, error) { 95 | var result MysqlResult 96 | 97 | defer func(){ 98 | if err := recover(); err != nil { 99 | log.Println(fmt.Sprintf("Mysql.Exec panic:%v", err)) 100 | } 101 | }() 102 | 103 | if isInsert(sqlStr) { 104 | sqlStr, args = makeMultiInsert(sqlStr, args...) 105 | } 106 | res, err := my.db.Exec(sqlStr, args...) 107 | if err != nil { 108 | log.Println("Mysql.Exec", err) 109 | }else{ 110 | iid, _ := res.LastInsertId() 111 | nRows, _ := res.RowsAffected() 112 | result.InsertId = uint(iid) 113 | result.NumRows = uint(nRows) 114 | } 115 | return result,err 116 | } 117 | 118 | func isInsert(s string) bool { 119 | s = strings.ToLower(strings.Trim(s, " ")) 120 | return strings.HasPrefix(s, "insert ") || strings.HasPrefix(s, "replace ") 121 | } 122 | 123 | 124 | func makeMultiInsert(sqlStr string, args ...interface{}) (string, []interface{}) { 125 | head := "" 126 | placeHolder := "" 127 | tail := "" //on duplicate 128 | nValues := 0 //有多少组值 129 | 130 | re, _ := regexp.Compile("\\([?, ]+\\)") //匹配占位符 131 | loc := re.FindStringIndex(sqlStr) 132 | if loc != nil { 133 | head = sqlStr[: loc[0]] 134 | placeHolder = sqlStr[ loc[0] : loc[1] ] 135 | tail = sqlStr[ loc[1]: ] 136 | } 137 | 138 | var data []interface{} 139 | nArgs := len(args) 140 | //检查是否二维数组 141 | if nArgs == 1 { 142 | value := reflect.ValueOf(args[0]) 143 | kind := value.Kind() 144 | vLen := value.Len() 145 | // []byte 要当成一个整体处理 146 | if (kind == reflect.Slice || kind == reflect.Array) && value.Type().String() != "[]uint8"{ 147 | for i:=0; i 1{ 169 | //如果是多个参数,就当成一维的 170 | nValues = 1 171 | data = args 172 | } 173 | if nValues > 1 { 174 | placeHolder = strings.Trim(strings.Repeat(placeHolder + ", ", nValues), ", ") 175 | } 176 | sqlStr = fmt.Sprintf("%s %s %s", head, placeHolder, tail) 177 | return sqlStr, data 178 | } 179 | /*************************** 180 | * 181 | * mysql查询结果类型 182 | * 183 | ****************************/ 184 | type MysqlResult struct { 185 | Rows [][]string //存结果 186 | ColIndexMap map[string]int 187 | InsertId uint //select时为0 188 | NumRows uint //select时是结果行数,insert或update时是影响的行数 189 | } 190 | 191 | -------------------------------------------------------------------------------- /runner.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "time" 6 | "fmt" 7 | "logd/heart_beat" 8 | "logd/lib" 9 | "logd/loglib" 10 | "strconv" 11 | "strings" 12 | ) 13 | func logdGo(cfg map[string]map[string]string) { 14 | 15 | receiveChan := make(chan map[string]string) 16 | sendBuffer := make(chan bytes.Buffer) 17 | r := ReceiverInit(sendBuffer,receiveChan, 2000, 0) 18 | 19 | tc := TcpClientInit(receiveChan) 20 | //start tcp listener to receive log 21 | go tc.StartLogAgentServer() 22 | //start receiver to receive log from tcp listenser 23 | go r.Start() 24 | 25 | addr := "localhost:1306" 26 | s := SenderInit(sendBuffer,addr, addr, 0) 27 | go s.Start() 28 | for { 29 | time.Sleep(1000 * time.Second) 30 | } 31 | 32 | } 33 | func tailerGo(cfg map[string]map[string]string) { 34 | qlst := lib.NewQuitList() 35 | 36 | receiveChan := make(chan map[string]string, 10000) //非阻塞 37 | sendBuffer := make(chan bytes.Buffer, 500) 38 | recvBufferSize, _ := strconv.Atoi(cfg["tail"]["recv_buffer_size"]) 39 | tailler := NewTailler(cfg["tail"]) 40 | r := ReceiverInit(sendBuffer, receiveChan, recvBufferSize, tailler.GetLineNum()) 41 | 42 | //make a new log tailler 43 | go tailler.Tailling(receiveChan) 44 | //start receiver to receive log 45 | go r.Start() 46 | 47 | //一定要发送方先退出 48 | qlst.Append(tailler.Quit) 49 | qlst.Append(r.Quit) 50 | // heart beat 51 | port, _ := cfg["monitor"]["hb_port"] 52 | monAddr, _ := cfg["monitor"]["mon_addr"] 53 | if port != "" && monAddr != "" { 54 | hb := heart_beat.NewHeartBeat(port, monAddr, "tail") 55 | go hb.Run() 56 | qlst.Append(hb.Quit) 57 | } 58 | 59 | addrs := strings.Split(cfg["tail"]["send_to"], ",") 60 | addr := strings.Trim(addrs[0], " ") 61 | bakAddr := addr 62 | //有备用地址? 63 | if len(addrs) > 1 { 64 | bakAddr = strings.Trim(addrs[1], " ") 65 | } 66 | 67 | //加大发送并发,sender阻塞会影响tail的进度 68 | nSenders := 2 69 | senders, ok := cfg["tail"]["senders"] 70 | if ok { 71 | tmp, err := strconv.Atoi(senders) 72 | if err == nil { 73 | nSenders = tmp 74 | } 75 | } 76 | for i:=1;i<=nSenders;i++ { 77 | s := SenderInit(sendBuffer, addr, bakAddr, i) 78 | go s.Start() 79 | qlst.Append(s.Quit) 80 | } 81 | loglib.Info(fmt.Sprintf("total senders %d", nSenders)) 82 | 83 | qlst.HandleQuitSignal() 84 | qlst.ExecQuit() 85 | } 86 | 87 | func collectorGo(cfg map[string]map[string]string) { 88 | qlst := lib.NewQuitList() 89 | 90 | bufferChan := make(chan bytes.Buffer, 500) 91 | rAddr := cfg["collector"]["listen"] 92 | tr := TcpReceiverInit(bufferChan,rAddr) 93 | go tr.Start() 94 | 95 | qlst.Append(tr.Quit) 96 | 97 | addrs := strings.Split(cfg["collector"]["send_to"], ",") 98 | addr := addrs[0] 99 | bakAddr := addr 100 | //有备用地址? 101 | if len(addrs) > 1 { 102 | bakAddr = addrs[1] 103 | } 104 | 105 | nSenders := 10 106 | senders, ok := cfg["collector"]["senders"] 107 | if ok { 108 | tmp, err := strconv.Atoi(senders) 109 | if err == nil { 110 | nSenders = tmp 111 | } 112 | } 113 | for i:=1;i<=nSenders;i++ { 114 | s := SenderInit(bufferChan, addr, bakAddr, i) 115 | go s.Start() 116 | qlst.Append(s.Quit) 117 | } 118 | loglib.Info(fmt.Sprintf("total senders %d", nSenders)) 119 | 120 | // heart beat 121 | port, _ := cfg["monitor"]["hb_port"] 122 | monAddr, _ := cfg["monitor"]["mon_addr"] 123 | if port != "" && monAddr != "" { 124 | hb := heart_beat.NewHeartBeat(port, monAddr, "collector") 125 | go hb.Run() 126 | qlst.Append(hb.Quit) 127 | } 128 | 129 | qlst.HandleQuitSignal() 130 | qlst.ExecQuit() 131 | } 132 | 133 | func fcollectorGo(cfg map[string]map[string]string) { 134 | bufferChan := make(chan bytes.Buffer, 100) 135 | addr := cfg["fcollector"]["listen"] 136 | 137 | fo := FileOutputerInit(bufferChan, cfg["fcollector"]["save_dir"]) 138 | go fo.Start() 139 | 140 | tr := TcpReceiverInit(bufferChan,addr) 141 | go tr.Start() 142 | 143 | qlst := lib.NewQuitList() 144 | 145 | // heart beat 146 | port, _ := cfg["monitor"]["hb_port"] 147 | monAddr, _ := cfg["monitor"]["mon_addr"] 148 | if port != "" && monAddr != "" { 149 | hb := heart_beat.NewHeartBeat(port, monAddr, "fcollector") 150 | go hb.Run() 151 | 152 | qlst.Append(hb.Quit) 153 | } 154 | 155 | qlst.Append(tr.Quit) //tcpReceiver要先退出 156 | qlst.Append(fo.Quit) 157 | qlst.HandleQuitSignal() 158 | qlst.ExecQuit() 159 | } 160 | 161 | func etlcollectorGo(cfg map[string]map[string]string) { 162 | qlst := lib.NewQuitList() 163 | 164 | bufferChan := make(chan bytes.Buffer, 100) 165 | addr := cfg["etlcollector"]["listen"] 166 | 167 | eo := EtlOutputerInit(bufferChan, cfg["etlcollector"]) 168 | go eo.Start() 169 | 170 | tr := TcpReceiverInit(bufferChan,addr) 171 | go tr.Start() 172 | //一定要发送方先退出 173 | qlst.Append(tr.Quit) 174 | qlst.Append(eo.Quit) 175 | 176 | // heart beat 177 | port, _ := cfg["monitor"]["hb_port"] 178 | monAddr, _ := cfg["monitor"]["mon_addr"] 179 | if port != "" && monAddr != "" { 180 | hb := heart_beat.NewHeartBeat(port, monAddr, "etlcollector") 181 | go hb.Run() 182 | qlst.Append(hb.Quit) 183 | } 184 | 185 | qlst.HandleQuitSignal() 186 | qlst.ExecQuit() 187 | } 188 | 189 | func mgocollectorGo(cfg map[string]map[string]string) { 190 | qlst := lib.NewQuitList() 191 | 192 | bufferChan := make(chan bytes.Buffer, 300) //mongodb写入并发可能开的比较高 193 | addr := cfg["mgocollector"]["listen"] 194 | 195 | mgo := MongoDbOutputerInit(bufferChan, cfg["mgocollector"]) 196 | go mgo.Start() 197 | 198 | tr := TcpReceiverInit(bufferChan,addr) 199 | go tr.Start() 200 | //一定要发送方先退出 201 | qlst.Append(tr.Quit) 202 | qlst.Append(mgo.Quit) 203 | 204 | // heart beat 205 | port, _ := cfg["monitor"]["hb_port"] 206 | monAddr, _ := cfg["monitor"]["mon_addr"] 207 | if port != "" && monAddr != "" { 208 | hb := heart_beat.NewHeartBeat(port, monAddr, "mgocollector") 209 | go hb.Run() 210 | qlst.Append(hb.Quit) 211 | } 212 | 213 | qlst.HandleQuitSignal() 214 | qlst.ExecQuit() 215 | } 216 | 217 | -------------------------------------------------------------------------------- /integrity/integrity_checker.go: -------------------------------------------------------------------------------- 1 | package integrity 2 | 3 | import ( 4 | "io/ioutil" 5 | "os" 6 | "fmt" 7 | "path/filepath" 8 | "strconv" 9 | "strings" 10 | "encoding/json" 11 | "time" 12 | "logd/loglib" 13 | "logd/lib" 14 | ) 15 | 16 | //日志完整性检查类 17 | type IntegrityChecker struct { 18 | dir string 19 | statusFile string 20 | hourReceived map[string]map[string]map[string]int // [ip][hour][id] = 1 21 | dayReceived map[string]map[string]map[string]int // [ip][day][hour] = 1 22 | } 23 | 24 | func NewIntegrityChecker(dir string) *IntegrityChecker { 25 | err := os.MkdirAll(dir, 0755) 26 | if err != nil { 27 | loglib.Error("make integrity dir error:" + err.Error()) 28 | return nil 29 | } 30 | statusFile := getFilePath() 31 | ic := &IntegrityChecker{} 32 | ic.dir = dir 33 | ic.statusFile = statusFile 34 | 35 | status := ic.LoadStatus(ic.statusFile) 36 | ic.hourReceived = status["hour_received"] 37 | ic.dayReceived = status["day_received"] 38 | return ic 39 | } 40 | 41 | func getFilePath() string { 42 | var d = lib.GetBinPath() + "/var" 43 | if ! lib.FileExists(d) { 44 | os.MkdirAll(d, 0775) 45 | } 46 | return d + "/log_received.json" 47 | } 48 | 49 | func (this *IntegrityChecker) LoadStatus(filename string) map[string]map[string]map[string]map[string]int { 50 | m := make(map[string]map[string]map[string]map[string]int) 51 | m["hour_received"] = make(map[string]map[string]map[string]int) 52 | m["day_received"] = make(map[string]map[string]map[string]int) 53 | if lib.FileExists(filename) { 54 | vbytes, err := ioutil.ReadFile( filename ) 55 | if err != nil { 56 | loglib.Error("read log received file error:" + err.Error()) 57 | }else{ 58 | err = json.Unmarshal(vbytes, &m) 59 | if err != nil { 60 | loglib.Error("unmarshal log received error:" + err.Error()) 61 | }else{ 62 | loglib.Info("load log received success !") 63 | } 64 | } 65 | }else{ 66 | loglib.Warning("log received file " + filename + " not found!") 67 | } 68 | return m 69 | } 70 | 71 | func (this *IntegrityChecker) SaveStatus() { 72 | m := make(map[string]map[string]map[string]map[string]int) 73 | m["hour_received"] = this.hourReceived 74 | m["day_received"] = this.dayReceived 75 | vbytes, err := json.Marshal(m) 76 | if err != nil { 77 | loglib.Error("marshal log received error:" + err.Error()) 78 | return 79 | } 80 | err = ioutil.WriteFile(this.statusFile, vbytes, 0664) 81 | if err == nil { 82 | loglib.Info("save log received success !") 83 | }else{ 84 | loglib.Error("save log received error:" + err.Error()) 85 | } 86 | } 87 | 88 | func (this *IntegrityChecker) Add(ip string, hour string, packId string, lines int, isDone bool) { 89 | _, ok := this.hourReceived[ip] 90 | if !ok { 91 | this.hourReceived[ip] = make(map[string]map[string]int) 92 | } 93 | _, ok = this.hourReceived[ip][hour] 94 | if !ok { 95 | this.hourReceived[ip][hour] = map[string]int{"total_lines": 0, "total_packs": 0} 96 | } 97 | this.hourReceived[ip][hour][packId] = 1 98 | this.hourReceived[ip][hour]["total_lines"] += lines 99 | if isDone { 100 | id, _ := strconv.Atoi(packId) 101 | this.hourReceived[ip][hour]["total_packs"] = id 102 | //this.Check() //改为手动调用 103 | } 104 | } 105 | 106 | func (this *IntegrityChecker) addHour(ip string, hour string) bool { 107 | if len(hour) > 8 { 108 | day := hour[0:8] 109 | _, ok := this.dayReceived[ip] 110 | if !ok { 111 | this.dayReceived[ip] = make(map[string]map[string]int) 112 | } 113 | _, ok = this.dayReceived[ip][day] 114 | if !ok { 115 | this.dayReceived[ip][day] = make(map[string]int) 116 | } 117 | this.dayReceived[ip][day][hour] = 1 118 | return true 119 | } 120 | return false 121 | } 122 | //检查日志是否完整,返回当前这次检查已完成的小时和日期 123 | func (this *IntegrityChecker) Check() (hourFinish map[string][]string, dayFinish map[string][]string) { 124 | hourFinish = make(map[string][]string) 125 | dayFinish = make(map[string][]string) 126 | interval := int64(86400 * 4) //4天前的不完整数据将被删除 127 | now := time.Now().Unix() 128 | //检查每小时是否完整 129 | for ip, m1 := range this.hourReceived { 130 | for hour, m2 := range m1 { 131 | totalPacks, ok := m2["total_packs"] 132 | if ok && totalPacks > 0 { 133 | miss := make([]string, 0) 134 | var id = "" 135 | //这小时已接收到最后一个包,可以check了 136 | for i:=1; i<=totalPacks; i++ { 137 | id = strconv.Itoa(i) 138 | _, ok = m2[id] 139 | if !ok { 140 | miss = append(miss, id) 141 | } 142 | } 143 | //if条件顺序不要错 144 | if len(miss) == 0 && this.makeHourTag(ip, hour, m2["total_lines"]) && this.addHour(ip, hour) { 145 | _, ok1 := hourFinish[ip] 146 | if !ok1 { 147 | hourFinish[ip] = make([]string, 0) 148 | } 149 | hourFinish[ip] = append(hourFinish[ip], hour) 150 | 151 | delete(this.hourReceived[ip], hour) 152 | if len(this.hourReceived[ip]) == 0 { 153 | delete(this.hourReceived, ip) 154 | } 155 | }else{ 156 | loglib.Warning(fmt.Sprintf("%s_%s total %d, miss %s", ip, hour, totalPacks, strings.Join(miss, ","))) 157 | } 158 | } 159 | 160 | tm, err := time.Parse("2006010215", hour) 161 | if err != nil || (now - tm.Unix()) > interval { 162 | delete(this.hourReceived[ip], hour) 163 | loglib.Info(fmt.Sprintf("hour integrity: %s %s overtime", ip, hour)) 164 | } 165 | } 166 | } 167 | 168 | //检查每天是否完整 169 | for ip, m1 := range this.dayReceived { 170 | for day, m2 := range m1 { 171 | if len(m2) == 24 && this.makeDayTag(ip, day) { 172 | loglib.Info(ip + "_" + day + " all received") 173 | 174 | _, ok1 := dayFinish[ip] 175 | if !ok1 { 176 | dayFinish[ip] = make([]string, 0) 177 | } 178 | dayFinish[ip] = append(dayFinish[ip], day) 179 | 180 | delete(this.dayReceived[ip], day) 181 | if len(this.dayReceived[ip]) == 0 { 182 | delete(this.dayReceived, ip) 183 | } 184 | } 185 | tm, err := time.Parse("20060102", day) 186 | if err != nil || (now - tm.Unix()) > interval { 187 | delete(this.dayReceived[ip], day) 188 | loglib.Info(fmt.Sprintf("day integrity: %s %s overtime", ip, day)) 189 | } 190 | } 191 | } 192 | 193 | return 194 | } 195 | 196 | //touch一个文件表明某一小时接收完 197 | func (this *IntegrityChecker) makeHourTag(ip string, hour string, lines int) bool { 198 | fname := fmt.Sprintf("%s_%s_%d", ip, hour, lines) 199 | filename := filepath.Join(this.dir, fname) 200 | fout, err := os.Create(filename) 201 | if err != nil { 202 | loglib.Error("tag " + fname + " error: " + err.Error()) 203 | return false 204 | }else{ 205 | fout.Close() 206 | } 207 | return true 208 | } 209 | //touch一个文件表明某一天接收完 210 | func (this *IntegrityChecker) makeDayTag(ip string, day string) bool { 211 | fname := fmt.Sprintf("%s_%s", ip, day) 212 | filename := filepath.Join(this.dir, fname) 213 | fout, err := os.Create(filename) 214 | if err != nil { 215 | loglib.Error("tag " + fname + " error: " + err.Error()) 216 | return false 217 | }else{ 218 | fout.Close() 219 | } 220 | return true 221 | } 222 | -------------------------------------------------------------------------------- /sender.go: -------------------------------------------------------------------------------- 1 | package main 2 | import ( 3 | "fmt" 4 | "bytes" 5 | "io/ioutil" 6 | "os" 7 | "logd/lib" 8 | "logd/tcp_pack" 9 | "net" 10 | "time" 11 | "strconv" 12 | "sync" 13 | "logd/loglib" 14 | ) 15 | 16 | var once sync.Once 17 | var fileList *lib.GlobalList = lib.GlobalListInit() 18 | 19 | type Sender struct { 20 | id int 21 | sBuffer chan bytes.Buffer 22 | file_mem_folder_name string 23 | memBuffer chan bytes.Buffer //sender自己的chan,用于保证sBuffer不阻塞 24 | connection Connection 25 | status *int 26 | sendToAddress string 27 | 28 | wq *lib.WaitQuit 29 | } 30 | 31 | //工厂初始化函数 32 | //增加备用地址,暂时支持一个备用地址 33 | func SenderInit(buffer chan bytes.Buffer, addr string, bakAddr string, id int) (s Sender) { 34 | // var s Sender 35 | // s = new(Sender) 36 | s.id = id 37 | s.sBuffer = buffer 38 | s.memBuffer = make(chan bytes.Buffer, 20) 39 | s.file_mem_folder_name = "tempfile" 40 | //auto make dir 41 | if _,err := os.Stat(s.file_mem_folder_name); err != nil && os.IsNotExist(err) { 42 | os.MkdirAll(s.file_mem_folder_name, 0775) 43 | } 44 | s.sendToAddress = addr 45 | s.connection = SingleConnectionInit(s.sendToAddress, bakAddr) 46 | a := 1 47 | s.status = &a 48 | s.wq = lib.NewWaitQuit("sender", -1) 49 | 50 | return s 51 | } 52 | 53 | //should be run by once 54 | func (s *Sender) reloadFileCache() { 55 | list := lib.GetFilelist(s.file_mem_folder_name) 56 | for _,filename := range list { 57 | // s.fileCacheList.PushBack(filename) 58 | loglib.Info("reloading:" + filename) 59 | fileList.PushBack(filename) 60 | } 61 | } 62 | //从公用的chan读pack到私有的chan,若私有chan已满则写入文件缓存 63 | //保证公用chan不会阻塞 64 | func (s *Sender) pickPacks() { 65 | for buf := range s.sBuffer { 66 | select { 67 | case s.memBuffer <- buf: 68 | break 69 | default: 70 | loglib.Info(fmt.Sprintf("sender%d mem buffer is full, total %d, pub chan:%d", s.id, len(s.memBuffer), len(s.sBuffer))) 71 | s.writeToFile(buf) 72 | } 73 | } 74 | close(s.memBuffer) 75 | } 76 | //goroutine 77 | func (s *Sender) Start() { 78 | // conn := s.getConnection() 79 | //初始化fileCacheList 80 | once.Do(s.reloadFileCache) 81 | 82 | //收尾工作 83 | defer func(){ 84 | if err := recover(); err != nil { 85 | loglib.Error(fmt.Sprintf("sender %d panic:%v", s.id, err)) 86 | } 87 | 88 | s.saveBufferInChan() 89 | 90 | //s.saveMemCache() 91 | 92 | s.connection.close() 93 | 94 | s.wq.AllDone() 95 | 96 | }() 97 | 98 | go s.pickPacks() 99 | //var connLost = 0 100 | var quit = false 101 | go lib.HandleQuitSignal(func(){ 102 | quit = true 103 | s.connection.close() 104 | }) 105 | 106 | var sendInterval = time.Duration(2000) //间隔稍大,避免发送文件缓存时因无连接或其他错误进入死循环 107 | 108 | var timeoutChan = time.After(sendInterval * time.Millisecond) 109 | for ; !quit; { 110 | 111 | select { 112 | case b := <- s.memBuffer: 113 | //send b 114 | result := s.sendBuffer(b) 115 | if result == false { 116 | //改为直接放入文件缓存 117 | s.writeToFile(b) 118 | } 119 | 120 | case <- timeoutChan : 121 | timeoutChan = time.After(sendInterval * time.Millisecond) 122 | 123 | // send from file 124 | e := fileList.Remove() 125 | if e != nil { // file list is not empty 126 | filename := e.Value.(string) 127 | // fmt.Println("sender ",s.id,": get file :",filename) 128 | data,err := ioutil.ReadFile(filename) 129 | if (err != nil) { 130 | // fmt.Println("sender ",s.id,":",err) 131 | if _, ok := err.(*os.PathError); !ok { 132 | fileList.PushBack(filename) 133 | } 134 | loglib.Error(fmt.Sprintf("sender%d read file cache %s error:%s", s.id, filename, err.Error())) 135 | }else{ 136 | 137 | packId := tcp_pack.GetPackId(data)//debug info 138 | loglib.Info(fmt.Sprintf("sender%d read pack %s from file: %s, len: %d", s.id, packId, filename, len(data)))//debug info 139 | result := s.sendData2(data) 140 | if result == true { 141 | // s.fileCacheList.Remove(front) 142 | // log.Println("sender ",s.id,":removed file:",filename, "for pack", packId)//debug info 143 | err = os.Remove(filename) 144 | lib.CheckError(err) 145 | timeoutChan = time.After(time.Millisecond) //发送成功,不用再等待 146 | }else { 147 | fileList.PushBack(filename) 148 | // fmt.Println("sender ",s.id,": pushback file :",filename) 149 | } 150 | } 151 | } 152 | 153 | } 154 | } 155 | 156 | } 157 | 158 | func (s *Sender) Quit() bool { 159 | return s.wq.Quit() 160 | } 161 | 162 | func (s *Sender) saveBufferInChan() { 163 | loglib.Info(fmt.Sprintf("sender%d begin to save pack in chan", s.id)) 164 | i := 0 165 | for b := range s.memBuffer { 166 | s.writeToFile(b) 167 | i++ 168 | } 169 | loglib.Info(fmt.Sprintf("sender%d saved num of pack in chan: %d", s.id, i)) 170 | } 171 | 172 | func (s *Sender) writeToFile(data bytes.Buffer) { 173 | //写入文件 174 | filename := createFileName(s.id) 175 | //创建文件 176 | _,err := os.Create(filename) 177 | lib.CheckError(err) 178 | 179 | d := data.Bytes() 180 | 181 | packId := tcp_pack.GetPackId(d) 182 | 183 | loglib.Info(fmt.Sprintf("sender%d save pack %s to file %s len:%d", s.id, packId, filename, len(d) )) 184 | err = ioutil.WriteFile(filename, d, 0666) 185 | if (err != nil) { 186 | loglib.Warning("write to file " + filename + " error:" + err.Error()) 187 | lib.CheckError(err) 188 | }else{ 189 | //追加fileCacheList 190 | fileList.PushBack(filename) 191 | } 192 | } 193 | 194 | 195 | func (s *Sender) sendBuffer(data bytes.Buffer) bool { 196 | result := s.sendData(data.Bytes(),s.connection.getConn()) 197 | //发送失败,tcp连接可能已经失效,重新建立tcp连接 198 | if result == false { 199 | s.connection.reconnect(s.connection.getConn()) 200 | *s.status = -1 201 | loglib.Info(fmt.Sprintf("sender%d reconnected by sendBuffer(),status:%d",s.id, *s.status)) 202 | }else { 203 | *s.status = 1 204 | } 205 | return result 206 | } 207 | 208 | func (s *Sender) sendData2(data []byte) bool { 209 | result := s.sendData(data,s.connection.getConn()) 210 | //发送失败,tcp连接可能已经失效,重新建立tcp连接 211 | if result == false { 212 | s.connection.reconnect(s.connection.getConn()) 213 | *s.status = -1 214 | loglib.Info(fmt.Sprintf("sender%d reconnected by sendData2(),status:%d", s.id, *s.status)) 215 | } 216 | return result 217 | } 218 | 219 | func (s Sender) sendData(data []byte, conn *net.TCPConn) bool { 220 | if len(data) == 0 { 221 | return true 222 | } 223 | 224 | if conn == nil { 225 | return false 226 | } 227 | /* 228 | lenBuf := make([]byte, 4) 229 | nData := len(data) 230 | binary.PutUvarint(lenBuf, uint64(nData)) 231 | data = append(lenBuf, data...) 232 | */ 233 | 234 | st := time.Now() 235 | packId := tcp_pack.GetPackId(data) 236 | 237 | conn.SetDeadline(time.Now().Add(5 * time.Minute)) //设置超时 238 | loglib.Info(fmt.Sprintf("sender%d start sending pack:%s length:%d", s.id, packId, len(data))) 239 | n,err := conn.Write(data) 240 | ed := time.Now() 241 | loglib.Info(fmt.Sprintf("sender%d end sending pack:%s length:%d elapse:%s", s.id, packId, n, ed.Sub(st)) ) 242 | 243 | lib.CheckError(err) 244 | 245 | //写失败了就不用等应答了,肯定拿不到 246 | if err == nil { 247 | conn.SetReadDeadline(time.Now().Add(8 * time.Minute)) //设置超时 248 | time1 := time.Now() 249 | var temp []byte = make([]byte,128) 250 | count,err := conn.Read(temp) 251 | if err == nil { 252 | loglib.Info(fmt.Sprintf("sender%d get anwser data len:%d for pack:%s elapse:%s", s.id, count, packId, time.Now().Sub(time1))) 253 | }else{ 254 | loglib.Info(fmt.Sprintf("sender%d get anwser data len:%d for pack:%s elapse:%s, error:%s", s.id, count, packId, time.Now().Sub(time1), err.Error())) 255 | } 256 | 257 | temp = temp[:count] 258 | if (string(temp) == "ok") {//发送成功 259 | return true 260 | }else if(string(temp) == "wrong header"){ 261 | //包头错误,丢弃 262 | loglib.Info(packId + " has wrong header, retry later!") 263 | return false 264 | }else {//发送失败 265 | //报警 266 | return false 267 | } 268 | }else{ 269 | loglib.Warning(fmt.Sprintf("write pack %s error:%s", packId, err.Error())) 270 | } 271 | return false 272 | } 273 | func createFileName(id int) string { 274 | t := time.Now() 275 | nanoSecond :=strconv.FormatInt(t.UnixNano(),10) 276 | filename := "tempfile/senderBufferTempFile_"+strconv.Itoa(id)+"_"+nanoSecond 277 | 278 | return filename 279 | 280 | } 281 | 282 | -------------------------------------------------------------------------------- /etl_outputer.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "compress/zlib" 5 | "bytes" 6 | "os" 7 | "fmt" 8 | "strconv" 9 | "encoding/binary" 10 | "path/filepath" 11 | "baidu.com/etl" 12 | "logd/lib" 13 | "io" 14 | "sync" 15 | "time" 16 | "logd/tcp_pack" 17 | "logd/integrity" 18 | "logd/loglib" 19 | ) 20 | 21 | type etlOutputer struct { 22 | 23 | buffer chan bytes.Buffer 24 | saveDir string 25 | dataDir string 26 | headerDir string 27 | icDir string 28 | etlDir string 29 | etlDoneDir string 30 | etlFailDir string 31 | writers map[string]*os.File //存日志的fd 32 | headerWriters map[string]*os.File //存header的fd 33 | config map[string]string 34 | ic *integrity.IntegrityChecker 35 | wq *lib.WaitQuit 36 | } 37 | 38 | 39 | //工厂初始化函数 40 | func EtlOutputerInit(buffer chan bytes.Buffer, config map[string]string) (e etlOutputer) { 41 | 42 | e.buffer = buffer 43 | saveDir, _ := config["save_dir"] 44 | e.saveDir = saveDir 45 | e.dataDir = filepath.Join(saveDir, "log_data") 46 | e.headerDir = filepath.Join(saveDir, "headers") 47 | e.icDir = filepath.Join(saveDir, "received") 48 | e.etlDir = filepath.Join(saveDir, "etl") 49 | e.etlDoneDir = filepath.Join(saveDir, "etl_done") 50 | e.etlFailDir = filepath.Join(saveDir, "etl_fail") 51 | e.ic = integrity.NewIntegrityChecker(e.icDir) 52 | 53 | os.MkdirAll(e.dataDir, 0775) 54 | os.MkdirAll(e.headerDir, 0775) 55 | os.MkdirAll(e.etlDir, 0775) 56 | os.MkdirAll(e.etlDoneDir, 0775) 57 | os.MkdirAll(e.etlFailDir, 0775) 58 | 59 | e.writers = make(map[string]*os.File) 60 | e.headerWriters = make(map[string]*os.File) 61 | e.config = config 62 | e.wq = lib.NewWaitQuit("etl outputer", -1) //不限退出超时时间,以便etl能做完 63 | 64 | return e 65 | } 66 | 67 | func (e *etlOutputer) Start() { 68 | defer func(){ 69 | if err := recover(); err != nil { 70 | loglib.Error(fmt.Sprintf("etl outputer panic:%v", err)) 71 | } 72 | 73 | e.wq.AllDone() 74 | }() 75 | 76 | spiderList, _ := e.config["spider_list"] 77 | colsFile , _ := e.config["columns_file"] 78 | hostsList, _ := e.config["hosts_white_list"] 79 | ipBlackList, _ := e.config["ip_black_list"] 80 | 81 | if colsFile != "" { 82 | e.runEtl(spiderList, colsFile, hostsList, ipBlackList) 83 | }else{ 84 | loglib.Error("[error] miss columns map file!") 85 | } 86 | } 87 | 88 | func (e *etlOutputer) Quit() bool { 89 | return e.wq.Quit() 90 | } 91 | 92 | func (e *etlOutputer) runEtl(spiderList string, colsFile string, hostsList string, ipBlackList string) { 93 | wg := &sync.WaitGroup{} 94 | fkeyChan := make(chan string, 100) 95 | defer func(){ 96 | if err := recover(); err != nil { 97 | loglib.Error(fmt.Sprintf("runEtl() panic:%v", err)) 98 | } 99 | 100 | e.ic.SaveStatus() 101 | e.closeWriters(e.writers) 102 | e.closeWriters(e.headerWriters) 103 | close(fkeyChan) 104 | //等待etl routine结束 105 | wg.Wait() 106 | }() 107 | 108 | for i:=0; i<5; i++ { 109 | wg.Add(1) 110 | go e.doEtl(fkeyChan, e.dataDir, e.etlDir, e.etlDoneDir, e.etlFailDir, spiderList, colsFile, hostsList, ipBlackList, wg) 111 | } 112 | nextCheckTime := time.Now().Add(2 * time.Minute) 113 | //使用range遍历,方便安全退出,只要发送方退出时关闭chan,这里就可以退出了 114 | for b := range e.buffer { 115 | loglib.Info(fmt.Sprintf("pack in chan: %d", len(e.buffer))) 116 | buf := make([]byte, 4) 117 | bp := &b 118 | bp.Read(buf) 119 | 120 | l, _ := binary.Uvarint(buf) 121 | headerLen := int(l) 122 | //get pack header 123 | buf = make([]byte, headerLen) 124 | bp.Read(buf) 125 | header := tcp_pack.ParseHeader(buf) 126 | 127 | r, err := zlib.NewReader(bp) 128 | if err != nil { 129 | loglib.Error("zlib reader Error: " + err.Error()) 130 | }else{ 131 | lines, _ := strconv.Atoi(header["lines"]) 132 | done := false 133 | if header["done"] == "1" { 134 | done = true 135 | } 136 | e.ic.Add(header["ip"], header["hour"], header["id"], lines, done) 137 | 138 | writerKey := header["ip"] + "_" + header["hour"] 139 | fout := e.getWriter(e.writers, e.dataDir, writerKey) 140 | 141 | buf = append(buf, '\n') 142 | /* 143 | //一头一尾写头信息,节省硬盘 144 | n, err := fout.Write(buf) 145 | if err != nil { 146 | loglib.Info(fmt.Sprintf("write %s %d %s", writerKey, n, err.Error())) 147 | } 148 | */ 149 | nn, err := io.Copy(fout, r) 150 | if err != nil { 151 | loglib.Warning(fmt.Sprintf("save %s_%s_%s error:%s, saved:%d", header["ip"], header["hour"], header["id"], err, nn)) 152 | } 153 | //fout.Write(buf) 154 | //单独存一份header便于查数 155 | fout = e.getWriter(e.headerWriters, e.headerDir, writerKey) 156 | n, err := fout.Write(buf) 157 | if err != nil { 158 | loglib.Info(fmt.Sprintf("writer header %s %d %s", writerKey, n, err.Error())) 159 | } 160 | //增加2分钟check一次的规则,避免done包先到,其他的包未到,则可能要等到下一小时才能check 161 | if done || time.Now().Unix() > nextCheckTime.Unix() { 162 | hourFinish, _ := e.ic.Check() 163 | for ip, hours := range hourFinish { 164 | for _, hour := range hours { 165 | writerKey = ip + "_" + hour 166 | loglib.Info(fmt.Sprintf("fkeychan %d", len(fkeyChan))) 167 | fkeyChan <- writerKey 168 | } 169 | } 170 | e.closeWriters(e.writers) 171 | e.closeWriters(e.headerWriters) 172 | e.ic.SaveStatus() 173 | nextCheckTime = time.Now().Add(2 * time.Minute) 174 | } 175 | 176 | r.Close() 177 | } 178 | } 179 | } 180 | 181 | func (e *etlOutputer) getWriter(writers map[string]*os.File, parentDir string, key string) *os.File { 182 | w, ok := writers[key] 183 | if !ok || w == nil { 184 | fname := filepath.Join(parentDir, key) 185 | w1, err := os.OpenFile(fname, os.O_WRONLY | os.O_APPEND | os.O_CREATE, 0666) 186 | writers[key] = w1 187 | w = w1 188 | if err != nil { 189 | loglib.Error(fmt.Sprintf("etl outputer create writer: %s error: %s", fname, err.Error())) 190 | } 191 | } 192 | return w 193 | } 194 | 195 | func (e *etlOutputer) closeWriter(writers map[string]*os.File, key string) { 196 | w, ok := writers[key] 197 | if ok { 198 | if w != nil { 199 | w.Close() 200 | } 201 | delete(writers, key) 202 | } 203 | } 204 | //关闭全部writer 205 | func (e *etlOutputer) closeWriters(writers map[string]*os.File) { 206 | for key, w := range writers { 207 | if w != nil { 208 | w.Close() 209 | } 210 | delete(writers, key) 211 | } 212 | 213 | } 214 | func (e *etlOutputer) doEtl(fkeyChan chan string, logDataDir string, etlDir string, etlDoneDir string, etlFailDir string, spiderList string, colsFile string, hostsList string, ipBlackList string, wg *sync.WaitGroup) { 215 | defer func(){ 216 | if err := recover(); err != nil { 217 | loglib.Error(fmt.Sprintf("doEtl() panic:%v", err)) 218 | } 219 | 220 | wg.Done() 221 | }() 222 | loglib.Info("etl routine start") 223 | for fkey := range fkeyChan { 224 | sv := etl.NewFileSaver(colsFile, etlDir, fkey) 225 | d := etl.NewDispatcher(sv, 6, hostsList, ipBlackList) 226 | g := etl.NewGlobalHao123(spiderList, 100, 200, 8, d) 227 | go g.Start(false) 228 | 229 | fname := filepath.Join(logDataDir, fkey) 230 | loglib.Info("start etl for " + fname) 231 | 232 | err := g.ParseFile(fname) 233 | g.Wait() 234 | // etl success 235 | // mark success 236 | if err == nil { 237 | //采用循环,增加打tag的成功率 238 | for i:=0; i<5; i++ { 239 | fd, err := os.Create(filepath.Join(etlDoneDir, fkey)) 240 | if err == nil { 241 | fd.Close() 242 | loglib.Info("finish etl for " + fname) 243 | break 244 | }else{ 245 | loglib.Warning("mark etl done for " + fname + " failed! error: " + err.Error()) 246 | } 247 | } 248 | }else{ 249 | //采用循环,增加打tag的成功率 250 | for i:=0; i<5; i++ { 251 | fd, err := os.Create(filepath.Join(etlFailDir, fkey)) 252 | if err == nil { 253 | fd.Close() 254 | loglib.Info("failed etl for " + fname) 255 | break 256 | }else{ 257 | loglib.Warning("mark etl fail for " + fname + " failed! error: " + err.Error()) 258 | } 259 | } 260 | 261 | } 262 | } 263 | loglib.Info("etl routine finish") 264 | } 265 | -------------------------------------------------------------------------------- /tcpReceiver.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "net" 5 | "logd/lib" 6 | "logd/loglib" 7 | "time" 8 | "fmt" 9 | "bytes" 10 | "strings" 11 | "io/ioutil" 12 | "os" 13 | "sync" 14 | "encoding/binary" 15 | "encoding/json" 16 | "crypto/md5" 17 | "logd/tcp_pack" 18 | ) 19 | 20 | type TcpReceiver struct { 21 | 22 | buffer chan bytes.Buffer 23 | receiveFromAddress string 24 | 25 | footPrint map[string]PackAppear //记录包的MD5, key是md5 26 | footPrintFile string 27 | mutex *sync.RWMutex 28 | 29 | wq *lib.WaitQuit //用于安全退出 30 | } 31 | 32 | type PackAppear struct { 33 | Time int64 //包首次出现的时间戳 34 | Id string //包的id 35 | } 36 | 37 | //工厂初始化函数 38 | func TcpReceiverInit(buffer chan bytes.Buffer, addr string) (t TcpReceiver) { 39 | 40 | t.buffer = buffer 41 | t.receiveFromAddress = addr 42 | 43 | t.footPrintFile = getFilePath() 44 | t.footPrint = t.loadFootPrint(t.footPrintFile) 45 | t.mutex = &sync.RWMutex{} 46 | t.wq = lib.NewWaitQuit("tcp receiver", -1) 47 | return t 48 | } 49 | 50 | 51 | func getFilePath() string { 52 | var d = lib.GetBinPath() + "/var" 53 | if !lib.FileExists(d) { 54 | os.MkdirAll(d, 0775) 55 | } 56 | return d + "/footprint.json" 57 | } 58 | func (t *TcpReceiver) Start() { 59 | 60 | tcpAddr, err := net.ResolveTCPAddr("tcp4", t.receiveFromAddress) 61 | lib.CheckError(err) 62 | listener, err := net.ListenTCP("tcp", tcpAddr) 63 | lib.CheckError(err) 64 | 65 | wg := &sync.WaitGroup{} 66 | 67 | wg.Add(1) 68 | go t.clearFootPrint(wg) 69 | 70 | //主routine信号处理 71 | go lib.HandleQuitSignal(func(){ 72 | //接收到信号关闭listenner,此时Accept会马上返回一个nil 的conn 73 | listener.Close() 74 | loglib.Info("close tcp receiver's listener.") 75 | }) 76 | 77 | defer func(){ 78 | if err := recover(); err != nil { 79 | loglib.Error(fmt.Sprintf("tcp receiver panic:%v", err)) 80 | } 81 | 82 | loglib.Info("wait connections finish...") 83 | wg.Wait() 84 | loglib.Info("all connections have been processed. quit.") 85 | close(t.buffer) //关闭chan 86 | t.saveFootPrint() 87 | 88 | t.wq.AllDone() 89 | 90 | }() 91 | 92 | for { 93 | conn, err := listener.Accept() 94 | if conn == nil { 95 | break 96 | } 97 | lib.CheckError(err) 98 | wg.Add(1) 99 | go t.handleConnnection(conn, wg) 100 | } 101 | 102 | } 103 | 104 | func (t *TcpReceiver) Quit() bool { 105 | return t.wq.Quit() 106 | } 107 | 108 | //包是否出现过 109 | func (t *TcpReceiver) hasAppeared(buf *bytes.Buffer) (PackAppear, bool, string) { 110 | h := md5.New() 111 | h.Write(buf.Bytes()) 112 | code := fmt.Sprintf("%x", h.Sum(nil)) 113 | t.mutex.RLock() 114 | appeared, ok := t.footPrint[code] 115 | t.mutex.RUnlock() 116 | return appeared, ok, code 117 | } 118 | 119 | func (t *TcpReceiver) clearFootPrint(wg *sync.WaitGroup) { 120 | defer wg.Done() 121 | 122 | ch1 := make(chan bool) //用于安全退出 123 | ch2 := time.After(time.Hour) //用于定时任务 124 | go lib.HandleQuitSignal(func(){ 125 | ch1 <- true 126 | }) 127 | 128 | loop: 129 | for { 130 | select { 131 | //监听一个chan以便安全退出 132 | case <- ch1: 133 | break loop 134 | case <- ch2: 135 | //若这个case未执行完,而ch1已可读,select会保证这个case执行完 136 | now := time.Now().Unix() 137 | t.mutex.Lock() 138 | for code, appear := range t.footPrint { 139 | if now - appear.Time >= 86400 { 140 | delete(t.footPrint, code) 141 | } 142 | } 143 | t.saveFootPrint() 144 | t.mutex.Unlock() 145 | ch2 = time.After(time.Hour) //用于定时任务 146 | } 147 | } 148 | loglib.Info("clear footprint quit!") 149 | } 150 | 151 | func (t *TcpReceiver) handleConnnection (conn net.Conn, wg *sync.WaitGroup) { 152 | defer func(){ 153 | if err := recover(); err != nil { 154 | loglib.Error(fmt.Sprintf("tcp receiver connection panic:%v", err)) 155 | } 156 | conn.Close() 157 | wg.Done() 158 | }() 159 | /* 160 | 用于标识收到退出信号后,能否直接退出 161 | 只要接收信号时,包没有收完,都是可退出的, 162 | 发送方会缓存以后重传; 163 | 如果收完了就不能直接退出,可能包已传给下一级处理但是 164 | 却告诉发送方发送失败 165 | */ 166 | var quit = false //用于标识是否要退出 167 | 168 | go lib.HandleQuitSignal(func(){ 169 | //关闭连接,避免阻塞在网络io上 170 | conn.Close() 171 | quit = true 172 | }) 173 | 174 | request := make([]byte, 512 * 1024) //缓冲为512k 175 | 176 | var packLen int = 0 177 | currLen := 0 178 | var b = new(bytes.Buffer) 179 | var content = new(bytes.Buffer) 180 | inAddr := conn.RemoteAddr().String() 181 | parts := strings.Split(inAddr, ":") 182 | inIp := parts[0] 183 | 184 | packId := "unkown" 185 | 186 | var routeInfo map[string]string 187 | var rePull = false //是否补拉,如果是补拉就不做重复包检验 188 | 189 | loglib.Info("incoming: " + inAddr) 190 | 191 | outer: 192 | for ; !quit; { 193 | 194 | st := time.Now() 195 | if packLen == 0 { 196 | conn.SetReadDeadline(time.Now().Add(5 * time.Minute)) 197 | time1 := time.Now() //时间打点 198 | // read zlib pack header length 199 | buf := make([]byte, 4) 200 | _, err := conn.Read(buf) 201 | if err != nil { 202 | loglib.Warning(fmt.Sprintf("conn:%s, get header len, tcp receiver read error:%s, elapse:%s", inAddr, err.Error(), time.Now().Sub(time1))) 203 | break 204 | } 205 | l, _ := binary.Uvarint(buf) 206 | headerLen := int(l) 207 | //get pack header 208 | headerBuf := make([]byte, headerLen) 209 | time2 := time.Now() 210 | _, err = conn.Read(headerBuf) 211 | if err != nil { 212 | loglib.Warning(fmt.Sprintf("conn:%s, get header, tcp receiver read error:%s, elapse:%s", inAddr, err.Error(), time.Now().Sub(time2))) 213 | break 214 | } 215 | 216 | //是否补拉 217 | route0 := tcp_pack.ParseHeader(headerBuf) 218 | if v, ok := route0["repull"]; ok && v == "1" { 219 | rePull = true 220 | }else{ 221 | rePull = false 222 | } 223 | 224 | buf = append(buf, headerBuf...) 225 | header,_, err := tcp_pack.ExtractHeader(buf) 226 | if err != nil { 227 | loglib.Error("wrong format header " + string(headerBuf) + " " + err.Error()) 228 | conn.Write([]byte("wrong header")) 229 | break 230 | } 231 | 232 | packId = tcp_pack.GetPackId(buf) 233 | packLen = header.PackLen 234 | currLen = 0 235 | routeInfo = make(map[string]string) 236 | b = new(bytes.Buffer) 237 | content = new(bytes.Buffer) 238 | 239 | loglib.Info(fmt.Sprintf("conn:%s, start receive pack %s, pack len:%d, header len:%d, header elapse:%s", inAddr, packId, packLen, headerLen, time.Now().Sub(time1))) 240 | b.Write(buf) 241 | 242 | routeInfo["ip"] = lib.GetIp() 243 | routeInfo["stage"] = "tcp recv" 244 | routeInfo["st"] = st.Format("2006-01-02 15:04:05.000") 245 | } 246 | //读包体的超时 247 | conn.SetReadDeadline(time.Now().Add(5 * time.Minute)) 248 | time3 := time.Now() 249 | //read enough bytes 250 | for currLen < packLen { 251 | requestLen, err := conn.Read(request) 252 | if requestLen == 0 || err != nil { 253 | //sender有重发机制,所以可丢弃 254 | packLen = 0 //设为0以便读取新的包 255 | 256 | ed := time.Now() 257 | loglib.Warning(fmt.Sprintf("conn:%s, not full! ip:%s, packid:%s, received:%d, end recv:%s, elapse:%s, body elapse:%s, error:%s", inAddr, inIp, packId, currLen, ed, ed.Sub(st), ed.Sub(time3), err.Error())) 258 | break outer //连接出错直接跳出外层循环 259 | } 260 | currLen += requestLen 261 | content.Write(request[:requestLen]) 262 | } 263 | if packLen > 0 && currLen >= packLen{ 264 | //收完马上应答 265 | _, err := conn.Write([]byte("ok")) 266 | if err != nil { 267 | loglib.Warning(fmt.Sprintf("ip:%s, packid:%s received, but response back error:%s", inIp, packId, err.Error())) 268 | }else{ 269 | loglib.Info(fmt.Sprintf("conn:%s, response to packid:%s", inAddr, packId)) 270 | } 271 | //避免收到重复包(补拉例外) 272 | appeared, ok, code := t.hasAppeared(content) 273 | if !ok || rePull { 274 | ed := time.Now() 275 | routeInfo["ed"] = ed.Format("2006-01-02 15:04:05.000") 276 | routeInfo["elapse"] = ed.Sub(st).String() 277 | b.Write(content.Bytes()) 278 | vbytes := tcp_pack.Packing(b.Bytes(), routeInfo, true) 279 | b = bytes.NewBuffer(vbytes) 280 | t.buffer <- *b 281 | packAppear := PackAppear{time.Now().Unix(), packId} 282 | t.mutex.Lock() 283 | t.footPrint[code] = packAppear //这里挂过 284 | t.mutex.Unlock() 285 | 286 | loglib.Info(fmt.Sprintf("conn:%s, finish ip:%s, packid:%s, repull:%v, received:%d, elapse:%s, body elapse:%s", inAddr, inIp, packId, rePull, currLen, ed.Sub(st), ed.Sub(time3))) 287 | }else{ 288 | loglib.Info(fmt.Sprintf("conn:%s, pack %s repeat %s already appear at %s", inAddr, packId, appeared.Id, time.Unix(appeared.Time, 0))) 289 | } 290 | packLen = 0 291 | } 292 | 293 | } 294 | loglib.Info("conn finish: " + inAddr) 295 | } 296 | 297 | 298 | //保存footprint 299 | func (t *TcpReceiver) saveFootPrint() { 300 | vbytes, err := json.Marshal(t.footPrint) 301 | if err != nil { 302 | loglib.Error("marshal footprint error:" + err.Error()) 303 | return 304 | } 305 | err = ioutil.WriteFile(t.footPrintFile, vbytes, 0664) 306 | if err == nil { 307 | loglib.Info("save footprint success !") 308 | }else{ 309 | loglib.Error("save footprint error:" + err.Error()) 310 | } 311 | } 312 | 313 | func (t *TcpReceiver) loadFootPrint(fname string) map[string]PackAppear { 314 | fp := make(map[string]PackAppear) 315 | if lib.FileExists( fname ) { 316 | vbytes, err := ioutil.ReadFile( fname ) 317 | if err != nil { 318 | loglib.Error("read footprint file error:" + err.Error()) 319 | }else{ 320 | err = json.Unmarshal(vbytes, &fp) 321 | if err != nil { 322 | loglib.Error("unmarshal footprint error:" + err.Error()) 323 | }else{ 324 | loglib.Info("load footprint success !") 325 | } 326 | } 327 | }else{ 328 | loglib.Warning("footprint file " + fname + " not found!") 329 | } 330 | return fp 331 | } 332 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. -------------------------------------------------------------------------------- /tailer.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "time" 7 | "os/exec" 8 | "path/filepath" 9 | "runtime" 10 | "strconv" 11 | "strings" 12 | "bufio" 13 | "logd/lib" 14 | "logd/loglib" 15 | ) 16 | 17 | var logFileKey = "log_file" //配置文件中的key名 18 | var recordFileKey = "record_file" 19 | var recordFile = "line.rec" 20 | var changeStr = "logfile changed" 21 | 22 | type Tailler struct{ 23 | logPath string //日志路径(带时间格式) 24 | nLT []int //logPath中时间格式前后的字符数 25 | currFile string //当前tail的文件 26 | fileHour time.Time //当前日志文件名上的小时 27 | hourStrFmt string 28 | lineNum int //记录已扫过的行数 29 | goFmt string //时间格式 30 | recordPath string 31 | config map[string]string 32 | //receiver的buffer size,每tail这么多行就记录,不够就不记录, 33 | //简化重启后包的id设置逻辑,这样只有最后一个包可能少于buffer size 34 | recvBufSize int 35 | wq *lib.WaitQuit 36 | } 37 | 38 | func NewTailler(config map[string]string) *Tailler{ 39 | val, ok := config[logFileKey] 40 | if !ok || val == "" { 41 | loglib.Error("config need log_file!") 42 | os.Exit(1) 43 | } 44 | logPath := val 45 | val, ok = config[recordFileKey] 46 | if !ok || val == "" { 47 | config[recordFileKey] = getRecordPath() 48 | } 49 | lineNum, fname := getLineRecord(config[recordFileKey]) 50 | goFmt, nLT := extractTimeFmt(logPath) 51 | if goFmt == "" { 52 | loglib.Error("log path has no time format!") 53 | os.Exit(1) 54 | } 55 | wq := lib.NewWaitQuit("tailler") 56 | bufSize, _ := strconv.Atoi(config["recv_buffer_size"]) 57 | 58 | return &Tailler{logPath: logPath, nLT:nLT, currFile: fname, hourStrFmt: "2006010215", lineNum: lineNum, goFmt: goFmt, recordPath: config[recordFileKey], config: config, recvBufSize: bufSize, wq: wq} 59 | } 60 | 61 | func getRecordPath() string { 62 | d, _ := filepath.Abs(filepath.Dir(os.Args[0])) 63 | d = filepath.Join(d, "var") 64 | if _, err := os.Stat(d); err != nil && os.IsNotExist(err) { 65 | os.MkdirAll(d, 0775) 66 | } 67 | return d + "/" + recordFile 68 | } 69 | /* 70 | * 行数为非负值表示已tail的行数 71 | * 行数为负值则都将从文件末尾开始 72 | * 自动保存的行数只可能是bufSize的倍数或者文件总行数 73 | */ 74 | func getLineRecord(path string) (line int, fname string) { 75 | fin, err := os.Open(path) 76 | if err != nil { 77 | _, f, l, _ := runtime.Caller(0) 78 | loglib.Error(fmt.Sprintf("%s:%d open line record `%s` error\n", f, l, path)) 79 | return -1, "" //从最后开始读 80 | } 81 | var txt string 82 | var lineStr = "" 83 | //只读第一行 84 | scanner := bufio.NewScanner(fin) 85 | for scanner.Scan() { 86 | txt = strings.Trim(scanner.Text(), " ") 87 | break 88 | } 89 | fin.Close() 90 | parts := strings.Split(txt, " ") 91 | if len(parts) == 2 { 92 | fname = parts[0] 93 | lineStr = parts[1] 94 | }else{ 95 | lineStr = parts[0] 96 | } 97 | line, err = strconv.Atoi(lineStr) 98 | if err != nil { 99 | loglib.Error("convert line record error:" + err.Error()) 100 | line = -1 101 | } 102 | return line, fname 103 | } 104 | 105 | func saveLineRecord(path string, fname string, lineNum int) { 106 | fout, err := os.Create(path) 107 | defer fout.Close() 108 | if err != nil { 109 | loglib.Error("save line record error: " + err.Error()) 110 | return 111 | } 112 | _, err = fmt.Fprintf(fout, "%s %d", fname, lineNum) 113 | if err != nil { 114 | loglib.Error("Write line record error" + err.Error()) 115 | return 116 | } 117 | loglib.Info("save line record success!") 118 | } 119 | //从带时间格式的路径中分离出时间格式,并转为go的格式 120 | //格式由<>括起 121 | func extractTimeFmt(logPath string) (goFmt string, nLT []int ) { 122 | size := len(logPath) 123 | unixFmt := "" 124 | // 格式前面的字符数 125 | nLeading := size 126 | // '<'的位置 127 | lPos := strings.Index(logPath, "<") 128 | // 格式后面的字符数 129 | nTailling := 0 130 | // '>'的位置 131 | tPos := strings.LastIndex(logPath, ">") 132 | if lPos > 0 && tPos > 0 && lPos < tPos { 133 | nLeading = lPos 134 | nTailling = size - tPos - 1 135 | unixFmt = logPath[ lPos + 1 : tPos ] //+1,-1是扔掉<> 136 | } 137 | goFmt = transFmt(unixFmt) 138 | nLT = []int{nLeading, nTailling} 139 | return 140 | 141 | } 142 | //unix的时间格式的文件名转为go时间格式的 143 | func transFmt(unixFmt string) string { 144 | if unixFmt == "" { 145 | return "" 146 | } 147 | var timeFmtMap = map[string]string{"%Y":"2006", "%m":"01", "%d":"02", "%H":"15"} 148 | fmt := unixFmt 149 | for k, v := range timeFmtMap { 150 | fmt = strings.Replace(fmt, k, v, -1) 151 | } 152 | return fmt 153 | } 154 | //从日志文件名获取时间,不依赖系统时间 155 | func (this *Tailler) getTimeFromLogName(name string) (time.Time, error) { 156 | size := len(name) 157 | timePart := "" 158 | if this.nLT[0] < size && this.nLT[1] < size { 159 | timePart = name[ this.nLT[0] : size - this.nLT[1] ] 160 | } 161 | layout := this.goFmt 162 | 163 | loc, _ := time.LoadLocation("Local") 164 | t, err := time.ParseInLocation(layout, timePart, loc) 165 | if err != nil { 166 | loglib.Error("parse " + timePart + " against " + layout + " error:" + err.Error()) 167 | } 168 | return t, err 169 | } 170 | //根据时间得到日志文件 171 | func (this *Tailler) getLogFileByTime(tm time.Time) string { 172 | size := len(this.logPath) 173 | prefix := this.logPath[ 0 : this.nLT[0] ] 174 | suffix := this.logPath[ size-this.nLT[1] : ] 175 | return prefix + tm.Format(this.goFmt) + suffix 176 | } 177 | 178 | func (this *Tailler) Tailling(receiveChan chan map[string]string) { 179 | if this.currFile == "" { 180 | //兼容老格式,老格式无文件路径 181 | this.currFile = this.getLogFileByTime(time.Now()) 182 | } 183 | var err error 184 | this.fileHour, err = this.getTimeFromLogName(this.currFile) 185 | if err != nil { 186 | loglib.Error("can't get time from current log file:" + this.currFile + "error:" + err.Error()) 187 | os.Exit(1) 188 | } 189 | isQuit := false 190 | for time.Since(this.fileHour).Hours() >= 1 { 191 | //说明重启时已经跟记录行号时不属于同一个小时了 192 | isQuit = this.taillingPrevious(this.currFile, this.lineNum, this.fileHour.Format(this.hourStrFmt), receiveChan) 193 | if isQuit { 194 | break 195 | } 196 | //继续下一个小时 197 | this.fileHour = this.fileHour.Add(time.Hour) 198 | this.currFile = this.getLogFileByTime(this.fileHour) 199 | this.lineNum = 0 200 | } 201 | if !isQuit { 202 | //处理当前这个小时 203 | this.taillingCurrent(receiveChan) 204 | } 205 | close(receiveChan) 206 | this.wq.AllDone() 207 | } 208 | 209 | func (this *Tailler) taillingPrevious(filePath string, lineNum int, hourStr string, receiveChan chan map[string]string) bool { 210 | var n_lines = "" 211 | if lineNum >= 0 { 212 | n_lines = fmt.Sprintf("+%d", lineNum+1) //略过已经tail过的行 213 | }else{ 214 | n_lines = "0" //从最后开始 215 | } 216 | 217 | loglib.Info("begin previous log: " + filePath + " from line: " + n_lines) 218 | var quit = false 219 | //收尾工作 220 | defer func(){ 221 | if err := recover(); err != nil { 222 | loglib.Error(fmt.Sprintf("tailler panic:%v", err)) 223 | } 224 | 225 | //如果是quit,丢弃不完整的包 226 | if quit { 227 | lineNum -= lineNum % this.recvBufSize 228 | } 229 | saveLineRecord(this.recordPath, filePath, lineNum) 230 | }() 231 | 232 | //启动时读取行号,以后都从首行开始 233 | cmd := exec.Command("tail", "-n", n_lines, filePath) 234 | stdout, err := cmd.StdoutPipe() 235 | 236 | if err != nil { 237 | loglib.Error("open pipe error") 238 | } 239 | 240 | //系统信号监听 241 | go lib.HandleQuitSignal(func(){ 242 | quit = true 243 | if cmd.Process != nil { 244 | cmd.Process.Kill() //关闭tail命令,不然读取循环无法终止 245 | } 246 | }) 247 | 248 | 249 | cmd.Start() 250 | rd := bufio.NewReader(stdout) 251 | for line, err := rd.ReadString('\n'); err == nil; line, err = rd.ReadString('\n'){ 252 | //fmt.Print(line) 253 | if quit { 254 | break 255 | } 256 | lineNum++ 257 | m := map[string]string{"hour":hourStr, "line":line} 258 | receiveChan <- m 259 | if lineNum % this.recvBufSize == 0 { 260 | saveLineRecord(this.recordPath, filePath, lineNum) 261 | } 262 | } 263 | if err := cmd.Wait(); err != nil { 264 | loglib.Info("wait sys tail error!" + err.Error()) 265 | } 266 | loglib.Info(fmt.Sprintf("%s tailed %d lines", filePath, lineNum)) 267 | if !quit { 268 | // 完整tail一个文件 269 | m := map[string]string{"hour":hourStr, "line": changeStr} 270 | receiveChan <- m 271 | saveLineRecord(this.recordPath, filePath, lineNum) 272 | } 273 | return quit 274 | 275 | } 276 | func (this *Tailler) taillingCurrent(receiveChan chan map[string]string) { 277 | var n_lines = "" 278 | if this.lineNum >= 0 { 279 | n_lines = fmt.Sprintf("+%d", this.lineNum+1) //略过已经tail过的行 280 | }else{ 281 | n_lines = "0" //从最后开始 282 | } 283 | 284 | var quit = false 285 | //收尾工作 286 | defer func(){ 287 | if err := recover(); err != nil { 288 | loglib.Error(fmt.Sprintf("tailler panic:%v", err)) 289 | } 290 | 291 | //如果是quit,丢弃不完整的包 292 | if quit { 293 | this.lineNum -= this.lineNum % this.recvBufSize 294 | } 295 | saveLineRecord(this.recordPath, this.currFile, this.lineNum) 296 | 297 | this.wq.AllDone() 298 | }() 299 | 300 | //启动时读取行号,以后都从首行开始 301 | cmd := exec.Command("tail", "-F", "-n", n_lines, this.currFile) 302 | n_lines = "+1" 303 | stdout, err := cmd.StdoutPipe() 304 | 305 | if err != nil { 306 | loglib.Error("open pipe error") 307 | } 308 | 309 | //系统信号监听 310 | go lib.HandleQuitSignal(func(){ 311 | quit = true 312 | if cmd.Process != nil { 313 | cmd.Process.Kill() //关闭tail命令,不然读取循环无法终止 314 | } 315 | }) 316 | 317 | //日志切割检测 318 | go func(){ 319 | nextHour := this.fileHour.Add(time.Hour) 320 | nextHourFile := this.getLogFileByTime(nextHour) 321 | timeToWait := 10 * time.Minute //到达下一小时后,等待日志文件的最长时间,10分钟 322 | for { 323 | if quit { 324 | break 325 | } 326 | if lib.FileExists(nextHourFile) || time.Now().Sub(nextHour) > timeToWait { 327 | currFile := this.currFile 328 | totalLines := this.GetTotalLines(currFile) 329 | loglib.Info(fmt.Sprintf("log rotated! previous file: %s, total lines: %d", currFile, totalLines)) 330 | 331 | //在kill前进行文件切换,避免kill后新的tail启动时文件名还是旧的 332 | this.fileHour = nextHour 333 | this.currFile = nextHourFile 334 | nextHour = nextHour.Add(time.Hour) 335 | nextHourFile = this.getLogFileByTime(nextHour) 336 | 337 | //发现日志切割,等待1分钟 338 | i := 0 339 | done := false 340 | for { 341 | if this.lineNum >= totalLines { 342 | done = true 343 | } 344 | if done || i > 60 { 345 | if cmd.Process != nil { 346 | cmd.Process.Kill() //关闭tail命令,不然读取循环无法终止 347 | } 348 | if done { 349 | loglib.Info("finish tail " + currFile) 350 | }else{ 351 | loglib.Info("tail " + currFile + " timeout") 352 | } 353 | break 354 | } 355 | i++ 356 | time.Sleep(time.Second) 357 | } 358 | } 359 | time.Sleep(time.Second) 360 | } 361 | 362 | }() 363 | 364 | outer: 365 | for { 366 | currFile := this.currFile //缓存当前tail的文件名 367 | hourStr := this.fileHour.Format( this.hourStrFmt ) 368 | cmd.Start() 369 | loglib.Info("begin current log: " + currFile) 370 | rd := bufio.NewReader(stdout) 371 | for line, err := rd.ReadString('\n'); err == nil; line, err = rd.ReadString('\n'){ 372 | //fmt.Print(line) 373 | if quit { 374 | break outer 375 | } 376 | this.lineNum++ 377 | m := map[string]string{"hour":hourStr, "line":line} 378 | receiveChan <- m 379 | if this.lineNum % this.recvBufSize == 0 { 380 | saveLineRecord(this.recordPath, currFile, this.lineNum) 381 | } 382 | } 383 | if err := cmd.Wait(); err != nil { 384 | loglib.Info("wait sys tail error!" + err.Error()) 385 | } 386 | loglib.Info(fmt.Sprintf("%s tailed %d lines", currFile, this.lineNum)) 387 | if quit { 388 | break 389 | } 390 | // 完整tail一个文件 391 | m := map[string]string{"hour":hourStr, "line": changeStr } 392 | receiveChan <- m 393 | saveLineRecord(this.recordPath, currFile, this.lineNum) 394 | //begin a new file 395 | this.lineNum = 0 396 | cmd = exec.Command("tail", "-F", "-n", n_lines, this.currFile) 397 | stdout, err = cmd.StdoutPipe() 398 | 399 | if err != nil { 400 | loglib.Error("open pipe error") 401 | break 402 | } 403 | } 404 | } 405 | 406 | func (this *Tailler) Quit() bool { 407 | return this.wq.Quit() 408 | } 409 | 410 | func (this *Tailler) GetLineNum() int { 411 | return this.lineNum 412 | } 413 | 414 | func (this *Tailler) GetTotalLines(fname string) int { 415 | cmd := exec.Command("/bin/sh", "-c", `wc -l ` + fname + ` | awk '{print $1}'`) 416 | out, err := cmd.Output() 417 | if err == nil { 418 | n, err := strconv.Atoi(strings.Trim(string(out), "\n")) 419 | if err != nil { 420 | loglib.Error("trans total lines " + string(out) + " error: " + err.Error()) 421 | } 422 | return n 423 | } 424 | return 0 425 | } 426 | -------------------------------------------------------------------------------- /mongodb_outputer.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "logd/lib" 5 | "logd/loglib" 6 | "compress/zlib" 7 | "bytes" 8 | "os" 9 | "fmt" 10 | "strconv" 11 | "strings" 12 | "encoding/binary" 13 | "sync" 14 | "io" 15 | "io/ioutil" 16 | "bufio" 17 | "encoding/json" 18 | "logd/tcp_pack" 19 | "time" 20 | "errors" 21 | "net/url" 22 | "gopkg.in/mgo.v2" 23 | "gopkg.in/mgo.v2/bson" 24 | ) 25 | 26 | type MongoDbOutputer struct { 27 | 28 | buffer chan bytes.Buffer 29 | mongosAddr string 30 | session *mgo.Session 31 | db string 32 | collection string 33 | isUpsert bool 34 | bulkSize int 35 | savers int 36 | file_mem_folder_name string 37 | transactionIdKey string 38 | fileList *lib.GlobalList 39 | wq *lib.WaitQuit 40 | } 41 | 42 | func MongoDbOutputerInit(buffer chan bytes.Buffer, config map[string]string) (mo MongoDbOutputer) { 43 | mo.buffer = buffer 44 | mo.wq = lib.NewWaitQuit("mongodb outputer", -1) 45 | mo.mongosAddr, _ = config["mongos"] 46 | mo.db , _ = config["db"] 47 | mo.collection , _ = config["collection"] 48 | mo.session = initMongoDbSession(mo.mongosAddr) 49 | //暂时出错直接退出 50 | if mo.session == nil { 51 | loglib.Error("init mongodb session failed") 52 | os.Exit(1) 53 | } 54 | 55 | upsert, _ := config["upsert"] 56 | if upsert == "true" { 57 | mo.isUpsert = true 58 | }else{ 59 | mo.isUpsert = false 60 | } 61 | 62 | bulkSize, _ := config["bulk_size"] 63 | nBulk, err := strconv.Atoi(bulkSize) 64 | if err == nil { 65 | mo.bulkSize = nBulk 66 | }else{ 67 | mo.bulkSize = 50 68 | } 69 | 70 | savers, _ := config["savers"] 71 | nSavers, err := strconv.Atoi(savers) 72 | if err == nil { 73 | mo.savers = nSavers 74 | }else{ 75 | mo.savers = 20 76 | } 77 | 78 | //创建文件缓存目录 79 | mo.file_mem_folder_name = "tempfile" 80 | if !lib.FileExists(mo.file_mem_folder_name) { 81 | os.MkdirAll(mo.file_mem_folder_name, 0775) 82 | } 83 | 84 | mo.transactionIdKey = "transaction_id" 85 | mo.fileList = lib.GlobalListInit() 86 | return mo 87 | } 88 | 89 | func initMongoDbSession(mongosAddr string) *mgo.Session { 90 | session, err := mgo.Dial(mongosAddr) 91 | if err != nil { 92 | loglib.Error(fmt.Sprintf("init mongodb session error:%v", err)) 93 | return nil 94 | } 95 | 96 | session.SetMode(mgo.Monotonic, true) //设置read preference 97 | session.SetSafe(&mgo.Safe{W:2}) //设置write concern 98 | return session 99 | } 100 | //用于检验session可用性并适时重连的routine func 101 | //用于重连main session 102 | func (this *MongoDbOutputer) reConnMongoDb() { 103 | nPingFail := 0 //ping失败次数 104 | reDial := false 105 | for { 106 | reDial = false 107 | if this.session == nil { 108 | //session未初始化 109 | reDial = true 110 | }else if(this.session.Ping() != nil) { 111 | //session连接丢失 112 | nPingFail++ 113 | if nPingFail == 3 { 114 | reDial = true 115 | } 116 | } 117 | 118 | if reDial { 119 | nPingFail = 0 120 | this.session = initMongoDbSession(this.mongosAddr) 121 | if this.session == nil { 122 | loglib.Info("session re-dial failed!") 123 | }else{ 124 | loglib.Info("session re-dial success!") 125 | } 126 | } 127 | time.Sleep(time.Second) 128 | } 129 | } 130 | //用于routine重新clone session, main session未重连,则继续用旧的session 131 | func (this *MongoDbOutputer) reCloneRoutineSession(psession **mgo.Session) { 132 | if this.session != nil { 133 | //re-clone a session 134 | *psession = this.session.Clone() 135 | } 136 | } 137 | 138 | func (this *MongoDbOutputer) Start() { 139 | wg := &sync.WaitGroup{} 140 | defer func(){ 141 | if err := recover(); err != nil { 142 | loglib.Error(fmt.Sprintf("mongodb outputer panic:%v", err)) 143 | } 144 | if this.session != nil { 145 | this.session.Close() 146 | } 147 | this.wq.AllDone() 148 | }() 149 | 150 | this.reloadFileCache() 151 | 152 | go this.reConnMongoDb() 153 | 154 | wg.Add(this.savers) 155 | 156 | for i:=0; i 0 { 324 | arr = append(arr, m) 325 | cnt++ 326 | if cnt >= this.bulkSize { 327 | err := this.bulkSaveBson(coll, arr...) 328 | if err != nil { 329 | this.cacheData(arr, "bulk", date, routineId) 330 | nCached += cnt 331 | //ping fail, re-connect, clone main session 332 | if (*psession).Ping() != nil { 333 | //refresh go-routine's session if possible 334 | this.reCloneRoutineSession(psession) 335 | if (*psession).Ping() == nil { 336 | loglib.Info(fmt.Sprintf("parse routine %d re-conn", routineId)) 337 | } 338 | } 339 | }else{ 340 | nInserted += cnt 341 | } 342 | arr = make([]interface{}, 0) 343 | cnt = 0 344 | } 345 | }else{ 346 | nDiscard++ 347 | } 348 | 349 | } 350 | cnt = len(arr) 351 | if cnt > 0 { 352 | err := this.bulkSaveBson(coll, arr...) 353 | if err != nil { 354 | this.cacheData(arr, "bulk", date, routineId) 355 | nCached += cnt 356 | //ping fail, re-connect, clone main session 357 | if (*psession).Ping() != nil { 358 | //refresh go-routine's session if possible 359 | this.reCloneRoutineSession(psession) 360 | if (*psession).Ping() == nil { 361 | loglib.Info(fmt.Sprintf("parse routine %d re-conn", routineId)) 362 | } 363 | } 364 | }else{ 365 | nInserted += cnt 366 | } 367 | } 368 | 369 | loglib.Info(fmt.Sprintf("save pack %s: inserted:%d, cached:%d, discard %d items", packId, nInserted, nCached, nDiscard)) 370 | } 371 | func (this *MongoDbOutputer) bulkSaveBson(coll *mgo.Collection, docs ...interface{}) (err error) { 372 | if coll != nil { 373 | err = coll.Insert(docs...) 374 | if err != nil { 375 | tmp := make([]string, 0) 376 | for _, doc := range docs { 377 | m, _ := doc.(bson.M) 378 | tid, _ := m[this.transactionIdKey].(string) 379 | tmp = append(tmp, tid) 380 | } 381 | tids := strings.Join(tmp, ",") 382 | loglib.Error(fmt.Sprintf("save %d bsons [%s] error:%v", len(docs), tids, err)) 383 | } 384 | }else{ 385 | err = errors.New("bulk: collection is nil") 386 | loglib.Error(fmt.Sprintf("save bsons error:%v", err)) 387 | 388 | } 389 | return 390 | } 391 | //更新插入,按字段更新 392 | func (this *MongoDbOutputer) upsert(psession **mgo.Session, r io.Reader, packId string, date string, routineId int) { 393 | nDiscard := 0 394 | nCached := 0 395 | nUpdated := 0 396 | nInserted := 0 397 | var coll *mgo.Collection = nil 398 | 399 | if *psession != nil { 400 | coll = (*psession).DB(this.db + date).C(this.collection) //按天分库 401 | } 402 | scanner := bufio.NewScanner(r) 403 | for scanner.Scan() { 404 | line := scanner.Text() 405 | m := this.parseLogLine(line) 406 | if len(m) > 0 { 407 | selector := bson.M{ this.transactionIdKey: m[ this.transactionIdKey ] } 408 | up := bson.M{"$set" : m} 409 | info, err := this.upsertBson(coll, selector, up) 410 | if err != nil { 411 | this.cacheData(m, "upsert", date, routineId) 412 | nCached++ 413 | //ping fail, re-connect, clone main session 414 | if (*psession).Ping() != nil { 415 | //refresh go-routine's session if possible 416 | this.reCloneRoutineSession(psession) 417 | if (*psession).Ping() == nil { 418 | loglib.Info(fmt.Sprintf("parse routine %d re-conn", routineId)) 419 | } 420 | } 421 | }else{ 422 | nInserted++ 423 | nUpdated += info.Updated 424 | } 425 | }else{ 426 | nDiscard++ 427 | } 428 | } 429 | 430 | loglib.Info(fmt.Sprintf("save pack %s: inserted:%d, updated:%d, cached:%d, discard %d items", packId, nInserted, nUpdated, nCached, nDiscard)) 431 | } 432 | func (this *MongoDbOutputer) upsertBson(coll *mgo.Collection, selector interface{}, doc interface{}) (info *mgo.ChangeInfo, err error) { 433 | m, _ := selector.(bson.M) 434 | tid, _ := m[this.transactionIdKey].(string) 435 | 436 | if coll != nil { 437 | info, err = coll.Upsert(selector, doc) 438 | 439 | if err != nil { 440 | loglib.Error(fmt.Sprintf("save bson [%s] error:%v", tid, err)) 441 | }else{ 442 | if info.Updated > 0 { 443 | loglib.Info(fmt.Sprintf("bson [%s] updated", tid)) 444 | } 445 | } 446 | }else{ 447 | info = &mgo.ChangeInfo{} 448 | err = errors.New("upsert: collection is nil") 449 | loglib.Error(fmt.Sprintf("save bson [%s] error:%v", tid, err)) 450 | } 451 | return 452 | } 453 | //缓存写入mongodb失败的数据 454 | //typeStr为bulk或upsert 455 | func (this *MongoDbOutputer) cacheData(data interface{}, typeStr string, date string, routineId int) { 456 | mp := bson.M{"type": typeStr, "date": date, "data": data} 457 | saveTry := 3 458 | b, err := json.Marshal(mp) 459 | arr, ok := data.([]bson.M) 460 | cnt := 1 461 | if ok { 462 | cnt = len(arr) 463 | } 464 | if err != nil { 465 | loglib.Error(fmt.Sprintf("cache data error when marshal, discard %d item(s), error:%v", cnt, err)) 466 | return 467 | } 468 | fname := this.createFileName(routineId) 469 | for i:=0; i 0 && p1 < slen-1 { 493 | p := strings.Index(line[p1+1:], " ") 494 | if p > 0 { 495 | p2 = p + p1 + 1 //注意!p只是slice中的index,不是line中的 496 | } 497 | }else{ 498 | p1 = 0 499 | } 500 | ipInLong := lib.IpToUint32(line[p1+1 : p2]) 501 | // host第一段 502 | p1 = strings.Index(line, ".") 503 | hostPrefix := line[:p1] + "_" 504 | //截取时间 505 | p1 = strings.Index(line, "[") 506 | p2 = strings.Index(line, "]") 507 | hourStr := line[p1+1 : p2] 508 | var timestamp int64 = 0 509 | var day int = 0 510 | var hour int = -1 511 | tm, err := time.ParseInLocation("02/Jan/2006:15:04:05 -0700", hourStr, time.Local) 512 | if err != nil { 513 | loglib.Warning("parse time error" + err.Error()) 514 | }else{ 515 | timestamp = tm.Unix() 516 | dayStr := tm.Format("20060102") 517 | day, err = strconv.Atoi(dayStr) 518 | if err != nil { 519 | loglib.Error(fmt.Sprintf("conv %s to int error: %v", dayStr, err)) 520 | } 521 | hour = tm.Hour() 522 | } 523 | //截取请求url 524 | urlStr := "" 525 | p3 := strings.Index(line, "\"") 526 | p4 := strings.Index(line[p3+1: ], "\"") + p3 + 1 527 | reqStr := line[p3+1 : p4] 528 | parts := strings.Split(reqStr, " ") 529 | 530 | m = make(bson.M) 531 | if len(parts) == 3 { 532 | urlStr = parts[1] 533 | u, err := url.Parse(urlStr) 534 | if err == nil { 535 | q := u.Query() 536 | tid := q.Get( this.transactionIdKey ) //检验有无transaction id 537 | if tid != "" { 538 | //参数对放入bson 539 | for k, _ := range q { 540 | newK := k 541 | if k != this.transactionIdKey { 542 | newK = hostPrefix + k 543 | } 544 | m[newK] = q.Get(k) 545 | } 546 | m[hostPrefix + "ipinlong"] = ipInLong 547 | m[hostPrefix + "time"] = timestamp 548 | m[hostPrefix + "day"] = day 549 | m[hostPrefix + "hour"] = hour 550 | } 551 | } 552 | } 553 | return 554 | } 555 | 556 | func (this *MongoDbOutputer) createFileName(id int) string { 557 | t := time.Now() 558 | filename := fmt.Sprintf("%s/writeFailedTempFile_%d_%d", this.file_mem_folder_name, id, t.UnixNano()) 559 | return filename 560 | } 561 | 562 | --------------------------------------------------------------------------------