├── README.md └── src ├── helper ├── common │ ├── common.go │ ├── const.go │ ├── errinfo.go │ ├── job.go │ └── log.go └── logid │ └── logid.go ├── master ├── config │ ├── config.go │ ├── log.json │ └── master.conf ├── jobmgr │ └── jobmgr.go ├── logmgr │ └── logmgr.go ├── main │ ├── init.go │ ├── main.go │ └── signal.go ├── webroot │ └── index.html └── workermgr │ └── workermgr.go ├── server └── httpserver │ ├── http_handler.go │ └── http_server.go └── worker ├── config ├── config.go ├── log.json └── worker.conf ├── jobmgr ├── executor.go ├── job.go ├── joblock.go ├── jobmgr.go └── scheduler.go ├── logmgr └── logmgr.go ├── main ├── init.go ├── main.go └── signal.go └── register └── register.go /README.md: -------------------------------------------------------------------------------- 1 | - 这是一个分布式Crontab的项目 2 | -------------------------------------------------------------------------------- /src/helper/common/common.go: -------------------------------------------------------------------------------- 1 | package common 2 | 3 | import ( 4 | "runtime" 5 | "path" 6 | "strconv" 7 | "strings" 8 | "time" 9 | "math" 10 | "math/rand" 11 | "sync" 12 | "net" 13 | "errors" 14 | ) 15 | 16 | var rand_gen = rand.New(rand.NewSource(time.Now().UnixNano())) 17 | var lk sync.Mutex 18 | 19 | 20 | // 获取本机网卡IP 21 | func GetLocalIP() (ipv4 string, err error) { 22 | var ( 23 | addrs []net.Addr 24 | addr net.Addr 25 | ipNet *net.IPNet // IP地址 26 | isIpNet bool 27 | ) 28 | // 获取所有网卡 29 | if addrs, err = net.InterfaceAddrs(); err != nil { 30 | return 31 | } 32 | // 取第一个非lo的网卡IP 33 | for _, addr = range addrs { 34 | // 这个网络地址是IP地址: ipv4, ipv6 35 | if ipNet, isIpNet = addr.(*net.IPNet); isIpNet && !ipNet.IP.IsLoopback() { 36 | // 跳过IPV6 37 | if ipNet.IP.To4() != nil { 38 | ipv4 = ipNet.IP.String() // 192.168.1.1 39 | return 40 | } 41 | } 42 | } 43 | 44 | err = errors.New("没有IP网卡") 45 | return 46 | } 47 | 48 | // 提取worker的IP 49 | func ExtractWorkerIP(regKey string) (string) { 50 | return strings.TrimPrefix(regKey, JOB_RGEISTER_PREFIX) 51 | } 52 | 53 | func CallerName() string { 54 | var pc uintptr 55 | var file string 56 | var line int 57 | var ok bool 58 | if pc, file, line, ok = runtime.Caller(1); !ok { 59 | return "" 60 | } 61 | name := runtime.FuncForPC(pc).Name() 62 | res := "[" + path.Base(file) + ":" + strconv.Itoa(line) + "]" + name 63 | tmp := strings.Split(name, ".") 64 | res = tmp[len(tmp)-1] 65 | return res 66 | } 67 | 68 | func RandInt() int { 69 | return rand_gen.Int() 70 | } 71 | 72 | func RandIntn(max int) int { 73 | lk.Lock() 74 | n := rand_gen.Intn(max) 75 | lk.Unlock() 76 | return n 77 | } 78 | 79 | func NowInS() int64 { 80 | return time.Now().Unix() 81 | } 82 | 83 | func NowInNs() int64 { 84 | return time.Now().UnixNano() 85 | } 86 | 87 | func NowInMs() int64 { 88 | return time.Now().UnixNano() / int64(time.Millisecond) 89 | } 90 | 91 | func Abs(x int32) int32 { 92 | switch { 93 | case x < 0: 94 | return -x 95 | case x == 0: 96 | return 0 // return correctly abs(-0) 97 | } 98 | return x 99 | } 100 | func Distance(flat float64, flng float64, 101 | tlat float64, tlng float64) (r int32) { 102 | distance := math.Sqrt((flat-tlat)*(flat-tlat) + (flng-tlng)*(flng-tlng)) 103 | return int32(distance * 100000) 104 | } 105 | 106 | func String(data []byte, err error) string { 107 | if err == nil { 108 | return string(data) 109 | } 110 | return "" 111 | } 112 | 113 | 114 | func InArray(l []string, e string) bool { 115 | for _, v := range l { 116 | if v == e { 117 | return true 118 | } 119 | } 120 | 121 | return false 122 | } 123 | 124 | func InIntArray(l []int, e int) bool { 125 | for _, v := range l { 126 | if v == e { 127 | return true 128 | } 129 | } 130 | 131 | return false 132 | } 133 | 134 | func InInt32Array(l []int32, e int32) bool { 135 | for _, v := range l { 136 | if v == e { 137 | return true 138 | } 139 | } 140 | 141 | return false 142 | } 143 | 144 | func ArrInIntArray(arr1 []int, arr2 []int) bool { 145 | for _, v1 := range arr1 { 146 | for _, v2 := range arr2 { 147 | if v1 == v2 { 148 | return true 149 | } 150 | } 151 | } 152 | 153 | return false 154 | } 155 | 156 | func IsDaytime(hour int, daytime string) bool { 157 | if len(daytime) == 0 { 158 | return false 159 | } 160 | hourArr := strings.Split(daytime, ",") 161 | if len(hourArr) <= hour { 162 | return false 163 | } 164 | if hourArr[hour] == "1" { 165 | return true 166 | } else { 167 | return false 168 | } 169 | } 170 | -------------------------------------------------------------------------------- /src/helper/common/const.go: -------------------------------------------------------------------------------- 1 | package common 2 | 3 | 4 | const ( 5 | JOB_KEY_PREFIX = "/cron/jobs/" 6 | JOB_KILL_PREFIX = "/cron/kill/" 7 | JOB_LOCK_PREFIX = "/cron/lock/" 8 | JOB_RGEISTER_PREFIX = "/cron/workers/" 9 | JOB = "job" 10 | JOB_NAME = "name" 11 | JOB_SKIP = "skip" 12 | JOB_LIMIT = "limit" 13 | MAX_NUM_JOB_QUEUE = 1000 14 | MAX_NUM_LOG_QUEUE = 1000 15 | JOB_EVENT_SAVE = 0 16 | JOB_EVENT_DELETE = 1 17 | JOB_EVENT_KILL = 2 18 | ) -------------------------------------------------------------------------------- /src/helper/common/errinfo.go: -------------------------------------------------------------------------------- 1 | package common 2 | 3 | const ( 4 | INIT_LOG_FAILED = 100000 5 | INIT_SERVCIE_FAILED = 100001 6 | 7 | ERRNO_KILL_JOB_FAILED = 200000 8 | ERRNO_GET_LOCAL_IP_ERROR = 200001 9 | 10 | ERRNO_CRON_PARSE_FAILD = 300000 11 | 12 | ERRNO_PANIC = 500000 13 | ERRNO_JSON_MARSHAL_FAILED = 500001 14 | ERRNO_JSON_UNMARSHAL_FAILED = 500002 15 | ERRNO_PARSEPOST_FAILED = 500003 16 | 17 | ERRNO_ETCD_PUT_FAILED = 800000 18 | ERRNO_ETCD_DELETE_FAILED = 800001 19 | ERRNO_ETCD_GET_FAILED = 800002 20 | ERRNO_ETCD_GRANT_LEASE_FAILED = 800003 21 | ERRNO_LOG_SAVE_FAILED = 800004 22 | ERRNO_LOG_GET_FAILED = 800005 23 | 24 | ERRNO_HTTP_RESPONSE_JSON_FAILED = 900000 25 | ) -------------------------------------------------------------------------------- /src/helper/common/job.go: -------------------------------------------------------------------------------- 1 | package common 2 | 3 | import ( 4 | "encoding/json" 5 | "strings" 6 | ) 7 | 8 | type Job struct { 9 | Name string `json:"name"` //任务名 10 | Command string `json:"command"` //shell命令 11 | CronExpr string `json:"cronExpr"` //cron表达式 12 | } 13 | 14 | func UnpackJob(val []byte) (ret *Job, err error) { 15 | ret = &Job{} 16 | err = json.Unmarshal(val,ret) 17 | return 18 | } 19 | 20 | func ExtractJobName(jobKey string) string { 21 | return strings.TrimPrefix(jobKey,JOB_KEY_PREFIX) 22 | } 23 | 24 | func ExtractKillJobName(jobKey string) string { 25 | return strings.TrimPrefix(jobKey,JOB_KILL_PREFIX) 26 | } 27 | 28 | type JobEvent struct { 29 | Type int 30 | Job *Job 31 | } 32 | 33 | func BuildEvent(evenType int, job *Job) *JobEvent { 34 | return &JobEvent{ 35 | Type:evenType, 36 | Job:job, 37 | } 38 | } -------------------------------------------------------------------------------- /src/helper/common/log.go: -------------------------------------------------------------------------------- 1 | package common 2 | 3 | 4 | type CronLog struct{ 5 | Name string `bson:"name" json:"name"`//任务名 6 | Command string `bson:"command" json:"command"`//命令 7 | OutPut string `bson:"output" json:"output"`//输出结果 8 | Err string `bson:"err" json:"err"`//错误 9 | StartTime int64 `bson:"startTime" json:"startTime"` //任务开始时间 10 | EndTime int64 `bson:"endTime" json:"endTime"` //任务结束时间 11 | ScheduleTime int64 `bson:"scheduleTime" json:"scheduleTime"` //计划调度时间 12 | } 13 | 14 | type LogBach struct { 15 | Logs []interface{} 16 | } 17 | 18 | type LogFilter struct { 19 | Name string `bson:"name"` 20 | } 21 | 22 | type SortLogByStartTime struct { 23 | SortOrder int `bson:"startTime"` //按照startTime倒序排列 24 | } -------------------------------------------------------------------------------- /src/helper/logid/logid.go: -------------------------------------------------------------------------------- 1 | package logid 2 | 3 | import ( 4 | "helper/common" 5 | "sync/atomic" 6 | ) 7 | 8 | var LogId int64 9 | 10 | func GenerRateLogId() int64 { 11 | return atomic.AddInt64((*int64)(&LogId), 1) 12 | } 13 | 14 | func init() { 15 | LogId = common.NowInNs() 16 | } 17 | -------------------------------------------------------------------------------- /src/master/config/config.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | var ( 4 | LogFile = "./src/master/config/log.json" 5 | ConfFile = "./src/master/config/master.conf" 6 | WebrootPath = "./src/master/webroot" 7 | ) 8 | 9 | type MasterCfg struct { 10 | Http HttpConfig 11 | JobMgr JobMgrConfig 12 | LogMgr LogMgrConfig 13 | } 14 | 15 | type HttpConfig struct { 16 | Port string 17 | } 18 | 19 | type JobMgrConfig struct{ 20 | Endpoints []string 21 | TimeOut int 22 | } 23 | 24 | 25 | type LogMgrConfig struct{ 26 | MongodbUri string 27 | TimeOut int 28 | Database string 29 | Collection string 30 | MaxBatchSize int 31 | LogTimeOut int 32 | } 33 | var Cfg MasterCfg 34 | -------------------------------------------------------------------------------- /src/master/config/log.json: -------------------------------------------------------------------------------- 1 | { 2 | "LogLevel" : "trace", 3 | 4 | "FileWriter" : { 5 | "On": true, 6 | 7 | "LogPath" : "./src/master/log/crontab.log.info", 8 | "RotateLogPath" : "./src/master/log/crontab.log.info.%Y%M%D%H", 9 | 10 | "WfLogPath" : "./src/master/log/crontab.log.wf", 11 | "RotateWfLogPath" : "./src/master/log/crontab.log.wf.%Y%M%D%H", 12 | 13 | "PublicLogPath" : "./src/master/log/public.log", 14 | "RotatePublicLogPath" : "./src/master/log/public.log.%Y%M%D%H" 15 | }, 16 | 17 | "ConsoleWriter" : { 18 | "On" : false 19 | } 20 | } -------------------------------------------------------------------------------- /src/master/config/master.conf: -------------------------------------------------------------------------------- 1 | [http] 2 | port = "8970" 3 | [jobmgr] 4 | endpoints = ["127.0.0.1:2379"] 5 | timeout = 500 6 | [logmgr] 7 | mongodburi = "mongodb://127.0.0.1:27017" 8 | timeout = 500 9 | database = "distribute_cron" 10 | collection = "log" 11 | maxbatchsize = 10 12 | logtimeout = 10 -------------------------------------------------------------------------------- /src/master/jobmgr/jobmgr.go: -------------------------------------------------------------------------------- 1 | package jobmgr 2 | 3 | import ( 4 | "go.etcd.io/etcd/clientv3" 5 | "time" 6 | "master/config" 7 | "helper/common" 8 | logger "github.com/shengkehua/xlog4go" 9 | "encoding/json" 10 | "context" 11 | ) 12 | 13 | var ( 14 | G_JobMgr *JobMgr 15 | ) 16 | 17 | type JobMgr struct { 18 | Client *clientv3.Client 19 | } 20 | 21 | //初始化管理器 22 | func Init() error{ 23 | config := clientv3.Config{ 24 | Endpoints:config.Cfg.JobMgr.Endpoints, 25 | DialTimeout: time.Duration(config.Cfg.JobMgr.TimeOut) * time.Millisecond, 26 | } 27 | 28 | if client,err := clientv3.New(config);err !=nil { 29 | return err 30 | }else { 31 | G_JobMgr = &JobMgr{ 32 | Client:client, 33 | } 34 | } 35 | return nil 36 | } 37 | 38 | //保存job到etcd 39 | func (j *JobMgr) SaveJob(job *common.Job) (oldJob *common.Job, err error) { 40 | var ( 41 | jobKey string 42 | jobVal []byte 43 | putResp *clientv3.PutResponse 44 | ) 45 | //确定保存的key和val 46 | jobKey = common.JOB_KEY_PREFIX + job.Name 47 | if jobVal,err = json.Marshal(job);err !=nil { 48 | logger.Error("Parse Error errno:%d, err:%s",common.ERRNO_JSON_MARSHAL_FAILED,err.Error()) 49 | return 50 | } 51 | 52 | //存入etcd 53 | if putResp,err = j.Client.Put(context.TODO(),jobKey,string(jobVal),clientv3.WithPrevKV());err !=nil { 54 | logger.Error("etcd put Error errno:%d, err:%s",common.ERRNO_ETCD_PUT_FAILED,err.Error()) 55 | return 56 | } 57 | //如果是更新则返回旧值,否则为空 58 | if putResp.PrevKv != nil { 59 | if oldJob,err = common.UnpackJob(putResp.PrevKv.Value);err !=nil { 60 | logger.Error("UnpackJob Error errno:%d, err:%s",common.ERRNO_JSON_UNMARSHAL_FAILED,err.Error()) 61 | //旧值解析错误打日志,不报错 62 | err = nil 63 | } 64 | } 65 | return 66 | } 67 | 68 | //删除etcd里的job 69 | func (j *JobMgr) DeleteJob(name string) (oldJob *common.Job, err error) { 70 | var ( 71 | jobKey string 72 | delResp *clientv3.DeleteResponse 73 | ) 74 | jobKey = common.JOB_KEY_PREFIX + name 75 | 76 | //etcd删除key 77 | if delResp, err = j.Client.Delete(context.TODO(),jobKey,clientv3.WithPrevKV()); err != nil { 78 | logger.Error("Delete Error errno:%d, err:%s",common.ERRNO_ETCD_DELETE_FAILED,err.Error()) 79 | return 80 | } 81 | 82 | //如果是删除一个不存在的key也没有影响,删除存在的可以就返回被删除的信息 83 | if len(delResp.PrevKvs) != 0 { 84 | if oldJob, err =common.UnpackJob(delResp.PrevKvs[0].Value);err !=nil { 85 | logger.Error("UnpackJob Error errno:%d, err:%s",common.ERRNO_JSON_UNMARSHAL_FAILED,err.Error()) 86 | //旧值解析错误打日志,不报错 87 | err = nil 88 | } 89 | } 90 | return 91 | } 92 | 93 | //列出所有的任务 94 | func (j *JobMgr) ListJob() (jobList []*common.Job,err error) { 95 | var ( 96 | getResp *clientv3.GetResponse 97 | ) 98 | if getResp, err = j.Client.Get(context.TODO(),common.JOB_KEY_PREFIX,clientv3.WithPrefix());err != nil { 99 | logger.Error("Get Error errno:%d, err:%s",common.ERRNO_ETCD_GET_FAILED,err.Error()) 100 | return 101 | } 102 | 103 | //遍历所有的返回任务 104 | jobList = make([]*common.Job,0) 105 | for _,v := range getResp.Kvs { 106 | job := &common.Job{} 107 | if job, err = common.UnpackJob(v.Value);err !=nil { 108 | //解析失败打个日志继续 109 | logger.Error("UnpackJob Error errno:%d, err:%s",common.ERRNO_JSON_UNMARSHAL_FAILED,err.Error()) 110 | err = nil 111 | continue 112 | } 113 | jobList = append(jobList,job) 114 | } 115 | return 116 | } 117 | 118 | //强制杀死任务 119 | func (j *JobMgr) KillJob(name string) (err error) { 120 | //杀死任务就是向etcd写入杀死的key,这样worker会监听到然后执行杀死操作 121 | //TODO 这样做可能会出现worker执行杀死任务失败,然后写入的key已经过期的情况(出错或者宕机)。 122 | var ( 123 | jobKey string 124 | lease *clientv3.LeaseGrantResponse 125 | ) 126 | //确定保存的key和val 127 | jobKey = common.JOB_KILL_PREFIX + name 128 | //存入etcd,租约1s(不续租) 129 | if lease, err = j.Client.Grant(context.TODO(),1);err != nil { 130 | logger.Error("Grant Lease Error errno:%d, err :%s", common.ERRNO_ETCD_GRANT_LEASE_FAILED,err.Error()) 131 | return 132 | } 133 | if _,err = j.Client.Put(context.TODO(),jobKey,"",clientv3.WithLease(lease.ID));err !=nil { 134 | logger.Error("etcd put Error errno:%d, err:%s",common.ERRNO_ETCD_PUT_FAILED,err.Error()) 135 | return 136 | } 137 | 138 | return 139 | } -------------------------------------------------------------------------------- /src/master/logmgr/logmgr.go: -------------------------------------------------------------------------------- 1 | package logmgr 2 | 3 | import ( 4 | "go.mongodb.org/mongo-driver/mongo" 5 | "time" 6 | "go.mongodb.org/mongo-driver/mongo/options" 7 | "context" 8 | "master/config" 9 | "helper/common" 10 | ) 11 | 12 | //log管理器 13 | type LogMgr struct { 14 | Client *mongo.Client //连接mongodb的客户端 15 | Collection *mongo.Collection //mongodb的表 16 | } 17 | 18 | var ( 19 | G_LogMgr *LogMgr 20 | ) 21 | 22 | func InitLogMgr() (err error) { 23 | //连接mogodb 24 | var ( 25 | client *mongo.Client 26 | ) 27 | con := context.TODO() 28 | //1、建立连接 29 | opt := options.Client() 30 | opt.SetConnectTimeout(time.Duration(config.Cfg.LogMgr.TimeOut)*time.Millisecond).ApplyURI(config.Cfg.LogMgr.MongodbUri) 31 | if client,err = mongo.Connect(con,opt); err != nil { 32 | return 33 | } 34 | 35 | G_LogMgr = &LogMgr{ 36 | Client:client, 37 | Collection:client.Database(config.Cfg.LogMgr.Database).Collection(config.Cfg.LogMgr.Collection), 38 | } 39 | 40 | return 41 | } 42 | 43 | func (l *LogMgr) ListLog(name string,skip int,limit int) (logArr []*common.CronLog, err error){ 44 | var( 45 | cursor *mongo.Cursor 46 | con context.Context 47 | ) 48 | 49 | //初始化返回 50 | logArr = make([]*common.CronLog,0) 51 | 52 | filter := &common.LogFilter{Name:name} 53 | logsort := &common.SortLogByStartTime{SortOrder:-1} 54 | skip64 := int64(skip) 55 | limit64 := int64(limit) 56 | findOpt := &options.FindOptions{ 57 | Limit:&limit64, 58 | Skip:&skip64, 59 | Sort:logsort, 60 | } 61 | if cursor, err = l.Collection.Find(con,filter,findOpt);err != nil { 62 | return 63 | } 64 | defer cursor.Close(con) 65 | for cursor.Next(con) { 66 | jobLog := &common.CronLog{} 67 | 68 | //反序列化bson 69 | if err = cursor.Decode(jobLog);err != nil { 70 | //日志不合法跳过本条 71 | continue 72 | } 73 | logArr = append(logArr,jobLog) 74 | } 75 | return 76 | } 77 | 78 | -------------------------------------------------------------------------------- /src/master/main/init.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "helper/common" 5 | "fmt" 6 | "helper/logid" 7 | logger "github.com/shengkehua/xlog4go" 8 | "server/httpserver" 9 | "runtime" 10 | "master/config" 11 | "github.com/BurntSushi/toml" 12 | "master/jobmgr" 13 | "master/logmgr" 14 | "master/workermgr" 15 | ) 16 | 17 | func initEvn() { 18 | runtime.GOMAXPROCS(runtime.NumCPU()) 19 | } 20 | 21 | func initLog() error { 22 | if err := logger.SetupLogWithConf(config.LogFile); err != nil { 23 | fmt.Println("log init fail: %s", err.Error()) 24 | return err 25 | } 26 | 27 | logid.LogId = common.NowInNs() 28 | 29 | logger.Info("init logger success.") 30 | return nil 31 | } 32 | 33 | func initHttpServer() error { 34 | logger.Info("init http") 35 | HttpInstance := httpserver.GetHttpInstance() 36 | if err := HttpInstance.Init(config.Cfg.Http.Port); err != nil { 37 | logger.Warn("init_http_server_failed") 38 | return err 39 | } 40 | 41 | for uri, handler := range httpserver.Uri2Handler { 42 | HttpInstance.AddHandler(uri, handler) 43 | } 44 | 45 | if err := HttpInstance.Start(); err != nil { 46 | logger.Fatal("start_http_server_failed") 47 | return err 48 | } 49 | 50 | logger.Info("init httpserver success.") 51 | return nil 52 | } 53 | 54 | func initConf() error { 55 | _, err := toml.DecodeFile(config.ConfFile, &config.Cfg) 56 | if err != nil { 57 | fmt.Println("failed to parse conf:%s", err.Error()) 58 | return err 59 | } 60 | logger.Info("config: %v", config.Cfg) 61 | logger.Info("init cfg success.") 62 | return nil 63 | } 64 | 65 | func initJobMgr() error { 66 | return jobmgr.Init() 67 | } 68 | 69 | func initLogMgr() error { 70 | return logmgr.InitLogMgr() 71 | } 72 | 73 | func initWorkerMgr() error { 74 | return workermgr.InitWorkerMgr() 75 | } -------------------------------------------------------------------------------- /src/master/main/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "helper/common" 6 | logger "github.com/shengkehua/xlog4go" 7 | "runtime/debug" 8 | ) 9 | 10 | //退出信号 11 | var G_QuitChan = make(chan int) 12 | 13 | func main(){ 14 | //初始化线程 15 | initEvn() 16 | 17 | //初始化log 18 | if err := initLog(); err != nil { 19 | fmt.Errorf("init_log_fail errno:%d errmsg:%s\n", common.INIT_LOG_FAILED, err.Error()) 20 | return 21 | } 22 | defer logger.Close() 23 | 24 | //设置recover 25 | defer func() { 26 | if err := recover(); err != nil { 27 | logger.Error("abort, unknown error, errno:%d,errmsg:%v, stack:%s", 28 | common.ERRNO_PANIC, err, string(debug.Stack())) 29 | } 30 | }() 31 | 32 | //初始化config 33 | if err := initConf(); err != nil { 34 | logger.Warn("init_conf_fail errno:%d errmsg:%s\n", common.INIT_SERVCIE_FAILED, err.Error()) 35 | return 36 | } 37 | 38 | //初始化jobmgr 39 | if err := initJobMgr();err !=nil { 40 | logger.Warn("init_conf_jobmgr errno:%d errmsg:%s\n",common.INIT_SERVCIE_FAILED,err.Error()) 41 | return 42 | } 43 | 44 | //初始化日志收集器 45 | if err := initLogMgr();err != nil { 46 | logger.Warn("init_logmgr errno:%d errmsg:%s\n",common.INIT_SERVCIE_FAILED,err.Error()) 47 | return 48 | } 49 | 50 | //初始化woker健康街节点监听 51 | if err := initWorkerMgr();err != nil { 52 | logger.Warn("init_logmgr errno:%d errmsg:%s\n",common.INIT_SERVCIE_FAILED,err.Error()) 53 | return 54 | } 55 | 56 | //启动master的http监听 57 | if err := initHttpServer(); err != nil { 58 | logger.Warn("init_http_server errno:%d err=%s\n", common.INIT_SERVCIE_FAILED, err.Error()) 59 | return 60 | } 61 | 62 | logger.Info("all_init_ok") 63 | fmt.Println("start_ok") 64 | 65 | //监听中断信号 66 | go signal_proc() 67 | 68 | value := <-G_QuitChan 69 | 70 | logger.Info("msg:diversion_api_quit chan_recv_val:%d", value) 71 | return 72 | 73 | } 74 | -------------------------------------------------------------------------------- /src/master/main/signal.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "os" 5 | "os/signal" 6 | "syscall" 7 | logger "github.com/shengkehua/xlog4go" 8 | "server/httpserver" 9 | "time" 10 | ) 11 | 12 | func signal_proc() { 13 | c := make(chan os.Signal, 1) 14 | 15 | signal.Notify(c, syscall.SIGINT, syscall.SIGALRM, syscall.SIGTERM, syscall.SIGUSR1) 16 | 17 | // Block until a signal is received. 18 | sig := <-c 19 | 20 | logger.Warn("Signal received: %v", sig) 21 | 22 | httpserver.HttpListener.Close() 23 | for _, handler := range httpserver.Uri2Handler { 24 | handler.Close() 25 | } 26 | 27 | time.Sleep(500 * time.Millisecond) 28 | 29 | logger.Warn("send quit signal") 30 | G_QuitChan <- 1 31 | } -------------------------------------------------------------------------------- /src/master/webroot/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Golang分布式Crontab 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 |
15 | 16 |
17 |
18 | 21 |
22 |
23 | 24 | 25 |
26 |
27 | 28 | 29 |
30 |
31 | 32 | 33 |
34 |
35 |
36 |
37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 |
任务名称shell命令cron表达式任务操作
49 |
50 |
51 |
52 |
53 |
54 | 55 | 56 | 86 | 87 | 88 | 119 | 120 | 121 | 146 | 147 | 322 | 323 | 324 | 325 | -------------------------------------------------------------------------------- /src/master/workermgr/workermgr.go: -------------------------------------------------------------------------------- 1 | package workermgr 2 | 3 | import ( 4 | "go.etcd.io/etcd/clientv3" 5 | "time" 6 | "context" 7 | "go.etcd.io/etcd/mvcc/mvccpb" 8 | "helper/common" 9 | "master/config" 10 | ) 11 | 12 | type WorkerMgr struct { 13 | client *clientv3.Client 14 | kv clientv3.KV 15 | lease clientv3.Lease 16 | } 17 | 18 | var ( 19 | G_workerMgr *WorkerMgr 20 | ) 21 | 22 | // 获取在线worker列表 23 | func (w *WorkerMgr) ListWorkers() (workerArr []string, err error) { 24 | var ( 25 | getResp *clientv3.GetResponse 26 | kv *mvccpb.KeyValue 27 | workerIP string 28 | ) 29 | 30 | // 初始化数组 31 | workerArr = make([]string, 0) 32 | 33 | // 获取目录下所有Kv 34 | if getResp, err = w.kv.Get(context.TODO(), common.JOB_RGEISTER_PREFIX, clientv3.WithPrefix()); err != nil { 35 | return 36 | } 37 | // 解析每个节点的IP 38 | for _, kv = range getResp.Kvs { 39 | // kv.Key : /cron/workers/192.168.2.1 40 | workerIP = common.ExtractWorkerIP(string(kv.Key)) 41 | workerArr = append(workerArr, workerIP) 42 | } 43 | return 44 | } 45 | 46 | func InitWorkerMgr() (err error) { 47 | var ( 48 | conf clientv3.Config 49 | client *clientv3.Client 50 | kv clientv3.KV 51 | lease clientv3.Lease 52 | ) 53 | 54 | // 初始化配置 55 | conf = clientv3.Config{ 56 | Endpoints:config.Cfg.JobMgr.Endpoints, 57 | DialTimeout: time.Duration(config.Cfg.JobMgr.TimeOut) * time.Millisecond, 58 | } 59 | 60 | // 建立连接 61 | if client, err = clientv3.New(conf); err != nil { 62 | return 63 | } 64 | 65 | // 得到KV和Lease的API子集 66 | kv = clientv3.NewKV(client) 67 | lease = clientv3.NewLease(client) 68 | 69 | G_workerMgr = &WorkerMgr{ 70 | client :client, 71 | kv: kv, 72 | lease: lease, 73 | } 74 | return 75 | } -------------------------------------------------------------------------------- /src/server/httpserver/http_handler.go: -------------------------------------------------------------------------------- 1 | package httpserver 2 | 3 | import ( 4 | "net/http" 5 | "encoding/json" 6 | "helper/common" 7 | "master/jobmgr" 8 | "strconv" 9 | "master/logmgr" 10 | "master/workermgr" 11 | ) 12 | 13 | //保存任务 14 | //POST job = {"name":"job","command":"echo hello","cronExpr":"* * * * * *"} 15 | func JobSaveHandler(resp http.ResponseWriter, req *http.Request) (response HttpResponser) { 16 | ret := &HttpResponse{ 17 | ErrMsg:"OK", 18 | ErrNo:0, 19 | } 20 | var ( 21 | errno int 22 | err error 23 | oldJob *common.Job 24 | ) 25 | 26 | defer func(){ 27 | if err != nil { 28 | ret.ErrMsg = err.Error() 29 | } 30 | ret.ErrNo = errno 31 | }() 32 | 33 | //解析Post表单 34 | if err = req.ParseForm();err != nil { 35 | errno = common.ERRNO_PARSEPOST_FAILED 36 | return ret 37 | } 38 | //获取表单中的job对象并反序列化到结构体 39 | postJob := req.PostForm.Get(common.JOB) 40 | job := &common.Job{} 41 | if err = json.Unmarshal([]byte(postJob),job);err != nil { 42 | errno = common.ERRNO_JSON_UNMARSHAL_FAILED 43 | return ret 44 | } 45 | if oldJob,err = jobmgr.G_JobMgr.SaveJob(job);err != nil { 46 | errno = common.ERRNO_ETCD_PUT_FAILED 47 | return ret 48 | } 49 | 50 | //把ret里的信息json话返回到resp里 51 | ret.Data = oldJob 52 | if _, err = ret.ResponseJson(resp);err != nil { 53 | errno = common.ERRNO_HTTP_RESPONSE_JSON_FAILED 54 | } 55 | 56 | return ret 57 | } 58 | 59 | //删除任务 60 | func JobDeleteHandler(resp http.ResponseWriter, req *http.Request) (response HttpResponser) { 61 | ret := &HttpResponse{ 62 | ErrMsg:"OK", 63 | ErrNo:0, 64 | } 65 | var ( 66 | errno int 67 | err error 68 | oldJob *common.Job 69 | ) 70 | 71 | defer func(){ 72 | if err != nil { 73 | ret.ErrMsg = err.Error() 74 | } 75 | ret.ErrNo = errno 76 | }() 77 | 78 | postJobName := req.PostForm.Get(common.JOB_NAME) 79 | 80 | if oldJob, err = jobmgr.G_JobMgr.DeleteJob(postJobName);err !=nil { 81 | errno = common.ERRNO_ETCD_DELETE_FAILED 82 | return ret 83 | } 84 | 85 | //把ret里的信息json返回到resp里 86 | ret.Data = oldJob 87 | if _, err = ret.ResponseJson(resp);err != nil { 88 | errno = common.ERRNO_HTTP_RESPONSE_JSON_FAILED 89 | } 90 | return ret 91 | } 92 | 93 | func JobListHandler(resp http.ResponseWriter, req *http.Request) (response HttpResponser) { 94 | ret := &HttpResponse{ 95 | ErrMsg:"OK", 96 | ErrNo:0, 97 | } 98 | var ( 99 | errno int 100 | err error 101 | jobList []*common.Job 102 | ) 103 | 104 | defer func(){ 105 | if err != nil { 106 | ret.ErrMsg = err.Error() 107 | } 108 | ret.ErrNo = errno 109 | }() 110 | 111 | if jobList,err = jobmgr.G_JobMgr.ListJob(); err != nil { 112 | errno = common.ERRNO_ETCD_GET_FAILED 113 | return ret 114 | } 115 | 116 | //把ret里的信息返回到resp 117 | ret.Data = jobList 118 | if _, err = ret.ResponseJson(resp);err != nil { 119 | errno = common.ERRNO_HTTP_RESPONSE_JSON_FAILED 120 | } 121 | return ret 122 | } 123 | 124 | func JobKillHandler(resp http.ResponseWriter, req *http.Request) (response HttpResponser) { 125 | ret := &HttpResponse{ 126 | ErrMsg:"OK", 127 | ErrNo:0, 128 | } 129 | var ( 130 | errno int 131 | err error 132 | ) 133 | 134 | defer func(){ 135 | if err != nil { 136 | ret.ErrMsg = err.Error() 137 | } 138 | ret.ErrNo = errno 139 | }() 140 | 141 | postJobName := req.PostForm.Get(common.JOB_NAME) 142 | 143 | if err = jobmgr.G_JobMgr.KillJob(postJobName);err !=nil { 144 | errno = common.ERRNO_KILL_JOB_FAILED 145 | return ret 146 | } 147 | 148 | //把ret里的信息json返回到resp里 149 | if _, err = ret.ResponseJson(resp);err != nil { 150 | errno = common.ERRNO_HTTP_RESPONSE_JSON_FAILED 151 | } 152 | return ret 153 | } 154 | 155 | func JobLogHandler(resp http.ResponseWriter, req *http.Request) (response HttpResponser) { 156 | ret := &HttpResponse{ 157 | ErrMsg:"OK", 158 | ErrNo:0, 159 | } 160 | var ( 161 | errno int 162 | err error 163 | skipParam int 164 | limitParam int 165 | ) 166 | 167 | defer func(){ 168 | if err != nil { 169 | ret.ErrMsg = err.Error() 170 | } 171 | ret.ErrNo = errno 172 | }() 173 | 174 | jobName := req.Form.Get(common.JOB_NAME) 175 | if skipParam,err = strconv.Atoi(req.Form.Get(common.JOB_SKIP));err != nil { 176 | //非法默认从第一条开始展示 177 | skipParam = 0 178 | } 179 | 180 | if limitParam,err = strconv.Atoi(req.Form.Get(common.JOB_LIMIT));err != nil { 181 | //非法默认10条 182 | limitParam = 10 183 | } 184 | //limitParam = 10 185 | if logArr,err := logmgr.G_LogMgr.ListLog(jobName,skipParam,limitParam);err !=nil { 186 | //日志获取失败 187 | errno = common.ERRNO_LOG_GET_FAILED 188 | return 189 | }else { 190 | ret.Data = logArr 191 | } 192 | 193 | //把ret里的信息json返回到resp里 194 | if _, err = ret.ResponseJson(resp);err != nil { 195 | errno = common.ERRNO_HTTP_RESPONSE_JSON_FAILED 196 | } 197 | return ret 198 | } 199 | 200 | func WorkerListHandler(resp http.ResponseWriter, req *http.Request) (response HttpResponser) { 201 | ret := &HttpResponse{ 202 | ErrMsg:"OK", 203 | ErrNo:0, 204 | } 205 | var ( 206 | errno int 207 | err error 208 | workerArr []string 209 | ) 210 | 211 | defer func(){ 212 | if err != nil { 213 | ret.ErrMsg = err.Error() 214 | } 215 | ret.ErrNo = errno 216 | }() 217 | 218 | if workerArr,err = workermgr.G_workerMgr.ListWorkers();err !=nil { 219 | errno = common.ERRNO_GET_LOCAL_IP_ERROR 220 | return 221 | } 222 | ret.Data = workerArr 223 | //把ret里的信息json返回到resp里 224 | if _, err = ret.ResponseJson(resp);err != nil { 225 | errno = common.ERRNO_HTTP_RESPONSE_JSON_FAILED 226 | } 227 | return ret 228 | } 229 | -------------------------------------------------------------------------------- /src/server/httpserver/http_server.go: -------------------------------------------------------------------------------- 1 | package httpserver 2 | 3 | import ( 4 | "net/http" 5 | "strings" 6 | "time" 7 | "os" 8 | logger "github.com/shengkehua/xlog4go" 9 | "runtime/debug" 10 | "net" 11 | "syscall" 12 | "sync" 13 | "io" 14 | "encoding/json" 15 | "fmt" 16 | "helper/common" 17 | "master/config" 18 | ) 19 | 20 | func init() { 21 | Uri2Handler = make(map[string]*HttpHandler) 22 | Uri2Handler["/job/save"] = &HttpHandler{Name: "SaveJob", Handle: JobSaveHandler} 23 | Uri2Handler["/job/delete"] = &HttpHandler{Name: "DeleteJob", Handle: JobDeleteHandler} 24 | Uri2Handler["/job/list"] = &HttpHandler{Name: "ListJob", Handle: JobListHandler} 25 | Uri2Handler["/job/kill"] = &HttpHandler{Name: "KillJob", Handle: JobKillHandler} 26 | Uri2Handler["/job/log"] = &HttpHandler{Name: "KillJob", Handle: JobLogHandler} 27 | Uri2Handler["/worker/list"] = &HttpHandler{Name: "KillJob", Handle: WorkerListHandler} 28 | } 29 | 30 | var ( 31 | HttpListener net.Listener 32 | Uri2Handler map[string]*HttpHandler 33 | onceHttp sync.Once 34 | httpInstance *HttpServer 35 | staticDir = http.Dir(config.WebrootPath) 36 | StaticHandler = http.FileServer(staticDir) 37 | ) 38 | 39 | type HttpResponser interface { 40 | //返回错误码, 用于监控 41 | ErrCode() int 42 | //返回内容给调用方 43 | ResponseJson(io.Writer) (int, error) 44 | //用于打印日志 45 | String() string 46 | //继承 error 接口 47 | Error() string 48 | } 49 | 50 | type HttpResponse struct { 51 | ErrNo int `json:"errno"` 52 | ErrMsg string `json:"errmsg"` 53 | LogId string `json:"logid,omitempty"` 54 | Data interface{} `json"data"` 55 | } 56 | 57 | func (r *HttpResponse) ErrCode() int { 58 | return r.ErrNo 59 | } 60 | 61 | func (r *HttpResponse) ResponseJson(w io.Writer) (n int, err error) { 62 | var s []byte 63 | var s1 string 64 | s, err = json.Marshal(r) 65 | if err != nil { 66 | //unlikely, Marshal failed, 返回固定的信息 67 | //不要打印r, 小心无限递归 68 | logger.Error("json.Marshal err:%v", err) 69 | s1 = fmt.Sprintf("{\"errno\":%v,\"errmsg\":\"%v\",\"logid\":\"%v\"}", common.ERRNO_JSON_MARSHAL_FAILED, err, r.LogId) 70 | } else { 71 | s1 = string(s) 72 | } 73 | n, err = io.WriteString(w, s1) 74 | if err != nil { 75 | logger.Error("io.WriteString err:%v", err) 76 | } 77 | return 78 | } 79 | 80 | func (r *HttpResponse) Error() string { 81 | return fmt.Sprintf("errno=%v,errmsg=%v", r.ErrNo, r.ErrMsg) 82 | } 83 | 84 | func (r *HttpResponse) String() string { 85 | resJson, _ := json.Marshal(r) 86 | return string(resJson) 87 | } 88 | 89 | func doResponse(logid string, errno int, errmsg string, writer io.Writer) (r HttpResponser) { 90 | r = &HttpResponse{ 91 | ErrNo: errno, 92 | ErrMsg: errmsg, 93 | LogId: logid, 94 | } 95 | _, err := r.ResponseJson(writer) 96 | if err != nil { 97 | logger.Error("doResponse err:%v", err) 98 | } 99 | return 100 | } 101 | 102 | type HttpHandler struct { 103 | Name string 104 | Handle func(w http.ResponseWriter, r *http.Request) HttpResponser 105 | waitGroup sync.WaitGroup 106 | } 107 | 108 | func (kh *HttpHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { 109 | var clientLogId, clientSpanId string 110 | var logId int64 111 | var info map[string]interface{} 112 | var resp HttpResponser 113 | var errCode int 114 | 115 | tBegin := time.Now() 116 | kh.waitGroup.Add(1) 117 | 118 | defer func() { 119 | kh.waitGroup.Done() 120 | //耗时 121 | latency := time.Since(tBegin) 122 | if resp != nil { 123 | errCode = resp.ErrCode() 124 | } else { 125 | //unlikely 126 | errCode = -1 127 | } 128 | //捕捉panic 129 | if err := recover(); err != nil { 130 | errCode = common.ERRNO_PANIC 131 | logger.Error("LogId:%d HandleError# recover errno:%d stack:%s", logId, errCode, string(debug.Stack())) 132 | resp = doResponse(clientLogId, common.ERRNO_PANIC, "panic", w) 133 | } 134 | 135 | if errCode != 0 { 136 | logger.Error("%v [traceid:%v LogId:%d] errno:%d resp:%s", info["name"], clientLogId, logId, errCode, resp.String()) 137 | } 138 | logger.Info("_com_request_out||funcname=%v||traceid=%v||spanid=%v||logId=%v||uri=%v||host=%v||remotAddr=%v||request=%v||response=%v||proc_time=%.2f", info["name"], clientLogId, clientSpanId, logId, info["url"], info["host"], info["remote"], info["param"], resp.String(), latency.Seconds()*1000) 139 | }() 140 | 141 | r.ParseForm() 142 | 143 | info = GetHttpRequestInfo(r) 144 | 145 | //log request in 146 | logger.Info("_com_request_in||funcname=%v||traceid=%v||spanid=%v||logId=%v||uri=%v||host=%v||remotAddr=%v||request=%v", info["name"], clientLogId, clientSpanId, logId, info["url"], info["host"], info["remote"], info["param"]) 147 | 148 | resp = kh.Handle(w, r) 149 | return 150 | } 151 | 152 | func (kh *HttpHandler) Close() { 153 | kh.waitGroup.Wait() 154 | } 155 | 156 | func GetHttpRequestInfo(r *http.Request) (info map[string]interface{}) { 157 | info = make(map[string]interface{}) 158 | info["clientLogId"] = r.Header.Get("didi-header-rid") 159 | info["url"] = r.URL.Path 160 | info["param"] = r.Form 161 | info["host"] = r.Host 162 | info["remote"] = r.RemoteAddr 163 | s1 := strings.Split(r.URL.Path, "/") 164 | if len(s1) > 0 { 165 | info["name"] = s1[len(s1)-1] 166 | } else { 167 | info["name"] = "NoBody" 168 | } 169 | info["now"] = time.Now() 170 | return 171 | } 172 | 173 | //HttpServer 174 | type HttpServer struct { 175 | ServerMux *http.ServeMux 176 | Uri2Handler map[string]*HttpHandler 177 | ListenPort string 178 | } 179 | 180 | //单例模式 181 | func GetHttpInstance() *HttpServer { 182 | onceHttp.Do(func() { 183 | if httpInstance == nil { 184 | httpInstance = &HttpServer{} 185 | } 186 | }) 187 | return httpInstance 188 | } 189 | 190 | func (hs *HttpServer) AddHandler(uri string, handler *HttpHandler) error { 191 | hs.Uri2Handler[uri] = handler 192 | return nil 193 | } 194 | 195 | func (hs *HttpServer) Init(port string) error { 196 | hs.Uri2Handler = make(map[string]*HttpHandler) 197 | hs.ListenPort = port 198 | hs.ServerMux = http.NewServeMux() 199 | return nil 200 | } 201 | 202 | func (hs *HttpServer) Start() error { 203 | 204 | curPid := os.Getpid() 205 | go func(pid int) { 206 | osProcess := os.Process{Pid: pid} 207 | defer func() { 208 | if errRecover := recover(); errRecover != nil { 209 | logger.Error("abort, unknown error, errno:%d,errmsg:%v, stack:%s", 210 | common.ERRNO_PANIC, errRecover, string(debug.Stack())) 211 | } 212 | }() 213 | 214 | var err error 215 | HttpListener, err = net.Listen("tcp", ":"+hs.ListenPort) 216 | if err != nil { 217 | logger.Error("will_send_kill_cmd, tcp_listen_fail errmsg:%s", err.Error()) 218 | osProcess.Signal(syscall.SIGINT) 219 | return 220 | } 221 | defer HttpListener.Close() 222 | 223 | //handler实现ServeHTTP接口就可以 224 | for uri, handler := range hs.Uri2Handler { 225 | hs.ServerMux.Handle(uri, handler) 226 | } 227 | //静态路由 228 | hs.ServerMux.Handle("/",http.StripPrefix("/",StaticHandler)) 229 | 230 | server := http.Server{ 231 | Handler: hs.ServerMux, 232 | ReadTimeout: 5 * time.Second, 233 | WriteTimeout: 5 * time.Second, 234 | } 235 | err = server.Serve(HttpListener) 236 | if err != nil { 237 | logger.Warn("server_error errmsg:%s", err.Error()) 238 | } 239 | }(curPid) 240 | return nil 241 | } -------------------------------------------------------------------------------- /src/worker/config/config.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | var ( 4 | LogFile = "./src/worker/config/log.json" 5 | ConfFile = "./src/worker/config/worker.conf" 6 | ) 7 | 8 | type WorkerCfg struct { 9 | JobMgr JobMgrConfig 10 | LogMgr LogMgrConfig 11 | } 12 | 13 | type JobMgrConfig struct{ 14 | Endpoints []string 15 | TimeOut int 16 | } 17 | 18 | 19 | type LogMgrConfig struct{ 20 | MongodbUri string 21 | TimeOut int 22 | Database string 23 | Collection string 24 | MaxBatchSize int 25 | LogTimeOut int 26 | } 27 | 28 | var Cfg WorkerCfg -------------------------------------------------------------------------------- /src/worker/config/log.json: -------------------------------------------------------------------------------- 1 | { 2 | "LogLevel" : "trace", 3 | 4 | "FileWriter" : { 5 | "On": true, 6 | 7 | "LogPath" : "./src/worker/log/crontab.log.info", 8 | "RotateLogPath" : "./src/worker/log/crontab.log.info.%Y%M%D%H", 9 | 10 | "WfLogPath" : "./src/worker/log/crontab.log.wf", 11 | "RotateWfLogPath" : "./src/worker/log/crontab.log.wf.%Y%M%D%H", 12 | 13 | "PublicLogPath" : "./src/worker/log/public.log", 14 | "RotatePublicLogPath" : "./src/worker/log/public.log.%Y%M%D%H" 15 | }, 16 | 17 | "ConsoleWriter" : { 18 | "On" : false 19 | } 20 | } -------------------------------------------------------------------------------- /src/worker/config/worker.conf: -------------------------------------------------------------------------------- 1 | [jobmgr] 2 | endpoints = ["127.0.0.1:2379"] 3 | timeout = 500 4 | [logmgr] 5 | mongodburi = "mongodb://127.0.0.1:27017" 6 | timeout = 500 7 | database = "distribute_cron" 8 | collection = "log" 9 | maxbatchsize = 10 10 | logtimeout = 10 -------------------------------------------------------------------------------- /src/worker/jobmgr/executor.go: -------------------------------------------------------------------------------- 1 | package jobmgr 2 | 3 | import ( 4 | "time" 5 | "os/exec" 6 | "helper/common" 7 | "math/rand" 8 | logger "github.com/shengkehua/xlog4go" 9 | ) 10 | 11 | //执行器 12 | var ( 13 | G_Executor *Executor 14 | ) 15 | 16 | type Executor struct { 17 | ScheduleResultChan chan *ExecResult 18 | } 19 | 20 | 21 | //执行一个shell任务 22 | func (e *Executor) ExecJob(plan *SchedulePlan){ 23 | //要在这里赋值上scheduletime计划调度时间。因为plan是指针,后面go并发以后plan的nexttime会改动。 24 | var scheduleTime = plan.NextTime 25 | go func(){ 26 | var ( 27 | err error 28 | result *ExecResult 29 | ) 30 | //获取乐观锁,成功再往下执行 31 | jobLock := G_JobMgr.NewJobLock(plan.Job.Name) 32 | //随机睡眠0-10ms,防止机器时间不同步导致任务分配不均匀 33 | time.Sleep(time.Duration(rand.Intn(10)) * time.Millisecond) 34 | err = jobLock.TryLock() 35 | defer jobLock.UnLock() 36 | if err !=nil { 37 | //上锁失败不能返回,也要更新执行表吧任务从执行表里去掉 38 | result = &ExecResult{ 39 | Err:err, 40 | EndTime:time.Now(), 41 | JobPlan:plan, 42 | } 43 | }else { 44 | //上锁成功 45 | logger.Info("上锁成功!可以执行任务:%s, command:%s",plan.Job.Name,plan.Job.Command) 46 | //执行调度 47 | startTime := time.Now() 48 | cmd := exec.CommandContext(plan.ctx,"/bin/bash","-c",plan.Job.Command) 49 | output,err := cmd.CombinedOutput() 50 | //返回结果 51 | result = &ExecResult{ 52 | Err:err, 53 | OutPut:output, 54 | JobPlan:plan, 55 | StartTime:startTime, 56 | EndTime:time.Now(), 57 | ScheduleTime:scheduleTime, 58 | } 59 | } 60 | e.PushJobResult(result) 61 | }() 62 | } 63 | 64 | 65 | func InitExecutor() (err error){ 66 | G_Executor = &Executor{ 67 | ScheduleResultChan :make(chan *ExecResult,common.MAX_NUM_JOB_QUEUE), 68 | } 69 | return 70 | } 71 | 72 | type ExecResult struct { 73 | Err error 74 | OutPut []byte 75 | JobPlan *SchedulePlan 76 | StartTime time.Time 77 | EndTime time.Time 78 | ScheduleTime time.Time 79 | } 80 | 81 | func (e *Executor) PushJobResult(result *ExecResult) { 82 | e.ScheduleResultChan<-result 83 | } -------------------------------------------------------------------------------- /src/worker/jobmgr/job.go: -------------------------------------------------------------------------------- 1 | package jobmgr 2 | 3 | type Job struct { 4 | Name string `json:"name"` //任务名 5 | Command string `json:"command"` //shell命令 6 | CronExpr string `json:"cronExpr"` //cron表达式 7 | } 8 | -------------------------------------------------------------------------------- /src/worker/jobmgr/joblock.go: -------------------------------------------------------------------------------- 1 | package jobmgr 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | logger "github.com/shengkehua/xlog4go" 7 | "go.etcd.io/etcd/clientv3" 8 | "helper/common" 9 | ) 10 | 11 | type JobLock struct { 12 | kv clientv3.KV 13 | lease clientv3.Lease 14 | jobNmae string 15 | leaseId clientv3.LeaseID 16 | cancelFunc context.CancelFunc 17 | isLocked bool 18 | } 19 | 20 | //初始化一把锁 21 | func initJobLock(client *clientv3.Client, name string) (jobLock *JobLock) { 22 | jobLock = &JobLock{ 23 | kv: client.KV, 24 | lease: client.Lease, 25 | jobNmae: name, 26 | } 27 | return 28 | } 29 | 30 | //尝试上乐观锁 31 | func (j *JobLock) TryLock() (err error) { 32 | var ( 33 | leaseResp *clientv3.LeaseGrantResponse 34 | leaseKeepAliveChan <-chan *clientv3.LeaseKeepAliveResponse 35 | txn clientv3.Txn 36 | txnResp *clientv3.TxnResponse 37 | ) 38 | ctx, ctlFunc := context.WithCancel(context.TODO()) 39 | //建立租约(1s) 40 | if leaseResp, err = j.lease.Grant(ctx, 5); err != nil { 41 | logger.Error("Lease Grant Error errno:%d, err:%s", common.ERRNO_ETCD_GRANT_LEASE_FAILED, err.Error()) 42 | return 43 | } 44 | //自动续租 45 | leaseId := leaseResp.ID 46 | defer func() { 47 | //如果有错误,释放租约,取消自动续租 48 | if err != nil { 49 | ctlFunc() 50 | j.lease.Revoke(context.TODO(), leaseId) 51 | } 52 | }() 53 | if leaseKeepAliveChan, err = j.lease.KeepAlive(ctx, leaseId); err != nil { 54 | logger.Error("Lease Grant Error errno:%d, err:%s", common.ERRNO_ETCD_GRANT_LEASE_FAILED, err.Error()) 55 | return 56 | } 57 | 58 | //处理自动续租信号 59 | go func() { 60 | for { 61 | select { 62 | case r := <-leaseKeepAliveChan: 63 | if r == nil { //续租失败 64 | return 65 | } 66 | } 67 | 68 | } 69 | }() 70 | //txn获取锁 71 | txn = j.kv.Txn(context.TODO()) 72 | 73 | // 锁路径 74 | lockKey := common.JOB_LOCK_PREFIX + j.jobNmae 75 | 76 | // 5, 事务抢锁 77 | txn.If(clientv3.Compare(clientv3.CreateRevision(lockKey), "=", 0)). 78 | Then(clientv3.OpPut(lockKey, "", clientv3.WithLease(leaseId))). 79 | Else(clientv3.OpGet(lockKey)) 80 | 81 | // 提交事务 82 | if txnResp, err = txn.Commit(); err != nil { 83 | return 84 | } 85 | 86 | // 6, 成功返回, 失败释放租约 87 | if !txnResp.Succeeded { // 锁被占用,获取失败 88 | err = errors.New("LockKey Got By Others") 89 | return 90 | } 91 | //抢锁成功,设置leaseId和取消函数 92 | j.leaseId = leaseId 93 | j.cancelFunc = ctlFunc 94 | j.isLocked = true 95 | return 96 | } 97 | 98 | //解锁 99 | func (j *JobLock) UnLock() { 100 | if j.isLocked { 101 | j.cancelFunc() 102 | j.lease.Revoke(context.TODO(), j.leaseId) 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /src/worker/jobmgr/jobmgr.go: -------------------------------------------------------------------------------- 1 | package jobmgr 2 | 3 | import ( 4 | "go.etcd.io/etcd/clientv3" 5 | "time" 6 | "worker/config" 7 | "context" 8 | "helper/common" 9 | logger "github.com/shengkehua/xlog4go" 10 | "go.etcd.io/etcd/mvcc/mvccpb" 11 | ) 12 | 13 | var ( 14 | G_JobMgr *JobMgr 15 | ) 16 | 17 | type JobMgr struct { 18 | Client *clientv3.Client 19 | } 20 | 21 | //初始化管理器 22 | func Init() error{ 23 | config := clientv3.Config{ 24 | Endpoints:config.Cfg.JobMgr.Endpoints, 25 | DialTimeout: time.Duration(config.Cfg.JobMgr.TimeOut) * time.Millisecond, 26 | } 27 | 28 | if client,err := clientv3.New(config);err !=nil { 29 | return err 30 | }else { 31 | G_JobMgr = &JobMgr{ 32 | Client:client, 33 | } 34 | } 35 | return nil 36 | } 37 | 38 | func (j *JobMgr) WatchJobs() (err error) { 39 | var ( 40 | getResp *clientv3.GetResponse 41 | watchVersion int64 42 | watchChan clientv3.WatchChan 43 | ) 44 | if getResp,err = j.Client.Get(context.TODO(),common.JOB_KEY_PREFIX,clientv3.WithPrefix());err != nil { 45 | logger.Error("WatchJob Error errno:%d, err:%s",common.ERRNO_ETCD_GET_FAILED,err.Error()) 46 | return 47 | } 48 | 49 | //当前有哪些任务 50 | for _,v:= range getResp.Kvs { 51 | job := &common.Job{} 52 | if job,err = common.UnpackJob(v.Value);err !=nil { 53 | logger.Error("WatchJob Error errno:%d, err:%s",common.ERRNO_ETCD_GET_FAILED,err.Error()) 54 | continue 55 | } 56 | jobEvent := common.BuildEvent(common.JOB_EVENT_SAVE,job) 57 | //防止写满阻塞流程 58 | go G_Scheduler.Push(jobEvent) 59 | 60 | } 61 | //从该version之后向后监听key的变化 62 | go func(){ 63 | //从get版本之后开始监听 64 | watchVersion = getResp.Header.Revision + 1 65 | //启动监听 66 | watchChan = j.Client.Watch(context.TODO(),common.JOB_KEY_PREFIX,clientv3.WithRev(watchVersion),clientv3.WithPrefix()) 67 | 68 | for resp := range watchChan { 69 | for _,res := range resp.Events { 70 | switch res.Type { 71 | case mvccpb.PUT: // 写key 72 | if job,err := common.UnpackJob(res.Kv.Value);err !=nil { 73 | //写入的val非法 74 | logger.Error("UnpackJob Error errno:%d, err:%s",common.ERRNO_JSON_UNMARSHAL_FAILED,err.Error()) 75 | continue 76 | }else { 77 | jobEvent := common.BuildEvent(common.JOB_EVENT_SAVE,job) 78 | go G_Scheduler.Push(jobEvent) 79 | 80 | } 81 | 82 | case mvccpb.DELETE://删除key 83 | jobNmae := common.ExtractJobName(string(res.Kv.Key)) 84 | job := &common.Job{ 85 | Name:jobNmae, 86 | } 87 | jobEvent := common.BuildEvent(common.JOB_EVENT_DELETE,job) 88 | go G_Scheduler.Push(jobEvent) 89 | } 90 | } 91 | } 92 | }() 93 | return 94 | } 95 | 96 | 97 | func (j *JobMgr) NewJobLock(name string) (jobLock *JobLock) { 98 | return initJobLock(j.Client,name) 99 | } 100 | 101 | func (j *JobMgr) WatchKillJobs() (err error) { 102 | var ( 103 | getResp *clientv3.GetResponse 104 | watchChan clientv3.WatchChan 105 | ) 106 | if getResp,err = j.Client.Get(context.TODO(),common.JOB_KILL_PREFIX,clientv3.WithPrefix());err != nil { 107 | logger.Error("WatchJob Error errno:%d, err:%s",common.ERRNO_ETCD_GET_FAILED,err.Error()) 108 | return 109 | } 110 | 111 | //当前有哪些任务 112 | for _,v:= range getResp.Kvs { 113 | job := &common.Job{} 114 | if job,err = common.UnpackJob(v.Value);err !=nil { 115 | logger.Error("WatchJob Error errno:%d, err:%s",common.ERRNO_ETCD_GET_FAILED,err.Error()) 116 | continue 117 | } 118 | jobEvent := common.BuildEvent(common.JOB_EVENT_KILL,job) 119 | //防止写满阻塞流程 120 | go G_Scheduler.Push(jobEvent) 121 | 122 | } 123 | //从该version之后向后监听key的变化 124 | go func(){ 125 | //从get版本之后开始监听 126 | //启动监听 127 | watchChan = j.Client.Watch(context.TODO(),common.JOB_KILL_PREFIX,clientv3.WithPrefix()) 128 | 129 | for resp := range watchChan { 130 | for _,res := range resp.Events { 131 | switch res.Type { 132 | case mvccpb.PUT: // 写key(删除) 133 | jobName := common.ExtractKillJobName(string(res.Kv.Key)) 134 | job := &common.Job{ 135 | Name:jobName, 136 | } 137 | jobEvent := common.BuildEvent(common.JOB_EVENT_KILL,job) 138 | go G_Scheduler.Push(jobEvent) 139 | case mvccpb.DELETE://删除key,无影响 140 | } 141 | } 142 | } 143 | }() 144 | return 145 | } 146 | -------------------------------------------------------------------------------- /src/worker/jobmgr/scheduler.go: -------------------------------------------------------------------------------- 1 | package jobmgr 2 | 3 | import ( 4 | "helper/common" 5 | "github.com/gorhill/cronexpr" 6 | logger "github.com/shengkehua/xlog4go" 7 | "time" 8 | "fmt" 9 | "context" 10 | "worker/logmgr" 11 | ) 12 | 13 | type Scheduler struct { 14 | jobEventChan chan *common.JobEvent //job触发事件,包括删除、更改 15 | jobEventPlanMap map[string]*SchedulePlan //job执行计划表 16 | jobExecMap map[string]*SchedulePlan //正在执行中的job 17 | } 18 | 19 | type SchedulePlan struct { 20 | Job *common.Job //任务信息 21 | Expr *cronexpr.Expression //解析好的cronexpr表达式 22 | NextTime time.Time //下次执行时间 23 | ctx context.Context 24 | cancelFunc context.CancelFunc 25 | } 26 | 27 | var ( 28 | G_Scheduler *Scheduler 29 | ) 30 | 31 | //调度协程 32 | func (sc *Scheduler) scheduleLoop() { 33 | //定时执行调度commonJob 34 | var ( 35 | scheduleAfter = sc.trySchedule() 36 | schedulerTimer = time.NewTimer(scheduleAfter) 37 | ) 38 | 39 | for { 40 | select { 41 | case v := <- sc.jobEventChan: //任务更新 42 | //这里不能并发,因为有map的写入操作 43 | sc.handle(v) 44 | case <-schedulerTimer.C: //休眠结束 45 | case result := <-G_Executor.ScheduleResultChan: //任务执行完毕 46 | sc.processScheduleResult(result) 47 | } 48 | //更新或者睡眠到了(无任务1秒唤醒一次)重新计算时间 49 | scheduleAfter = sc.trySchedule() 50 | schedulerTimer.Reset(scheduleAfter) 51 | } 52 | } 53 | 54 | func (sc *Scheduler) processScheduleResult(result *ExecResult) { 55 | //执行完毕(无论是否成功)需要删除执行列表(这里没有并发了,可以安全删除map的元素) 56 | delete(sc.jobExecMap,result.JobPlan.Job.Name) 57 | //任务执行太快可能导致因为机器时间差造成的重复执行。 58 | time.Sleep(20*time.Millisecond) 59 | 60 | //任务执行成功 61 | fmt.Println("任务名:",result.JobPlan.Job.Name," 开始时间:", 62 | result.StartTime," 结束时间:",result.EndTime," 执行结果:",string(result.OutPut)) 63 | //写日志 64 | LogOne := &common.CronLog{ 65 | Name:result.JobPlan.Job.Name, 66 | Command:result.JobPlan.Job.Command, 67 | OutPut:string(result.OutPut), 68 | StartTime:result.StartTime.UnixNano()/1000/1000, 69 | EndTime:result.EndTime.UnixNano()/1000/1000, 70 | ScheduleTime:result.ScheduleTime.UnixNano()/1000/1000, 71 | } 72 | 73 | if result.Err != nil { 74 | LogOne.Err = result.Err.Error() 75 | }else{ 76 | LogOne.Err = "" 77 | } 78 | logmgr.G_LogMgr.LogChan <-LogOne 79 | } 80 | 81 | //执行并返回要最少休眠的时间 82 | func (sc *Scheduler) trySchedule() (timeAfter time.Duration){ 83 | var ( 84 | now time.Time 85 | nearestTime *time.Time 86 | ) 87 | 88 | //任务表为空时休眠1s 89 | if len(sc.jobEventPlanMap) == 0 { 90 | timeAfter = 1*time.Second 91 | return 92 | } 93 | //初始化时间 94 | now = time.Now() 95 | //遍历任务 96 | for _,v := range sc.jobEventPlanMap{ 97 | //需要执行任务 98 | if v.NextTime.Before(now)||v.NextTime.Equal(now) { 99 | //这里也不能并发操作,我这里开始犯了错误,因为内部有map的写操作。 100 | sc.tryStartJob(v) 101 | //执行完以后需要记录下次执行时间 102 | v.NextTime = v.Expr.Next(now) 103 | } 104 | //统计最近要过期的时间 105 | if nearestTime == nil || v.NextTime.Before(*nearestTime) { 106 | nearestTime = &v.NextTime 107 | } 108 | } 109 | //下次调度时间 110 | timeAfter = nearestTime.Sub(now) 111 | return 112 | } 113 | 114 | //初始化 115 | func InitScheduler() (err error) { 116 | G_Scheduler = &Scheduler{ 117 | jobEventChan:make(chan *common.JobEvent,common.MAX_NUM_JOB_QUEUE), 118 | jobEventPlanMap:make(map[string]*SchedulePlan,0), 119 | jobExecMap: make(map[string]*SchedulePlan,0), 120 | } 121 | 122 | go G_Scheduler.scheduleLoop() 123 | return 124 | } 125 | 126 | //push任务到调度器 127 | func (sc *Scheduler) Push(jobEvent *common.JobEvent) { 128 | sc.jobEventChan <- jobEvent 129 | } 130 | 131 | //处理任务(维护任务表) 132 | func (sc *Scheduler) handle(jobEvent *common.JobEvent) { 133 | var ( 134 | jobSchedulePlan *SchedulePlan 135 | err error 136 | ) 137 | switch jobEvent.Type { 138 | case common.JOB_EVENT_SAVE: 139 | if jobSchedulePlan,err = buidJobSchedulPlan(jobEvent.Job);err != nil { 140 | return 141 | } 142 | sc.jobEventPlanMap[jobEvent.Job.Name] = jobSchedulePlan 143 | case common.JOB_EVENT_DELETE: 144 | if _,ok := sc.jobEventPlanMap[jobEvent.Job.Name];ok{ 145 | delete(sc.jobEventPlanMap,jobEvent.Job.Name) 146 | } 147 | case common.JOB_EVENT_KILL: 148 | //强杀任务必须在执行表中 149 | if jobSchedulePlan,ok := sc.jobExecMap[jobEvent.Job.Name];ok{ 150 | //通过调用执行中的任务的cancelFunc()来中断任务执行 151 | fmt.Println("执行强杀任务") 152 | jobSchedulePlan.cancelFunc() 153 | //强杀任务后需要恢复不然永远无法执行。 154 | jobSchedulePlan.ctx,jobSchedulePlan.cancelFunc = context.WithCancel(context.TODO()) 155 | } 156 | } 157 | } 158 | 159 | //根据当前任务构建下一次调度计划 160 | func buidJobSchedulPlan(job *common.Job) (plan *SchedulePlan,err error) { 161 | var ( 162 | expr *cronexpr.Expression 163 | ) 164 | 165 | //解析Job的Cronb表达式 166 | if expr, err = cronexpr.Parse(job.CronExpr);err != nil { 167 | logger.Warn("Cron Parse Error errno:%d, err:%s",common.ERRNO_CRON_PARSE_FAILD,err.Error()) 168 | return 169 | } 170 | ctx, cancelFunc := context.WithCancel(context.TODO()) 171 | //生成调度计划 172 | plan = &SchedulePlan{ 173 | Job:job, 174 | Expr:expr, 175 | NextTime:expr.Next(time.Now()), 176 | ctx:ctx, 177 | cancelFunc:cancelFunc, 178 | } 179 | 180 | 181 | return 182 | } 183 | 184 | //任务执行的时间如果超出其应该下次执行的时间时下次不再执行 185 | func (sc *Scheduler) tryStartJob (plan *SchedulePlan) { 186 | //任务还在执行则跳过本次调度 187 | if _,ok := sc.jobExecMap[plan.Job.Name];ok { 188 | fmt.Println("任务还在执行中") 189 | return 190 | } 191 | 192 | //未执行则加入执行表中 193 | sc.jobExecMap[plan.Job.Name] = plan 194 | //执行cron命令 195 | G_Executor.ExecJob(plan) 196 | } 197 | -------------------------------------------------------------------------------- /src/worker/logmgr/logmgr.go: -------------------------------------------------------------------------------- 1 | package logmgr 2 | 3 | import ( 4 | "go.mongodb.org/mongo-driver/mongo" 5 | "time" 6 | "go.mongodb.org/mongo-driver/mongo/options" 7 | "context" 8 | "worker/config" 9 | "helper/common" 10 | logger "github.com/shengkehua/xlog4go" 11 | ) 12 | 13 | //log管理器 14 | type LogMgr struct { 15 | Client *mongo.Client //连接mongodb的客户端 16 | LogChan chan *common.CronLog //写入日志的chan 17 | Collection *mongo.Collection //mongodb的表 18 | } 19 | 20 | var ( 21 | G_LogMgr *LogMgr 22 | ) 23 | 24 | func InitLogMgr() (err error) { 25 | //连接mogodb 26 | var ( 27 | client *mongo.Client 28 | ) 29 | con := context.TODO() 30 | //1、建立连接 31 | opt := options.Client() 32 | opt.SetConnectTimeout(time.Duration(config.Cfg.LogMgr.TimeOut)*time.Millisecond).ApplyURI(config.Cfg.LogMgr.MongodbUri) 33 | if client,err = mongo.Connect(con,opt); err != nil { 34 | return 35 | } 36 | 37 | G_LogMgr = &LogMgr{ 38 | Client:client, 39 | LogChan:make(chan *common.CronLog,common.MAX_NUM_LOG_QUEUE), 40 | Collection:client.Database(config.Cfg.LogMgr.Database).Collection(config.Cfg.LogMgr.Collection), 41 | } 42 | 43 | //启动监听器 44 | go G_LogMgr.LogLoop() 45 | return 46 | } 47 | 48 | //监听循环 49 | func (l *LogMgr) LogLoop() { 50 | 51 | //初始化timer 52 | var ( 53 | commitTimer = time.NewTimer(time.Duration(config.Cfg.LogMgr.LogTimeOut)*time.Second) 54 | logBatch = &common.LogBach{} 55 | ) 56 | for { 57 | select { 58 | case v := <-l.LogChan: 59 | logBatch.Logs = append(logBatch.Logs,v) 60 | if len(logBatch.Logs) >= config.Cfg.LogMgr.MaxBatchSize { //达到batchsize 执行一次存储 61 | //满了以后先暂停计时器 62 | commitTimer.Reset(time.Duration(config.Cfg.LogMgr.LogTimeOut)*time.Second) 63 | commitTimer.Stop() 64 | l.DoLogSave(logBatch) 65 | logBatch.Logs = make([]interface{},0) 66 | } 67 | case <-commitTimer.C: 68 | commitTimer.Reset(time.Duration(config.Cfg.LogMgr.LogTimeOut)*time.Second) 69 | if len(logBatch.Logs) != 0 { 70 | l.DoLogSave(logBatch) 71 | logBatch.Logs = make([]interface{},0) 72 | } 73 | } 74 | } 75 | } 76 | 77 | func (l *LogMgr) DoLogSave(batch *common.LogBach) { 78 | //插入多个 79 | if _, err := l.Collection.InsertMany(context.TODO(),batch.Logs);err != nil { 80 | logger.Error("Log Save Error errno:%d, err:%s", common.ERRNO_LOG_SAVE_FAILED, err.Error()) 81 | } 82 | 83 | return 84 | } -------------------------------------------------------------------------------- /src/worker/main/init.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "runtime" 5 | "worker/config" 6 | "fmt" 7 | "helper/logid" 8 | "helper/common" 9 | logger "github.com/shengkehua/xlog4go" 10 | "github.com/BurntSushi/toml" 11 | "worker/jobmgr" 12 | "worker/logmgr" 13 | "worker/register" 14 | ) 15 | 16 | func initEvn() { 17 | runtime.GOMAXPROCS(runtime.NumCPU()) 18 | } 19 | 20 | func initLog() error { 21 | if err := logger.SetupLogWithConf(config.LogFile); err != nil { 22 | fmt.Println("log init fail: %s", err.Error()) 23 | return err 24 | } 25 | 26 | logid.LogId = common.NowInNs() 27 | 28 | logger.Info("init logger success.") 29 | return nil 30 | } 31 | 32 | func initConf() error { 33 | _, err := toml.DecodeFile(config.ConfFile, &config.Cfg) 34 | if err != nil { 35 | fmt.Println("failed to parse conf:%s", err.Error()) 36 | return err 37 | } 38 | logger.Info("config: %v", config.Cfg) 39 | logger.Info("init cfg success.") 40 | return nil 41 | } 42 | 43 | func initJobMgr() error { 44 | return jobmgr.Init() 45 | } 46 | 47 | func initWatcher() error { 48 | e1 := jobmgr.G_JobMgr.WatchJobs() 49 | e2 := jobmgr.G_JobMgr.WatchKillJobs() 50 | if e1 != nil { 51 | return e1 52 | }else { 53 | return e2 54 | } 55 | } 56 | 57 | func initScheduler() error { 58 | return jobmgr.InitScheduler() 59 | } 60 | 61 | func initExecutor() error { 62 | return jobmgr.InitExecutor() 63 | } 64 | 65 | func initLogMgr() error { 66 | return logmgr.InitLogMgr() 67 | } 68 | 69 | func initRegiter() error { 70 | return register.InitRegiter() 71 | } -------------------------------------------------------------------------------- /src/worker/main/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "helper/common" 6 | logger "github.com/shengkehua/xlog4go" 7 | "runtime/debug" 8 | ) 9 | 10 | //退出信号 11 | var G_QuitChan = make(chan int) 12 | 13 | func main() { 14 | //初始化线程 15 | initEvn() 16 | 17 | //初始化log 18 | if err := initLog(); err != nil { 19 | fmt.Errorf("init_log_fail errno:%d errmsg:%s\n", common.INIT_LOG_FAILED, err.Error()) 20 | return 21 | } 22 | defer logger.Close() 23 | 24 | //设置recover 25 | defer func() { 26 | if err := recover(); err != nil { 27 | logger.Error("abort, unknown error, errno:%d,errmsg:%v, stack:%s", 28 | common.ERRNO_PANIC, err, string(debug.Stack())) 29 | } 30 | }() 31 | 32 | //初始化config 33 | if err := initConf(); err != nil { 34 | logger.Warn("init_conf_fail errno:%d errmsg:%s\n", common.INIT_SERVCIE_FAILED, err.Error()) 35 | return 36 | } 37 | 38 | //初始化jobmgr 39 | if err := initJobMgr();err !=nil { 40 | logger.Warn("init_jobmgr errno:%d errmsg:%s\n",common.INIT_SERVCIE_FAILED,err.Error()) 41 | return 42 | } 43 | 44 | //初始化日志收集器 45 | if err := initLogMgr();err != nil { 46 | logger.Warn("init_logmgr errno:%d errmsg:%s\n",common.INIT_SERVCIE_FAILED,err.Error()) 47 | return 48 | } 49 | //初始化调度器 50 | if err := initScheduler();err != nil { 51 | logger.Warn("init_scheduler errno:%d errmsg=%s\n", common.INIT_SERVCIE_FAILED, err.Error()) 52 | return 53 | } 54 | 55 | //初始化执行器 56 | if err := initExecutor();err != nil { 57 | logger.Warn("init_executor errno:%d errmsg=%s\n", common.INIT_SERVCIE_FAILED, err.Error()) 58 | return 59 | } 60 | //初始化watcher,启动监听 61 | if err := initWatcher();err != nil { 62 | logger.Warn("init_conf_watcher errno:%d errmsg:%s\n",common.INIT_SERVCIE_FAILED,err.Error()) 63 | return 64 | } 65 | 66 | if err := initRegiter();err != nil { 67 | logger.Warn("init_register errno:%d errmsg:%s\n",common.INIT_SERVCIE_FAILED,err.Error()) 68 | return 69 | } 70 | 71 | logger.Info("all_init_ok") 72 | fmt.Println("start_ok") 73 | 74 | //监听中断信号 75 | go signal_proc() 76 | 77 | value := <-G_QuitChan 78 | 79 | logger.Info("msg:diversion_api_quit chan_recv_val:%d", value) 80 | return 81 | 82 | } -------------------------------------------------------------------------------- /src/worker/main/signal.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "os" 5 | "os/signal" 6 | "syscall" 7 | logger "github.com/shengkehua/xlog4go" 8 | "time" 9 | ) 10 | 11 | func signal_proc() { 12 | c := make(chan os.Signal, 1) 13 | 14 | signal.Notify(c, syscall.SIGINT, syscall.SIGALRM, syscall.SIGTERM, syscall.SIGUSR1) 15 | 16 | // Block until a signal is received. 17 | sig := <-c 18 | 19 | logger.Warn("Signal received: %v", sig) 20 | 21 | time.Sleep(500 * time.Millisecond) 22 | 23 | logger.Warn("send quit signal") 24 | G_QuitChan <- 1 25 | } -------------------------------------------------------------------------------- /src/worker/register/register.go: -------------------------------------------------------------------------------- 1 | package register 2 | 3 | import ( 4 | "go.etcd.io/etcd/clientv3" 5 | logger "github.com/shengkehua/xlog4go" 6 | "helper/common" 7 | "time" 8 | "worker/config" 9 | "context" 10 | ) 11 | 12 | //注册器 13 | type Register struct { 14 | Client *clientv3.Client 15 | } 16 | 17 | 18 | var ( 19 | G_Register *Register 20 | ) 21 | 22 | //初始化注册器 23 | func InitRegiter() (err error) { 24 | config := clientv3.Config{ 25 | Endpoints:config.Cfg.JobMgr.Endpoints, 26 | DialTimeout: time.Duration(config.Cfg.JobMgr.TimeOut) * time.Millisecond, 27 | } 28 | 29 | if client,err := clientv3.New(config);err !=nil { 30 | return err 31 | }else { 32 | G_Register = &Register{ 33 | Client:client, 34 | } 35 | } 36 | go G_Register.RegisterWorker() 37 | return nil 38 | } 39 | 40 | //注册服务 41 | func (r *Register) RegisterWorker() { 42 | var ( 43 | leaseResp *clientv3.LeaseGrantResponse 44 | leaseKeepAliveChan <-chan *clientv3.LeaseKeepAliveResponse 45 | err error 46 | localIP string 47 | regKey string 48 | leaseId clientv3.LeaseID 49 | ) 50 | 51 | //注册失败不断充实,防止网络抖动造成的短暂问题 52 | for { 53 | ctx, ctlFunc := context.WithCancel(context.TODO()) 54 | //建立租约(5s) 55 | if leaseResp, err = r.Client.Grant(ctx, 5); err != nil { 56 | logger.Error("Lease Grant Error errno:%d, err:%s", common.ERRNO_ETCD_GRANT_LEASE_FAILED, err.Error()) 57 | goto ERR 58 | } 59 | 60 | //自动续租 61 | leaseId = leaseResp.ID 62 | if leaseKeepAliveChan, err = r.Client.KeepAlive(ctx, leaseId); err != nil { 63 | logger.Error("Lease Grant Error errno:%d, err:%s", common.ERRNO_ETCD_GRANT_LEASE_FAILED, err.Error()) 64 | goto ERR 65 | } 66 | //写入/cron/workers/IP...作为注册 67 | if localIP,err = common.GetLocalIP();err !=nil { 68 | logger.Error("Get LocalIP Error errno:%d, err:%s", common.ERRNO_GET_LOCAL_IP_ERROR, err.Error()) 69 | goto ERR 70 | } 71 | regKey = common.JOB_RGEISTER_PREFIX + localIP 72 | 73 | if _,err = r.Client.Put(ctx,regKey,"",clientv3.WithLease(leaseId)); err != nil { 74 | logger.Error("ETCD Put Error errno:%d, err:%s", common.ERRNO_ETCD_PUT_FAILED, err.Error()) 75 | goto ERR 76 | } 77 | //处理自动续租信号 78 | for { 79 | select { 80 | case r := <-leaseKeepAliveChan: 81 | if r == nil { //续租失败 82 | goto ERR 83 | } 84 | } 85 | } 86 | ERR: 87 | if ctlFunc != nil { 88 | time.Sleep(1 * time.Second) 89 | ctlFunc() 90 | } 91 | } 92 | 93 | return 94 | } --------------------------------------------------------------------------------