├── doc └── img │ ├── arch.png │ └── dingtalk.png ├── etc ├── template │ ├── subject.tpl │ ├── feishu.tpl │ ├── wecom.tpl │ └── dingtalk.tpl ├── script │ ├── notify │ │ ├── Makefile │ │ ├── README.md │ │ └── notify.go │ └── notify.py ├── service │ ├── n9e-server.service │ ├── n9e-webapi.service │ └── telegraf.service └── webapi.conf ├── docker ├── .dockerignore ├── n9eetc │ ├── template │ │ ├── subject.tpl │ │ ├── dingtalk.tpl │ │ ├── feishu.tpl │ │ └── wecom.tpl │ ├── metrics.yaml │ ├── script │ │ └── notify.py │ └── webapi.conf ├── prometc │ ├── targets.json │ └── prometheus.yml ├── mysqletc │ └── my.cnf ├── initsql │ └── c-init.sql ├── Dockerfile ├── build.sh ├── ibexetc │ ├── agentd.conf │ └── server.conf └── docker-compose.yaml ├── src ├── server │ ├── engine │ │ ├── cmd_windows.go │ │ ├── queue.go │ │ ├── cmd_unix.go │ │ ├── logger.go │ │ ├── engine.go │ │ ├── effective.go │ │ ├── mute.go │ │ ├── vector.go │ │ └── consume.go │ ├── memsto │ │ ├── memsto.go │ │ ├── busi_group_cache.go │ │ ├── alert_rule_cache.go │ │ ├── user_cache.go │ │ ├── alert_mute_cache.go │ │ ├── target_cache.go │ │ ├── alert_subsribe_cache.go │ │ └── user_group_cache.go │ ├── naming │ │ ├── leader.go │ │ ├── hashring.go │ │ └── heartbeat.go │ ├── poster │ │ └── post.go │ ├── common │ │ └── label_append.go │ ├── sender │ │ ├── wecom.go │ │ ├── feishu.go │ │ ├── dingtalk.go │ │ └── email.go │ ├── router │ │ ├── router_memsto.go │ │ ├── router.go │ │ └── router_prom.go │ ├── reader │ │ └── reader.go │ ├── stat │ │ └── stat.go │ └── server.go ├── webapi │ ├── config │ │ ├── init.go │ │ ├── metrics.go │ │ ├── i18n.go │ │ └── config.go │ ├── router │ │ ├── router_config.go │ │ ├── router_role.go │ │ ├── router_mute.go │ │ ├── router_chart.go │ │ ├── router_chart_share.go │ │ ├── router_chart_group.go │ │ ├── router_alert_aggr_view.go │ │ ├── router_self.go │ │ ├── router_collect_rule.go │ │ ├── router_prometheus.go │ │ ├── router_alert_subscribe.go │ │ ├── router_alert_his_event.go │ │ ├── router_metric_desc.go │ │ ├── router_funcs.go │ │ ├── router_user_group.go │ │ ├── router_alert_rule.go │ │ ├── router_user.go │ │ ├── router_busi_group.go │ │ └── router_login.go │ ├── stat │ │ └── stat.go │ ├── prom │ │ └── prom.go │ └── webapi.go ├── pkg │ ├── sys │ │ ├── cmd_windows.go │ │ └── cmd_unix.go │ ├── logx │ │ └── logx.go │ ├── ormx │ │ ├── ormx.go │ │ └── types.go │ ├── httpx │ │ └── httpx.go │ ├── ldapx │ │ └── ldapx.go │ └── ibex │ │ └── ibex.go ├── models │ ├── role.go │ ├── chart_share.go │ ├── chart.go │ ├── role_operation.go │ ├── common.go │ ├── user_group_member.go │ ├── task_record.go │ ├── chart_group.go │ ├── configs.go │ ├── busi_group_member.go │ ├── alert_aggr_view.go │ ├── metric_description.go │ ├── alert_mute.go │ ├── user_group.go │ └── dashboard.go ├── main.go └── storage │ └── storage.go ├── .gitattributes ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── config.yml │ └── enhancement.md ├── PULL_REQUEST_TEMPLATE.md └── workflows │ └── n9e.yml ├── README.md ├── .gitignore ├── Makefile └── go.mod /doc/img/arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UlricQin/nightingale/main/doc/img/arch.png -------------------------------------------------------------------------------- /doc/img/dingtalk.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UlricQin/nightingale/main/doc/img/dingtalk.png -------------------------------------------------------------------------------- /etc/template/subject.tpl: -------------------------------------------------------------------------------- 1 | {{if .IsRecovered}}Recovered{{else}}Triggered{{end}}: {{.RuleName}} {{.TagsJSON}} -------------------------------------------------------------------------------- /docker/.dockerignore: -------------------------------------------------------------------------------- 1 | ibexetc 2 | initsql 3 | mysqletc 4 | n9eetc 5 | prometc 6 | build.sh 7 | docker-compose.yaml 8 | -------------------------------------------------------------------------------- /docker/n9eetc/template/subject.tpl: -------------------------------------------------------------------------------- 1 | {{if .IsRecovered}}Recovered{{else}}Triggered{{end}}: {{.RuleName}} {{.TagsJSON}} -------------------------------------------------------------------------------- /docker/prometc/targets.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "targets": [ 4 | "nwebapi:18000","nserver:19000" 5 | ] 6 | } 7 | ] 8 | -------------------------------------------------------------------------------- /src/server/engine/cmd_windows.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import "os/exec" 4 | 5 | func startCmd(c *exec.Cmd) error { 6 | return c.Start() 7 | } 8 | -------------------------------------------------------------------------------- /src/webapi/config/init.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import "github.com/toolkits/pkg/i18n" 4 | 5 | func init() { 6 | i18n.DictRegister(langDict) 7 | } 8 | -------------------------------------------------------------------------------- /src/server/engine/queue.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import "github.com/toolkits/pkg/container/list" 4 | 5 | var EventQueue = list.NewSafeListLimited(10000000) 6 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *.css linguist-language=go 2 | *.less linguist-language=go 3 | *.js linguist-language=go 4 | *.tsx linguist-language=go 5 | *.html linguist-language=go 6 | -------------------------------------------------------------------------------- /docker/mysqletc/my.cnf: -------------------------------------------------------------------------------- 1 | [mysqld] 2 | pid-file = /var/run/mysqld/mysqld.pid 3 | socket = /var/run/mysqld/mysqld.sock 4 | datadir = /var/lib/mysql 5 | bind-address = 0.0.0.0 -------------------------------------------------------------------------------- /docker/initsql/c-init.sql: -------------------------------------------------------------------------------- 1 | GRANT ALL ON *.* TO 'root'@'127.0.0.1' IDENTIFIED BY '1234'; 2 | GRANT ALL ON *.* TO 'root'@'localhost' IDENTIFIED BY '1234'; 3 | GRANT ALL ON *.* TO 'root'@'%' IDENTIFIED BY '1234'; -------------------------------------------------------------------------------- /etc/script/notify/Makefile: -------------------------------------------------------------------------------- 1 | 2 | .phony: all 3 | all: plugin 4 | 5 | .phony: plugin 6 | plugin: 7 | export GOPROXY=http://goproxy.cn,direct 8 | go build -buildmode=plugin -o notify.so notify.go 9 | 10 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug Report 3 | about: Report a bug encountered while operating Nightingale 4 | labels: kind/bug 5 | --- 6 | 7 | **夜莺版本**: 8 | 9 | 10 | **问题和复现方法**: 11 | 12 | 13 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | contact_links: 3 | - name: Nightingale community 4 | url: https://n9e.didiyun.com/community/ 5 | about: List of communication channels for the Nightingale community. -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:2 2 | #FROM ubuntu:21.04 3 | 4 | WORKDIR /app 5 | ADD n9e /app 6 | RUN mkdir -p /app/pub 7 | ADD pub /app/pub/ 8 | RUN chmod +x n9e 9 | 10 | EXPOSE 19000 11 | EXPOSE 18000 12 | 13 | CMD ["/app/n9e", "-h"] 14 | -------------------------------------------------------------------------------- /src/server/engine/cmd_unix.go: -------------------------------------------------------------------------------- 1 | //go:build !windows 2 | // +build !windows 3 | 4 | package engine 5 | 6 | import ( 7 | "os/exec" 8 | "syscall" 9 | ) 10 | 11 | func startCmd(c *exec.Cmd) error { 12 | c.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} 13 | return c.Start() 14 | } 15 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/enhancement.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Enhancement Request 3 | about: Suggest an enhancement to the nightingale project 4 | labels: kind/feature 5 | 6 | --- 7 | 8 | 9 | **What would you like to be added**: 10 | 11 | **Why is this needed**: -------------------------------------------------------------------------------- /docker/n9eetc/template/dingtalk.tpl: -------------------------------------------------------------------------------- 1 | 级别状态: S{{.Severity}} {{if .IsRecovered}}Recovered{{else}}Triggered{{end}} 2 | 规则名称: {{.RuleName}}{{if .RuleNote}} 3 | 规则备注: {{.RuleNote}}{{end}} 4 | 监控指标: {{.TagsJSON}} 5 | {{if .IsRecovered}}恢复时间:{{timeformat .LastEvalTime}}{{else}}触发时间: {{timeformat .TriggerTime}} 6 | 触发时值: {{.TriggerValue}}{{end}} -------------------------------------------------------------------------------- /docker/n9eetc/template/feishu.tpl: -------------------------------------------------------------------------------- 1 | 级别状态: S{{.Severity}} {{if .IsRecovered}}Recovered{{else}}Triggered{{end}} 2 | 规则名称: {{.RuleName}}{{if .RuleNote}} 3 | 规则备注: {{.RuleNote}}{{end}} 4 | 监控指标: {{.TagsJSON}} 5 | {{if .IsRecovered}}恢复时间:{{timeformat .LastEvalTime}}{{else}}触发时间: {{timeformat .TriggerTime}} 6 | 触发时值: {{.TriggerValue}}{{end}} -------------------------------------------------------------------------------- /etc/template/feishu.tpl: -------------------------------------------------------------------------------- 1 | 级别状态: S{{.Severity}} {{if .IsRecovered}}Recovered{{else}}Triggered{{end}} 2 | 规则名称: {{.RuleName}}{{if .RuleNote}} 3 | 规则备注: {{.RuleNote}}{{end}} 4 | 监控指标: {{.TagsJSON}} 5 | {{if .IsRecovered}}恢复时间:{{timeformat .LastEvalTime}}{{else}}触发时间: {{timeformat .TriggerTime}} 6 | 触发时值: {{.TriggerValue}}{{end}} 7 | 发送时间: {{timestamp}} -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | **What type of PR is this?** 2 | 3 | **What this PR does / why we need it**: 4 | 7 | 8 | **Which issue(s) this PR fixes**: 9 | 12 | Fixes # 13 | 14 | **Special notes for your reviewer**: -------------------------------------------------------------------------------- /src/server/memsto/memsto.go: -------------------------------------------------------------------------------- 1 | package memsto 2 | 3 | import ( 4 | "os" 5 | 6 | "github.com/toolkits/pkg/logger" 7 | ) 8 | 9 | func exit(code int) { 10 | logger.Close() 11 | os.Exit(code) 12 | } 13 | 14 | func Sync() { 15 | SyncBusiGroups() 16 | SyncUsers() 17 | SyncUserGroups() 18 | SyncAlertMutes() 19 | SyncAlertSubscribes() 20 | SyncAlertRules() 21 | SyncTargets() 22 | } 23 | -------------------------------------------------------------------------------- /docker/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | if [ $# -ne 1 ]; then 4 | echo "$0 " 5 | exit 0 6 | fi 7 | 8 | tag=$1 9 | 10 | echo "tag: ${tag}" 11 | 12 | rm -rf n9e pub 13 | cp ../n9e . 14 | cp -r ../pub . 15 | 16 | docker build -t nightingale:${tag} . 17 | 18 | docker tag nightingale:${tag} ulric2019/nightingale:${tag} 19 | docker push ulric2019/nightingale:${tag} 20 | 21 | rm -rf n9e pub 22 | -------------------------------------------------------------------------------- /docker/n9eetc/template/wecom.tpl: -------------------------------------------------------------------------------- 1 | **级别状态**: {{if .IsRecovered}}S{{.Severity}} Recovered{{else}}S{{.Severity}} Triggered{{end}} 2 | **规则标题**: {{.RuleName}}{{if .RuleNote}} 3 | **规则备注**: {{.RuleNote}}{{end}} 4 | **监控指标**: {{.TagsJSON}} 5 | {{if .IsRecovered}}**恢复时间**:{{timeformat .LastEvalTime}}{{else}}**触发时间**: {{timeformat .TriggerTime}} 6 | **触发时值**: {{.TriggerValue}}{{end}} -------------------------------------------------------------------------------- /src/webapi/router/router_config.go: -------------------------------------------------------------------------------- 1 | package router 2 | 3 | import ( 4 | "github.com/gin-gonic/gin" 5 | "github.com/toolkits/pkg/ginx" 6 | 7 | "github.com/didi/nightingale/v5/src/webapi/config" 8 | ) 9 | 10 | func notifyChannelsGets(c *gin.Context) { 11 | ginx.NewRender(c).Data(config.C.NotifyChannels, nil) 12 | } 13 | 14 | func contactKeysGets(c *gin.Context) { 15 | ginx.NewRender(c).Data(config.C.ContactKeys, nil) 16 | } 17 | -------------------------------------------------------------------------------- /etc/template/wecom.tpl: -------------------------------------------------------------------------------- 1 | **级别状态**: {{if .IsRecovered}}S{{.Severity}} Recovered{{else}}S{{.Severity}} Triggered{{end}} 2 | **规则标题**: {{.RuleName}}{{if .RuleNote}} 3 | **规则备注**: {{.RuleNote}}{{end}} 4 | **监控指标**: {{.TagsJSON}} 5 | {{if .IsRecovered}}**恢复时间**:{{timeformat .LastEvalTime}}{{else}}**触发时间**: {{timeformat .TriggerTime}} 6 | **触发时值**: {{.TriggerValue}}{{end}} 7 | **发送时间**: {{timestamp}} -------------------------------------------------------------------------------- /etc/service/n9e-server.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description="n9e-server" 3 | After=network.target 4 | 5 | [Service] 6 | Type=simple 7 | 8 | ExecStart=/root/gopath/src/n9e/n9e server 9 | WorkingDirectory=/root/gopath/src/n9e 10 | 11 | Restart=on-failure 12 | SuccessExitStatus=0 13 | LimitNOFILE=65536 14 | StandardOutput=syslog 15 | StandardError=syslog 16 | SyslogIdentifier=n9e-server 17 | 18 | 19 | [Install] 20 | WantedBy=multi-user.target 21 | -------------------------------------------------------------------------------- /etc/service/n9e-webapi.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description="n9e-webapi" 3 | After=network.target 4 | 5 | [Service] 6 | Type=simple 7 | 8 | ExecStart=/root/gopath/src/n9e/n9e webapi 9 | WorkingDirectory=/root/gopath/src/n9e 10 | 11 | Restart=on-failure 12 | SuccessExitStatus=0 13 | LimitNOFILE=65536 14 | StandardOutput=syslog 15 | StandardError=syslog 16 | SyslogIdentifier=n9e-webapi 17 | 18 | 19 | [Install] 20 | WantedBy=multi-user.target 21 | -------------------------------------------------------------------------------- /etc/template/dingtalk.tpl: -------------------------------------------------------------------------------- 1 | #### {{if .IsRecovered}}S{{.Severity}} - Recovered - {{.RuleName}}{{else}}S{{.Severity}} - Triggered - {{.RuleName}}{{end}} 2 | 3 | --- 4 | 5 | - **规则标题**: {{.RuleName}}{{if .RuleNote}} 6 | - **规则备注**: {{.RuleNote}}{{end}} 7 | - **监控指标**: {{.TagsJSON}} 8 | - {{if .IsRecovered}}**恢复时间**:{{timeformat .LastEvalTime}}{{else}}**触发时间**: {{timeformat .TriggerTime}} 9 | - **触发时值**: {{.TriggerValue}}{{end}} 10 | - **发送时间**: {{timestamp}} 11 | 12 | -------------------------------------------------------------------------------- /.github/workflows/n9e.yml: -------------------------------------------------------------------------------- 1 | name: Go 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | 11 | build: 12 | name: Build 13 | runs-on: ubuntu-latest 14 | steps: 15 | 16 | - name: Set up Go 1.17 17 | uses: actions/setup-go@v1 18 | with: 19 | go-version: 1.17 20 | id: go 21 | 22 | - name: Check out code into the Go module directory 23 | uses: actions/checkout@v2 24 | 25 | - name: Build 26 | run: make 27 | -------------------------------------------------------------------------------- /etc/service/telegraf.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description="telegraf" 3 | After=network.target 4 | 5 | [Service] 6 | Type=simple 7 | 8 | ExecStart=/opt/telegraf/usr/bin/telegraf --config etc/telegraf/telegraf.conf --output-filter opentsdb 9 | WorkingDirectory=/opt/telegraf 10 | 11 | KillMode=process 12 | KillSignal=SIGQUIT 13 | TimeoutStopSec=5 14 | Restart=always 15 | SuccessExitStatus=0 16 | LimitNOFILE=65536 17 | StandardOutput=syslog 18 | StandardError=syslog 19 | SyslogIdentifier=telegraf 20 | 21 | 22 | [Install] 23 | WantedBy=multi-user.target -------------------------------------------------------------------------------- /src/webapi/router/router_role.go: -------------------------------------------------------------------------------- 1 | package router 2 | 3 | import ( 4 | "strings" 5 | 6 | "github.com/gin-gonic/gin" 7 | "github.com/toolkits/pkg/ginx" 8 | 9 | "github.com/didi/nightingale/v5/src/models" 10 | ) 11 | 12 | func rolesGets(c *gin.Context) { 13 | lst, err := models.RoleGetsAll() 14 | ginx.NewRender(c).Data(lst, err) 15 | } 16 | 17 | func permsGets(c *gin.Context) { 18 | user := c.MustGet("user").(*models.User) 19 | lst, err := models.OperationsOfRole(strings.Fields(user.Roles)) 20 | ginx.NewRender(c).Data(lst, err) 21 | } 22 | -------------------------------------------------------------------------------- /src/pkg/sys/cmd_windows.go: -------------------------------------------------------------------------------- 1 | package sys 2 | 3 | import ( 4 | "os/exec" 5 | "syscall" 6 | "time" 7 | ) 8 | 9 | func WrapTimeout(cmd *exec.Cmd, timeout time.Duration) (error, bool) { 10 | var err error 11 | 12 | done := make(chan error) 13 | go func() { 14 | done <- cmd.Wait() 15 | }() 16 | 17 | select { 18 | case <-time.After(timeout): 19 | go func() { 20 | <-done // allow goroutine to exit 21 | }() 22 | 23 | err = cmd.Process.Signal(syscall.SIGKILL) 24 | return err, true 25 | case err = <-done: 26 | return err, false 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/webapi/config/metrics.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "path" 5 | 6 | cmap "github.com/orcaman/concurrent-map" 7 | "github.com/toolkits/pkg/file" 8 | "github.com/toolkits/pkg/runner" 9 | ) 10 | 11 | var Metrics = cmap.New() 12 | 13 | func loadMetricsYaml() error { 14 | fp := path.Join(runner.Cwd, "etc", "metrics.yaml") 15 | if !file.IsExist(fp) { 16 | return nil 17 | } 18 | 19 | nmap := make(map[string]string) 20 | err := file.ReadYaml(fp, &nmap) 21 | if err != nil { 22 | return err 23 | } 24 | 25 | for key, val := range nmap { 26 | Metrics.Set(key, val) 27 | } 28 | 29 | return nil 30 | } 31 | -------------------------------------------------------------------------------- /src/server/naming/leader.go: -------------------------------------------------------------------------------- 1 | package naming 2 | 3 | import ( 4 | "context" 5 | "sort" 6 | 7 | "github.com/didi/nightingale/v5/src/server/config" 8 | "github.com/toolkits/pkg/logger" 9 | ) 10 | 11 | func IamLeader() (bool, error) { 12 | servers, err := ActiveServers(context.Background(), config.C.ClusterName) 13 | if err != nil { 14 | logger.Errorf("failed to get active servers: %v", err) 15 | return false, err 16 | } 17 | 18 | if len(servers) == 0 { 19 | logger.Errorf("active servers empty") 20 | return false, err 21 | } 22 | 23 | sort.Strings(servers) 24 | 25 | return config.C.Heartbeat.Endpoint == servers[0], nil 26 | } 27 | -------------------------------------------------------------------------------- /src/models/role.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | import ( 4 | "github.com/pkg/errors" 5 | ) 6 | 7 | type Role struct { 8 | Id int64 `json:"id" gorm:"primaryKey"` 9 | Name string `json:"name"` 10 | Note string `json:"note"` 11 | } 12 | 13 | func (Role) TableName() string { 14 | return "role" 15 | } 16 | 17 | func RoleGets(where string, args ...interface{}) ([]Role, error) { 18 | var objs []Role 19 | err := DB().Where(where, args...).Order("name").Find(&objs).Error 20 | if err != nil { 21 | return nil, errors.WithMessage(err, "failed to query roles") 22 | } 23 | return objs, nil 24 | } 25 | 26 | func RoleGetsAll() ([]Role, error) { 27 | return RoleGets("") 28 | } 29 | -------------------------------------------------------------------------------- /etc/script/notify/README.md: -------------------------------------------------------------------------------- 1 | 通过go plugin模式处理告警通知 2 | --- 3 | 4 | 相比于调用py脚本方式,该方式一般无需考虑依赖问题 5 | 6 | ### (1) 编写动态链接库逻辑 7 | 8 | ```go 9 | package main 10 | 11 | type inter interface { 12 | Descript() string 13 | Notify([]byte) 14 | } 15 | 16 | // 0、Descript 可用于该插件在 server 中的描述 17 | // 1、在 Notify 方法中实现要处理的自定义逻辑 18 | ``` 19 | 20 | 实现以上接口的 `struct` 实例即为合法 `plugin` 21 | 22 | ### (2) 构建链接库 23 | 24 | 参考 `notify.go` 实现方式,执行 `make` 后可以看到生成一个 `notify.so` 链接文件,放到 n9e 对应项目位置即可 25 | 26 | ### (3) 更新 n9e 配置 27 | 28 | ```text 29 | [Alerting.CallPlugin] 30 | Enable = false 31 | PluginPath = "./etc/script/notify.so" 32 | # 注意此处caller必须在notify.so中作为变量暴露 33 | Caller = "n9eCaller" 34 | ``` 35 | 36 | -------------------------------------------------------------------------------- /src/models/chart_share.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | type ChartShare struct { 4 | Id int64 `json:"id" gorm:"primaryKey"` 5 | Cluster string `json:"cluster"` 6 | Configs string `json:"configs"` 7 | CreateBy string `json:"create_by"` 8 | CreateAt int64 `json:"create_at"` 9 | } 10 | 11 | func (cs *ChartShare) TableName() string { 12 | return "chart_share" 13 | } 14 | 15 | func (cs *ChartShare) Add() error { 16 | return Insert(cs) 17 | } 18 | 19 | func ChartShareGetsByIds(ids []int64) ([]ChartShare, error) { 20 | var lst []ChartShare 21 | if len(ids) == 0 { 22 | return lst, nil 23 | } 24 | 25 | err := DB().Where("id in ?", ids).Order("id").Find(&lst).Error 26 | return lst, err 27 | } 28 | -------------------------------------------------------------------------------- /src/server/engine/logger.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import ( 4 | "github.com/didi/nightingale/v5/src/models" 5 | "github.com/toolkits/pkg/logger" 6 | ) 7 | 8 | func logEvent(event *models.AlertCurEvent, location string, err ...error) { 9 | status := "triggered" 10 | if event.IsRecovered { 11 | status = "recovered" 12 | } 13 | 14 | message := "" 15 | if len(err) > 0 && err[0] != nil { 16 | message = "error_message: " + err[0].Error() 17 | } 18 | 19 | logger.Infof( 20 | "event(%s %s) %s: rule_id=%d %v%s@%d %s", 21 | event.Hash, 22 | status, 23 | location, 24 | event.RuleId, 25 | event.TagsJSON, 26 | event.TriggerValue, 27 | event.TriggerTime, 28 | message, 29 | ) 30 | } 31 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Introduction 2 | 3 | Nightingale is an enterprise-level cloud-native monitoring system, which can be used as drop-in replacement of Prometheus for alerting and management. 4 | 5 | ## Architecture 6 | 7 | ![n9e-architecture](doc/img/arch.png) 8 | 9 | ## Docs 10 | 11 | [https://n9e.github.io/](https://n9e.github.io/) 12 | 13 | ## TODO 14 | 15 | - [x] deploy nightingale in docker 16 | - [x] export /metrics endpoint 17 | - [x] notify.py support feishu 18 | - [ ] notify.py support sms 19 | - [ ] notify.py support voice 20 | - [x] support remote write api 21 | - [ ] support pushgateway api 22 | 23 | 24 | ## Any questions? 25 | 26 | [Click me](https://s3-gz01.didistatic.com/n9e-pub/image/n9e-wx.png) 27 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.exe 2 | *.exe~ 3 | *.dll 4 | *.dylib 5 | *.test 6 | *.out 7 | *.prof 8 | *.log 9 | *.o 10 | *.a 11 | *.so 12 | *.sw[po] 13 | *.tar.gz 14 | *.[568vq] 15 | [568vq].out 16 | 17 | *.cgo1.go 18 | *.cgo2.c 19 | _cgo_defun.c 20 | _cgo_gotypes.go 21 | _cgo_export.* 22 | _testmain.go 23 | _obj 24 | _test 25 | 26 | /log* 27 | /bin 28 | /out 29 | /build 30 | /dist 31 | /etc/*.local.yml 32 | /etc/*.local.conf 33 | /etc/plugins/*.local.yml 34 | /data* 35 | /tarball 36 | /run 37 | /vendor 38 | /tmp 39 | /pub 40 | /n9e 41 | /docker/pub 42 | /docker/n9e 43 | /docker/mysqldata 44 | 45 | .alerts 46 | .idea 47 | .index 48 | .vscode 49 | .DS_Store 50 | .cache-loader 51 | .payload 52 | queries.active 53 | 54 | /n9e-* 55 | 56 | -------------------------------------------------------------------------------- /src/pkg/sys/cmd_unix.go: -------------------------------------------------------------------------------- 1 | //go:build !windows 2 | // +build !windows 3 | 4 | package sys 5 | 6 | import ( 7 | "os/exec" 8 | "syscall" 9 | "time" 10 | ) 11 | 12 | func WrapTimeout(cmd *exec.Cmd, timeout time.Duration) (error, bool) { 13 | var err error 14 | 15 | done := make(chan error) 16 | go func() { 17 | done <- cmd.Wait() 18 | }() 19 | 20 | select { 21 | case <-time.After(timeout): 22 | go func() { 23 | <-done // allow goroutine to exit 24 | }() 25 | 26 | // IMPORTANT: cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} is necessary before cmd.Start() 27 | err = syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL) 28 | return err, true 29 | case err = <-done: 30 | return err, false 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/server/engine/engine.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import ( 4 | "context" 5 | "time" 6 | 7 | "github.com/didi/nightingale/v5/src/server/config" 8 | "github.com/didi/nightingale/v5/src/server/sender" 9 | promstat "github.com/didi/nightingale/v5/src/server/stat" 10 | ) 11 | 12 | func Start(ctx context.Context) error { 13 | err := initTpls() 14 | if err != nil { 15 | return err 16 | } 17 | 18 | // start loop consumer 19 | go loopConsume(ctx) 20 | 21 | // filter my rules and start worker 22 | go loopFilterRules(ctx) 23 | 24 | go reportQueueSize() 25 | 26 | go sender.StartEmailSender() 27 | 28 | return nil 29 | } 30 | 31 | func reportQueueSize() { 32 | for { 33 | time.Sleep(time.Second) 34 | promstat.GaugeAlertQueueSize.WithLabelValues(config.C.ClusterName).Set(float64(EventQueue.Len())) 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/models/chart.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | type Chart struct { 4 | Id int64 `json:"id" gorm:"primaryKey"` 5 | GroupId int64 `json:"group_id"` 6 | Configs string `json:"configs"` 7 | Weight int `json:"weight"` 8 | } 9 | 10 | func (c *Chart) TableName() string { 11 | return "chart" 12 | } 13 | 14 | func ChartsOf(chartGroupId int64) ([]Chart, error) { 15 | var objs []Chart 16 | err := DB().Where("group_id = ?", chartGroupId).Order("weight").Find(&objs).Error 17 | return objs, err 18 | } 19 | 20 | func (c *Chart) Add() error { 21 | return Insert(c) 22 | } 23 | 24 | func (c *Chart) Update(selectField interface{}, selectFields ...interface{}) error { 25 | return DB().Model(c).Select(selectField, selectFields...).Updates(c).Error 26 | } 27 | 28 | func (c *Chart) Del() error { 29 | return DB().Where("id=?", c.Id).Delete(&Chart{}).Error 30 | } 31 | -------------------------------------------------------------------------------- /src/server/poster/post.go: -------------------------------------------------------------------------------- 1 | package poster 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | "io/ioutil" 7 | "net/http" 8 | "time" 9 | ) 10 | 11 | func PostJSON(url string, timeout time.Duration, v interface{}) (response []byte, code int, err error) { 12 | var bs []byte 13 | 14 | bs, err = json.Marshal(v) 15 | if err != nil { 16 | return 17 | } 18 | 19 | bf := bytes.NewBuffer(bs) 20 | 21 | client := http.Client{ 22 | Timeout: timeout, 23 | } 24 | 25 | req, err := http.NewRequest("POST", url, bf) 26 | req.Header.Set("Content-Type", "application/json") 27 | 28 | var resp *http.Response 29 | resp, err = client.Do(req) 30 | if err != nil { 31 | return 32 | } 33 | 34 | code = resp.StatusCode 35 | 36 | if resp.Body != nil { 37 | defer resp.Body.Close() 38 | response, err = ioutil.ReadAll(resp.Body) 39 | } 40 | 41 | return 42 | } 43 | -------------------------------------------------------------------------------- /src/webapi/router/router_mute.go: -------------------------------------------------------------------------------- 1 | package router 2 | 3 | import ( 4 | "github.com/gin-gonic/gin" 5 | "github.com/toolkits/pkg/ginx" 6 | 7 | "github.com/didi/nightingale/v5/src/models" 8 | ) 9 | 10 | // Return all, front-end search and paging 11 | func alertMuteGets(c *gin.Context) { 12 | bgid := ginx.UrlParamInt64(c, "id") 13 | lst, err := models.AlertMuteGets(bgid) 14 | ginx.NewRender(c).Data(lst, err) 15 | } 16 | 17 | func alertMuteAdd(c *gin.Context) { 18 | var f models.AlertMute 19 | ginx.BindJSON(c, &f) 20 | 21 | username := c.MustGet("username").(string) 22 | f.CreateBy = username 23 | f.GroupId = ginx.UrlParamInt64(c, "id") 24 | 25 | ginx.NewRender(c).Message(f.Add()) 26 | } 27 | 28 | func alertMuteDel(c *gin.Context) { 29 | var f idsForm 30 | ginx.BindJSON(c, &f) 31 | f.Verify() 32 | 33 | ginx.NewRender(c).Message(models.AlertMuteDel(f.Ids)) 34 | } 35 | -------------------------------------------------------------------------------- /src/models/role_operation.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | import ( 4 | "github.com/toolkits/pkg/slice" 5 | ) 6 | 7 | type RoleOperation struct { 8 | RoleName string 9 | Operation string 10 | } 11 | 12 | func (RoleOperation) TableName() string { 13 | return "role_operation" 14 | } 15 | 16 | func RoleHasOperation(roles []string, operation string) (bool, error) { 17 | if len(roles) == 0 { 18 | return false, nil 19 | } 20 | 21 | return Exists(DB().Model(&RoleOperation{}).Where("operation = ? and role_name in ?", operation, roles)) 22 | } 23 | 24 | func OperationsOfRole(roles []string) ([]string, error) { 25 | session := DB().Model(&RoleOperation{}).Select("distinct(operation) as operation") 26 | 27 | if !slice.ContainsString(roles, AdminRole) { 28 | session = session.Where("role_name in ?", roles) 29 | } 30 | 31 | var ret []string 32 | err := session.Pluck("operation", &ret).Error 33 | return ret, err 34 | } 35 | -------------------------------------------------------------------------------- /docker/ibexetc/agentd.conf: -------------------------------------------------------------------------------- 1 | # debug, release 2 | RunMode = "release" 3 | 4 | # task meta storage dir 5 | MetaDir = "./meta" 6 | 7 | [HTTP] 8 | Enable = true 9 | # http listening address 10 | Host = "0.0.0.0" 11 | # http listening port 12 | Port = 2090 13 | # https cert file path 14 | CertFile = "" 15 | # https key file path 16 | KeyFile = "" 17 | # whether print access log 18 | PrintAccessLog = true 19 | # whether enable pprof 20 | PProf = false 21 | # http graceful shutdown timeout, unit: s 22 | ShutdownTimeout = 30 23 | # max content length: 64M 24 | MaxContentLength = 67108864 25 | # http server read timeout, unit: s 26 | ReadTimeout = 20 27 | # http server write timeout, unit: s 28 | WriteTimeout = 40 29 | # http server idle timeout, unit: s 30 | IdleTimeout = 120 31 | 32 | [Heartbeat] 33 | # unit: ms 34 | Interval = 1000 35 | # rpc servers 36 | Servers = ["ibex:20090"] 37 | # $ip or $hostname or specified string 38 | Host = "telegraf01" -------------------------------------------------------------------------------- /src/server/common/label_append.go: -------------------------------------------------------------------------------- 1 | package common 2 | 3 | import ( 4 | "github.com/didi/nightingale/v5/src/models" 5 | "github.com/didi/nightingale/v5/src/server/config" 6 | "github.com/didi/nightingale/v5/src/server/memsto" 7 | "github.com/prometheus/prometheus/prompb" 8 | ) 9 | 10 | func AppendLabels(pt *prompb.TimeSeries, target *models.Target) { 11 | if target == nil { 12 | return 13 | } 14 | 15 | for key, value := range target.TagsMap { 16 | pt.Labels = append(pt.Labels, &prompb.Label{ 17 | Name: key, 18 | Value: value, 19 | }) 20 | } 21 | 22 | if target.GroupId > 0 && len(config.C.BusiGroupLabelKey) > 0 { 23 | bg := memsto.BusiGroupCache.GetByBusiGroupId(target.GroupId) 24 | if bg == nil { 25 | return 26 | } 27 | 28 | if bg.LabelEnable == 0 { 29 | return 30 | } 31 | 32 | pt.Labels = append(pt.Labels, &prompb.Label{ 33 | Name: config.C.BusiGroupLabelKey, 34 | Value: bg.LabelValue, 35 | }) 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/models/common.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | import ( 4 | "github.com/toolkits/pkg/str" 5 | "gorm.io/gorm" 6 | 7 | "github.com/didi/nightingale/v5/src/storage" 8 | ) 9 | 10 | const AdminRole = "Admin" 11 | 12 | func DB() *gorm.DB { 13 | return storage.DB 14 | } 15 | 16 | func Count(tx *gorm.DB) (int64, error) { 17 | var cnt int64 18 | err := tx.Count(&cnt).Error 19 | return cnt, err 20 | } 21 | 22 | func Exists(tx *gorm.DB) (bool, error) { 23 | num, err := Count(tx) 24 | return num > 0, err 25 | } 26 | 27 | func Insert(obj interface{}) error { 28 | return DB().Create(obj).Error 29 | } 30 | 31 | // CryptoPass crypto password use salt 32 | func CryptoPass(raw string) (string, error) { 33 | salt, err := ConfigsGet("salt") 34 | if err != nil { 35 | return "", err 36 | } 37 | 38 | return str.MD5(salt + "<-*Uk30^96eY*->" + raw), nil 39 | } 40 | 41 | type Statistics struct { 42 | Total int64 `gorm:"total"` 43 | LastUpdated int64 `gorm:"last_updated"` 44 | } 45 | -------------------------------------------------------------------------------- /src/server/engine/effective.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import ( 4 | "strconv" 5 | "strings" 6 | "time" 7 | 8 | "github.com/didi/nightingale/v5/src/models" 9 | ) 10 | 11 | func isNoneffective(timestamp int64, alertRule *models.AlertRule) bool { 12 | if alertRule.Disabled == 1 { 13 | return true 14 | } 15 | 16 | tm := time.Unix(timestamp, 0) 17 | triggerTime := tm.Format("15:04") 18 | triggerWeek := strconv.Itoa(int(tm.Weekday())) 19 | 20 | if alertRule.EnableStime <= alertRule.EnableEtime { 21 | if triggerTime < alertRule.EnableStime || triggerTime > alertRule.EnableEtime { 22 | return true 23 | } 24 | } else { 25 | if triggerTime < alertRule.EnableStime && triggerTime > alertRule.EnableEtime { 26 | return true 27 | } 28 | } 29 | 30 | alertRule.EnableDaysOfWeek = strings.Replace(alertRule.EnableDaysOfWeek, "7", "0", 1) 31 | 32 | if !strings.Contains(alertRule.EnableDaysOfWeek, triggerWeek) { 33 | return true 34 | } 35 | 36 | return false 37 | } 38 | -------------------------------------------------------------------------------- /etc/script/notify/notify.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "time" 6 | 7 | "github.com/tidwall/gjson" 8 | ) 9 | 10 | // the caller can be called for alerting notify by complete this interface 11 | type inter interface { 12 | Descript() string 13 | Notify([]byte) 14 | } 15 | 16 | // N9E complete 17 | type N9EPlugin struct { 18 | Name string 19 | Description string 20 | BuildAt string 21 | } 22 | 23 | func (n *N9EPlugin) Descript() string { 24 | return fmt.Sprintf("%s: %s", n.Name, n.Description) 25 | } 26 | 27 | func (n *N9EPlugin) Notify(bs []byte) { 28 | var channels = []string{ 29 | "dingtalk_robot_token", 30 | "wecom_robot_token", 31 | "feishu_robot_token", 32 | } 33 | for _, ch := range channels { 34 | if ret := gjson.GetBytes(bs, ch); ret.Exists() { 35 | fmt.Printf("do something...") 36 | } 37 | } 38 | } 39 | 40 | // will be loaded for alertingCall 41 | var n9eCaller = N9EPlugin{ 42 | Name: "n9e", 43 | Description: "演示告警通过动态链接库方式通知", 44 | BuildAt: time.Now().Local().Format("2006/01/02 15:04:05"), 45 | } 46 | -------------------------------------------------------------------------------- /src/pkg/logx/logx.go: -------------------------------------------------------------------------------- 1 | package logx 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/pkg/errors" 7 | "github.com/toolkits/pkg/logger" 8 | ) 9 | 10 | type Config struct { 11 | Dir string 12 | Level string 13 | Output string 14 | KeepHours uint 15 | RotateNum int 16 | RotateSize uint64 17 | } 18 | 19 | func Init(c Config) (func(), error) { 20 | logger.SetSeverity(c.Level) 21 | 22 | if c.Output == "stderr" { 23 | logger.LogToStderr() 24 | } else if c.Output == "file" { 25 | lb, err := logger.NewFileBackend(c.Dir) 26 | if err != nil { 27 | return nil, errors.WithMessage(err, "NewFileBackend failed") 28 | } 29 | 30 | if c.KeepHours != 0 { 31 | lb.SetRotateByHour(true) 32 | lb.SetKeepHours(c.KeepHours) 33 | } else if c.RotateNum != 0 { 34 | lb.Rotate(c.RotateNum, c.RotateSize*1024*1024) 35 | } else { 36 | return nil, errors.New("KeepHours and Rotatenum both are 0") 37 | } 38 | 39 | logger.SetLogging(c.Level, lb) 40 | } 41 | 42 | return func() { 43 | fmt.Println("logger exiting") 44 | logger.Close() 45 | }, nil 46 | } 47 | -------------------------------------------------------------------------------- /src/webapi/router/router_chart.go: -------------------------------------------------------------------------------- 1 | package router 2 | 3 | import ( 4 | "github.com/gin-gonic/gin" 5 | "github.com/toolkits/pkg/ginx" 6 | 7 | "github.com/didi/nightingale/v5/src/models" 8 | ) 9 | 10 | func chartGets(c *gin.Context) { 11 | lst, err := models.ChartsOf(ginx.QueryInt64(c, "cgid")) 12 | ginx.NewRender(c).Data(lst, err) 13 | } 14 | 15 | func chartAdd(c *gin.Context) { 16 | var chart models.Chart 17 | ginx.BindJSON(c, &chart) 18 | 19 | // group_id / configs / weight 20 | chart.Id = 0 21 | err := chart.Add() 22 | ginx.NewRender(c).Data(chart, err) 23 | } 24 | 25 | func chartPut(c *gin.Context) { 26 | var arr []models.Chart 27 | ginx.BindJSON(c, &arr) 28 | 29 | for i := 0; i < len(arr); i++ { 30 | ginx.Dangerous(arr[i].Update("configs", "weight", "group_id")) 31 | } 32 | 33 | ginx.NewRender(c).Message(nil) 34 | } 35 | 36 | func chartDel(c *gin.Context) { 37 | var f idsForm 38 | ginx.BindJSON(c, &f) 39 | 40 | for i := 0; i < len(f.Ids); i++ { 41 | cg := models.Chart{Id: f.Ids[i]} 42 | ginx.Dangerous(cg.Del()) 43 | } 44 | 45 | ginx.NewRender(c).Message(nil) 46 | } 47 | -------------------------------------------------------------------------------- /src/webapi/router/router_chart_share.go: -------------------------------------------------------------------------------- 1 | package router 2 | 3 | import ( 4 | "time" 5 | 6 | "github.com/gin-gonic/gin" 7 | "github.com/toolkits/pkg/ginx" 8 | "github.com/toolkits/pkg/str" 9 | 10 | "github.com/didi/nightingale/v5/src/models" 11 | ) 12 | 13 | func chartShareGets(c *gin.Context) { 14 | ids := ginx.QueryStr(c, "ids", "") 15 | lst, err := models.ChartShareGetsByIds(str.IdsInt64(ids, ",")) 16 | ginx.NewRender(c).Data(lst, err) 17 | } 18 | 19 | type chartShareForm struct { 20 | Configs string `json:"configs"` 21 | } 22 | 23 | func chartShareAdd(c *gin.Context) { 24 | username := c.MustGet("username").(string) 25 | cluster := MustGetCluster(c) 26 | 27 | var forms []chartShareForm 28 | ginx.BindJSON(c, &forms) 29 | 30 | ids := []int64{} 31 | now := time.Now().Unix() 32 | 33 | for _, f := range forms { 34 | chart := models.ChartShare{ 35 | Cluster: cluster, 36 | Configs: f.Configs, 37 | CreateBy: username, 38 | CreateAt: now, 39 | } 40 | ginx.Dangerous(chart.Add()) 41 | ids = append(ids, chart.Id) 42 | } 43 | 44 | ginx.NewRender(c).Data(ids, nil) 45 | } 46 | -------------------------------------------------------------------------------- /src/server/sender/wecom.go: -------------------------------------------------------------------------------- 1 | package sender 2 | 3 | import ( 4 | "time" 5 | 6 | "github.com/didi/nightingale/v5/src/server/poster" 7 | "github.com/toolkits/pkg/logger" 8 | ) 9 | 10 | type WecomMessage struct { 11 | Text string 12 | Tokens []string 13 | } 14 | 15 | type wecomMarkdown struct { 16 | Content string `json:"content"` 17 | } 18 | 19 | type wecom struct { 20 | Msgtype string `json:"msgtype"` 21 | Markdown wecomMarkdown `json:"markdown"` 22 | } 23 | 24 | func SendWecom(message WecomMessage) { 25 | for i := 0; i < len(message.Tokens); i++ { 26 | url := "https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=" + message.Tokens[i] 27 | body := wecom{ 28 | Msgtype: "markdown", 29 | Markdown: wecomMarkdown{ 30 | Content: message.Text, 31 | }, 32 | } 33 | 34 | res, code, err := poster.PostJSON(url, time.Second*5, body) 35 | if err != nil { 36 | logger.Errorf("wecom_sender: result=fail url=%s code=%d error=%v response=%s", url, code, err, string(res)) 37 | } else { 38 | logger.Infof("wecom_sender: result=succ url=%s code=%d response=%s", url, code, string(res)) 39 | } 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/webapi/router/router_chart_group.go: -------------------------------------------------------------------------------- 1 | package router 2 | 3 | import ( 4 | "github.com/gin-gonic/gin" 5 | "github.com/toolkits/pkg/ginx" 6 | 7 | "github.com/didi/nightingale/v5/src/models" 8 | ) 9 | 10 | func chartGroupGets(c *gin.Context) { 11 | objs, err := models.ChartGroupsOf(ginx.QueryInt64(c, "did")) 12 | ginx.NewRender(c).Data(objs, err) 13 | } 14 | 15 | func chartGroupAdd(c *gin.Context) { 16 | var cg models.ChartGroup 17 | ginx.BindJSON(c, &cg) 18 | 19 | // dashboard_id / name / weight 20 | cg.Id = 0 21 | err := cg.Add() 22 | ginx.NewRender(c).Data(cg, err) 23 | } 24 | 25 | func chartGroupPut(c *gin.Context) { 26 | var arr []models.ChartGroup 27 | ginx.BindJSON(c, &arr) 28 | 29 | for i := 0; i < len(arr); i++ { 30 | ginx.Dangerous(arr[i].Update("name", "weight", "dashboard_id")) 31 | } 32 | 33 | ginx.NewRender(c).Message(nil) 34 | } 35 | 36 | func chartGroupDel(c *gin.Context) { 37 | var f idsForm 38 | ginx.BindJSON(c, &f) 39 | 40 | for i := 0; i < len(f.Ids); i++ { 41 | cg := models.ChartGroup{Id: f.Ids[i]} 42 | ginx.Dangerous(cg.Del()) 43 | } 44 | 45 | ginx.NewRender(c).Message(nil) 46 | } 47 | -------------------------------------------------------------------------------- /docker/prometc/prometheus.yml: -------------------------------------------------------------------------------- 1 | # my global config 2 | global: 3 | scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute. 4 | evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute. 5 | # scrape_timeout is set to the global default (10s). 6 | 7 | # Alertmanager configuration 8 | alerting: 9 | alertmanagers: 10 | - static_configs: 11 | - targets: 12 | # - alertmanager:9093 13 | 14 | # Load rules once and periodically evaluate them according to the global 'evaluation_interval'. 15 | rule_files: 16 | # - "first_rules.yml" 17 | # - "second_rules.yml" 18 | 19 | # A scrape configuration containing exactly one endpoint to scrape: 20 | # Here it's Prometheus itself. 21 | scrape_configs: 22 | # The job name is added as a label `job=` to any timeseries scraped from this config. 23 | - job_name: 'prometheus' 24 | 25 | # metrics_path defaults to '/metrics' 26 | # scheme defaults to 'http'. 27 | 28 | static_configs: 29 | - targets: ['localhost:9090'] 30 | 31 | - job_name: 'n9e' 32 | file_sd_configs: 33 | - files: 34 | - targets.json 35 | -------------------------------------------------------------------------------- /src/webapi/stat/stat.go: -------------------------------------------------------------------------------- 1 | package stat 2 | 3 | import ( 4 | "time" 5 | 6 | "github.com/prometheus/client_golang/prometheus" 7 | ) 8 | 9 | const Service = "n9e-webapi" 10 | 11 | var ( 12 | labels = []string{"service", "code", "path", "method"} 13 | 14 | uptime = prometheus.NewCounterVec( 15 | prometheus.CounterOpts{ 16 | Name: "uptime", 17 | Help: "HTTP service uptime.", 18 | }, []string{"service"}, 19 | ) 20 | 21 | RequestCounter = prometheus.NewCounterVec( 22 | prometheus.CounterOpts{ 23 | Name: "http_request_count_total", 24 | Help: "Total number of HTTP requests made.", 25 | }, labels, 26 | ) 27 | 28 | RequestDuration = prometheus.NewHistogramVec( 29 | prometheus.HistogramOpts{ 30 | Buckets: []float64{.01, .1, 1, 10}, 31 | Name: "http_request_duration_seconds", 32 | Help: "HTTP request latencies in seconds.", 33 | }, labels, 34 | ) 35 | ) 36 | 37 | func Init() { 38 | // Register the summary and the histogram with Prometheus's default registry. 39 | prometheus.MustRegister( 40 | uptime, 41 | RequestCounter, 42 | RequestDuration, 43 | ) 44 | 45 | go recordUptime() 46 | } 47 | 48 | // recordUptime increases service uptime per second. 49 | func recordUptime() { 50 | for range time.Tick(time.Second) { 51 | uptime.WithLabelValues(Service).Inc() 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/server/router/router_memsto.go: -------------------------------------------------------------------------------- 1 | package router 2 | 3 | import ( 4 | "github.com/gin-gonic/gin" 5 | "github.com/toolkits/pkg/ginx" 6 | 7 | "github.com/didi/nightingale/v5/src/server/idents" 8 | "github.com/didi/nightingale/v5/src/server/memsto" 9 | ) 10 | 11 | func alertRuleGet(c *gin.Context) { 12 | id := ginx.QueryInt64(c, "id") 13 | rule := memsto.AlertRuleCache.Get(id) 14 | c.JSON(200, gin.H{"id": id, "rule": rule}) 15 | } 16 | 17 | func identsGets(c *gin.Context) { 18 | c.JSON(200, idents.Idents.Items()) 19 | } 20 | 21 | func mutesGets(c *gin.Context) { 22 | c.JSON(200, memsto.AlertMuteCache.GetAllStructs()) 23 | } 24 | 25 | func subscribesGets(c *gin.Context) { 26 | c.JSON(200, memsto.AlertSubscribeCache.GetStructs(ginx.QueryInt64(c, "id"))) 27 | } 28 | 29 | func targetGet(c *gin.Context) { 30 | ident := ginx.QueryStr(c, "ident") 31 | target, _ := memsto.TargetCache.Get(ident) 32 | c.JSON(200, gin.H{"ident": ident, "target": target}) 33 | } 34 | 35 | func userGet(c *gin.Context) { 36 | id := ginx.QueryInt64(c, "id") 37 | user := memsto.UserCache.GetByUserId(id) 38 | c.JSON(200, gin.H{"id": id, "user": user}) 39 | } 40 | 41 | func userGroupGet(c *gin.Context) { 42 | id := ginx.QueryInt64(c, "id") 43 | ug := memsto.UserGroupCache.GetByUserGroupId(id) 44 | c.JSON(200, gin.H{"id": id, "user_group": ug}) 45 | } 46 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: start build 2 | 3 | NOW = $(shell date -u '+%Y%m%d%I%M%S') 4 | 5 | RELEASE_VERSION = 5.5.0 6 | 7 | APP = n9e 8 | SERVER_BIN = $(APP) 9 | # RELEASE_ROOT = release 10 | # RELEASE_SERVER = release/${APP} 11 | # GIT_COUNT = $(shell git rev-list --all --count) 12 | # GIT_HASH = $(shell git rev-parse --short HEAD) 13 | # RELEASE_TAG = $(RELEASE_VERSION).$(GIT_COUNT).$(GIT_HASH) 14 | 15 | all: build 16 | 17 | build: 18 | go build -ldflags "-w -s -X main.VERSION=$(RELEASE_VERSION)" -o $(SERVER_BIN) ./src 19 | 20 | # start: 21 | # @go run -ldflags "-X main.VERSION=$(RELEASE_TAG)" ./cmd/${APP}/main.go web -c ./configs/config.toml -m ./configs/model.conf --menu ./configs/menu.yaml 22 | run_webapi: 23 | nohup ./n9e webapi > webapi.log 2>&1 & 24 | 25 | run_server: 26 | nohup ./n9e server > server.log 2>&1 & 27 | 28 | # swagger: 29 | # @swag init --parseDependency --generalInfo ./cmd/${APP}/main.go --output ./internal/app/swagger 30 | 31 | # wire: 32 | # @wire gen ./internal/app 33 | 34 | # test: 35 | # cd ./internal/app/test && go test -v 36 | 37 | # clean: 38 | # rm -rf data release $(SERVER_BIN) internal/app/test/data cmd/${APP}/data 39 | 40 | pack: build 41 | rm -rf $(APP)-$(RELEASE_VERSION).tar.gz 42 | tar -zcvf $(APP)-$(RELEASE_VERSION).tar.gz docker etc $(SERVER_BIN) pub/font pub/index.html pub/assets pub/image 43 | -------------------------------------------------------------------------------- /src/server/sender/feishu.go: -------------------------------------------------------------------------------- 1 | package sender 2 | 3 | import ( 4 | "time" 5 | 6 | "github.com/didi/nightingale/v5/src/server/poster" 7 | "github.com/toolkits/pkg/logger" 8 | ) 9 | 10 | type FeishuMessage struct { 11 | Text string 12 | AtMobiles []string 13 | Tokens []string 14 | } 15 | 16 | type feishuContent struct { 17 | Text string `json:"text"` 18 | } 19 | 20 | type feishuAt struct { 21 | AtMobiles []string `json:"atMobiles"` 22 | IsAtAll bool `json:"isAtAll"` 23 | } 24 | 25 | type feishu struct { 26 | Msgtype string `json:"msg_type"` 27 | Content feishuContent `json:"content"` 28 | At feishuAt `json:"at"` 29 | } 30 | 31 | func SendFeishu(message FeishuMessage) { 32 | for i := 0; i < len(message.Tokens); i++ { 33 | url := "https://open.feishu.cn/open-apis/bot/v2/hook/" + message.Tokens[i] 34 | body := feishu{ 35 | Msgtype: "text", 36 | Content: feishuContent{ 37 | Text: message.Text, 38 | }, 39 | At: feishuAt{ 40 | AtMobiles: message.AtMobiles, 41 | IsAtAll: false, 42 | }, 43 | } 44 | 45 | res, code, err := poster.PostJSON(url, time.Second*5, body) 46 | if err != nil { 47 | logger.Errorf("feishu_sender: result=fail url=%s code=%d error=%v response=%s", url, code, err, string(res)) 48 | } else { 49 | logger.Infof("feishu_sender: result=succ url=%s code=%d response=%s", url, code, string(res)) 50 | } 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/pkg/ormx/ormx.go: -------------------------------------------------------------------------------- 1 | package ormx 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | "time" 7 | 8 | "gorm.io/driver/mysql" 9 | "gorm.io/driver/postgres" 10 | "gorm.io/gorm" 11 | "gorm.io/gorm/schema" 12 | ) 13 | 14 | // Config GORM Config 15 | type Config struct { 16 | Debug bool 17 | DBType string 18 | DSN string 19 | MaxLifetime int 20 | MaxOpenConns int 21 | MaxIdleConns int 22 | TablePrefix string 23 | } 24 | 25 | // New Create gorm.DB instance 26 | func New(c Config) (*gorm.DB, error) { 27 | var dialector gorm.Dialector 28 | 29 | switch strings.ToLower(c.DBType) { 30 | case "mysql": 31 | dialector = mysql.Open(c.DSN) 32 | case "postgres": 33 | dialector = postgres.Open(c.DSN) 34 | default: 35 | return nil, fmt.Errorf("dialector(%s) not supported", c.DBType) 36 | } 37 | 38 | gconfig := &gorm.Config{ 39 | NamingStrategy: schema.NamingStrategy{ 40 | TablePrefix: c.TablePrefix, 41 | SingularTable: true, 42 | }, 43 | } 44 | 45 | db, err := gorm.Open(dialector, gconfig) 46 | if err != nil { 47 | return nil, err 48 | } 49 | 50 | if c.Debug { 51 | db = db.Debug() 52 | } 53 | 54 | sqlDB, err := db.DB() 55 | if err != nil { 56 | return nil, err 57 | } 58 | 59 | sqlDB.SetMaxIdleConns(c.MaxIdleConns) 60 | sqlDB.SetMaxOpenConns(c.MaxOpenConns) 61 | sqlDB.SetConnMaxLifetime(time.Duration(c.MaxLifetime) * time.Second) 62 | 63 | return db, nil 64 | } 65 | -------------------------------------------------------------------------------- /src/server/naming/hashring.go: -------------------------------------------------------------------------------- 1 | package naming 2 | 3 | import ( 4 | "sync" 5 | 6 | "github.com/toolkits/pkg/consistent" 7 | "github.com/toolkits/pkg/logger" 8 | ) 9 | 10 | const NodeReplicas = 500 11 | 12 | type ConsistentHashRing struct { 13 | sync.RWMutex 14 | ring *consistent.Consistent 15 | } 16 | 17 | // for alert_rule sharding 18 | var HashRing = NewConsistentHashRing(int32(NodeReplicas), []string{}) 19 | 20 | func (chr *ConsistentHashRing) GetNode(pk string) (string, error) { 21 | chr.RLock() 22 | defer chr.RUnlock() 23 | 24 | return chr.ring.Get(pk) 25 | } 26 | 27 | func (chr *ConsistentHashRing) Set(r *consistent.Consistent) { 28 | chr.Lock() 29 | defer chr.Unlock() 30 | chr.ring = r 31 | } 32 | 33 | func (chr *ConsistentHashRing) GetRing() *consistent.Consistent { 34 | chr.RLock() 35 | defer chr.RUnlock() 36 | 37 | return chr.ring 38 | } 39 | 40 | func NewConsistentHashRing(replicas int32, nodes []string) *ConsistentHashRing { 41 | ret := &ConsistentHashRing{ring: consistent.New()} 42 | ret.ring.NumberOfReplicas = int(replicas) 43 | for i := 0; i < len(nodes); i++ { 44 | ret.ring.Add(nodes[i]) 45 | } 46 | return ret 47 | } 48 | 49 | func RebuildConsistentHashRing(nodes []string) { 50 | r := consistent.New() 51 | r.NumberOfReplicas = NodeReplicas 52 | for i := 0; i < len(nodes); i++ { 53 | r.Add(nodes[i]) 54 | } 55 | 56 | HashRing.Set(r) 57 | 58 | logger.Infof("hash ring rebuild %+v", r.Members()) 59 | } 60 | -------------------------------------------------------------------------------- /src/webapi/router/router_alert_aggr_view.go: -------------------------------------------------------------------------------- 1 | package router 2 | 3 | import ( 4 | "net/http" 5 | 6 | "github.com/didi/nightingale/v5/src/models" 7 | "github.com/gin-gonic/gin" 8 | "github.com/toolkits/pkg/ginx" 9 | ) 10 | 11 | func alertAggrViewGets(c *gin.Context) { 12 | lst, err := models.AlertAggrViewGets(c.MustGet("userid")) 13 | ginx.NewRender(c).Data(lst, err) 14 | } 15 | 16 | // name and rule is necessary 17 | func alertAggrViewAdd(c *gin.Context) { 18 | var f models.AlertAggrView 19 | ginx.BindJSON(c, &f) 20 | 21 | f.Id = 0 22 | f.CreateBy = c.MustGet("username").(string) 23 | f.UserId = c.MustGet("userid").(int64) 24 | 25 | ginx.NewRender(c).Message(f.Add()) 26 | } 27 | 28 | func alertAggrViewDel(c *gin.Context) { 29 | var f idsForm 30 | ginx.BindJSON(c, &f) 31 | 32 | ginx.NewRender(c).Message(models.AlertAggrViewDel(f.Ids, c.MustGet("userid"))) 33 | } 34 | 35 | // id / name / rule is necessary 36 | func alertAggrViewPut(c *gin.Context) { 37 | var f models.AlertAggrView 38 | ginx.BindJSON(c, &f) 39 | 40 | view, err := models.AlertAggrViewGet("id = ?", f.Id) 41 | ginx.Dangerous(err) 42 | 43 | if view == nil { 44 | ginx.NewRender(c).Message("no such item(id: %d)", f.Id) 45 | return 46 | } 47 | 48 | userid := c.MustGet("userid").(int64) 49 | if view.UserId != userid { 50 | ginx.NewRender(c, http.StatusForbidden).Message("forbidden") 51 | return 52 | } 53 | 54 | ginx.NewRender(c).Message(view.Update(f.Name, f.Rule)) 55 | } 56 | -------------------------------------------------------------------------------- /docker/n9eetc/metrics.yaml: -------------------------------------------------------------------------------- 1 | cpu_usage_idle: CPU空闲率(单位:%) 2 | cpu_usage_active: CPU使用率(单位:%) 3 | cpu_usage_system: CPU内核态时间占比(单位:%) 4 | cpu_usage_user: CPU用户态时间占比(单位:%) 5 | cpu_usage_nice: 低优先级用户态CPU时间占比,也就是进程nice值被调整为1-19之间的CPU时间。这里注意,nice可取值范围是-20到19,数值越大,优先级反而越低(单位:%) 6 | cpu_usage_iowait: CPU等待I/O的时间占比(单位:%) 7 | cpu_usage_irq: CPU处理硬中断的时间占比(单位:%) 8 | cpu_usage_softirq: CPU处理软中断的时间占比(单位:%) 9 | cpu_usage_steal: 在虚拟机环境下有该指标,表示CPU被其他虚拟机争用的时间占比,超过20就表示争抢严重(单位:%) 10 | cpu_usage_guest: 通过虚拟化运行其他操作系统的时间,也就是运行虚拟机的CPU时间占比(单位:%) 11 | cpu_usage_guest_nice: 以低优先级运行虚拟机的时间占比(单位:%) 12 | 13 | disk_free: 硬盘分区剩余量(单位:byte) 14 | disk_used: 硬盘分区使用量(单位:byte) 15 | disk_used_percent: 硬盘分区使用率(单位:%) 16 | disk_total: 硬盘分区总量(单位:byte) 17 | disk_inodes_free: 硬盘分区inode剩余量 18 | disk_inodes_used: 硬盘分区inode使用量 19 | disk_inodes_total: 硬盘分区inode总量 20 | 21 | diskio_io_time: 从设备视角来看I/O请求总时间,队列中有I/O请求就计数(单位:毫秒),counter类型,需要用函数求rate才有使用价值 22 | diskio_iops_in_progress: 已经分配给设备驱动且尚未完成的IO请求,不包含在队列中但尚未分配给设备驱动的IO请求,gauge类型 23 | diskio_merged_reads: 相邻读请求merge读的次数,counter类型 24 | diskio_merged_writes: 相邻写请求merge写的次数,counter类型 25 | diskio_read_bytes: 读取的byte数量,counter类型,需要用函数求rate才有使用价值 26 | diskio_read_time: 读请求总时间(单位:毫秒),counter类型,需要用函数求rate才有使用价值 27 | diskio_reads: 读请求次数,counter类型,需要用函数求rate才有使用价值 28 | diskio_weighted_io_time: 从I/O请求视角来看I/O等待总时间,如果同时有多个I/O请求,时间会叠加(单位:毫秒) 29 | diskio_write_bytes: 写入的byte数量,counter类型,需要用函数求rate才有使用价值 30 | diskio_write_time: 写请求总时间(单位:毫秒),counter类型,需要用函数求rate才有使用价值 31 | diskio_writes: 写请求次数,counter类型,需要用函数求rate才有使用价值 32 | -------------------------------------------------------------------------------- /src/webapi/router/router_self.go: -------------------------------------------------------------------------------- 1 | package router 2 | 3 | import ( 4 | "github.com/gin-gonic/gin" 5 | "github.com/toolkits/pkg/ginx" 6 | 7 | "github.com/didi/nightingale/v5/src/models" 8 | "github.com/didi/nightingale/v5/src/pkg/ormx" 9 | ) 10 | 11 | func selfProfileGet(c *gin.Context) { 12 | user := c.MustGet("user").(*models.User) 13 | if user.IsAdmin() { 14 | user.Admin = true 15 | } 16 | ginx.NewRender(c).Data(user, nil) 17 | } 18 | 19 | type selfProfileForm struct { 20 | Nickname string `json:"nickname"` 21 | Phone string `json:"phone"` 22 | Email string `json:"email"` 23 | Portrait string `json:"portrait"` 24 | Contacts ormx.JSONObj `json:"contacts"` 25 | } 26 | 27 | func selfProfilePut(c *gin.Context) { 28 | var f selfProfileForm 29 | ginx.BindJSON(c, &f) 30 | 31 | user := c.MustGet("user").(*models.User) 32 | user.Nickname = f.Nickname 33 | user.Phone = f.Phone 34 | user.Email = f.Email 35 | user.Portrait = f.Portrait 36 | user.Contacts = f.Contacts 37 | user.UpdateBy = user.Username 38 | 39 | ginx.NewRender(c).Message(user.UpdateAllFields()) 40 | } 41 | 42 | type selfPasswordForm struct { 43 | OldPass string `json:"oldpass" binding:"required"` 44 | NewPass string `json:"newpass" binding:"required"` 45 | } 46 | 47 | func selfPasswordPut(c *gin.Context) { 48 | var f selfPasswordForm 49 | ginx.BindJSON(c, &f) 50 | user := c.MustGet("user").(*models.User) 51 | ginx.NewRender(c).Message(user.ChangePassword(f.OldPass, f.NewPass)) 52 | } 53 | -------------------------------------------------------------------------------- /src/server/engine/mute.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import ( 4 | "github.com/didi/nightingale/v5/src/models" 5 | "github.com/didi/nightingale/v5/src/server/memsto" 6 | ) 7 | 8 | // 如果传入了clock这个可选参数,就表示使用这个clock表示的时间,否则就从event的字段中取TriggerTime 9 | func isMuted(event *models.AlertCurEvent, clock ...int64) bool { 10 | mutes, has := memsto.AlertMuteCache.Gets(event.GroupId) 11 | if !has || len(mutes) == 0 { 12 | return false 13 | } 14 | 15 | for i := 0; i < len(mutes); i++ { 16 | if matchMute(event, mutes[i], clock...) { 17 | return true 18 | } 19 | } 20 | 21 | return false 22 | } 23 | 24 | func matchMute(event *models.AlertCurEvent, mute *models.AlertMute, clock ...int64) bool { 25 | ts := event.TriggerTime 26 | if len(clock) > 0 { 27 | ts = clock[0] 28 | } 29 | 30 | if ts < mute.Btime || ts > mute.Etime { 31 | return false 32 | } 33 | 34 | return matchTags(event.TagsMap, mute.ITags) 35 | } 36 | 37 | func matchTags(eventTagsMap map[string]string, itags []models.TagFilter) bool { 38 | for i := 0; i < len(itags); i++ { 39 | filter := itags[i] 40 | value, exists := eventTagsMap[filter.Key] 41 | if !exists { 42 | return false 43 | } 44 | 45 | if filter.Func == "==" { 46 | // == 47 | if filter.Value != value { 48 | return false 49 | } 50 | } else if filter.Func == "in" { 51 | // in 52 | if _, has := filter.Vset[value]; !has { 53 | return false 54 | } 55 | } else { 56 | // =~ 57 | if !filter.Regexp.MatchString(value) { 58 | return false 59 | } 60 | } 61 | } 62 | 63 | return true 64 | } 65 | -------------------------------------------------------------------------------- /src/models/user_group_member.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | type UserGroupMember struct { 4 | GroupId int64 5 | UserId int64 6 | } 7 | 8 | func (UserGroupMember) TableName() string { 9 | return "user_group_member" 10 | } 11 | 12 | func MyGroupIds(userId int64) ([]int64, error) { 13 | var ids []int64 14 | err := DB().Model(&UserGroupMember{}).Where("user_id=?", userId).Pluck("group_id", &ids).Error 15 | return ids, err 16 | } 17 | 18 | func MemberIds(groupId int64) ([]int64, error) { 19 | var ids []int64 20 | err := DB().Model(&UserGroupMember{}).Where("group_id=?", groupId).Pluck("user_id", &ids).Error 21 | return ids, err 22 | } 23 | 24 | func UserGroupMemberCount(where string, args ...interface{}) (int64, error) { 25 | return Count(DB().Model(&UserGroupMember{}).Where(where, args...)) 26 | } 27 | 28 | func UserGroupMemberAdd(groupId, userId int64) error { 29 | num, err := UserGroupMemberCount("user_id=? and group_id=?", userId, groupId) 30 | if err != nil { 31 | return err 32 | } 33 | 34 | if num > 0 { 35 | // already exists 36 | return nil 37 | } 38 | 39 | obj := UserGroupMember{ 40 | GroupId: groupId, 41 | UserId: userId, 42 | } 43 | 44 | return Insert(obj) 45 | } 46 | 47 | func UserGroupMemberDel(groupId int64, userIds []int64) error { 48 | if len(userIds) == 0 { 49 | return nil 50 | } 51 | 52 | return DB().Where("group_id = ? and user_id in ?", groupId, userIds).Delete(&UserGroupMember{}).Error 53 | } 54 | 55 | func UserGroupMemberGetAll() ([]UserGroupMember, error) { 56 | var lst []UserGroupMember 57 | err := DB().Find(&lst).Error 58 | return lst, err 59 | } 60 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/didi/nightingale/v5 2 | 3 | go 1.14 4 | 5 | require ( 6 | github.com/dgrijalva/jwt-go v3.2.0+incompatible 7 | github.com/fatih/camelcase v1.0.0 // indirect 8 | github.com/fatih/structs v1.1.0 // indirect 9 | github.com/gin-contrib/pprof v1.3.0 10 | github.com/gin-gonic/gin v1.7.4 11 | github.com/go-ldap/ldap/v3 v3.4.1 12 | github.com/go-redis/redis/v8 v8.11.3 13 | github.com/gogo/protobuf v1.1.1 14 | github.com/golang-jwt/jwt v3.2.2+incompatible 15 | github.com/golang/protobuf v1.5.2 16 | github.com/golang/snappy v0.0.4 17 | github.com/google/uuid v1.3.0 18 | github.com/json-iterator/go v1.1.12 19 | github.com/koding/multiconfig v0.0.0-20171124222453-69c27309b2d7 20 | github.com/mattn/go-isatty v0.0.12 21 | github.com/orcaman/concurrent-map v0.0.0-20210501183033-44dafcb38ecc 22 | github.com/pkg/errors v0.9.1 23 | github.com/prometheus/client_golang v1.11.0 24 | github.com/prometheus/common v0.26.0 25 | github.com/prometheus/prometheus v2.5.0+incompatible 26 | github.com/tidwall/gjson v1.14.0 27 | github.com/toolkits/pkg v1.2.9 28 | github.com/urfave/cli/v2 v2.3.0 29 | golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d // indirect 30 | golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e // indirect 31 | google.golang.org/genproto v0.0.0-20211007155348-82e027067bd4 // indirect 32 | google.golang.org/grpc v1.41.0 // indirect 33 | gopkg.in/alexcesaro/quotedprintable.v3 v3.0.0-20150716171945-2caba252f4dc // indirect 34 | gopkg.in/gomail.v2 v2.0.0-20160411212932-81ebce5c23df 35 | gorm.io/driver/mysql v1.1.2 36 | gorm.io/driver/postgres v1.1.1 37 | gorm.io/gorm v1.21.15 38 | ) 39 | -------------------------------------------------------------------------------- /src/server/sender/dingtalk.go: -------------------------------------------------------------------------------- 1 | package sender 2 | 3 | import ( 4 | "strings" 5 | "time" 6 | 7 | "github.com/didi/nightingale/v5/src/server/poster" 8 | "github.com/toolkits/pkg/logger" 9 | ) 10 | 11 | type DingtalkMessage struct { 12 | Title string 13 | Text string 14 | AtMobiles []string 15 | Tokens []string 16 | } 17 | 18 | type dingtalkMarkdown struct { 19 | Title string `json:"title"` 20 | Text string `json:"text"` 21 | } 22 | 23 | type dingtalkAt struct { 24 | AtMobiles []string `json:"atMobiles"` 25 | IsAtAll bool `json:"isAtAll"` 26 | } 27 | 28 | type dingtalk struct { 29 | Msgtype string `json:"msgtype"` 30 | Markdown dingtalkMarkdown `json:"markdown"` 31 | At dingtalkAt `json:"at"` 32 | } 33 | 34 | func SendDingtalk(message DingtalkMessage) { 35 | ats := make([]string, len(message.AtMobiles)) 36 | for i := 0; i < len(message.AtMobiles); i++ { 37 | ats[i] = "@" + message.AtMobiles[i] 38 | } 39 | 40 | for i := 0; i < len(message.Tokens); i++ { 41 | url := "https://oapi.dingtalk.com/robot/send?access_token=" + message.Tokens[i] 42 | body := dingtalk{ 43 | Msgtype: "markdown", 44 | Markdown: dingtalkMarkdown{ 45 | Title: message.Title, 46 | Text: message.Text + " " + strings.Join(ats, " "), 47 | }, 48 | At: dingtalkAt{ 49 | AtMobiles: message.AtMobiles, 50 | IsAtAll: false, 51 | }, 52 | } 53 | 54 | res, code, err := poster.PostJSON(url, time.Second*5, body) 55 | if err != nil { 56 | logger.Errorf("dingtalk_sender: result=fail url=%s code=%d error=%v response=%s", url, code, err, string(res)) 57 | } else { 58 | logger.Infof("dingtalk_sender: result=succ url=%s code=%d response=%s", url, code, string(res)) 59 | } 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /src/pkg/httpx/httpx.go: -------------------------------------------------------------------------------- 1 | package httpx 2 | 3 | import ( 4 | "context" 5 | "crypto/tls" 6 | "fmt" 7 | "net/http" 8 | "time" 9 | ) 10 | 11 | type Config struct { 12 | Host string 13 | Port int 14 | CertFile string 15 | KeyFile string 16 | PProf bool 17 | PrintAccessLog bool 18 | ShutdownTimeout int 19 | MaxContentLength int64 20 | ReadTimeout int 21 | WriteTimeout int 22 | IdleTimeout int 23 | } 24 | 25 | func Init(cfg Config, handler http.Handler) func() { 26 | addr := fmt.Sprintf("%s:%d", cfg.Host, cfg.Port) 27 | srv := &http.Server{ 28 | Addr: addr, 29 | Handler: handler, 30 | ReadTimeout: time.Duration(cfg.ReadTimeout) * time.Second, 31 | WriteTimeout: time.Duration(cfg.WriteTimeout) * time.Second, 32 | IdleTimeout: time.Duration(cfg.IdleTimeout) * time.Second, 33 | } 34 | 35 | go func() { 36 | fmt.Println("http server listening on:", addr) 37 | 38 | var err error 39 | if cfg.CertFile != "" && cfg.KeyFile != "" { 40 | srv.TLSConfig = &tls.Config{MinVersion: tls.VersionTLS12} 41 | err = srv.ListenAndServeTLS(cfg.CertFile, cfg.KeyFile) 42 | } else { 43 | err = srv.ListenAndServe() 44 | } 45 | if err != nil && err != http.ErrServerClosed { 46 | panic(err) 47 | } 48 | }() 49 | 50 | return func() { 51 | ctx, cancel := context.WithTimeout(context.Background(), time.Second*time.Duration(cfg.ShutdownTimeout)) 52 | defer cancel() 53 | 54 | srv.SetKeepAlivesEnabled(false) 55 | if err := srv.Shutdown(ctx); err != nil { 56 | fmt.Println("cannot shutdown http server:", err) 57 | } 58 | 59 | select { 60 | case <-ctx.Done(): 61 | fmt.Println("http exiting") 62 | default: 63 | fmt.Println("http server stopped") 64 | } 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /src/server/reader/reader.go: -------------------------------------------------------------------------------- 1 | package reader 2 | 3 | import ( 4 | "net" 5 | "net/http" 6 | "time" 7 | 8 | "github.com/prometheus/client_golang/api" 9 | ) 10 | 11 | type Options struct { 12 | Url string 13 | BasicAuthUser string 14 | BasicAuthPass string 15 | 16 | Timeout int64 17 | DialTimeout int64 18 | TLSHandshakeTimeout int64 19 | ExpectContinueTimeout int64 20 | IdleConnTimeout int64 21 | KeepAlive int64 22 | 23 | MaxConnsPerHost int 24 | MaxIdleConns int 25 | MaxIdleConnsPerHost int 26 | } 27 | 28 | type ReaderType struct { 29 | Opts Options 30 | Client API 31 | } 32 | 33 | var Reader ReaderType 34 | 35 | func Init(opts Options) error { 36 | cli, err := api.NewClient(api.Config{ 37 | Address: opts.Url, 38 | RoundTripper: &http.Transport{ 39 | // TLSClientConfig: tlsConfig, 40 | Proxy: http.ProxyFromEnvironment, 41 | DialContext: (&net.Dialer{ 42 | Timeout: time.Duration(opts.DialTimeout) * time.Millisecond, 43 | KeepAlive: time.Duration(opts.KeepAlive) * time.Millisecond, 44 | }).DialContext, 45 | ResponseHeaderTimeout: time.Duration(opts.Timeout) * time.Millisecond, 46 | TLSHandshakeTimeout: time.Duration(opts.TLSHandshakeTimeout) * time.Millisecond, 47 | ExpectContinueTimeout: time.Duration(opts.ExpectContinueTimeout) * time.Millisecond, 48 | MaxConnsPerHost: opts.MaxConnsPerHost, 49 | MaxIdleConns: opts.MaxIdleConns, 50 | MaxIdleConnsPerHost: opts.MaxIdleConnsPerHost, 51 | IdleConnTimeout: time.Duration(opts.IdleConnTimeout) * time.Millisecond, 52 | }, 53 | }) 54 | 55 | if err != nil { 56 | return err 57 | } 58 | 59 | Reader = ReaderType{ 60 | Opts: opts, 61 | Client: NewAPI(cli), 62 | } 63 | 64 | return nil 65 | } 66 | -------------------------------------------------------------------------------- /src/server/engine/vector.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import ( 4 | "math" 5 | 6 | "github.com/prometheus/common/model" 7 | ) 8 | 9 | type Vector struct { 10 | Key string `json:"key"` 11 | Labels model.Metric `json:"labels"` 12 | Timestamp int64 `json:"timestamp"` 13 | Value float64 `json:"value"` 14 | } 15 | 16 | func ConvertVectors(value model.Value) (lst []Vector) { 17 | switch value.Type() { 18 | case model.ValVector: 19 | items, ok := value.(model.Vector) 20 | if !ok { 21 | return 22 | } 23 | 24 | for _, item := range items { 25 | if math.IsNaN(float64(item.Value)) { 26 | continue 27 | } 28 | 29 | lst = append(lst, Vector{ 30 | Key: item.Metric.String(), 31 | Timestamp: item.Timestamp.Unix(), 32 | Value: float64(item.Value), 33 | Labels: item.Metric, 34 | }) 35 | } 36 | case model.ValMatrix: 37 | items, ok := value.(model.Matrix) 38 | if !ok { 39 | return 40 | } 41 | 42 | for _, item := range items { 43 | if len(item.Values) == 0 { 44 | return 45 | } 46 | 47 | last := item.Values[len(item.Values)-1] 48 | 49 | if math.IsNaN(float64(last.Value)) { 50 | continue 51 | } 52 | 53 | lst = append(lst, Vector{ 54 | Key: item.Metric.String(), 55 | Labels: item.Metric, 56 | Timestamp: last.Timestamp.Unix(), 57 | Value: float64(last.Value), 58 | }) 59 | } 60 | case model.ValScalar: 61 | item, ok := value.(*model.Scalar) 62 | if !ok { 63 | return 64 | } 65 | 66 | if math.IsNaN(float64(item.Value)) { 67 | return 68 | } 69 | 70 | lst = append(lst, Vector{ 71 | Key: "{}", 72 | Timestamp: item.Timestamp.Unix(), 73 | Value: float64(item.Value), 74 | Labels: model.Metric{}, 75 | }) 76 | default: 77 | return 78 | } 79 | 80 | return 81 | } 82 | -------------------------------------------------------------------------------- /src/server/stat/stat.go: -------------------------------------------------------------------------------- 1 | package stat 2 | 3 | import ( 4 | "github.com/prometheus/client_golang/prometheus" 5 | ) 6 | 7 | const ( 8 | namespace = "n9e" 9 | subsystem = "server" 10 | ) 11 | 12 | var ( 13 | // 各个周期性任务的执行耗时 14 | GaugeCronDuration = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 15 | Namespace: namespace, 16 | Subsystem: subsystem, 17 | Name: "cron_duration", 18 | Help: "Cron method use duration, unit: ms.", 19 | }, []string{"cluster", "name"}) 20 | 21 | // 从数据库同步数据的时候,同步的条数 22 | GaugeSyncNumber = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 23 | Namespace: namespace, 24 | Subsystem: subsystem, 25 | Name: "cron_sync_number", 26 | Help: "Cron sync number.", 27 | }, []string{"cluster", "name"}) 28 | 29 | // 从各个接收接口接收到的监控数据总量 30 | CounterSampleTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ 31 | Namespace: namespace, 32 | Subsystem: subsystem, 33 | Name: "samples_received_total", 34 | Help: "Total number samples received.", 35 | }, []string{"cluster", "channel"}) 36 | 37 | // 产生的告警总量 38 | CounterAlertsTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ 39 | Namespace: namespace, 40 | Subsystem: subsystem, 41 | Name: "alerts_total", 42 | Help: "Total number alert events.", 43 | }, []string{"cluster"}) 44 | 45 | // 内存中的告警事件队列的长度 46 | GaugeAlertQueueSize = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 47 | Namespace: namespace, 48 | Subsystem: subsystem, 49 | Name: "alert_queue_size", 50 | Help: "The size of alert queue.", 51 | }, []string{"cluster"}) 52 | ) 53 | 54 | func Init() { 55 | // Register the summary and the histogram with Prometheus's default registry. 56 | prometheus.MustRegister( 57 | GaugeCronDuration, 58 | GaugeSyncNumber, 59 | CounterSampleTotal, 60 | CounterAlertsTotal, 61 | GaugeAlertQueueSize, 62 | ) 63 | } 64 | -------------------------------------------------------------------------------- /src/models/task_record.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | type TaskRecord struct { 4 | Id int64 `json:"id" gorm:"primaryKey"` 5 | GroupId int64 `json:"group_id"` 6 | IbexAddress string `json:"ibex_address"` 7 | IbexAuthUser string `json:"ibex_auth_user"` 8 | IbexAuthPass string `json:"ibex_auth_pass"` 9 | Title string `json:"title"` 10 | Account string `json:"account"` 11 | Batch int `json:"batch"` 12 | Tolerance int `json:"tolerance"` 13 | Timeout int `json:"timeout"` 14 | Pause string `json:"pause"` 15 | Script string `json:"script"` 16 | Args string `json:"args"` 17 | CreateAt int64 `json:"create_at"` 18 | CreateBy string `json:"create_by"` 19 | } 20 | 21 | func (r *TaskRecord) TableName() string { 22 | return "task_record" 23 | } 24 | 25 | // create task 26 | func (r *TaskRecord) Add() error { 27 | return Insert(r) 28 | } 29 | 30 | // list task, filter by group_id, create_by 31 | func TaskRecordTotal(bgid, beginTime int64, createBy, query string) (int64, error) { 32 | session := DB().Model(new(TaskRecord)).Where("create_at > ? and group_id = ?", beginTime, bgid) 33 | 34 | if createBy != "" { 35 | session = session.Where("create_by = ?", createBy) 36 | } 37 | 38 | if query != "" { 39 | session = session.Where("title like ?", "%"+query+"%") 40 | } 41 | 42 | return Count(session) 43 | } 44 | 45 | func TaskRecordGets(bgid, beginTime int64, createBy, query string, limit, offset int) ([]*TaskRecord, error) { 46 | session := DB().Where("create_at > ? and group_id = ?", beginTime, bgid).Order("create_at desc").Limit(limit).Offset(offset) 47 | 48 | if createBy != "" { 49 | session = session.Where("create_by = ?", createBy) 50 | } 51 | 52 | if query != "" { 53 | session = session.Where("title like ?", "%"+query+"%") 54 | } 55 | 56 | var lst []*TaskRecord 57 | err := session.Find(&lst).Error 58 | return lst, err 59 | } 60 | 61 | // update is_done field 62 | func (r *TaskRecord) UpdateIsDone(isDone int) error { 63 | return DB().Model(r).Update("is_done", isDone).Error 64 | } 65 | -------------------------------------------------------------------------------- /etc/script/notify.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: UTF-8 -*- 3 | import sys 4 | import json 5 | 6 | class Sender(object): 7 | @classmethod 8 | def send_email(cls, payload): 9 | # already done in go code 10 | pass 11 | 12 | @classmethod 13 | def send_wecom(cls, payload): 14 | # already done in go code 15 | pass 16 | 17 | @classmethod 18 | def send_dingtalk(cls, payload): 19 | # already done in go code 20 | pass 21 | 22 | @classmethod 23 | def send_feishu(cls, payload): 24 | # already done in go code 25 | pass 26 | 27 | @classmethod 28 | def send_sms(cls, payload): 29 | users = payload.get('event').get("notify_users_obj") 30 | phones = {} 31 | for u in users: 32 | if u.get("phone"): 33 | phones[u.get("phone")] = 1 34 | if phones: 35 | print("send_sms not implemented, phones: {}".format(phones.keys())) 36 | 37 | @classmethod 38 | def send_voice(cls, payload): 39 | users = payload.get('event').get("notify_users_obj") 40 | phones = {} 41 | for u in users: 42 | if u.get("phone"): 43 | phones[u.get("phone")] = 1 44 | if phones: 45 | print("send_voice not implemented, phones: {}".format(phones.keys())) 46 | 47 | def main(): 48 | payload = json.load(sys.stdin) 49 | with open(".payload", 'w') as f: 50 | f.write(json.dumps(payload, indent=4)) 51 | for ch in payload.get('event').get('notify_channels'): 52 | send_func_name = "send_{}".format(ch.strip()) 53 | if not hasattr(Sender, send_func_name): 54 | print("function: {} not found", send_func_name) 55 | continue 56 | send_func = getattr(Sender, send_func_name) 57 | send_func(payload) 58 | 59 | def hello(): 60 | print("hello nightingale") 61 | 62 | if __name__ == "__main__": 63 | if len(sys.argv) == 1: 64 | main() 65 | elif sys.argv[1] == "hello": 66 | hello() 67 | else: 68 | print("I am confused") -------------------------------------------------------------------------------- /src/models/chart_group.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | import ( 4 | "github.com/pkg/errors" 5 | "github.com/toolkits/pkg/str" 6 | "gorm.io/gorm" 7 | ) 8 | 9 | type ChartGroup struct { 10 | Id int64 `json:"id" gorm:"primaryKey"` 11 | DashboardId int64 `json:"dashboard_id"` 12 | Name string `json:"name"` 13 | Weight int `json:"weight"` 14 | } 15 | 16 | func (cg *ChartGroup) TableName() string { 17 | return "chart_group" 18 | } 19 | 20 | func (cg *ChartGroup) Verify() error { 21 | if cg.DashboardId <= 0 { 22 | return errors.New("Arg(dashboard_id) invalid") 23 | } 24 | 25 | if str.Dangerous(cg.Name) { 26 | return errors.New("Name has invalid characters") 27 | } 28 | 29 | return nil 30 | } 31 | 32 | func (cg *ChartGroup) Add() error { 33 | if err := cg.Verify(); err != nil { 34 | return err 35 | } 36 | 37 | return Insert(cg) 38 | } 39 | 40 | func (cg *ChartGroup) Update(selectField interface{}, selectFields ...interface{}) error { 41 | if err := cg.Verify(); err != nil { 42 | return err 43 | } 44 | 45 | return DB().Model(cg).Select(selectField, selectFields...).Updates(cg).Error 46 | } 47 | 48 | func (cg *ChartGroup) Del() error { 49 | return DB().Transaction(func(tx *gorm.DB) error { 50 | if err := tx.Where("group_id=?", cg.Id).Delete(&Chart{}).Error; err != nil { 51 | return err 52 | } 53 | 54 | if err := tx.Where("id=?", cg.Id).Delete(&ChartGroup{}).Error; err != nil { 55 | return err 56 | } 57 | 58 | return nil 59 | }) 60 | } 61 | 62 | func NewDefaultChartGroup(dashId int64) error { 63 | return Insert(&ChartGroup{ 64 | DashboardId: dashId, 65 | Name: "Default chart group", 66 | Weight: 0, 67 | }) 68 | } 69 | 70 | func ChartGroupIdsOf(dashId int64) ([]int64, error) { 71 | var ids []int64 72 | err := DB().Model(&ChartGroup{}).Where("dashboard_id = ?", dashId).Pluck("id", &ids).Error 73 | return ids, err 74 | } 75 | 76 | func ChartGroupsOf(dashId int64) ([]ChartGroup, error) { 77 | var objs []ChartGroup 78 | err := DB().Where("dashboard_id = ?", dashId).Order("weight").Find(&objs).Error 79 | return objs, err 80 | } 81 | -------------------------------------------------------------------------------- /docker/n9eetc/script/notify.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: UTF-8 -*- 3 | import sys 4 | import json 5 | 6 | class Sender(object): 7 | @classmethod 8 | def send_email(cls, payload): 9 | # already done in go code 10 | pass 11 | 12 | @classmethod 13 | def send_wecom(cls, payload): 14 | # already done in go code 15 | pass 16 | 17 | @classmethod 18 | def send_dingtalk(cls, payload): 19 | # already done in go code 20 | pass 21 | 22 | @classmethod 23 | def send_feishu(cls, payload): 24 | # already done in go code 25 | pass 26 | 27 | @classmethod 28 | def send_sms(cls, payload): 29 | users = payload.get('event').get("notify_users_obj") 30 | phones = {} 31 | for u in users: 32 | if u.get("phone"): 33 | phones[u.get("phone")] = 1 34 | if phones: 35 | print("send_sms not implemented, phones: {}".format(phones.keys())) 36 | 37 | @classmethod 38 | def send_voice(cls, payload): 39 | users = payload.get('event').get("notify_users_obj") 40 | phones = {} 41 | for u in users: 42 | if u.get("phone"): 43 | phones[u.get("phone")] = 1 44 | if phones: 45 | print("send_voice not implemented, phones: {}".format(phones.keys())) 46 | 47 | def main(): 48 | payload = json.load(sys.stdin) 49 | with open(".payload", 'w') as f: 50 | f.write(json.dumps(payload, indent=4)) 51 | for ch in payload.get('event').get('notify_channels'): 52 | send_func_name = "send_{}".format(ch.strip()) 53 | if not hasattr(Sender, send_func_name): 54 | print("function: {} not found", send_func_name) 55 | continue 56 | send_func = getattr(Sender, send_func_name) 57 | send_func(payload) 58 | 59 | def hello(): 60 | print("hello nightingale") 61 | 62 | if __name__ == "__main__": 63 | if len(sys.argv) == 1: 64 | main() 65 | elif sys.argv[1] == "hello": 66 | hello() 67 | else: 68 | print("I am confused") -------------------------------------------------------------------------------- /docker/ibexetc/server.conf: -------------------------------------------------------------------------------- 1 | # debug, release 2 | RunMode = "release" 3 | 4 | [Log] 5 | # log write dir 6 | Dir = "logs-server" 7 | # log level: DEBUG INFO WARNING ERROR 8 | Level = "DEBUG" 9 | # stdout, stderr, file 10 | Output = "stdout" 11 | # # rotate by time 12 | # KeepHours: 4 13 | # # rotate by size 14 | # RotateNum = 3 15 | # # unit: MB 16 | # RotateSize = 256 17 | 18 | [HTTP] 19 | Enable = true 20 | # http listening address 21 | Host = "0.0.0.0" 22 | # http listening port 23 | Port = 10090 24 | # https cert file path 25 | CertFile = "" 26 | # https key file path 27 | KeyFile = "" 28 | # whether print access log 29 | PrintAccessLog = true 30 | # whether enable pprof 31 | PProf = false 32 | # http graceful shutdown timeout, unit: s 33 | ShutdownTimeout = 30 34 | # max content length: 64M 35 | MaxContentLength = 67108864 36 | # http server read timeout, unit: s 37 | ReadTimeout = 20 38 | # http server write timeout, unit: s 39 | WriteTimeout = 40 40 | # http server idle timeout, unit: s 41 | IdleTimeout = 120 42 | 43 | [BasicAuth] 44 | # using when call apis 45 | ibex = "ibex" 46 | 47 | [RPC] 48 | Listen = "0.0.0.0:20090" 49 | 50 | [Heartbeat] 51 | # auto detect if blank 52 | IP = "" 53 | # unit: ms 54 | Interval = 1000 55 | 56 | [Output] 57 | # database | remote 58 | ComeFrom = "database" 59 | AgtdPort = 2090 60 | 61 | [Gorm] 62 | # enable debug mode or not 63 | Debug = false 64 | # mysql postgres 65 | DBType = "mysql" 66 | # unit: s 67 | MaxLifetime = 7200 68 | # max open connections 69 | MaxOpenConns = 150 70 | # max idle connections 71 | MaxIdleConns = 50 72 | # table prefix 73 | TablePrefix = "" 74 | 75 | [MySQL] 76 | # mysql address host:port 77 | Address = "mysql:3306" 78 | # mysql username 79 | User = "root" 80 | # mysql password 81 | Password = "1234" 82 | # database name 83 | DBName = "ibex" 84 | # connection params 85 | Parameters = "charset=utf8mb4&parseTime=True&loc=Local&allowNativePasswords=true" 86 | 87 | [Postgres] 88 | # pg address host:port 89 | Address = "postgres:5432" 90 | # pg user 91 | User = "root" 92 | # pg password 93 | Password = "1234" 94 | # database name 95 | DBName = "ibex" 96 | # ssl mode 97 | SSLMode = "disable" 98 | -------------------------------------------------------------------------------- /src/webapi/prom/prom.go: -------------------------------------------------------------------------------- 1 | package prom 2 | 3 | import ( 4 | "net" 5 | "net/http" 6 | "time" 7 | ) 8 | 9 | type Options struct { 10 | Name string 11 | Prom string 12 | 13 | BasicAuthUser string 14 | BasicAuthPass string 15 | 16 | Timeout int64 17 | DialTimeout int64 18 | TLSHandshakeTimeout int64 19 | ExpectContinueTimeout int64 20 | IdleConnTimeout int64 21 | KeepAlive int64 22 | 23 | MaxConnsPerHost int 24 | MaxIdleConns int 25 | MaxIdleConnsPerHost int 26 | } 27 | 28 | type ClusterType struct { 29 | Opts Options 30 | Transport *http.Transport 31 | } 32 | 33 | type ClustersType struct { 34 | M map[string]ClusterType 35 | } 36 | 37 | func NewClusters() ClustersType { 38 | return ClustersType{ 39 | M: make(map[string]ClusterType), 40 | } 41 | } 42 | 43 | func (cs *ClustersType) Put(name string, cluster ClusterType) { 44 | cs.M[name] = cluster 45 | } 46 | 47 | func (cs *ClustersType) Get(name string) (ClusterType, bool) { 48 | c, has := cs.M[name] 49 | return c, has 50 | } 51 | 52 | var Clusters = NewClusters() 53 | 54 | func Init(opts []Options) error { 55 | for i := 0; i < len(opts); i++ { 56 | cluster := ClusterType{ 57 | Opts: opts[i], 58 | Transport: &http.Transport{ 59 | // TLSClientConfig: tlsConfig, 60 | Proxy: http.ProxyFromEnvironment, 61 | DialContext: (&net.Dialer{ 62 | Timeout: time.Duration(opts[i].DialTimeout) * time.Millisecond, 63 | KeepAlive: time.Duration(opts[i].KeepAlive) * time.Millisecond, 64 | }).DialContext, 65 | ResponseHeaderTimeout: time.Duration(opts[i].Timeout) * time.Millisecond, 66 | TLSHandshakeTimeout: time.Duration(opts[i].TLSHandshakeTimeout) * time.Millisecond, 67 | ExpectContinueTimeout: time.Duration(opts[i].ExpectContinueTimeout) * time.Millisecond, 68 | MaxConnsPerHost: opts[i].MaxConnsPerHost, 69 | MaxIdleConns: opts[i].MaxIdleConns, 70 | MaxIdleConnsPerHost: opts[i].MaxIdleConnsPerHost, 71 | IdleConnTimeout: time.Duration(opts[i].IdleConnTimeout) * time.Millisecond, 72 | }, 73 | } 74 | Clusters.Put(opts[i].Name, cluster) 75 | } 76 | 77 | return nil 78 | } 79 | -------------------------------------------------------------------------------- /src/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | 7 | "github.com/toolkits/pkg/runner" 8 | "github.com/urfave/cli/v2" 9 | 10 | "github.com/didi/nightingale/v5/src/server" 11 | "github.com/didi/nightingale/v5/src/webapi" 12 | ) 13 | 14 | // VERSION go build -ldflags "-X main.VERSION=x.x.x" 15 | var VERSION = "not specified" 16 | 17 | func main() { 18 | app := cli.NewApp() 19 | app.Name = "n9e" 20 | app.Version = VERSION 21 | app.Usage = "Nightingale, enterprise prometheus management" 22 | app.Commands = []*cli.Command{ 23 | newWebapiCmd(), 24 | newServerCmd(), 25 | } 26 | app.Run(os.Args) 27 | } 28 | 29 | func newWebapiCmd() *cli.Command { 30 | return &cli.Command{ 31 | Name: "webapi", 32 | Usage: "Run webapi", 33 | Flags: []cli.Flag{ 34 | &cli.StringFlag{ 35 | Name: "conf", 36 | Aliases: []string{"c"}, 37 | Usage: "specify configuration file(.json,.yaml,.toml)", 38 | }, 39 | }, 40 | Action: func(c *cli.Context) error { 41 | printEnv() 42 | 43 | var opts []webapi.WebapiOption 44 | if c.String("conf") != "" { 45 | opts = append(opts, webapi.SetConfigFile(c.String("conf"))) 46 | } 47 | opts = append(opts, webapi.SetVersion(VERSION)) 48 | 49 | webapi.Run(opts...) 50 | return nil 51 | }, 52 | } 53 | } 54 | 55 | func newServerCmd() *cli.Command { 56 | return &cli.Command{ 57 | Name: "server", 58 | Usage: "Run server", 59 | Flags: []cli.Flag{ 60 | &cli.StringFlag{ 61 | Name: "conf", 62 | Aliases: []string{"c"}, 63 | Usage: "specify configuration file(.json,.yaml,.toml)", 64 | }, 65 | }, 66 | Action: func(c *cli.Context) error { 67 | printEnv() 68 | 69 | var opts []server.ServerOption 70 | if c.String("conf") != "" { 71 | opts = append(opts, server.SetConfigFile(c.String("conf"))) 72 | } 73 | opts = append(opts, server.SetVersion(VERSION)) 74 | 75 | server.Run(opts...) 76 | return nil 77 | }, 78 | } 79 | } 80 | 81 | func printEnv() { 82 | runner.Init() 83 | fmt.Println("runner.cwd:", runner.Cwd) 84 | fmt.Println("runner.hostname:", runner.Hostname) 85 | fmt.Println("runner.fd_limits:", runner.FdLimits()) 86 | fmt.Println("runner.vm_limits:", runner.VMLimits()) 87 | } 88 | -------------------------------------------------------------------------------- /src/models/configs.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "os" 7 | "time" 8 | 9 | "github.com/pkg/errors" 10 | "github.com/toolkits/pkg/runner" 11 | "github.com/toolkits/pkg/str" 12 | ) 13 | 14 | type Configs struct { 15 | Id int64 `gorm:"primaryKey"` 16 | Ckey string 17 | Cval string 18 | } 19 | 20 | func (Configs) TableName() string { 21 | return "configs" 22 | } 23 | 24 | // InitSalt generate random salt 25 | func InitSalt() { 26 | val, err := ConfigsGet("salt") 27 | if err != nil { 28 | log.Fatalln("cannot query salt", err) 29 | } 30 | 31 | if val != "" { 32 | return 33 | } 34 | 35 | content := fmt.Sprintf("%s%d%d%s", runner.Hostname, os.Getpid(), time.Now().UnixNano(), str.RandLetters(6)) 36 | salt := str.MD5(content) 37 | err = ConfigsSet("salt", salt) 38 | if err != nil { 39 | log.Fatalln("init salt in mysql", err) 40 | } 41 | } 42 | 43 | func ConfigsGet(ckey string) (string, error) { 44 | var lst []string 45 | err := DB().Model(&Configs{}).Where("ckey=?", ckey).Pluck("cval", &lst).Error 46 | if err != nil { 47 | return "", errors.WithMessage(err, "failed to query configs") 48 | } 49 | 50 | if len(lst) > 0 { 51 | return lst[0], nil 52 | } 53 | 54 | return "", nil 55 | } 56 | 57 | func ConfigsSet(ckey, cval string) error { 58 | num, err := Count(DB().Model(&Configs{}).Where("ckey=?", ckey)) 59 | if err != nil { 60 | return errors.WithMessage(err, "failed to count configs") 61 | } 62 | 63 | if num == 0 { 64 | // insert 65 | err = DB().Create(&Configs{ 66 | Ckey: ckey, 67 | Cval: cval, 68 | }).Error 69 | } else { 70 | // update 71 | err = DB().Model(&Configs{}).Where("ckey=?", ckey).Update("cval", cval).Error 72 | } 73 | 74 | return err 75 | } 76 | 77 | func ConfigsGets(ckeys []string) (map[string]string, error) { 78 | var objs []Configs 79 | err := DB().Where("ckey in ?", ckeys).Find(&objs).Error 80 | if err != nil { 81 | return nil, errors.WithMessage(err, "failed to gets configs") 82 | } 83 | 84 | count := len(ckeys) 85 | kvmap := make(map[string]string, count) 86 | for i := 0; i < count; i++ { 87 | kvmap[ckeys[i]] = "" 88 | } 89 | 90 | for i := 0; i < len(objs); i++ { 91 | kvmap[objs[i].Ckey] = objs[i].Cval 92 | } 93 | 94 | return kvmap, nil 95 | } 96 | -------------------------------------------------------------------------------- /src/pkg/ormx/types.go: -------------------------------------------------------------------------------- 1 | package ormx 2 | 3 | import ( 4 | "database/sql/driver" 5 | "encoding/json" 6 | "errors" 7 | "fmt" 8 | ) 9 | 10 | type JSONObj json.RawMessage 11 | type JSONArr json.RawMessage 12 | 13 | // 实现 sql.Scanner 接口,Scan 将 value 扫描至 Jsonb 14 | func (j *JSONObj) Scan(value interface{}) error { 15 | // 判断是不是byte类型 16 | bytes, ok := value.([]byte) 17 | if !ok { 18 | // 判断是不是string类型 19 | strings, ok := value.(string) 20 | if !ok { 21 | return errors.New(fmt.Sprint("Failed to unmarshal JSONB value:", value)) 22 | } 23 | // string类型转byte[] 24 | bytes = []byte(strings) 25 | } 26 | 27 | result := json.RawMessage{} 28 | err := json.Unmarshal(bytes, &result) 29 | *j = JSONObj(result) 30 | return err 31 | } 32 | 33 | // 实现 driver.Valuer 接口,Value 返回 json value 34 | func (j JSONObj) Value() (driver.Value, error) { 35 | if len(j) == 0 { 36 | return nil, nil 37 | } 38 | return json.RawMessage(j).MarshalJSON() 39 | } 40 | 41 | func (j *JSONObj) MarshalJSON() ([]byte, error) { 42 | ret := []byte(*j) 43 | if len(ret) == 0 { 44 | return []byte(`{}`), nil 45 | } 46 | // not valid json 47 | if ret[0] == '"' { 48 | return []byte(`{}`), nil 49 | } 50 | return ret, nil 51 | } 52 | 53 | func (j *JSONObj) UnmarshalJSON(data []byte) error { 54 | *j = JSONObj(data) 55 | return nil 56 | } 57 | 58 | // 实现 sql.Scanner 接口,Scan 将 value 扫描至 Jsonb 59 | func (j *JSONArr) Scan(value interface{}) error { 60 | bytes, ok := value.([]byte) 61 | if !ok { 62 | return errors.New(fmt.Sprint("Failed to unmarshal JSONB value:", value)) 63 | } 64 | 65 | result := json.RawMessage{} 66 | err := json.Unmarshal(bytes, &result) 67 | *j = JSONArr(result) 68 | return err 69 | } 70 | 71 | // 实现 driver.Valuer 接口,Value 返回 json value 72 | func (j JSONArr) Value() (driver.Value, error) { 73 | if len(j) == 0 { 74 | return nil, nil 75 | } 76 | return json.RawMessage(j).MarshalJSON() 77 | } 78 | 79 | func (j *JSONArr) MarshalJSON() ([]byte, error) { 80 | ret := []byte(*j) 81 | if len(ret) == 0 { 82 | return []byte(`[]`), nil 83 | } 84 | // not valid json 85 | if ret[0] == '"' { 86 | return []byte(`[]`), nil 87 | } 88 | return ret, nil 89 | } 90 | 91 | func (j *JSONArr) UnmarshalJSON(data []byte) error { 92 | *j = JSONArr(data) 93 | return nil 94 | } 95 | -------------------------------------------------------------------------------- /src/webapi/router/router_collect_rule.go: -------------------------------------------------------------------------------- 1 | package router 2 | 3 | // import ( 4 | // "net/http" 5 | 6 | // "github.com/gin-gonic/gin" 7 | // "github.com/toolkits/pkg/ginx" 8 | 9 | // "github.com/didi/nightingale/v5/src/models" 10 | // ) 11 | 12 | // func collectRuleGets(c *gin.Context) { 13 | // busiGroupId := ginx.UrlParamInt64(c, "id") 14 | // crs, err := models.CollectRuleGets(busiGroupId, ginx.QueryStr(c, "type", "")) 15 | // ginx.NewRender(c).Data(crs, err) 16 | // } 17 | 18 | // func collectRuleAdd(c *gin.Context) { 19 | // var lst []models.CollectRule 20 | // ginx.BindJSON(c, &lst) 21 | 22 | // count := len(lst) 23 | // if count == 0 { 24 | // ginx.Bomb(http.StatusBadRequest, "input json is empty") 25 | // } 26 | 27 | // username := c.MustGet("username").(string) 28 | // bgid := ginx.UrlParamInt64(c, "id") 29 | 30 | // // collect rule name -> error string 31 | // reterr := make(map[string]string) 32 | // for i := 0; i < count; i++ { 33 | // lst[i].Id = 0 34 | // lst[i].GroupId = bgid 35 | // lst[i].CreateBy = username 36 | // lst[i].UpdateBy = username 37 | // lst[i].FE2DB() 38 | 39 | // if err := lst[i].Add(); err != nil { 40 | // reterr[lst[i].Name] = err.Error() 41 | // } else { 42 | // reterr[lst[i].Name] = "" 43 | // } 44 | // } 45 | 46 | // ginx.NewRender(c).Data(reterr, nil) 47 | // } 48 | 49 | // func collectRuleDel(c *gin.Context) { 50 | // var f idsForm 51 | // ginx.BindJSON(c, &f) 52 | // f.Verify() 53 | 54 | // // param(busiGroupId) for protect 55 | // ginx.NewRender(c).Message(models.CollectRuleDels(f.Ids, ginx.UrlParamInt64(c, "id"))) 56 | // } 57 | 58 | // func collectRuleGet(c *gin.Context) { 59 | // crid := ginx.UrlParamInt64(c, "crid") 60 | // cr, err := models.CollectRuleGetById(crid) 61 | // ginx.NewRender(c).Data(cr, err) 62 | // } 63 | 64 | // func collectRulePut(c *gin.Context) { 65 | // var f models.CollectRule 66 | // ginx.BindJSON(c, &f) 67 | 68 | // crid := ginx.UrlParamInt64(c, "crid") 69 | // cr, err := models.CollectRuleGetById(crid) 70 | // ginx.Dangerous(err) 71 | 72 | // if cr == nil { 73 | // ginx.NewRender(c, http.StatusNotFound).Message("No such CollectRule") 74 | // return 75 | // } 76 | 77 | // f.UpdateBy = c.MustGet("username").(string) 78 | // ginx.NewRender(c).Message(cr.Update(f)) 79 | // } 80 | -------------------------------------------------------------------------------- /src/server/router/router.go: -------------------------------------------------------------------------------- 1 | package router 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "strings" 7 | 8 | "github.com/gin-contrib/pprof" 9 | "github.com/gin-gonic/gin" 10 | "github.com/prometheus/client_golang/prometheus/promhttp" 11 | "github.com/toolkits/pkg/ginx" 12 | 13 | "github.com/didi/nightingale/v5/src/pkg/aop" 14 | "github.com/didi/nightingale/v5/src/server/config" 15 | "github.com/didi/nightingale/v5/src/server/naming" 16 | ) 17 | 18 | func New(version string) *gin.Engine { 19 | gin.SetMode(config.C.RunMode) 20 | 21 | loggerMid := aop.Logger() 22 | recoveryMid := aop.Recovery() 23 | 24 | if strings.ToLower(config.C.RunMode) == "release" { 25 | aop.DisableConsoleColor() 26 | } 27 | 28 | r := gin.New() 29 | 30 | r.Use(recoveryMid) 31 | 32 | // whether print access log 33 | if config.C.HTTP.PrintAccessLog { 34 | r.Use(loggerMid) 35 | } 36 | 37 | configRoute(r, version) 38 | 39 | return r 40 | } 41 | 42 | func configRoute(r *gin.Engine, version string) { 43 | if config.C.HTTP.PProf { 44 | pprof.Register(r, "/api/debug/pprof") 45 | } 46 | 47 | r.GET("/ping", func(c *gin.Context) { 48 | c.String(200, "pong") 49 | }) 50 | 51 | r.GET("/pid", func(c *gin.Context) { 52 | c.String(200, fmt.Sprintf("%d", os.Getpid())) 53 | }) 54 | 55 | r.GET("/addr", func(c *gin.Context) { 56 | c.String(200, c.Request.RemoteAddr) 57 | }) 58 | 59 | r.GET("/version", func(c *gin.Context) { 60 | c.String(200, version) 61 | }) 62 | 63 | r.GET("/servers/active", func(c *gin.Context) { 64 | lst, err := naming.ActiveServers(c.Request.Context(), config.C.ClusterName) 65 | ginx.NewRender(c).Data(lst, err) 66 | }) 67 | 68 | // use apiKey not basic auth 69 | r.POST("/datadog/api/v1/series", datadogSeries) 70 | 71 | if len(config.C.BasicAuth) > 0 { 72 | auth := gin.BasicAuth(config.C.BasicAuth) 73 | r.Use(auth) 74 | } 75 | 76 | r.POST("/opentsdb/put", handleOpenTSDB) 77 | r.POST("/openfalcon/push", falconPush) 78 | r.POST("/prometheus/v1/write", remoteWrite) 79 | r.POST("/prometheus/v1/query", queryPromql) 80 | 81 | r.GET("/memory/alert-rule", alertRuleGet) 82 | r.GET("/memory/idents", identsGets) 83 | r.GET("/memory/alert-mutes", mutesGets) 84 | r.GET("/memory/alert-subscribes", subscribesGets) 85 | r.GET("/memory/target", targetGet) 86 | r.GET("/memory/user", userGet) 87 | r.GET("/memory/user-group", userGroupGet) 88 | 89 | r.GET("/metrics", gin.WrapH(promhttp.Handler())) 90 | } 91 | -------------------------------------------------------------------------------- /src/webapi/router/router_prometheus.go: -------------------------------------------------------------------------------- 1 | package router 2 | 3 | import ( 4 | "net/http" 5 | "net/http/httputil" 6 | "net/url" 7 | "strings" 8 | 9 | "github.com/gin-gonic/gin" 10 | "github.com/toolkits/pkg/ginx" 11 | 12 | "github.com/didi/nightingale/v5/src/webapi/config" 13 | "github.com/didi/nightingale/v5/src/webapi/prom" 14 | ) 15 | 16 | func prometheusProxy(c *gin.Context) { 17 | xcluster := c.GetHeader("X-Cluster") 18 | if xcluster == "" { 19 | c.String(http.StatusBadRequest, "X-Cluster missed") 20 | return 21 | } 22 | 23 | cluster, exists := prom.Clusters.Get(xcluster) 24 | if !exists { 25 | c.String(http.StatusBadRequest, "No such cluster: %s", xcluster) 26 | return 27 | } 28 | 29 | target, err := url.Parse(cluster.Opts.Prom) 30 | if err != nil { 31 | c.String(http.StatusInternalServerError, "invalid prometheus url: %s", cluster.Opts.Prom) 32 | return 33 | } 34 | 35 | director := func(req *http.Request) { 36 | req.URL.Scheme = target.Scheme 37 | req.URL.Host = target.Host 38 | req.Host = target.Host 39 | 40 | req.Header.Set("Host", target.Host) 41 | 42 | // fe request e.g. /api/n9e/prometheus/api/v1/query 43 | index := strings.Index(req.URL.Path, "/prometheus") 44 | if index == -1 { 45 | panic("url path invalid") 46 | } 47 | 48 | req.URL.Path = strings.TrimRight(target.Path, "/") + req.URL.Path[index+11:] 49 | 50 | if target.RawQuery == "" || req.URL.RawQuery == "" { 51 | req.URL.RawQuery = target.RawQuery + req.URL.RawQuery 52 | } else { 53 | req.URL.RawQuery = target.RawQuery + "&" + req.URL.RawQuery 54 | } 55 | 56 | if _, ok := req.Header["User-Agent"]; !ok { 57 | req.Header.Set("User-Agent", "") 58 | } 59 | 60 | if cluster.Opts.BasicAuthUser != "" { 61 | req.SetBasicAuth(cluster.Opts.BasicAuthUser, cluster.Opts.BasicAuthPass) 62 | } 63 | } 64 | 65 | errFunc := func(w http.ResponseWriter, r *http.Request, err error) { 66 | http.Error(w, err.Error(), http.StatusBadGateway) 67 | } 68 | 69 | proxy := &httputil.ReverseProxy{ 70 | Director: director, 71 | Transport: cluster.Transport, 72 | ErrorHandler: errFunc, 73 | } 74 | 75 | proxy.ServeHTTP(c.Writer, c.Request) 76 | } 77 | 78 | func clustersGets(c *gin.Context) { 79 | count := len(config.C.Clusters) 80 | names := make([]string, 0, count) 81 | for i := 0; i < count; i++ { 82 | names = append(names, config.C.Clusters[i].Name) 83 | } 84 | ginx.NewRender(c).Data(names, nil) 85 | } 86 | -------------------------------------------------------------------------------- /src/webapi/router/router_alert_subscribe.go: -------------------------------------------------------------------------------- 1 | package router 2 | 3 | import ( 4 | "net/http" 5 | "time" 6 | 7 | "github.com/gin-gonic/gin" 8 | "github.com/toolkits/pkg/ginx" 9 | 10 | "github.com/didi/nightingale/v5/src/models" 11 | ) 12 | 13 | // Return all, front-end search and paging 14 | func alertSubscribeGets(c *gin.Context) { 15 | bgid := ginx.UrlParamInt64(c, "id") 16 | lst, err := models.AlertSubscribeGets(bgid) 17 | if err == nil { 18 | ugcache := make(map[int64]*models.UserGroup) 19 | for i := 0; i < len(lst); i++ { 20 | ginx.Dangerous(lst[i].FillUserGroups(ugcache)) 21 | } 22 | 23 | rulecache := make(map[int64]string) 24 | for i := 0; i < len(lst); i++ { 25 | ginx.Dangerous(lst[i].FillRuleName(rulecache)) 26 | } 27 | } 28 | ginx.NewRender(c).Data(lst, err) 29 | } 30 | 31 | func alertSubscribeGet(c *gin.Context) { 32 | subid := ginx.UrlParamInt64(c, "sid") 33 | 34 | sub, err := models.AlertSubscribeGet("id=?", subid) 35 | ginx.Dangerous(err) 36 | 37 | if sub == nil { 38 | ginx.NewRender(c, 404).Message("No such alert subscribe") 39 | return 40 | } 41 | 42 | ugcache := make(map[int64]*models.UserGroup) 43 | ginx.Dangerous(sub.FillUserGroups(ugcache)) 44 | 45 | rulecache := make(map[int64]string) 46 | ginx.Dangerous(sub.FillRuleName(rulecache)) 47 | 48 | ginx.NewRender(c).Data(sub, nil) 49 | } 50 | 51 | func alertSubscribeAdd(c *gin.Context) { 52 | var f models.AlertSubscribe 53 | ginx.BindJSON(c, &f) 54 | 55 | username := c.MustGet("username").(string) 56 | f.CreateBy = username 57 | f.UpdateBy = username 58 | f.GroupId = ginx.UrlParamInt64(c, "id") 59 | 60 | if f.GroupId <= 0 { 61 | ginx.Bomb(http.StatusBadRequest, "group_id invalid") 62 | } 63 | 64 | ginx.NewRender(c).Message(f.Add()) 65 | } 66 | 67 | func alertSubscribePut(c *gin.Context) { 68 | var fs []models.AlertSubscribe 69 | ginx.BindJSON(c, &fs) 70 | 71 | timestamp := time.Now().Unix() 72 | username := c.MustGet("username").(string) 73 | for i := 0; i < len(fs); i++ { 74 | fs[i].UpdateBy = username 75 | fs[i].UpdateAt = timestamp 76 | ginx.Dangerous(fs[i].Update( 77 | "rule_id", 78 | "tags", 79 | "redefine_severity", 80 | "new_severity", 81 | "redefine_channels", 82 | "new_channels", 83 | "user_group_ids", 84 | "update_at", 85 | "update_by", 86 | )) 87 | } 88 | 89 | ginx.NewRender(c).Message(nil) 90 | } 91 | 92 | func alertSubscribeDel(c *gin.Context) { 93 | var f idsForm 94 | ginx.BindJSON(c, &f) 95 | f.Verify() 96 | 97 | ginx.NewRender(c).Message(models.AlertSubscribeDel(f.Ids)) 98 | } 99 | -------------------------------------------------------------------------------- /src/server/naming/heartbeat.go: -------------------------------------------------------------------------------- 1 | package naming 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "sort" 7 | "strconv" 8 | "strings" 9 | "time" 10 | 11 | "github.com/toolkits/pkg/logger" 12 | 13 | "github.com/didi/nightingale/v5/src/server/config" 14 | "github.com/didi/nightingale/v5/src/storage" 15 | ) 16 | 17 | // local servers 18 | var localss string 19 | 20 | func Heartbeat(ctx context.Context) error { 21 | if err := heartbeat(ctx); err != nil { 22 | fmt.Println("failed to heartbeat:", err) 23 | return err 24 | } 25 | 26 | go loopHeartbeat(ctx) 27 | return nil 28 | } 29 | 30 | func loopHeartbeat(ctx context.Context) { 31 | interval := time.Duration(config.C.Heartbeat.Interval) * time.Millisecond 32 | for { 33 | time.Sleep(interval) 34 | if err := heartbeat(ctx); err != nil { 35 | logger.Warning(err) 36 | } 37 | } 38 | } 39 | 40 | // hash struct: 41 | // /server/heartbeat/Default -> { 42 | // 10.2.3.4:19000 => $timestamp 43 | // 10.2.3.5:19000 => $timestamp 44 | // } 45 | func redisKey(cluster string) string { 46 | return fmt.Sprintf("/server/heartbeat/%s", cluster) 47 | } 48 | 49 | func heartbeat(ctx context.Context) error { 50 | now := time.Now().Unix() 51 | key := redisKey(config.C.ClusterName) 52 | err := storage.Redis.HSet(ctx, key, config.C.Heartbeat.Endpoint, now).Err() 53 | if err != nil { 54 | return err 55 | } 56 | 57 | servers, err := ActiveServers(ctx, config.C.ClusterName) 58 | if err != nil { 59 | return err 60 | } 61 | 62 | sort.Strings(servers) 63 | newss := strings.Join(servers, " ") 64 | if newss != localss { 65 | RebuildConsistentHashRing(servers) 66 | localss = newss 67 | } 68 | 69 | return nil 70 | } 71 | 72 | func clearDeadServer(ctx context.Context, cluster, endpoint string) { 73 | key := redisKey(cluster) 74 | err := storage.Redis.HDel(ctx, key, endpoint).Err() 75 | if err != nil { 76 | logger.Warningf("failed to hdel %s %s, error: %v", key, endpoint, err) 77 | } 78 | } 79 | 80 | func ActiveServers(ctx context.Context, cluster string) ([]string, error) { 81 | ret, err := storage.Redis.HGetAll(ctx, redisKey(cluster)).Result() 82 | if err != nil { 83 | return nil, err 84 | } 85 | 86 | now := time.Now().Unix() 87 | dur := int64(20) 88 | 89 | actives := make([]string, 0, len(ret)) 90 | for endpoint, clockstr := range ret { 91 | clock, err := strconv.ParseInt(clockstr, 10, 64) 92 | if err != nil { 93 | continue 94 | } 95 | 96 | if now-clock > dur { 97 | clearDeadServer(ctx, cluster, endpoint) 98 | continue 99 | } 100 | 101 | actives = append(actives, endpoint) 102 | } 103 | 104 | return actives, nil 105 | } 106 | -------------------------------------------------------------------------------- /src/webapi/router/router_alert_his_event.go: -------------------------------------------------------------------------------- 1 | package router 2 | 3 | import ( 4 | "time" 5 | 6 | "github.com/gin-gonic/gin" 7 | "github.com/toolkits/pkg/ginx" 8 | 9 | "github.com/didi/nightingale/v5/src/models" 10 | ) 11 | 12 | func getTimeRange(c *gin.Context) (stime, etime int64) { 13 | stime = ginx.QueryInt64(c, "stime", 0) 14 | etime = ginx.QueryInt64(c, "etime", 0) 15 | hours := ginx.QueryInt64(c, "hours", 0) 16 | now := time.Now().Unix() 17 | if hours != 0 { 18 | stime = now - 3600*hours 19 | etime = now + 3600*24 20 | } 21 | 22 | if stime != 0 && etime == 0 { 23 | etime = now + 3600*24 24 | } 25 | return 26 | } 27 | 28 | func alertHisEventsList(c *gin.Context) { 29 | stime, etime := getTimeRange(c) 30 | 31 | severity := ginx.QueryInt(c, "severity", -1) 32 | recovered := ginx.QueryInt(c, "is_recovered", -1) 33 | query := ginx.QueryStr(c, "query", "") 34 | limit := ginx.QueryInt(c, "limit", 20) 35 | busiGroupId := ginx.QueryInt64(c, "bgid", 0) 36 | clusters := queryClusters(c) 37 | 38 | total, err := models.AlertHisEventTotal(busiGroupId, stime, etime, severity, recovered, clusters, query) 39 | ginx.Dangerous(err) 40 | 41 | list, err := models.AlertHisEventGets(busiGroupId, stime, etime, severity, recovered, clusters, query, limit, ginx.Offset(c, limit)) 42 | ginx.Dangerous(err) 43 | 44 | cache := make(map[int64]*models.UserGroup) 45 | for i := 0; i < len(list); i++ { 46 | list[i].FillNotifyGroups(cache) 47 | } 48 | 49 | ginx.NewRender(c).Data(gin.H{ 50 | "list": list, 51 | "total": total, 52 | }, nil) 53 | } 54 | 55 | func alertHisEventGets(c *gin.Context) { 56 | stime, etime := getTimeRange(c) 57 | 58 | severity := ginx.QueryInt(c, "severity", -1) 59 | recovered := ginx.QueryInt(c, "is_recovered", -1) 60 | query := ginx.QueryStr(c, "query", "") 61 | limit := ginx.QueryInt(c, "limit", 20) 62 | busiGroupId := ginx.UrlParamInt64(c, "id") 63 | clusters := queryClusters(c) 64 | 65 | total, err := models.AlertHisEventTotal(busiGroupId, stime, etime, severity, recovered, clusters, query) 66 | ginx.Dangerous(err) 67 | 68 | list, err := models.AlertHisEventGets(busiGroupId, stime, etime, severity, recovered, clusters, query, limit, ginx.Offset(c, limit)) 69 | ginx.Dangerous(err) 70 | 71 | cache := make(map[int64]*models.UserGroup) 72 | for i := 0; i < len(list); i++ { 73 | list[i].FillNotifyGroups(cache) 74 | } 75 | 76 | ginx.NewRender(c).Data(gin.H{ 77 | "list": list, 78 | "total": total, 79 | }, nil) 80 | } 81 | 82 | func alertHisEventGet(c *gin.Context) { 83 | eid := ginx.UrlParamInt64(c, "eid") 84 | event, err := models.AlertHisEventGetById(eid) 85 | ginx.Dangerous(err) 86 | 87 | if event == nil { 88 | ginx.Bomb(404, "No such alert event") 89 | } 90 | 91 | ginx.NewRender(c).Data(event, err) 92 | } 93 | -------------------------------------------------------------------------------- /src/storage/storage.go: -------------------------------------------------------------------------------- 1 | package storage 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "fmt" 7 | "os" 8 | "strings" 9 | 10 | "github.com/go-redis/redis/v8" 11 | "gorm.io/gorm" 12 | 13 | "github.com/didi/nightingale/v5/src/pkg/ormx" 14 | ) 15 | 16 | type RedisConfig struct { 17 | Address string 18 | Password string 19 | DB int 20 | } 21 | 22 | type DBConfig struct { 23 | Gorm Gorm 24 | MySQL MySQL 25 | Postgres Postgres 26 | } 27 | 28 | type Gorm struct { 29 | Debug bool 30 | DBType string 31 | MaxLifetime int 32 | MaxOpenConns int 33 | MaxIdleConns int 34 | TablePrefix string 35 | EnableAutoMigrate bool 36 | } 37 | 38 | type MySQL struct { 39 | Address string 40 | User string 41 | Password string 42 | DBName string 43 | Parameters string 44 | } 45 | 46 | func (a MySQL) DSN() string { 47 | return fmt.Sprintf("%s:%s@tcp(%s)/%s?%s", 48 | a.User, a.Password, a.Address, a.DBName, a.Parameters) 49 | } 50 | 51 | type Postgres struct { 52 | Address string 53 | User string 54 | Password string 55 | DBName string 56 | SSLMode string 57 | } 58 | 59 | func (a Postgres) DSN() string { 60 | arr := strings.Split(a.Address, ":") 61 | if len(arr) != 2 { 62 | panic("pg address(" + a.Address + ") invalid") 63 | } 64 | 65 | return fmt.Sprintf("host=%s port=%s user=%s dbname=%s password=%s sslmode=%s", 66 | arr[0], arr[1], a.User, a.DBName, a.Password, a.SSLMode) 67 | } 68 | 69 | var DB *gorm.DB 70 | 71 | func InitDB(cfg DBConfig) error { 72 | db, err := newGormDB(cfg) 73 | if err == nil { 74 | DB = db 75 | } 76 | return err 77 | } 78 | 79 | func newGormDB(cfg DBConfig) (*gorm.DB, error) { 80 | var dsn string 81 | switch cfg.Gorm.DBType { 82 | case "mysql": 83 | dsn = cfg.MySQL.DSN() 84 | case "postgres": 85 | dsn = cfg.Postgres.DSN() 86 | default: 87 | return nil, errors.New("unknown DBType") 88 | } 89 | 90 | return ormx.New(ormx.Config{ 91 | Debug: cfg.Gorm.Debug, 92 | DBType: cfg.Gorm.DBType, 93 | DSN: dsn, 94 | MaxIdleConns: cfg.Gorm.MaxIdleConns, 95 | MaxLifetime: cfg.Gorm.MaxLifetime, 96 | MaxOpenConns: cfg.Gorm.MaxOpenConns, 97 | TablePrefix: cfg.Gorm.TablePrefix, 98 | }) 99 | } 100 | 101 | var Redis *redis.Client 102 | 103 | func InitRedis(cfg RedisConfig) (func(), error) { 104 | Redis = redis.NewClient(&redis.Options{ 105 | Addr: cfg.Address, 106 | Password: cfg.Password, 107 | DB: cfg.DB, 108 | }) 109 | 110 | err := Redis.Ping(context.Background()).Err() 111 | if err != nil { 112 | fmt.Println("ping redis failed:", err) 113 | os.Exit(1) 114 | } 115 | 116 | return func() { 117 | fmt.Println("redis exiting") 118 | Redis.Close() 119 | }, nil 120 | } 121 | -------------------------------------------------------------------------------- /src/models/busi_group_member.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | type BusiGroupMember struct { 4 | BusiGroupId int64 `json:"busi_group_id"` 5 | UserGroupId int64 `json:"user_group_id"` 6 | PermFlag string `json:"perm_flag"` 7 | } 8 | 9 | func (BusiGroupMember) TableName() string { 10 | return "busi_group_member" 11 | } 12 | 13 | func BusiGroupIds(userGroupIds []int64, permFlag ...string) ([]int64, error) { 14 | if len(userGroupIds) == 0 { 15 | return []int64{}, nil 16 | } 17 | 18 | session := DB().Model(&BusiGroupMember{}).Where("user_group_id in ?", userGroupIds) 19 | if len(permFlag) > 0 { 20 | session = session.Where("perm_flag=?", permFlag[0]) 21 | } 22 | 23 | var ids []int64 24 | err := session.Pluck("busi_group_id", &ids).Error 25 | return ids, err 26 | } 27 | 28 | func UserGroupIdsOfBusiGroup(busiGroupId int64, permFlag ...string) ([]int64, error) { 29 | session := DB().Model(&BusiGroupMember{}).Where("busi_group_id = ?", busiGroupId) 30 | if len(permFlag) > 0 { 31 | session = session.Where("perm_flag=?", permFlag[0]) 32 | } 33 | 34 | var ids []int64 35 | err := session.Pluck("user_group_id", &ids).Error 36 | return ids, err 37 | } 38 | 39 | func BusiGroupMemberCount(where string, args ...interface{}) (int64, error) { 40 | return Count(DB().Model(&BusiGroupMember{}).Where(where, args...)) 41 | } 42 | 43 | func BusiGroupMemberAdd(member BusiGroupMember) error { 44 | obj, err := BusiGroupMemberGet("busi_group_id = ? and user_group_id = ?", member.BusiGroupId, member.UserGroupId) 45 | if err != nil { 46 | return err 47 | } 48 | 49 | if obj == nil { 50 | // insert 51 | return Insert(&BusiGroupMember{ 52 | BusiGroupId: member.BusiGroupId, 53 | UserGroupId: member.UserGroupId, 54 | PermFlag: member.PermFlag, 55 | }) 56 | } else { 57 | // update 58 | if obj.PermFlag == member.PermFlag { 59 | return nil 60 | } 61 | 62 | return DB().Model(&BusiGroupMember{}).Where("busi_group_id = ? and user_group_id = ?", member.BusiGroupId, member.UserGroupId).Update("perm_flag", member.PermFlag).Error 63 | } 64 | } 65 | 66 | func BusiGroupMemberGet(where string, args ...interface{}) (*BusiGroupMember, error) { 67 | var lst []*BusiGroupMember 68 | err := DB().Where(where, args...).Find(&lst).Error 69 | if err != nil { 70 | return nil, err 71 | } 72 | 73 | if len(lst) == 0 { 74 | return nil, nil 75 | } 76 | 77 | return lst[0], nil 78 | } 79 | 80 | func BusiGroupMemberDel(where string, args ...interface{}) error { 81 | return DB().Where(where, args...).Delete(&BusiGroupMember{}).Error 82 | } 83 | 84 | func BusiGroupMemberGets(where string, args ...interface{}) ([]BusiGroupMember, error) { 85 | var lst []BusiGroupMember 86 | err := DB().Where(where, args...).Order("perm_flag").Find(&lst).Error 87 | return lst, err 88 | } 89 | 90 | func BusiGroupMemberGetsByBusiGroupId(busiGroupId int64) ([]BusiGroupMember, error) { 91 | return BusiGroupMemberGets("busi_group_id=?", busiGroupId) 92 | } 93 | -------------------------------------------------------------------------------- /src/server/memsto/busi_group_cache.go: -------------------------------------------------------------------------------- 1 | package memsto 2 | 3 | import ( 4 | "fmt" 5 | "sync" 6 | "time" 7 | 8 | "github.com/pkg/errors" 9 | "github.com/toolkits/pkg/logger" 10 | 11 | "github.com/didi/nightingale/v5/src/models" 12 | "github.com/didi/nightingale/v5/src/server/config" 13 | promstat "github.com/didi/nightingale/v5/src/server/stat" 14 | ) 15 | 16 | type BusiGroupCacheType struct { 17 | statTotal int64 18 | statLastUpdated int64 19 | 20 | sync.RWMutex 21 | ugs map[int64]*models.BusiGroup // key: id 22 | } 23 | 24 | var BusiGroupCache = BusiGroupCacheType{ 25 | statTotal: -1, 26 | statLastUpdated: -1, 27 | ugs: make(map[int64]*models.BusiGroup), 28 | } 29 | 30 | func (c *BusiGroupCacheType) StatChanged(total, lastUpdated int64) bool { 31 | if c.statTotal == total && c.statLastUpdated == lastUpdated { 32 | return false 33 | } 34 | 35 | return true 36 | } 37 | 38 | func (c *BusiGroupCacheType) Set(ugs map[int64]*models.BusiGroup, total, lastUpdated int64) { 39 | c.Lock() 40 | c.ugs = ugs 41 | c.Unlock() 42 | 43 | // only one goroutine used, so no need lock 44 | c.statTotal = total 45 | c.statLastUpdated = lastUpdated 46 | } 47 | 48 | func (c *BusiGroupCacheType) GetByBusiGroupId(id int64) *models.BusiGroup { 49 | c.RLock() 50 | defer c.RUnlock() 51 | return c.ugs[id] 52 | } 53 | 54 | func SyncBusiGroups() { 55 | err := syncBusiGroups() 56 | if err != nil { 57 | fmt.Println("failed to sync busi groups:", err) 58 | exit(1) 59 | } 60 | 61 | go loopSyncBusiGroups() 62 | } 63 | 64 | func loopSyncBusiGroups() { 65 | duration := time.Duration(9000) * time.Millisecond 66 | for { 67 | time.Sleep(duration) 68 | if err := syncBusiGroups(); err != nil { 69 | logger.Warning("failed to sync busi groups:", err) 70 | } 71 | } 72 | } 73 | 74 | func syncBusiGroups() error { 75 | start := time.Now() 76 | 77 | stat, err := models.BusiGroupStatistics() 78 | if err != nil { 79 | return errors.WithMessage(err, "failed to exec BusiGroupStatistics") 80 | } 81 | 82 | if !BusiGroupCache.StatChanged(stat.Total, stat.LastUpdated) { 83 | promstat.GaugeCronDuration.WithLabelValues(config.C.ClusterName, "sync_busi_groups").Set(0) 84 | promstat.GaugeSyncNumber.WithLabelValues(config.C.ClusterName, "sync_busi_groups").Set(0) 85 | logger.Debug("busi_group not changed") 86 | return nil 87 | } 88 | 89 | m, err := models.BusiGroupGetMap() 90 | if err != nil { 91 | return errors.WithMessage(err, "failed to exec BusiGroupGetMap") 92 | } 93 | 94 | BusiGroupCache.Set(m, stat.Total, stat.LastUpdated) 95 | 96 | ms := time.Since(start).Milliseconds() 97 | promstat.GaugeCronDuration.WithLabelValues(config.C.ClusterName, "sync_busi_groups").Set(float64(ms)) 98 | promstat.GaugeSyncNumber.WithLabelValues(config.C.ClusterName, "sync_busi_groups").Set(float64(len(m))) 99 | logger.Infof("timer: sync busi groups done, cost: %dms, number: %d", ms, len(m)) 100 | 101 | return nil 102 | } 103 | -------------------------------------------------------------------------------- /src/webapi/config/i18n.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | var ( 4 | dict = map[string]string{ 5 | "just a test": "这只是一个测试", 6 | "just a test: %s": "这只是一个测试: %s", 7 | "InternalServerError": "系统内部错误,请联系管理员", 8 | "Username or password invalid": "登录失败,请检查用户名和密码", 9 | "Username is blank": "用户名不能为空", 10 | "Username has invalid characters": "用户名含有非法字符", 11 | "Nickname has invalid characters": "昵称含有非法字符", 12 | "Phone invalid": "手机号格式非法", 13 | "Email invalid": "邮箱格式非法", 14 | "Incorrect old password": "旧密码错误", 15 | "Username already exists": "用户名已存在", 16 | "No such user": "用户不存在", 17 | "Note has invalid characters": "备注含有非法字符", 18 | "UserGroup already exists": "用户组已存在,不能重复创建", 19 | "No such UserGroup": "用户组不存在", 20 | "No such BusiGroup": "业务组不存在", 21 | "BusiGroup already exists": "业务分组已存在,不能重复创建", 22 | "Some UserGroup id not exists": "有些用户组ID不存在", 23 | "Some alert mutes still in the BusiGroup": "业务组下仍然存在告警屏蔽配置,不能删除", 24 | "Some dashboards still in the BusiGroup": "业务组下仍然存在监控大盘配置,不能删除", 25 | "Some collect rules still in the BusiGroup": "业务组下仍然存在采集规则配置,不能删除", 26 | "Some alert rules still in the BusiGroup": "业务组下仍然存在告警规则配置,不能删除", 27 | "Some alert subscribes still in the BusiGroup": "业务组下仍然存在订阅规则配置,不能删除", 28 | "Some targets still in the BusiGroup": "业务组下仍然存在监控对象,不能删除", 29 | "Some recovery scripts still in the BusiGroup": "业务组下仍然存在自愈脚本,不能删除", 30 | "Name is blank": "名称不能为空", 31 | "Name has invalid characters": "名称含有非法字符", 32 | "Dashboard already exists": "监控大盘已存在", 33 | "No such dashboard": "监控大盘不存在", 34 | "AlertRule already exists": "告警规则已存在,不能重复创建", 35 | "No such AlertRule": "告警规则不存在", 36 | "CollectRule already exists": "采集规则已存在,不能重复创建", 37 | "No such metric description": "该指标释义不存在,可能已被删除", 38 | "No such TargetQuery": "查询条件不存在,可能已被删除", 39 | "No permission. Only admins can assign BG": "没有权限!只有管理员才能分配业务组", 40 | "No permission to operate the targets: %s": "没有权限操作这些监控对象:%s", 41 | "No permission. You are not admin of BG(%s)": "没有权限操作,您并非业务组(%s)的管理员", 42 | "The business group must retain at least one team": "业务组下要保留至少一个团队", 43 | "At least one team have rw permission": "业务组下至少要有一个具备读写权限的团队", 44 | "duplicate tagkey(%s)": "标签KEY(%s)重复了", 45 | "Failed to create BusiGroup(%s)": "创建业务(%s)组失败", 46 | } 47 | langDict = map[string]map[string]string{ 48 | "zh": dict, 49 | } 50 | ) 51 | -------------------------------------------------------------------------------- /src/pkg/ldapx/ldapx.go: -------------------------------------------------------------------------------- 1 | package ldapx 2 | 3 | import ( 4 | "crypto/tls" 5 | "fmt" 6 | 7 | "github.com/go-ldap/ldap/v3" 8 | ) 9 | 10 | type LdapSection struct { 11 | Enable bool 12 | Host string 13 | Port int 14 | BaseDn string 15 | BindUser string 16 | BindPass string 17 | AuthFilter string 18 | Attributes LdapAttributes 19 | CoverAttributes bool 20 | TLS bool 21 | StartTLS bool 22 | DefaultRoles []string 23 | } 24 | 25 | type LdapAttributes struct { 26 | Nickname string `yaml:"nickname"` 27 | Phone string `yaml:"phone"` 28 | Email string `yaml:"email"` 29 | } 30 | 31 | var LDAP LdapSection 32 | 33 | func Init(ldap LdapSection) { 34 | LDAP = ldap 35 | } 36 | 37 | func genLdapAttributeSearchList() []string { 38 | var ldapAttributes []string 39 | attrs := LDAP.Attributes 40 | if attrs.Nickname != "" { 41 | ldapAttributes = append(ldapAttributes, attrs.Nickname) 42 | } 43 | if attrs.Email != "" { 44 | ldapAttributes = append(ldapAttributes, attrs.Email) 45 | } 46 | if attrs.Phone != "" { 47 | ldapAttributes = append(ldapAttributes, attrs.Phone) 48 | } 49 | return ldapAttributes 50 | } 51 | 52 | func LdapReq(user, pass string) (*ldap.SearchResult, error) { 53 | var conn *ldap.Conn 54 | var err error 55 | lc := LDAP 56 | addr := fmt.Sprintf("%s:%d", lc.Host, lc.Port) 57 | 58 | if lc.TLS { 59 | conn, err = ldap.DialTLS("tcp", addr, &tls.Config{InsecureSkipVerify: true}) 60 | } else { 61 | conn, err = ldap.Dial("tcp", addr) 62 | } 63 | 64 | if err != nil { 65 | return nil, fmt.Errorf("ldap.error: cannot dial ldap(%s): %v", addr, err) 66 | } 67 | 68 | defer conn.Close() 69 | 70 | if !lc.TLS && lc.StartTLS { 71 | if err := conn.StartTLS(&tls.Config{InsecureSkipVerify: true}); err != nil { 72 | return nil, fmt.Errorf("ldap.error: conn startTLS fail: %v", err) 73 | } 74 | } 75 | 76 | // if bindUser is empty, anonymousSearch mode 77 | if lc.BindUser != "" { 78 | // BindSearch mode 79 | if err := conn.Bind(lc.BindUser, lc.BindPass); err != nil { 80 | return nil, fmt.Errorf("ldap.error: bind ldap fail: %v, use user(%s) to bind", err, lc.BindUser) 81 | } 82 | } 83 | 84 | searchRequest := ldap.NewSearchRequest( 85 | lc.BaseDn, // The base dn to search 86 | ldap.ScopeWholeSubtree, ldap.NeverDerefAliases, 0, 0, false, 87 | fmt.Sprintf(lc.AuthFilter, user), // The filter to apply 88 | genLdapAttributeSearchList(), // A list attributes to retrieve 89 | nil, 90 | ) 91 | 92 | sr, err := conn.Search(searchRequest) 93 | if err != nil { 94 | return nil, fmt.Errorf("ldap.error: ldap search fail: %v", err) 95 | } 96 | 97 | if len(sr.Entries) == 0 { 98 | return nil, fmt.Errorf("Username or password invalid") 99 | } 100 | 101 | if len(sr.Entries) > 1 { 102 | return nil, fmt.Errorf("ldap.error: search user(%s), multi entries found", user) 103 | } 104 | 105 | if err := conn.Bind(sr.Entries[0].DN, pass); err != nil { 106 | return nil, fmt.Errorf("Username or password invalid") 107 | } 108 | 109 | return sr, nil 110 | } 111 | -------------------------------------------------------------------------------- /src/webapi/webapi.go: -------------------------------------------------------------------------------- 1 | package webapi 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "os/signal" 7 | "path/filepath" 8 | "syscall" 9 | 10 | "github.com/toolkits/pkg/i18n" 11 | 12 | "github.com/didi/nightingale/v5/src/models" 13 | "github.com/didi/nightingale/v5/src/pkg/httpx" 14 | "github.com/didi/nightingale/v5/src/pkg/ldapx" 15 | "github.com/didi/nightingale/v5/src/pkg/logx" 16 | "github.com/didi/nightingale/v5/src/storage" 17 | "github.com/didi/nightingale/v5/src/webapi/config" 18 | "github.com/didi/nightingale/v5/src/webapi/prom" 19 | "github.com/didi/nightingale/v5/src/webapi/router" 20 | "github.com/didi/nightingale/v5/src/webapi/stat" 21 | ) 22 | 23 | type Webapi struct { 24 | ConfigFile string 25 | Version string 26 | } 27 | 28 | type WebapiOption func(*Webapi) 29 | 30 | func SetConfigFile(f string) WebapiOption { 31 | return func(s *Webapi) { 32 | s.ConfigFile = f 33 | } 34 | } 35 | 36 | func SetVersion(v string) WebapiOption { 37 | return func(s *Webapi) { 38 | s.Version = v 39 | } 40 | } 41 | 42 | // Run run webapi 43 | func Run(opts ...WebapiOption) { 44 | code := 1 45 | sc := make(chan os.Signal, 1) 46 | signal.Notify(sc, syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT) 47 | 48 | webapi := Webapi{ 49 | ConfigFile: filepath.Join("etc", "webapi.conf"), 50 | Version: "not specified", 51 | } 52 | 53 | for _, opt := range opts { 54 | opt(&webapi) 55 | } 56 | 57 | cleanFunc, err := webapi.initialize() 58 | if err != nil { 59 | fmt.Println("webapi init fail:", err) 60 | os.Exit(code) 61 | } 62 | 63 | EXIT: 64 | for { 65 | sig := <-sc 66 | fmt.Println("received signal:", sig.String()) 67 | switch sig { 68 | case syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGINT: 69 | code = 0 70 | break EXIT 71 | case syscall.SIGHUP: 72 | // reload configuration? 73 | default: 74 | break EXIT 75 | } 76 | } 77 | 78 | cleanFunc() 79 | fmt.Println("webapi exited") 80 | os.Exit(code) 81 | } 82 | 83 | func (a Webapi) initialize() (func(), error) { 84 | // parse config file 85 | config.MustLoad(a.ConfigFile) 86 | 87 | // init i18n 88 | i18n.Init(config.C.I18N) 89 | 90 | // init ldap 91 | ldapx.Init(config.C.LDAP) 92 | 93 | // init logger 94 | loggerClean, err := logx.Init(config.C.Log) 95 | if err != nil { 96 | return nil, err 97 | } 98 | 99 | // init database 100 | if err = storage.InitDB(storage.DBConfig{ 101 | Gorm: config.C.Gorm, 102 | MySQL: config.C.MySQL, 103 | Postgres: config.C.Postgres, 104 | }); err != nil { 105 | return nil, err 106 | } 107 | 108 | // init redis 109 | redisClean, err := storage.InitRedis(config.C.Redis) 110 | if err != nil { 111 | return nil, err 112 | } 113 | 114 | models.InitSalt() 115 | models.InitRoot() 116 | 117 | // init prometheus proxy config 118 | if err = prom.Init(config.C.Clusters); err != nil { 119 | return nil, err 120 | } 121 | 122 | stat.Init() 123 | 124 | // init http server 125 | r := router.New(a.Version) 126 | httpClean := httpx.Init(config.C.HTTP, r) 127 | 128 | // release all the resources 129 | return func() { 130 | loggerClean() 131 | httpClean() 132 | redisClean() 133 | }, nil 134 | } 135 | -------------------------------------------------------------------------------- /src/server/sender/email.go: -------------------------------------------------------------------------------- 1 | package sender 2 | 3 | import ( 4 | "crypto/tls" 5 | "time" 6 | 7 | "github.com/didi/nightingale/v5/src/server/config" 8 | "github.com/toolkits/pkg/logger" 9 | "gopkg.in/gomail.v2" 10 | ) 11 | 12 | var mailch chan *gomail.Message 13 | 14 | func SendEmail(subject, content string, tos []string) { 15 | conf := config.C.SMTP 16 | 17 | d := gomail.NewDialer(conf.Host, conf.Port, conf.User, conf.Pass) 18 | if conf.InsecureSkipVerify { 19 | d.TLSConfig = &tls.Config{InsecureSkipVerify: true} 20 | } 21 | 22 | m := gomail.NewMessage() 23 | 24 | m.SetHeader("From", config.C.SMTP.From) 25 | m.SetHeader("To", tos...) 26 | m.SetHeader("Subject", subject) 27 | m.SetBody("text/html", content) 28 | 29 | err := d.DialAndSend(m) 30 | if err != nil { 31 | logger.Errorf("email_sender: failed to send: %v", err) 32 | } 33 | } 34 | 35 | func WriteEmail(subject, content string, tos []string) { 36 | m := gomail.NewMessage() 37 | 38 | m.SetHeader("From", config.C.SMTP.From) 39 | m.SetHeader("To", tos...) 40 | m.SetHeader("Subject", subject) 41 | m.SetBody("text/html", content) 42 | 43 | mailch <- m 44 | } 45 | 46 | func dialSmtp(d *gomail.Dialer) gomail.SendCloser { 47 | for { 48 | if s, err := d.Dial(); err != nil { 49 | logger.Errorf("email_sender: failed to dial smtp: %s", err) 50 | time.Sleep(time.Second) 51 | continue 52 | } else { 53 | return s 54 | } 55 | } 56 | } 57 | 58 | func StartEmailSender() { 59 | mailch = make(chan *gomail.Message, 100000) 60 | 61 | conf := config.C.SMTP 62 | 63 | d := gomail.NewDialer(conf.Host, conf.Port, conf.User, conf.Pass) 64 | if conf.InsecureSkipVerify { 65 | d.TLSConfig = &tls.Config{InsecureSkipVerify: true} 66 | } 67 | 68 | var s gomail.SendCloser 69 | var open bool 70 | var size int 71 | for { 72 | select { 73 | case m, ok := <-mailch: 74 | if !ok { 75 | return 76 | } 77 | 78 | if !open { 79 | s = dialSmtp(d) 80 | open = true 81 | } 82 | 83 | if err := gomail.Send(s, m); err != nil { 84 | logger.Errorf("email_sender: failed to send: %s", err) 85 | 86 | // close and retry 87 | if err := s.Close(); err != nil { 88 | logger.Warningf("email_sender: failed to close smtp connection: %s", err) 89 | } 90 | 91 | s = dialSmtp(d) 92 | open = true 93 | 94 | if err := gomail.Send(s, m); err != nil { 95 | logger.Errorf("email_sender: failed to retry send: %s", err) 96 | } 97 | } else { 98 | logger.Infof("email_sender: result=succ subject=%v to=%v", m.GetHeader("Subject"), m.GetHeader("To")) 99 | } 100 | 101 | size++ 102 | 103 | if size >= conf.Batch { 104 | if err := s.Close(); err != nil { 105 | logger.Warningf("email_sender: failed to close smtp connection: %s", err) 106 | } 107 | open = false 108 | size = 0 109 | } 110 | 111 | // Close the connection to the SMTP server if no email was sent in 112 | // the last 30 seconds. 113 | case <-time.After(30 * time.Second): 114 | if open { 115 | if err := s.Close(); err != nil { 116 | logger.Warningf("email_sender: failed to close smtp connection: %s", err) 117 | } 118 | open = false 119 | } 120 | } 121 | } 122 | } 123 | -------------------------------------------------------------------------------- /src/models/alert_aggr_view.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "sort" 7 | "strings" 8 | "time" 9 | 10 | "github.com/toolkits/pkg/slice" 11 | ) 12 | 13 | // AlertAggrView 在告警聚合视图查看的时候,要存储一些聚合规则 14 | type AlertAggrView struct { 15 | Id int64 `json:"id" gorm:"primaryKey"` 16 | Name string `json:"name"` 17 | Rule string `json:"rule"` 18 | Cate int `json:"cate"` 19 | UserId int64 `json:"user_id"` 20 | CreateAt int64 `json:"create_at"` 21 | CreateBy string `json:"create_by"` 22 | UpdateAt int64 `json:"update_at"` 23 | } 24 | 25 | func (v *AlertAggrView) TableName() string { 26 | return "alert_aggr_view" 27 | } 28 | 29 | func (v *AlertAggrView) Verify() error { 30 | v.Name = strings.TrimSpace(v.Name) 31 | if v.Name == "" { 32 | return errors.New("name is blank") 33 | } 34 | 35 | v.Rule = strings.TrimSpace(v.Rule) 36 | if v.Rule == "" { 37 | return errors.New("rule is blank") 38 | } 39 | 40 | var validFields = []string{ 41 | "cluster", 42 | "group_id", 43 | "group_name", 44 | "rule_id", 45 | "rule_name", 46 | "severity", 47 | "runbook_url", 48 | "target_ident", 49 | "target_note", 50 | } 51 | 52 | arr := strings.Fields(v.Rule) 53 | for i := 0; i < len(arr); i++ { 54 | pair := strings.Split(arr[i], ":") 55 | if len(pair) != 2 { 56 | return errors.New("rule invalid") 57 | } 58 | 59 | if !(pair[0] == "field" || pair[0] == "tagkey") { 60 | return errors.New("rule invalid") 61 | } 62 | 63 | if pair[0] == "field" { 64 | // 只支持有限的field 65 | if !slice.ContainsString(validFields, pair[1]) { 66 | return fmt.Errorf("unsupported field: %s", pair[1]) 67 | } 68 | } 69 | } 70 | 71 | return nil 72 | } 73 | 74 | func (v *AlertAggrView) Add() error { 75 | if err := v.Verify(); err != nil { 76 | return err 77 | } 78 | 79 | now := time.Now().Unix() 80 | v.CreateAt = now 81 | v.UpdateAt = now 82 | v.Cate = 1 83 | return Insert(v) 84 | } 85 | 86 | func (v *AlertAggrView) Update(name, rule string) error { 87 | if err := v.Verify(); err != nil { 88 | return err 89 | } 90 | 91 | v.UpdateAt = time.Now().Unix() 92 | v.Name = name 93 | v.Rule = rule 94 | 95 | return DB().Model(v).Select("name", "rule", "update_at").Updates(v).Error 96 | } 97 | 98 | // AlertAggrViewDel: userid for safe delete 99 | func AlertAggrViewDel(ids []int64, userId interface{}) error { 100 | if len(ids) == 0 { 101 | return nil 102 | } 103 | 104 | return DB().Where("id in ? and user_id = ?", ids, userId).Delete(new(AlertAggrView)).Error 105 | } 106 | 107 | func AlertAggrViewGets(userId interface{}) ([]AlertAggrView, error) { 108 | var lst []AlertAggrView 109 | err := DB().Where("user_id = ? or cate = 0", userId).Find(&lst).Error 110 | if err == nil && len(lst) > 0 { 111 | sort.Slice(lst, func(i, j int) bool { 112 | return lst[i].Name < lst[j].Name 113 | }) 114 | } 115 | return lst, err 116 | } 117 | 118 | func AlertAggrViewGet(where string, args ...interface{}) (*AlertAggrView, error) { 119 | var lst []*AlertAggrView 120 | err := DB().Where(where, args...).Find(&lst).Error 121 | if err != nil { 122 | return nil, err 123 | } 124 | 125 | if len(lst) == 0 { 126 | return nil, nil 127 | } 128 | 129 | return lst[0], nil 130 | } 131 | -------------------------------------------------------------------------------- /src/webapi/router/router_metric_desc.go: -------------------------------------------------------------------------------- 1 | package router 2 | 3 | import ( 4 | "path" 5 | 6 | "github.com/gin-gonic/gin" 7 | "github.com/toolkits/pkg/file" 8 | "github.com/toolkits/pkg/ginx" 9 | "github.com/toolkits/pkg/runner" 10 | 11 | "github.com/didi/nightingale/v5/src/webapi/config" 12 | ) 13 | 14 | func metricsDescGetFile(c *gin.Context) { 15 | fp := config.C.MetricsYamlFile 16 | if fp == "" { 17 | fp = path.Join(runner.Cwd, "etc", "metrics.yaml") 18 | } 19 | 20 | if !file.IsExist(fp) { 21 | c.String(404, "%s not found", fp) 22 | return 23 | } 24 | 25 | ret := make(map[string]string) 26 | err := file.ReadYaml(fp, &ret) 27 | if err != nil { 28 | c.String(500, err.Error()) 29 | return 30 | } 31 | 32 | c.JSON(200, ret) 33 | } 34 | 35 | // 前端传过来一个metric数组,后端去查询有没有对应的释义,返回map 36 | func metricsDescGetMap(c *gin.Context) { 37 | var arr []string 38 | ginx.BindJSON(c, &arr) 39 | 40 | ret := make(map[string]string) 41 | for i := 0; i < len(arr); i++ { 42 | desc, has := config.Metrics.Get(arr[i]) 43 | if !has { 44 | ret[arr[i]] = "" 45 | } else { 46 | ret[arr[i]] = desc.(string) 47 | } 48 | } 49 | 50 | ginx.NewRender(c).Data(ret, nil) 51 | } 52 | 53 | // 页面功能暂时先不要了,直接通过配置文件来维护 54 | // func metricDescriptionGets(c *gin.Context) { 55 | // limit := ginx.QueryInt(c, "limit", 20) 56 | // query := ginx.QueryStr(c, "query", "") 57 | 58 | // total, err := models.MetricDescriptionTotal(query) 59 | // ginx.Dangerous(err) 60 | 61 | // list, err := models.MetricDescriptionGets(query, limit, ginx.Offset(c, limit)) 62 | // ginx.Dangerous(err) 63 | 64 | // ginx.NewRender(c).Data(gin.H{ 65 | // "list": list, 66 | // "total": total, 67 | // }, nil) 68 | // } 69 | 70 | // type metricDescriptionAddForm struct { 71 | // Data string `json:"data"` 72 | // } 73 | 74 | // func metricDescriptionAdd(c *gin.Context) { 75 | // var f metricDescriptionAddForm 76 | // ginx.BindJSON(c, &f) 77 | 78 | // var metricDescriptions []models.MetricDescription 79 | 80 | // lines := strings.Split(f.Data, "\n") 81 | // for _, md := range lines { 82 | // arr := strings.SplitN(md, ":", 2) 83 | // if len(arr) != 2 { 84 | // ginx.Bomb(200, "metric description %s is illegal", md) 85 | // } 86 | // m := models.MetricDescription{ 87 | // Metric: arr[0], 88 | // Description: arr[1], 89 | // } 90 | // metricDescriptions = append(metricDescriptions, m) 91 | // } 92 | 93 | // if len(metricDescriptions) == 0 { 94 | // ginx.Bomb(http.StatusBadRequest, "Decoded metric description empty") 95 | // } 96 | 97 | // ginx.NewRender(c).Message(models.MetricDescriptionUpdate(metricDescriptions)) 98 | // } 99 | 100 | // func metricDescriptionDel(c *gin.Context) { 101 | // var f idsForm 102 | // ginx.BindJSON(c, &f) 103 | // f.Verify() 104 | // ginx.NewRender(c).Message(models.MetricDescriptionDel(f.Ids)) 105 | // } 106 | 107 | // type metricDescriptionForm struct { 108 | // Description string `json:"description"` 109 | // } 110 | 111 | // func metricDescriptionPut(c *gin.Context) { 112 | // var f metricDescriptionForm 113 | // ginx.BindJSON(c, &f) 114 | 115 | // md, err := models.MetricDescriptionGet("id=?", ginx.UrlParamInt64(c, "id")) 116 | // ginx.Dangerous(err) 117 | 118 | // if md == nil { 119 | // ginx.Bomb(200, "No such metric description") 120 | // } 121 | 122 | // ginx.NewRender(c).Message(md.Update(f.Description, time.Now().Unix())) 123 | // } 124 | -------------------------------------------------------------------------------- /src/server/memsto/alert_rule_cache.go: -------------------------------------------------------------------------------- 1 | package memsto 2 | 3 | import ( 4 | "fmt" 5 | "sync" 6 | "time" 7 | 8 | "github.com/pkg/errors" 9 | "github.com/toolkits/pkg/logger" 10 | 11 | "github.com/didi/nightingale/v5/src/models" 12 | "github.com/didi/nightingale/v5/src/server/config" 13 | promstat "github.com/didi/nightingale/v5/src/server/stat" 14 | ) 15 | 16 | type AlertRuleCacheType struct { 17 | statTotal int64 18 | statLastUpdated int64 19 | 20 | sync.RWMutex 21 | rules map[int64]*models.AlertRule // key: rule id 22 | } 23 | 24 | var AlertRuleCache = AlertRuleCacheType{ 25 | statTotal: -1, 26 | statLastUpdated: -1, 27 | rules: make(map[int64]*models.AlertRule), 28 | } 29 | 30 | func (arc *AlertRuleCacheType) StatChanged(total, lastUpdated int64) bool { 31 | if arc.statTotal == total && arc.statLastUpdated == lastUpdated { 32 | return false 33 | } 34 | 35 | return true 36 | } 37 | 38 | func (arc *AlertRuleCacheType) Set(m map[int64]*models.AlertRule, total, lastUpdated int64) { 39 | arc.Lock() 40 | arc.rules = m 41 | arc.Unlock() 42 | 43 | // only one goroutine used, so no need lock 44 | arc.statTotal = total 45 | arc.statLastUpdated = lastUpdated 46 | } 47 | 48 | func (arc *AlertRuleCacheType) Get(ruleId int64) *models.AlertRule { 49 | arc.RLock() 50 | defer arc.RUnlock() 51 | return arc.rules[ruleId] 52 | } 53 | 54 | func (arc *AlertRuleCacheType) GetRuleIds() []int64 { 55 | arc.RLock() 56 | defer arc.RUnlock() 57 | 58 | count := len(arc.rules) 59 | list := make([]int64, 0, count) 60 | for ruleId := range arc.rules { 61 | list = append(list, ruleId) 62 | } 63 | 64 | return list 65 | } 66 | 67 | func SyncAlertRules() { 68 | err := syncAlertRules() 69 | if err != nil { 70 | fmt.Println("failed to sync alert rules:", err) 71 | exit(1) 72 | } 73 | 74 | go loopSyncAlertRules() 75 | } 76 | 77 | func loopSyncAlertRules() { 78 | duration := time.Duration(9000) * time.Millisecond 79 | for { 80 | time.Sleep(duration) 81 | if err := syncAlertRules(); err != nil { 82 | logger.Warning("failed to sync alert rules:", err) 83 | } 84 | } 85 | } 86 | 87 | func syncAlertRules() error { 88 | start := time.Now() 89 | 90 | stat, err := models.AlertRuleStatistics(config.C.ClusterName) 91 | if err != nil { 92 | return errors.WithMessage(err, "failed to exec AlertRuleStatistics") 93 | } 94 | 95 | if !AlertRuleCache.StatChanged(stat.Total, stat.LastUpdated) { 96 | promstat.GaugeCronDuration.WithLabelValues(config.C.ClusterName, "sync_alert_rules").Set(0) 97 | promstat.GaugeSyncNumber.WithLabelValues(config.C.ClusterName, "sync_alert_rules").Set(0) 98 | logger.Debug("alert rules not changed") 99 | return nil 100 | } 101 | 102 | lst, err := models.AlertRuleGetsByCluster(config.C.ClusterName) 103 | if err != nil { 104 | return errors.WithMessage(err, "failed to exec AlertRuleGetsByCluster") 105 | } 106 | 107 | m := make(map[int64]*models.AlertRule) 108 | for i := 0; i < len(lst); i++ { 109 | m[lst[i].Id] = lst[i] 110 | } 111 | 112 | AlertRuleCache.Set(m, stat.Total, stat.LastUpdated) 113 | 114 | ms := time.Since(start).Milliseconds() 115 | promstat.GaugeCronDuration.WithLabelValues(config.C.ClusterName, "sync_alert_rules").Set(float64(ms)) 116 | promstat.GaugeSyncNumber.WithLabelValues(config.C.ClusterName, "sync_alert_rules").Set(float64(len(m))) 117 | logger.Infof("timer: sync rules done, cost: %dms, number: %d", ms, len(m)) 118 | 119 | return nil 120 | } 121 | -------------------------------------------------------------------------------- /src/server/memsto/user_cache.go: -------------------------------------------------------------------------------- 1 | package memsto 2 | 3 | import ( 4 | "fmt" 5 | "sync" 6 | "time" 7 | 8 | "github.com/pkg/errors" 9 | "github.com/toolkits/pkg/logger" 10 | 11 | "github.com/didi/nightingale/v5/src/models" 12 | "github.com/didi/nightingale/v5/src/server/config" 13 | promstat "github.com/didi/nightingale/v5/src/server/stat" 14 | ) 15 | 16 | type UserCacheType struct { 17 | statTotal int64 18 | statLastUpdated int64 19 | 20 | sync.RWMutex 21 | users map[int64]*models.User // key: id 22 | } 23 | 24 | var UserCache = UserCacheType{ 25 | statTotal: -1, 26 | statLastUpdated: -1, 27 | users: make(map[int64]*models.User), 28 | } 29 | 30 | func (uc *UserCacheType) StatChanged(total, lastUpdated int64) bool { 31 | if uc.statTotal == total && uc.statLastUpdated == lastUpdated { 32 | return false 33 | } 34 | 35 | return true 36 | } 37 | 38 | func (uc *UserCacheType) Set(m map[int64]*models.User, total, lastUpdated int64) { 39 | uc.Lock() 40 | uc.users = m 41 | uc.Unlock() 42 | 43 | // only one goroutine used, so no need lock 44 | uc.statTotal = total 45 | uc.statLastUpdated = lastUpdated 46 | } 47 | 48 | func (uc *UserCacheType) GetByUserId(id int64) *models.User { 49 | uc.RLock() 50 | defer uc.RUnlock() 51 | return uc.users[id] 52 | } 53 | 54 | func (uc *UserCacheType) GetByUserIds(ids []int64) []*models.User { 55 | set := make(map[int64]struct{}) 56 | 57 | uc.RLock() 58 | defer uc.RUnlock() 59 | 60 | var users []*models.User 61 | for _, id := range ids { 62 | if uc.users[id] == nil { 63 | continue 64 | } 65 | 66 | if _, has := set[id]; has { 67 | continue 68 | } 69 | 70 | users = append(users, uc.users[id]) 71 | set[id] = struct{}{} 72 | } 73 | 74 | if users == nil { 75 | users = []*models.User{} 76 | } 77 | 78 | return users 79 | } 80 | 81 | func SyncUsers() { 82 | err := syncUsers() 83 | if err != nil { 84 | fmt.Println("failed to sync users:", err) 85 | exit(1) 86 | } 87 | 88 | go loopSyncUsers() 89 | } 90 | 91 | func loopSyncUsers() { 92 | duration := time.Duration(9000) * time.Millisecond 93 | for { 94 | time.Sleep(duration) 95 | if err := syncUsers(); err != nil { 96 | logger.Warning("failed to sync users:", err) 97 | } 98 | } 99 | } 100 | 101 | func syncUsers() error { 102 | start := time.Now() 103 | 104 | stat, err := models.UserStatistics() 105 | if err != nil { 106 | return errors.WithMessage(err, "failed to exec UserStatistics") 107 | } 108 | 109 | if !UserCache.StatChanged(stat.Total, stat.LastUpdated) { 110 | promstat.GaugeCronDuration.WithLabelValues(config.C.ClusterName, "sync_users").Set(0) 111 | promstat.GaugeSyncNumber.WithLabelValues(config.C.ClusterName, "sync_users").Set(0) 112 | logger.Debug("users not changed") 113 | return nil 114 | } 115 | 116 | lst, err := models.UserGetAll() 117 | if err != nil { 118 | return errors.WithMessage(err, "failed to exec UserGetAll") 119 | } 120 | 121 | m := make(map[int64]*models.User) 122 | for i := 0; i < len(lst); i++ { 123 | m[lst[i].Id] = lst[i] 124 | } 125 | 126 | UserCache.Set(m, stat.Total, stat.LastUpdated) 127 | 128 | ms := time.Since(start).Milliseconds() 129 | promstat.GaugeCronDuration.WithLabelValues(config.C.ClusterName, "sync_users").Set(float64(ms)) 130 | promstat.GaugeSyncNumber.WithLabelValues(config.C.ClusterName, "sync_users").Set(float64(len(m))) 131 | logger.Infof("timer: sync users done, cost: %dms, number: %d", ms, len(m)) 132 | 133 | return nil 134 | } 135 | -------------------------------------------------------------------------------- /src/webapi/router/router_funcs.go: -------------------------------------------------------------------------------- 1 | package router 2 | 3 | import ( 4 | "fmt" 5 | "net/http" 6 | "strings" 7 | 8 | "github.com/gin-gonic/gin" 9 | "github.com/toolkits/pkg/ginx" 10 | "github.com/toolkits/pkg/str" 11 | 12 | "github.com/didi/nightingale/v5/src/models" 13 | "github.com/didi/nightingale/v5/src/pkg/ibex" 14 | "github.com/didi/nightingale/v5/src/webapi/config" 15 | ) 16 | 17 | const defaultLimit = 300 18 | 19 | func queryClusters(c *gin.Context) []string { 20 | clusters := ginx.QueryStr(c, "clusters", "") 21 | clusters = strings.ReplaceAll(clusters, ",", " ") 22 | return strings.Fields(clusters) 23 | } 24 | 25 | func Cluster(c *gin.Context) string { 26 | return c.GetHeader("X-Cluster") 27 | } 28 | 29 | func MustGetCluster(c *gin.Context) string { 30 | cluster := Cluster(c) 31 | if cluster == "" { 32 | ginx.Bomb(http.StatusBadRequest, "Header(X-Cluster) missed") 33 | } 34 | return cluster 35 | } 36 | 37 | type idsForm struct { 38 | Ids []int64 `json:"ids"` 39 | } 40 | 41 | func (f idsForm) Verify() { 42 | if len(f.Ids) == 0 { 43 | ginx.Bomb(http.StatusBadRequest, "ids empty") 44 | } 45 | } 46 | 47 | func User(id int64) *models.User { 48 | obj, err := models.UserGetById(id) 49 | ginx.Dangerous(err) 50 | 51 | if obj == nil { 52 | ginx.Bomb(http.StatusNotFound, "No such user") 53 | } 54 | 55 | return obj 56 | } 57 | 58 | func UserGroup(id int64) *models.UserGroup { 59 | obj, err := models.UserGroupGetById(id) 60 | ginx.Dangerous(err) 61 | 62 | if obj == nil { 63 | ginx.Bomb(http.StatusNotFound, "No such UserGroup") 64 | } 65 | 66 | return obj 67 | } 68 | 69 | func BusiGroup(id int64) *models.BusiGroup { 70 | obj, err := models.BusiGroupGetById(id) 71 | ginx.Dangerous(err) 72 | 73 | if obj == nil { 74 | ginx.Bomb(http.StatusNotFound, "No such BusiGroup") 75 | } 76 | 77 | return obj 78 | } 79 | 80 | func Dashboard(id int64) *models.Dashboard { 81 | obj, err := models.DashboardGet("id=?", id) 82 | ginx.Dangerous(err) 83 | 84 | if obj == nil { 85 | ginx.Bomb(http.StatusNotFound, "No such dashboard") 86 | } 87 | 88 | return obj 89 | } 90 | 91 | type DoneIdsReply struct { 92 | Err string `json:"err"` 93 | Dat struct { 94 | List []int64 `json:"list"` 95 | } `json:"dat"` 96 | } 97 | 98 | func TaskDoneIds(ids []int64) ([]int64, error) { 99 | var res DoneIdsReply 100 | err := ibex.New( 101 | config.C.Ibex.Address, 102 | config.C.Ibex.BasicAuthUser, 103 | config.C.Ibex.BasicAuthPass, 104 | config.C.Ibex.Timeout, 105 | ). 106 | Path("/ibex/v1/tasks/done-ids"). 107 | QueryString("ids", str.IdsString(ids, ",")). 108 | Out(&res). 109 | GET() 110 | 111 | if err != nil { 112 | return nil, err 113 | } 114 | 115 | if res.Err != "" { 116 | return nil, fmt.Errorf("response.err: %v", res.Err) 117 | } 118 | 119 | return res.Dat.List, nil 120 | } 121 | 122 | type TaskCreateReply struct { 123 | Err string `json:"err"` 124 | Dat int64 `json:"dat"` // task.id 125 | } 126 | 127 | // return task.id, error 128 | func TaskCreate(v interface{}) (int64, error) { 129 | var res TaskCreateReply 130 | err := ibex.New( 131 | config.C.Ibex.Address, 132 | config.C.Ibex.BasicAuthUser, 133 | config.C.Ibex.BasicAuthPass, 134 | config.C.Ibex.Timeout, 135 | ). 136 | Path("/ibex/v1/tasks"). 137 | In(v). 138 | Out(&res). 139 | POST() 140 | 141 | if err != nil { 142 | return 0, err 143 | } 144 | 145 | if res.Err != "" { 146 | return 0, fmt.Errorf("response.err: %v", res.Err) 147 | } 148 | 149 | return res.Dat, nil 150 | } 151 | -------------------------------------------------------------------------------- /src/webapi/router/router_user_group.go: -------------------------------------------------------------------------------- 1 | package router 2 | 3 | import ( 4 | "net/http" 5 | "time" 6 | 7 | "github.com/gin-gonic/gin" 8 | "github.com/toolkits/pkg/ginx" 9 | 10 | "github.com/didi/nightingale/v5/src/models" 11 | ) 12 | 13 | func checkBusiGroupPerm(c *gin.Context) { 14 | me := c.MustGet("user").(*models.User) 15 | bg := BusiGroup(ginx.UrlParamInt64(c, "id")) 16 | 17 | can, err := me.CanDoBusiGroup(bg, ginx.UrlParamStr(c, "perm")) 18 | ginx.NewRender(c).Data(can, err) 19 | } 20 | 21 | func userGroupGets(c *gin.Context) { 22 | limit := ginx.QueryInt(c, "limit", 1500) 23 | query := ginx.QueryStr(c, "query", "") 24 | 25 | me := c.MustGet("user").(*models.User) 26 | lst, err := me.UserGroups(limit, query) 27 | 28 | ginx.NewRender(c).Data(lst, err) 29 | } 30 | 31 | type userGroupForm struct { 32 | Name string `json:"name" binding:"required"` 33 | Note string `json:"note"` 34 | } 35 | 36 | func userGroupAdd(c *gin.Context) { 37 | var f userGroupForm 38 | ginx.BindJSON(c, &f) 39 | 40 | me := c.MustGet("user").(*models.User) 41 | 42 | ug := models.UserGroup{ 43 | Name: f.Name, 44 | Note: f.Note, 45 | CreateBy: me.Username, 46 | UpdateBy: me.Username, 47 | } 48 | 49 | err := ug.Add() 50 | if err == nil { 51 | // Even failure is not a big deal 52 | models.UserGroupMemberAdd(ug.Id, me.Id) 53 | } 54 | 55 | ginx.NewRender(c).Data(ug.Id, err) 56 | } 57 | 58 | func userGroupPut(c *gin.Context) { 59 | var f userGroupForm 60 | ginx.BindJSON(c, &f) 61 | 62 | me := c.MustGet("user").(*models.User) 63 | ug := c.MustGet("user_group").(*models.UserGroup) 64 | 65 | if ug.Name != f.Name { 66 | // name changed, check duplication 67 | num, err := models.UserGroupCount("name=? and id<>?", f.Name, ug.Id) 68 | ginx.Dangerous(err) 69 | 70 | if num > 0 { 71 | ginx.Bomb(http.StatusOK, "UserGroup already exists") 72 | } 73 | } 74 | 75 | ug.Name = f.Name 76 | ug.Note = f.Note 77 | ug.UpdateBy = me.Username 78 | ug.UpdateAt = time.Now().Unix() 79 | 80 | ginx.NewRender(c).Message(ug.Update("Name", "Note", "UpdateAt", "UpdateBy")) 81 | } 82 | 83 | // Return all members, front-end search and paging 84 | func userGroupGet(c *gin.Context) { 85 | ug := UserGroup(ginx.UrlParamInt64(c, "id")) 86 | 87 | ids, err := models.MemberIds(ug.Id) 88 | ginx.Dangerous(err) 89 | 90 | users, err := models.UserGetsByIds(ids) 91 | 92 | ginx.NewRender(c).Data(gin.H{ 93 | "users": users, 94 | "user_group": ug, 95 | }, err) 96 | } 97 | 98 | func userGroupDel(c *gin.Context) { 99 | ug := c.MustGet("user_group").(*models.UserGroup) 100 | ginx.NewRender(c).Message(ug.Del()) 101 | } 102 | 103 | func userGroupMemberAdd(c *gin.Context) { 104 | var f idsForm 105 | ginx.BindJSON(c, &f) 106 | f.Verify() 107 | 108 | me := c.MustGet("user").(*models.User) 109 | ug := c.MustGet("user_group").(*models.UserGroup) 110 | 111 | err := ug.AddMembers(f.Ids) 112 | if err == nil { 113 | ug.UpdateAt = time.Now().Unix() 114 | ug.UpdateBy = me.Username 115 | ug.Update("UpdateAt", "UpdateBy") 116 | } 117 | 118 | ginx.NewRender(c).Message(err) 119 | } 120 | 121 | func userGroupMemberDel(c *gin.Context) { 122 | var f idsForm 123 | ginx.BindJSON(c, &f) 124 | f.Verify() 125 | 126 | me := c.MustGet("user").(*models.User) 127 | ug := c.MustGet("user_group").(*models.UserGroup) 128 | 129 | err := ug.DelMembers(f.Ids) 130 | if err == nil { 131 | ug.UpdateAt = time.Now().Unix() 132 | ug.UpdateBy = me.Username 133 | ug.Update("UpdateAt", "UpdateBy") 134 | } 135 | 136 | ginx.NewRender(c).Message(err) 137 | } 138 | -------------------------------------------------------------------------------- /src/server/router/router_prom.go: -------------------------------------------------------------------------------- 1 | package router 2 | 3 | import ( 4 | "io" 5 | "io/ioutil" 6 | "net/http" 7 | "time" 8 | 9 | "github.com/gin-gonic/gin" 10 | "github.com/gogo/protobuf/proto" 11 | "github.com/golang/snappy" 12 | "github.com/prometheus/prometheus/prompb" 13 | "github.com/toolkits/pkg/ginx" 14 | 15 | "github.com/didi/nightingale/v5/src/server/common" 16 | "github.com/didi/nightingale/v5/src/server/config" 17 | "github.com/didi/nightingale/v5/src/server/engine" 18 | "github.com/didi/nightingale/v5/src/server/idents" 19 | "github.com/didi/nightingale/v5/src/server/memsto" 20 | "github.com/didi/nightingale/v5/src/server/reader" 21 | promstat "github.com/didi/nightingale/v5/src/server/stat" 22 | "github.com/didi/nightingale/v5/src/server/writer" 23 | ) 24 | 25 | type promqlForm struct { 26 | PromQL string `json:"promql"` 27 | } 28 | 29 | func queryPromql(c *gin.Context) { 30 | var f promqlForm 31 | ginx.BindJSON(c, &f) 32 | 33 | value, warnings, err := reader.Reader.Client.Query(c.Request.Context(), f.PromQL, time.Now()) 34 | if err != nil { 35 | c.String(500, "promql:%s error:%v", f.PromQL, err) 36 | return 37 | } 38 | 39 | if len(warnings) > 0 { 40 | c.String(500, "promql:%s warnings:%v", f.PromQL, warnings) 41 | return 42 | } 43 | 44 | c.JSON(200, engine.ConvertVectors(value)) 45 | } 46 | 47 | func remoteWrite(c *gin.Context) { 48 | req, err := DecodeWriteRequest(c.Request.Body) 49 | if err != nil { 50 | c.String(http.StatusBadRequest, err.Error()) 51 | return 52 | } 53 | 54 | count := len(req.Timeseries) 55 | 56 | if count == 0 { 57 | c.String(200, "") 58 | return 59 | } 60 | 61 | var ( 62 | now = time.Now().Unix() 63 | ids = make(map[string]interface{}) 64 | lst = make([]interface{}, count) 65 | ident string 66 | ) 67 | 68 | for i := 0; i < count; i++ { 69 | ident = "" 70 | 71 | // find ident label 72 | for j := 0; j < len(req.Timeseries[i].Labels); j++ { 73 | if req.Timeseries[i].Labels[j].Name == "ident" { 74 | ident = req.Timeseries[i].Labels[j].Value 75 | } 76 | } 77 | 78 | if ident == "" { 79 | // not found, try agent_hostname 80 | for j := 0; j < len(req.Timeseries[i].Labels); j++ { 81 | // agent_hostname for grafana-agent 82 | if req.Timeseries[i].Labels[j].Name == "agent_hostname" { 83 | req.Timeseries[i].Labels[j].Name = "ident" 84 | ident = req.Timeseries[i].Labels[j].Value 85 | } 86 | } 87 | } 88 | 89 | if len(ident) > 0 { 90 | // register host 91 | ids[ident] = now 92 | 93 | // fill tags 94 | target, has := memsto.TargetCache.Get(ident) 95 | if has { 96 | common.AppendLabels(req.Timeseries[i], target) 97 | } 98 | } 99 | 100 | lst[i] = req.Timeseries[i] 101 | } 102 | 103 | promstat.CounterSampleTotal.WithLabelValues(config.C.ClusterName, "prometheus").Add(float64(count)) 104 | idents.Idents.MSet(ids) 105 | if writer.Writers.PushQueue(lst) { 106 | c.String(200, "") 107 | } else { 108 | c.String(http.StatusInternalServerError, "writer queue full") 109 | } 110 | } 111 | 112 | // DecodeWriteRequest from an io.Reader into a prompb.WriteRequest, handling 113 | // snappy decompression. 114 | func DecodeWriteRequest(r io.Reader) (*prompb.WriteRequest, error) { 115 | compressed, err := ioutil.ReadAll(r) 116 | if err != nil { 117 | return nil, err 118 | } 119 | 120 | reqBuf, err := snappy.Decode(nil, compressed) 121 | if err != nil { 122 | return nil, err 123 | } 124 | 125 | var req prompb.WriteRequest 126 | if err := proto.Unmarshal(reqBuf, &req); err != nil { 127 | return nil, err 128 | } 129 | 130 | return &req, nil 131 | } 132 | -------------------------------------------------------------------------------- /src/webapi/config/config.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "strings" 7 | "sync" 8 | 9 | "github.com/gin-gonic/gin" 10 | "github.com/koding/multiconfig" 11 | 12 | "github.com/didi/nightingale/v5/src/pkg/httpx" 13 | "github.com/didi/nightingale/v5/src/pkg/ldapx" 14 | "github.com/didi/nightingale/v5/src/pkg/logx" 15 | "github.com/didi/nightingale/v5/src/storage" 16 | "github.com/didi/nightingale/v5/src/webapi/prom" 17 | ) 18 | 19 | var ( 20 | C = new(Config) 21 | once sync.Once 22 | ) 23 | 24 | func MustLoad(fpaths ...string) { 25 | once.Do(func() { 26 | loaders := []multiconfig.Loader{ 27 | &multiconfig.TagLoader{}, 28 | &multiconfig.EnvironmentLoader{}, 29 | } 30 | 31 | for _, fpath := range fpaths { 32 | handled := false 33 | 34 | if strings.HasSuffix(fpath, "toml") { 35 | loaders = append(loaders, &multiconfig.TOMLLoader{Path: fpath}) 36 | handled = true 37 | } 38 | if strings.HasSuffix(fpath, "conf") { 39 | loaders = append(loaders, &multiconfig.TOMLLoader{Path: fpath}) 40 | handled = true 41 | } 42 | if strings.HasSuffix(fpath, "json") { 43 | loaders = append(loaders, &multiconfig.JSONLoader{Path: fpath}) 44 | handled = true 45 | } 46 | if strings.HasSuffix(fpath, "yaml") { 47 | loaders = append(loaders, &multiconfig.YAMLLoader{Path: fpath}) 48 | handled = true 49 | } 50 | 51 | if !handled { 52 | fmt.Println("config file invalid, valid file exts: .conf,.yaml,.toml,.json") 53 | os.Exit(1) 54 | } 55 | } 56 | 57 | m := multiconfig.DefaultLoader{ 58 | Loader: multiconfig.MultiLoader(loaders...), 59 | Validator: multiconfig.MultiValidator(&multiconfig.RequiredValidator{}), 60 | } 61 | 62 | m.MustLoad(C) 63 | 64 | if !strings.HasPrefix(C.Ibex.Address, "http") { 65 | C.Ibex.Address = "http://" + C.Ibex.Address 66 | } 67 | 68 | err := loadMetricsYaml() 69 | if err != nil { 70 | fmt.Println("failed to load metrics.yaml:", err) 71 | os.Exit(1) 72 | } 73 | }) 74 | } 75 | 76 | type Config struct { 77 | RunMode string 78 | I18N string 79 | AdminRole string 80 | MetricsYamlFile string 81 | BuiltinAlertsDir string 82 | BuiltinDashboardsDir string 83 | ContactKeys []LabelAndKey 84 | NotifyChannels []LabelAndKey 85 | Log logx.Config 86 | HTTP httpx.Config 87 | JWTAuth JWTAuth 88 | BasicAuth gin.Accounts 89 | AnonymousAccess AnonymousAccess 90 | LDAP ldapx.LdapSection 91 | Redis storage.RedisConfig 92 | Gorm storage.Gorm 93 | MySQL storage.MySQL 94 | Postgres storage.Postgres 95 | Clusters []prom.Options 96 | Ibex Ibex 97 | } 98 | 99 | type LabelAndKey struct { 100 | Label string `json:"label"` 101 | Key string `json:"key"` 102 | } 103 | 104 | func LabelAndKeyHasKey(keys []LabelAndKey, key string) bool { 105 | for i := 0; i < len(keys); i++ { 106 | if keys[i].Key == key { 107 | return true 108 | } 109 | } 110 | return false 111 | } 112 | 113 | type JWTAuth struct { 114 | SigningKey string 115 | AccessExpired int64 116 | RefreshExpired int64 117 | RedisKeyPrefix string 118 | } 119 | 120 | type AnonymousAccess struct { 121 | PromQuerier bool 122 | AlertDetail bool 123 | } 124 | 125 | type Ibex struct { 126 | Address string 127 | BasicAuthUser string 128 | BasicAuthPass string 129 | Timeout int64 130 | } 131 | 132 | func (c *Config) IsDebugMode() bool { 133 | return c.RunMode == "debug" 134 | } 135 | -------------------------------------------------------------------------------- /src/webapi/router/router_alert_rule.go: -------------------------------------------------------------------------------- 1 | package router 2 | 3 | import ( 4 | "net/http" 5 | "time" 6 | 7 | "github.com/gin-gonic/gin" 8 | "github.com/toolkits/pkg/ginx" 9 | "github.com/toolkits/pkg/i18n" 10 | 11 | "github.com/didi/nightingale/v5/src/models" 12 | ) 13 | 14 | // Return all, front-end search and paging 15 | func alertRuleGets(c *gin.Context) { 16 | busiGroupId := ginx.UrlParamInt64(c, "id") 17 | ars, err := models.AlertRuleGets(busiGroupId) 18 | if err == nil { 19 | cache := make(map[int64]*models.UserGroup) 20 | for i := 0; i < len(ars); i++ { 21 | ars[i].FillNotifyGroups(cache) 22 | } 23 | } 24 | ginx.NewRender(c).Data(ars, err) 25 | } 26 | 27 | // single or import 28 | func alertRuleAdd(c *gin.Context) { 29 | var lst []models.AlertRule 30 | ginx.BindJSON(c, &lst) 31 | 32 | count := len(lst) 33 | if count == 0 { 34 | ginx.Bomb(http.StatusBadRequest, "input json is empty") 35 | } 36 | 37 | username := c.MustGet("username").(string) 38 | bgid := ginx.UrlParamInt64(c, "id") 39 | 40 | // alert rule name -> error string 41 | reterr := make(map[string]string) 42 | for i := 0; i < count; i++ { 43 | lst[i].Id = 0 44 | lst[i].GroupId = bgid 45 | lst[i].CreateBy = username 46 | lst[i].UpdateBy = username 47 | lst[i].FE2DB() 48 | 49 | if err := lst[i].Add(); err != nil { 50 | reterr[lst[i].Name] = i18n.Sprintf(c.GetHeader("X-Language"), err.Error()) 51 | } else { 52 | reterr[lst[i].Name] = "" 53 | } 54 | } 55 | 56 | ginx.NewRender(c).Data(reterr, nil) 57 | } 58 | 59 | func alertRuleDel(c *gin.Context) { 60 | var f idsForm 61 | ginx.BindJSON(c, &f) 62 | f.Verify() 63 | 64 | // param(busiGroupId) for protect 65 | ginx.NewRender(c).Message(models.AlertRuleDels(f.Ids, ginx.UrlParamInt64(c, "id"))) 66 | } 67 | 68 | func alertRulePut(c *gin.Context) { 69 | var f models.AlertRule 70 | ginx.BindJSON(c, &f) 71 | 72 | arid := ginx.UrlParamInt64(c, "arid") 73 | ar, err := models.AlertRuleGetById(arid) 74 | ginx.Dangerous(err) 75 | 76 | if ar == nil { 77 | ginx.NewRender(c, http.StatusNotFound).Message("No such AlertRule") 78 | return 79 | } 80 | 81 | bgrwCheck(c, ar.GroupId) 82 | 83 | f.UpdateBy = c.MustGet("username").(string) 84 | ginx.NewRender(c).Message(ar.Update(f)) 85 | } 86 | 87 | type alertRuleFieldForm struct { 88 | Ids []int64 `json:"ids"` 89 | Fields map[string]interface{} `json:"fields"` 90 | } 91 | 92 | // update one field: cluster note severity disabled prom_eval_interval prom_for_duration notify_channels notify_groups notify_recovered notify_repeat_step callbacks runbook_url append_tags 93 | func alertRulePutFields(c *gin.Context) { 94 | var f alertRuleFieldForm 95 | ginx.BindJSON(c, &f) 96 | 97 | if len(f.Fields) == 0 { 98 | ginx.Bomb(http.StatusBadRequest, "fields empty") 99 | } 100 | 101 | f.Fields["update_by"] = c.MustGet("username").(string) 102 | f.Fields["update_at"] = time.Now().Unix() 103 | 104 | for i := 0; i < len(f.Ids); i++ { 105 | ar, err := models.AlertRuleGetById(f.Ids[i]) 106 | ginx.Dangerous(err) 107 | 108 | if ar == nil { 109 | continue 110 | } 111 | 112 | ginx.Dangerous(ar.UpdateFieldsMap(f.Fields)) 113 | } 114 | 115 | ginx.NewRender(c).Message(nil) 116 | } 117 | 118 | func alertRuleGet(c *gin.Context) { 119 | arid := ginx.UrlParamInt64(c, "arid") 120 | 121 | ar, err := models.AlertRuleGetById(arid) 122 | ginx.Dangerous(err) 123 | 124 | if ar == nil { 125 | ginx.NewRender(c, http.StatusNotFound).Message("No such AlertRule") 126 | return 127 | } 128 | 129 | err = ar.FillNotifyGroups(make(map[int64]*models.UserGroup)) 130 | ginx.NewRender(c).Data(ar, err) 131 | } 132 | -------------------------------------------------------------------------------- /src/models/metric_description.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | import ( 4 | "strings" 5 | "time" 6 | ) 7 | 8 | type MetricDescription struct { 9 | Id int64 `json:"id"` 10 | Metric string `json:"metric"` 11 | Description string `json:"description"` 12 | UpdateAt int64 `json:"update_at"` 13 | } 14 | 15 | func (md *MetricDescription) TableName() string { 16 | return "metric_description" 17 | } 18 | 19 | func MetricDescriptionUpdate(mds []MetricDescription) error { 20 | now := time.Now().Unix() 21 | 22 | for i := 0; i < len(mds); i++ { 23 | mds[i].Metric = strings.TrimSpace(mds[i].Metric) 24 | md, err := MetricDescriptionGet("metric = ?", mds[i].Metric) 25 | if err != nil { 26 | return err 27 | } 28 | 29 | if md == nil { 30 | // insert 31 | mds[i].UpdateAt = now 32 | err = Insert(&mds[i]) 33 | if err != nil { 34 | return err 35 | } 36 | } else { 37 | // update 38 | err = md.Update(mds[i].Description, now) 39 | if err != nil { 40 | return err 41 | } 42 | } 43 | } 44 | return nil 45 | } 46 | 47 | func (md *MetricDescription) Update(desn string, now int64) error { 48 | md.Description = desn 49 | md.UpdateAt = now 50 | return DB().Model(md).Select("Description", "UpdateAt").Updates(md).Error 51 | } 52 | 53 | func MetricDescriptionGet(where string, args ...interface{}) (*MetricDescription, error) { 54 | var lst []*MetricDescription 55 | err := DB().Where(where, args...).Find(&lst).Error 56 | if err != nil { 57 | return nil, err 58 | } 59 | 60 | if len(lst) == 0 { 61 | return nil, nil 62 | } 63 | 64 | return lst[0], nil 65 | } 66 | 67 | func MetricDescriptionTotal(query string) (int64, error) { 68 | session := DB().Model(&MetricDescription{}) 69 | 70 | if query != "" { 71 | q := "%" + query + "%" 72 | session = session.Where("metric like ? or description like ?", q, q) 73 | } 74 | 75 | return Count(session) 76 | } 77 | 78 | func MetricDescriptionGets(query string, limit, offset int) ([]MetricDescription, error) { 79 | session := DB().Order("metric").Limit(limit).Offset(offset) 80 | if query != "" { 81 | q := "%" + query + "%" 82 | session = session.Where("metric like ? or description like ?", q, q) 83 | } 84 | 85 | var objs []MetricDescription 86 | err := session.Find(&objs).Error 87 | return objs, err 88 | } 89 | 90 | func MetricDescGetAll() ([]MetricDescription, error) { 91 | var objs []MetricDescription 92 | err := DB().Find(&objs).Error 93 | return objs, err 94 | } 95 | 96 | func MetricDescStatistics() (*Statistics, error) { 97 | session := DB().Model(&MetricDescription{}).Select("count(*) as total", "max(update_at) as last_updated") 98 | 99 | var stats []*Statistics 100 | err := session.Find(&stats).Error 101 | if err != nil { 102 | return nil, err 103 | } 104 | 105 | return stats[0], nil 106 | } 107 | 108 | func MetricDescriptionMapper(metrics []string) (map[string]string, error) { 109 | if len(metrics) == 0 { 110 | return map[string]string{}, nil 111 | } 112 | 113 | var objs []MetricDescription 114 | err := DB().Where("metric in ?", metrics).Find(&objs).Error 115 | if err != nil { 116 | return nil, err 117 | } 118 | 119 | count := len(objs) 120 | if count == 0 { 121 | return map[string]string{}, nil 122 | } 123 | 124 | mapper := make(map[string]string, count) 125 | for i := 0; i < count; i++ { 126 | mapper[objs[i].Metric] = objs[i].Description 127 | } 128 | 129 | return mapper, nil 130 | } 131 | 132 | func MetricDescriptionDel(ids []int64) error { 133 | if len(ids) == 0 { 134 | return nil 135 | } 136 | 137 | return DB().Where("id in ?", ids).Delete(new(MetricDescription)).Error 138 | } 139 | -------------------------------------------------------------------------------- /src/webapi/router/router_user.go: -------------------------------------------------------------------------------- 1 | package router 2 | 3 | import ( 4 | "net/http" 5 | "strings" 6 | 7 | "github.com/gin-gonic/gin" 8 | "github.com/toolkits/pkg/ginx" 9 | 10 | "github.com/didi/nightingale/v5/src/models" 11 | "github.com/didi/nightingale/v5/src/pkg/ormx" 12 | ) 13 | 14 | func userGets(c *gin.Context) { 15 | limit := ginx.QueryInt(c, "limit", 20) 16 | query := ginx.QueryStr(c, "query", "") 17 | 18 | total, err := models.UserTotal(query) 19 | ginx.Dangerous(err) 20 | 21 | list, err := models.UserGets(query, limit, ginx.Offset(c, limit)) 22 | ginx.Dangerous(err) 23 | 24 | user := c.MustGet("user").(*models.User) 25 | 26 | ginx.NewRender(c).Data(gin.H{ 27 | "list": list, 28 | "total": total, 29 | "admin": user.IsAdmin(), 30 | }, nil) 31 | } 32 | 33 | type userAddForm struct { 34 | Username string `json:"username" binding:"required"` 35 | Password string `json:"password" binding:"required"` 36 | Nickname string `json:"nickname"` 37 | Phone string `json:"phone"` 38 | Email string `json:"email"` 39 | Portrait string `json:"portrait"` 40 | Roles []string `json:"roles" binding:"required"` 41 | Contacts ormx.JSONObj `json:"contacts"` 42 | } 43 | 44 | func userAddPost(c *gin.Context) { 45 | var f userAddForm 46 | ginx.BindJSON(c, &f) 47 | 48 | password, err := models.CryptoPass(f.Password) 49 | ginx.Dangerous(err) 50 | 51 | if len(f.Roles) == 0 { 52 | ginx.Bomb(http.StatusBadRequest, "roles empty") 53 | } 54 | 55 | user := c.MustGet("user").(*models.User) 56 | 57 | u := models.User{ 58 | Username: f.Username, 59 | Password: password, 60 | Nickname: f.Nickname, 61 | Phone: f.Phone, 62 | Email: f.Email, 63 | Portrait: f.Portrait, 64 | Roles: strings.Join(f.Roles, " "), 65 | Contacts: f.Contacts, 66 | CreateBy: user.Username, 67 | UpdateBy: user.Username, 68 | } 69 | 70 | ginx.NewRender(c).Message(u.Add()) 71 | } 72 | 73 | func userProfileGet(c *gin.Context) { 74 | user := User(ginx.UrlParamInt64(c, "id")) 75 | ginx.NewRender(c).Data(user, nil) 76 | } 77 | 78 | type userProfileForm struct { 79 | Nickname string `json:"nickname"` 80 | Phone string `json:"phone"` 81 | Email string `json:"email"` 82 | Roles []string `json:"roles"` 83 | Contacts ormx.JSONObj `json:"contacts"` 84 | } 85 | 86 | func userProfilePut(c *gin.Context) { 87 | var f userProfileForm 88 | ginx.BindJSON(c, &f) 89 | 90 | if len(f.Roles) == 0 { 91 | ginx.Bomb(http.StatusBadRequest, "roles empty") 92 | } 93 | 94 | target := User(ginx.UrlParamInt64(c, "id")) 95 | target.Nickname = f.Nickname 96 | target.Phone = f.Phone 97 | target.Email = f.Email 98 | target.Roles = strings.Join(f.Roles, " ") 99 | target.Contacts = f.Contacts 100 | target.UpdateBy = c.MustGet("username").(string) 101 | 102 | ginx.NewRender(c).Message(target.UpdateAllFields()) 103 | } 104 | 105 | type userPasswordForm struct { 106 | Password string `json:"password" binding:"required"` 107 | } 108 | 109 | func userPasswordPut(c *gin.Context) { 110 | var f userPasswordForm 111 | ginx.BindJSON(c, &f) 112 | 113 | target := User(ginx.UrlParamInt64(c, "id")) 114 | 115 | cryptoPass, err := models.CryptoPass(f.Password) 116 | ginx.Dangerous(err) 117 | 118 | ginx.NewRender(c).Message(target.UpdatePassword(cryptoPass, c.MustGet("username").(string))) 119 | } 120 | 121 | func userDel(c *gin.Context) { 122 | id := ginx.UrlParamInt64(c, "id") 123 | target, err := models.UserGetById(id) 124 | ginx.Dangerous(err) 125 | 126 | if target == nil { 127 | ginx.NewRender(c).Message(nil) 128 | return 129 | } 130 | 131 | ginx.NewRender(c).Message(target.Del()) 132 | } 133 | -------------------------------------------------------------------------------- /src/webapi/router/router_busi_group.go: -------------------------------------------------------------------------------- 1 | package router 2 | 3 | import ( 4 | "net/http" 5 | 6 | "github.com/gin-gonic/gin" 7 | "github.com/toolkits/pkg/ginx" 8 | "github.com/toolkits/pkg/logger" 9 | "github.com/toolkits/pkg/str" 10 | 11 | "github.com/didi/nightingale/v5/src/models" 12 | ) 13 | 14 | type busiGroupForm struct { 15 | Name string `json:"name" binding:"required"` 16 | LabelEnable int `json:"label_enable"` 17 | LabelValue string `json:"label_value"` 18 | Members []models.BusiGroupMember `json:"members"` 19 | } 20 | 21 | func busiGroupAdd(c *gin.Context) { 22 | var f busiGroupForm 23 | ginx.BindJSON(c, &f) 24 | 25 | if len(f.Members) == 0 { 26 | ginx.Bomb(http.StatusBadRequest, "members empty") 27 | } 28 | 29 | rwhas := false 30 | for i := 0; i < len(f.Members); i++ { 31 | if f.Members[i].PermFlag == "rw" { 32 | rwhas = true 33 | break 34 | } 35 | } 36 | 37 | if !rwhas { 38 | ginx.Bomb(http.StatusBadRequest, "At least one team have rw permission") 39 | } 40 | 41 | username := c.MustGet("username").(string) 42 | ginx.Dangerous(models.BusiGroupAdd(f.Name, f.LabelEnable, f.LabelValue, f.Members, username)) 43 | 44 | // 如果创建成功,拿着name去查,应该可以查到 45 | newbg, err := models.BusiGroupGet("name=?", f.Name) 46 | ginx.Dangerous(err) 47 | 48 | if newbg == nil { 49 | ginx.NewRender(c).Message("Failed to create BusiGroup(%s)", f.Name) 50 | return 51 | } 52 | 53 | ginx.NewRender(c).Data(newbg.Id, nil) 54 | } 55 | 56 | func busiGroupPut(c *gin.Context) { 57 | var f busiGroupForm 58 | ginx.BindJSON(c, &f) 59 | 60 | username := c.MustGet("username").(string) 61 | targetbg := c.MustGet("busi_group").(*models.BusiGroup) 62 | ginx.NewRender(c).Message(targetbg.Update(f.Name, f.LabelEnable, f.LabelValue, username)) 63 | } 64 | 65 | func busiGroupMemberAdd(c *gin.Context) { 66 | var members []models.BusiGroupMember 67 | ginx.BindJSON(c, &members) 68 | 69 | username := c.MustGet("username").(string) 70 | targetbg := c.MustGet("busi_group").(*models.BusiGroup) 71 | 72 | ginx.NewRender(c).Message(targetbg.AddMembers(members, username)) 73 | } 74 | 75 | func busiGroupMemberDel(c *gin.Context) { 76 | var members []models.BusiGroupMember 77 | ginx.BindJSON(c, &members) 78 | 79 | username := c.MustGet("username").(string) 80 | targetbg := c.MustGet("busi_group").(*models.BusiGroup) 81 | 82 | ginx.NewRender(c).Message(targetbg.DelMembers(members, username)) 83 | } 84 | 85 | func busiGroupDel(c *gin.Context) { 86 | username := c.MustGet("username").(string) 87 | targetbg := c.MustGet("busi_group").(*models.BusiGroup) 88 | 89 | err := targetbg.Del() 90 | if err != nil { 91 | logger.Infof("busi_group_delete fail: operator=%s, group_name=%s error=%v", username, targetbg.Name, err) 92 | } else { 93 | logger.Infof("busi_group_delete succ: operator=%s, group_name=%s", username, targetbg.Name) 94 | } 95 | 96 | ginx.NewRender(c).Message(err) 97 | } 98 | 99 | // 我是超管、或者我是业务组成员 100 | func busiGroupGets(c *gin.Context) { 101 | limit := ginx.QueryInt(c, "limit", defaultLimit) 102 | query := ginx.QueryStr(c, "query", "") 103 | all := ginx.QueryBool(c, "all", false) 104 | 105 | me := c.MustGet("user").(*models.User) 106 | lst, err := me.BusiGroups(limit, query, all) 107 | 108 | ginx.NewRender(c).Data(lst, err) 109 | } 110 | 111 | // 这个接口只有在活跃告警页面才调用,获取各个BG的活跃告警数量 112 | func busiGroupAlertingsGets(c *gin.Context) { 113 | ids := ginx.QueryStr(c, "ids", "") 114 | ret, err := models.AlertNumbers(str.IdsInt64(ids)) 115 | ginx.NewRender(c).Data(ret, err) 116 | } 117 | 118 | func busiGroupGet(c *gin.Context) { 119 | bg := BusiGroup(ginx.UrlParamInt64(c, "id")) 120 | ginx.Dangerous(bg.FillUserGroups()) 121 | ginx.NewRender(c).Data(bg, nil) 122 | } 123 | -------------------------------------------------------------------------------- /src/server/memsto/alert_mute_cache.go: -------------------------------------------------------------------------------- 1 | package memsto 2 | 3 | import ( 4 | "fmt" 5 | "sync" 6 | "time" 7 | 8 | "github.com/pkg/errors" 9 | "github.com/toolkits/pkg/logger" 10 | 11 | "github.com/didi/nightingale/v5/src/models" 12 | "github.com/didi/nightingale/v5/src/server/config" 13 | promstat "github.com/didi/nightingale/v5/src/server/stat" 14 | ) 15 | 16 | type AlertMuteCacheType struct { 17 | statTotal int64 18 | statLastUpdated int64 19 | 20 | sync.RWMutex 21 | mutes map[int64][]*models.AlertMute // key: busi_group_id 22 | } 23 | 24 | var AlertMuteCache = AlertMuteCacheType{ 25 | statTotal: -1, 26 | statLastUpdated: -1, 27 | mutes: make(map[int64][]*models.AlertMute), 28 | } 29 | 30 | func (amc *AlertMuteCacheType) StatChanged(total, lastUpdated int64) bool { 31 | if amc.statTotal == total && amc.statLastUpdated == lastUpdated { 32 | return false 33 | } 34 | 35 | return true 36 | } 37 | 38 | func (amc *AlertMuteCacheType) Set(ms map[int64][]*models.AlertMute, total, lastUpdated int64) { 39 | amc.Lock() 40 | amc.mutes = ms 41 | amc.Unlock() 42 | 43 | // only one goroutine used, so no need lock 44 | amc.statTotal = total 45 | amc.statLastUpdated = lastUpdated 46 | } 47 | 48 | func (amc *AlertMuteCacheType) Gets(bgid int64) ([]*models.AlertMute, bool) { 49 | amc.RLock() 50 | defer amc.RUnlock() 51 | lst, has := amc.mutes[bgid] 52 | return lst, has 53 | } 54 | 55 | func (amc *AlertMuteCacheType) GetAllStructs() map[int64][]models.AlertMute { 56 | amc.RLock() 57 | defer amc.RUnlock() 58 | 59 | ret := make(map[int64][]models.AlertMute) 60 | for bgid := range amc.mutes { 61 | lst := amc.mutes[bgid] 62 | for i := 0; i < len(lst); i++ { 63 | ret[bgid] = append(ret[bgid], *lst[i]) 64 | } 65 | } 66 | 67 | return ret 68 | } 69 | 70 | func SyncAlertMutes() { 71 | err := syncAlertMutes() 72 | if err != nil { 73 | fmt.Println("failed to sync alert mutes:", err) 74 | exit(1) 75 | } 76 | 77 | go loopSyncAlertMutes() 78 | } 79 | 80 | func loopSyncAlertMutes() { 81 | duration := time.Duration(9000) * time.Millisecond 82 | for { 83 | time.Sleep(duration) 84 | if err := syncAlertMutes(); err != nil { 85 | logger.Warning("failed to sync alert mutes:", err) 86 | } 87 | } 88 | } 89 | 90 | func syncAlertMutes() error { 91 | start := time.Now() 92 | 93 | stat, err := models.AlertMuteStatistics(config.C.ClusterName) 94 | if err != nil { 95 | return errors.WithMessage(err, "failed to exec AlertMuteStatistics") 96 | } 97 | 98 | if !AlertMuteCache.StatChanged(stat.Total, stat.LastUpdated) { 99 | promstat.GaugeCronDuration.WithLabelValues(config.C.ClusterName, "sync_alert_mutes").Set(0) 100 | promstat.GaugeSyncNumber.WithLabelValues(config.C.ClusterName, "sync_alert_mutes").Set(0) 101 | logger.Debug("alert mutes not changed") 102 | return nil 103 | } 104 | 105 | lst, err := models.AlertMuteGetsByCluster(config.C.ClusterName) 106 | if err != nil { 107 | return errors.WithMessage(err, "failed to exec AlertMuteGetsByCluster") 108 | } 109 | 110 | oks := make(map[int64][]*models.AlertMute) 111 | 112 | for i := 0; i < len(lst); i++ { 113 | err = lst[i].Parse() 114 | if err != nil { 115 | logger.Warningf("failed to parse alert_mute, id: %d", lst[i].Id) 116 | continue 117 | } 118 | 119 | oks[lst[i].GroupId] = append(oks[lst[i].GroupId], lst[i]) 120 | } 121 | 122 | AlertMuteCache.Set(oks, stat.Total, stat.LastUpdated) 123 | 124 | ms := time.Since(start).Milliseconds() 125 | promstat.GaugeCronDuration.WithLabelValues(config.C.ClusterName, "sync_alert_mutes").Set(float64(ms)) 126 | promstat.GaugeSyncNumber.WithLabelValues(config.C.ClusterName, "sync_alert_mutes").Set(float64(len(lst))) 127 | logger.Infof("timer: sync mutes done, cost: %dms, number: %d", ms, len(lst)) 128 | 129 | return nil 130 | } 131 | -------------------------------------------------------------------------------- /src/models/alert_mute.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "regexp" 7 | "strings" 8 | "time" 9 | 10 | "github.com/didi/nightingale/v5/src/pkg/ormx" 11 | "github.com/pkg/errors" 12 | ) 13 | 14 | type TagFilter struct { 15 | Key string `json:"key"` // tag key 16 | Func string `json:"func"` // == | =~ | in 17 | Value string `json:"value"` // tag value 18 | Regexp *regexp.Regexp // parse value to regexp if func = '=~' 19 | Vset map[string]struct{} // parse value to regexp if func = 'in' 20 | } 21 | 22 | type AlertMute struct { 23 | Id int64 `json:"id" gorm:"primaryKey"` 24 | GroupId int64 `json:"group_id"` 25 | Cluster string `json:"cluster"` 26 | Tags ormx.JSONArr `json:"tags"` 27 | Cause string `json:"cause"` 28 | Btime int64 `json:"btime"` 29 | Etime int64 `json:"etime"` 30 | CreateBy string `json:"create_by"` 31 | CreateAt int64 `json:"create_at"` 32 | ITags []TagFilter `json:"-" gorm:"-"` // inner tags 33 | } 34 | 35 | func (m *AlertMute) TableName() string { 36 | return "alert_mute" 37 | } 38 | 39 | func AlertMuteGets(groupId int64) (lst []AlertMute, err error) { 40 | err = DB().Where("group_id=?", groupId).Order("id desc").Find(&lst).Error 41 | return 42 | } 43 | 44 | func (m *AlertMute) Verify() error { 45 | if m.GroupId <= 0 { 46 | return errors.New("group_id invalid") 47 | } 48 | 49 | if m.Cluster == "" { 50 | return errors.New("cluster invalid") 51 | } 52 | 53 | if m.Etime <= m.Btime { 54 | return fmt.Errorf("Oops... etime(%d) <= btime(%d)", m.Etime, m.Btime) 55 | } 56 | 57 | if err := m.Parse(); err != nil { 58 | return err 59 | } 60 | 61 | if len(m.ITags) == 0 { 62 | return errors.New("tags is blank") 63 | } 64 | 65 | return nil 66 | } 67 | 68 | func (m *AlertMute) Parse() error { 69 | err := json.Unmarshal(m.Tags, &m.ITags) 70 | if err != nil { 71 | return err 72 | } 73 | 74 | for i := 0; i < len(m.ITags); i++ { 75 | if m.ITags[i].Func == "=~" { 76 | m.ITags[i].Regexp, err = regexp.Compile(m.ITags[i].Value) 77 | if err != nil { 78 | return err 79 | } 80 | } else if m.ITags[i].Func == "in" { 81 | arr := strings.Fields(m.ITags[i].Value) 82 | m.ITags[i].Vset = make(map[string]struct{}) 83 | for j := 0; j < len(arr); j++ { 84 | m.ITags[i].Vset[arr[j]] = struct{}{} 85 | } 86 | } 87 | } 88 | 89 | return nil 90 | } 91 | 92 | func (m *AlertMute) Add() error { 93 | if err := m.Verify(); err != nil { 94 | return err 95 | } 96 | m.CreateAt = time.Now().Unix() 97 | return Insert(m) 98 | } 99 | 100 | func AlertMuteDel(ids []int64) error { 101 | if len(ids) == 0 { 102 | return nil 103 | } 104 | return DB().Where("id in ?", ids).Delete(new(AlertMute)).Error 105 | } 106 | 107 | func AlertMuteStatistics(cluster string) (*Statistics, error) { 108 | session := DB().Model(&AlertMute{}).Select("count(*) as total", "max(create_at) as last_updated") 109 | if cluster != "" { 110 | session = session.Where("cluster = ?", cluster) 111 | } 112 | 113 | var stats []*Statistics 114 | err := session.Find(&stats).Error 115 | if err != nil { 116 | return nil, err 117 | } 118 | 119 | return stats[0], nil 120 | } 121 | 122 | func AlertMuteGetsByCluster(cluster string) ([]*AlertMute, error) { 123 | // clean expired first 124 | buf := int64(30) 125 | err := DB().Where("etime < ?", time.Now().Unix()+buf).Delete(new(AlertMute)).Error 126 | if err != nil { 127 | return nil, err 128 | } 129 | 130 | // get my cluster's mutes 131 | session := DB().Model(&AlertMute{}) 132 | if cluster != "" { 133 | session = session.Where("cluster = ?", cluster) 134 | } 135 | 136 | var lst []*AlertMute 137 | err = session.Find(&lst).Error 138 | return lst, err 139 | } 140 | -------------------------------------------------------------------------------- /src/models/user_group.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | import ( 4 | "time" 5 | 6 | "github.com/pkg/errors" 7 | "github.com/toolkits/pkg/str" 8 | "gorm.io/gorm" 9 | ) 10 | 11 | type UserGroup struct { 12 | Id int64 `json:"id" gorm:"primaryKey"` 13 | Name string `json:"name"` 14 | Note string `json:"note"` 15 | CreateAt int64 `json:"create_at"` 16 | CreateBy string `json:"create_by"` 17 | UpdateAt int64 `json:"update_at"` 18 | UpdateBy string `json:"update_by"` 19 | UserIds []int64 `json:"-" gorm:"-"` 20 | } 21 | 22 | func (ug *UserGroup) TableName() string { 23 | return "user_group" 24 | } 25 | 26 | func (ug *UserGroup) Verify() error { 27 | if str.Dangerous(ug.Name) { 28 | return errors.New("Name has invalid characters") 29 | } 30 | 31 | if str.Dangerous(ug.Note) { 32 | return errors.New("Note has invalid characters") 33 | } 34 | 35 | return nil 36 | } 37 | 38 | func (ug *UserGroup) Update(selectField interface{}, selectFields ...interface{}) error { 39 | if err := ug.Verify(); err != nil { 40 | return err 41 | } 42 | 43 | return DB().Model(ug).Select(selectField, selectFields...).Updates(ug).Error 44 | } 45 | 46 | func UserGroupCount(where string, args ...interface{}) (num int64, err error) { 47 | return Count(DB().Model(&UserGroup{}).Where(where, args...)) 48 | } 49 | 50 | func (ug *UserGroup) Add() error { 51 | if err := ug.Verify(); err != nil { 52 | return err 53 | } 54 | 55 | num, err := UserGroupCount("name=?", ug.Name) 56 | if err != nil { 57 | return errors.WithMessage(err, "failed to count user-groups") 58 | } 59 | 60 | if num > 0 { 61 | return errors.New("UserGroup already exists") 62 | } 63 | 64 | now := time.Now().Unix() 65 | ug.CreateAt = now 66 | ug.UpdateAt = now 67 | return Insert(ug) 68 | } 69 | 70 | func (ug *UserGroup) Del() error { 71 | return DB().Transaction(func(tx *gorm.DB) error { 72 | if err := tx.Where("group_id=?", ug.Id).Delete(&UserGroupMember{}).Error; err != nil { 73 | return err 74 | } 75 | 76 | if err := tx.Where("id=?", ug.Id).Delete(&UserGroup{}).Error; err != nil { 77 | return err 78 | } 79 | 80 | return nil 81 | }) 82 | } 83 | 84 | func UserGroupGet(where string, args ...interface{}) (*UserGroup, error) { 85 | var lst []*UserGroup 86 | err := DB().Where(where, args...).Find(&lst).Error 87 | if err != nil { 88 | return nil, err 89 | } 90 | 91 | if len(lst) == 0 { 92 | return nil, nil 93 | } 94 | 95 | return lst[0], nil 96 | } 97 | 98 | func UserGroupGetById(id int64) (*UserGroup, error) { 99 | return UserGroupGet("id = ?", id) 100 | } 101 | 102 | func UserGroupGetByIds(ids []int64) ([]UserGroup, error) { 103 | var lst []UserGroup 104 | if len(ids) == 0 { 105 | return lst, nil 106 | } 107 | 108 | err := DB().Where("id in ?", ids).Order("name").Find(&lst).Error 109 | return lst, err 110 | } 111 | 112 | func UserGroupGetAll() ([]*UserGroup, error) { 113 | var lst []*UserGroup 114 | err := DB().Find(&lst).Error 115 | return lst, err 116 | } 117 | 118 | func (ug *UserGroup) AddMembers(userIds []int64) error { 119 | count := len(userIds) 120 | for i := 0; i < count; i++ { 121 | user, err := UserGetById(userIds[i]) 122 | if err != nil { 123 | return err 124 | } 125 | if user == nil { 126 | continue 127 | } 128 | err = UserGroupMemberAdd(ug.Id, user.Id) 129 | if err != nil { 130 | return err 131 | } 132 | } 133 | return nil 134 | } 135 | 136 | func (ug *UserGroup) DelMembers(userIds []int64) error { 137 | return UserGroupMemberDel(ug.Id, userIds) 138 | } 139 | 140 | func UserGroupStatistics() (*Statistics, error) { 141 | session := DB().Model(&UserGroup{}).Select("count(*) as total", "max(update_at) as last_updated") 142 | 143 | var stats []*Statistics 144 | err := session.Find(&stats).Error 145 | if err != nil { 146 | return nil, err 147 | } 148 | 149 | return stats[0], nil 150 | } 151 | -------------------------------------------------------------------------------- /src/server/memsto/target_cache.go: -------------------------------------------------------------------------------- 1 | package memsto 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | "sync" 7 | "time" 8 | 9 | "github.com/pkg/errors" 10 | "github.com/toolkits/pkg/logger" 11 | 12 | "github.com/didi/nightingale/v5/src/models" 13 | "github.com/didi/nightingale/v5/src/server/config" 14 | promstat "github.com/didi/nightingale/v5/src/server/stat" 15 | ) 16 | 17 | // 1. append note to alert_event 18 | // 2. append tags to series 19 | type TargetCacheType struct { 20 | statTotal int64 21 | statLastUpdated int64 22 | 23 | sync.RWMutex 24 | targets map[string]*models.Target // key: ident 25 | } 26 | 27 | // init TargetCache 28 | var TargetCache = TargetCacheType{ 29 | statTotal: -1, 30 | statLastUpdated: -1, 31 | targets: make(map[string]*models.Target), 32 | } 33 | 34 | func (tc *TargetCacheType) StatChanged(total, lastUpdated int64) bool { 35 | if tc.statTotal == total && tc.statLastUpdated == lastUpdated { 36 | return false 37 | } 38 | 39 | return true 40 | } 41 | 42 | func (tc *TargetCacheType) Set(m map[string]*models.Target, total, lastUpdated int64) { 43 | tc.Lock() 44 | tc.targets = m 45 | tc.Unlock() 46 | 47 | // only one goroutine used, so no need lock 48 | tc.statTotal = total 49 | tc.statLastUpdated = lastUpdated 50 | } 51 | 52 | func (tc *TargetCacheType) Get(ident string) (*models.Target, bool) { 53 | tc.RLock() 54 | defer tc.RUnlock() 55 | val, has := tc.targets[ident] 56 | return val, has 57 | } 58 | 59 | func (tc *TargetCacheType) GetDeads(actives map[string]struct{}) map[string]*models.Target { 60 | ret := make(map[string]*models.Target) 61 | 62 | tc.RLock() 63 | defer tc.RUnlock() 64 | 65 | for ident, target := range tc.targets { 66 | if _, has := actives[ident]; !has { 67 | ret[ident] = target 68 | } 69 | } 70 | 71 | return ret 72 | } 73 | 74 | func SyncTargets() { 75 | err := syncTargets() 76 | if err != nil { 77 | fmt.Println("failed to sync targets:", err) 78 | exit(1) 79 | } 80 | 81 | go loopSyncTargets() 82 | } 83 | 84 | func loopSyncTargets() { 85 | duration := time.Duration(9000) * time.Millisecond 86 | for { 87 | time.Sleep(duration) 88 | if err := syncTargets(); err != nil { 89 | logger.Warning("failed to sync targets:", err) 90 | } 91 | } 92 | } 93 | 94 | func syncTargets() error { 95 | start := time.Now() 96 | 97 | stat, err := models.TargetStatistics(config.C.ClusterName) 98 | if err != nil { 99 | return errors.WithMessage(err, "failed to exec TargetStatistics") 100 | } 101 | 102 | if !TargetCache.StatChanged(stat.Total, stat.LastUpdated) { 103 | promstat.GaugeCronDuration.WithLabelValues(config.C.ClusterName, "sync_targets").Set(0) 104 | promstat.GaugeSyncNumber.WithLabelValues(config.C.ClusterName, "sync_targets").Set(0) 105 | logger.Debug("targets not changed") 106 | return nil 107 | } 108 | 109 | lst, err := models.TargetGetsByCluster(config.C.ClusterName) 110 | if err != nil { 111 | return errors.WithMessage(err, "failed to exec TargetGetsByCluster") 112 | } 113 | 114 | m := make(map[string]*models.Target) 115 | for i := 0; i < len(lst); i++ { 116 | lst[i].TagsJSON = strings.Fields(lst[i].Tags) 117 | lst[i].TagsMap = make(map[string]string) 118 | for _, item := range lst[i].TagsJSON { 119 | arr := strings.Split(item, "=") 120 | if len(arr) != 2 { 121 | continue 122 | } 123 | lst[i].TagsMap[arr[0]] = arr[1] 124 | } 125 | 126 | m[lst[i].Ident] = lst[i] 127 | } 128 | 129 | TargetCache.Set(m, stat.Total, stat.LastUpdated) 130 | 131 | ms := time.Since(start).Milliseconds() 132 | promstat.GaugeCronDuration.WithLabelValues(config.C.ClusterName, "sync_targets").Set(float64(ms)) 133 | promstat.GaugeSyncNumber.WithLabelValues(config.C.ClusterName, "sync_targets").Set(float64(len(lst))) 134 | logger.Infof("timer: sync targets done, cost: %dms, number: %d", ms, len(lst)) 135 | 136 | return nil 137 | } 138 | -------------------------------------------------------------------------------- /src/server/memsto/alert_subsribe_cache.go: -------------------------------------------------------------------------------- 1 | package memsto 2 | 3 | import ( 4 | "fmt" 5 | "sync" 6 | "time" 7 | 8 | "github.com/pkg/errors" 9 | "github.com/toolkits/pkg/logger" 10 | 11 | "github.com/didi/nightingale/v5/src/models" 12 | "github.com/didi/nightingale/v5/src/server/config" 13 | promstat "github.com/didi/nightingale/v5/src/server/stat" 14 | ) 15 | 16 | type AlertSubscribeCacheType struct { 17 | statTotal int64 18 | statLastUpdated int64 19 | 20 | sync.RWMutex 21 | subs map[int64][]*models.AlertSubscribe 22 | } 23 | 24 | var AlertSubscribeCache = AlertSubscribeCacheType{ 25 | statTotal: -1, 26 | statLastUpdated: -1, 27 | subs: make(map[int64][]*models.AlertSubscribe), 28 | } 29 | 30 | func (c *AlertSubscribeCacheType) StatChanged(total, lastUpdated int64) bool { 31 | if c.statTotal == total && c.statLastUpdated == lastUpdated { 32 | return false 33 | } 34 | 35 | return true 36 | } 37 | 38 | func (c *AlertSubscribeCacheType) Set(m map[int64][]*models.AlertSubscribe, total, lastUpdated int64) { 39 | c.Lock() 40 | c.subs = m 41 | c.Unlock() 42 | 43 | // only one goroutine used, so no need lock 44 | c.statTotal = total 45 | c.statLastUpdated = lastUpdated 46 | } 47 | 48 | func (c *AlertSubscribeCacheType) Get(ruleId int64) ([]*models.AlertSubscribe, bool) { 49 | c.RLock() 50 | defer c.RUnlock() 51 | 52 | lst, has := c.subs[ruleId] 53 | return lst, has 54 | } 55 | 56 | func (c *AlertSubscribeCacheType) GetStructs(ruleId int64) []models.AlertSubscribe { 57 | c.RLock() 58 | defer c.RUnlock() 59 | 60 | lst, has := c.subs[ruleId] 61 | if !has { 62 | return []models.AlertSubscribe{} 63 | } 64 | 65 | ret := make([]models.AlertSubscribe, len(lst)) 66 | for i := 0; i < len(lst); i++ { 67 | ret[i] = *lst[i] 68 | } 69 | 70 | return ret 71 | } 72 | 73 | func SyncAlertSubscribes() { 74 | err := syncAlertSubscribes() 75 | if err != nil { 76 | fmt.Println("failed to sync alert subscribes:", err) 77 | exit(1) 78 | } 79 | 80 | go loopSyncAlertSubscribes() 81 | } 82 | 83 | func loopSyncAlertSubscribes() { 84 | duration := time.Duration(9000) * time.Millisecond 85 | for { 86 | time.Sleep(duration) 87 | if err := syncAlertSubscribes(); err != nil { 88 | logger.Warning("failed to sync alert subscribes:", err) 89 | } 90 | } 91 | } 92 | 93 | func syncAlertSubscribes() error { 94 | start := time.Now() 95 | 96 | stat, err := models.AlertSubscribeStatistics(config.C.ClusterName) 97 | if err != nil { 98 | return errors.WithMessage(err, "failed to exec AlertSubscribeStatistics") 99 | } 100 | 101 | if !AlertSubscribeCache.StatChanged(stat.Total, stat.LastUpdated) { 102 | promstat.GaugeCronDuration.WithLabelValues(config.C.ClusterName, "sync_alert_subscribes").Set(0) 103 | promstat.GaugeSyncNumber.WithLabelValues(config.C.ClusterName, "sync_alert_subscribes").Set(0) 104 | logger.Debug("alert subscribes not changed") 105 | return nil 106 | } 107 | 108 | lst, err := models.AlertSubscribeGetsByCluster(config.C.ClusterName) 109 | if err != nil { 110 | return errors.WithMessage(err, "failed to exec AlertSubscribeGetsByCluster") 111 | } 112 | 113 | subs := make(map[int64][]*models.AlertSubscribe) 114 | 115 | for i := 0; i < len(lst); i++ { 116 | err = lst[i].Parse() 117 | if err != nil { 118 | logger.Warningf("failed to parse alert subscribe, id: %d", lst[i].Id) 119 | continue 120 | } 121 | 122 | subs[lst[i].RuleId] = append(subs[lst[i].RuleId], lst[i]) 123 | } 124 | 125 | AlertSubscribeCache.Set(subs, stat.Total, stat.LastUpdated) 126 | 127 | ms := time.Since(start).Milliseconds() 128 | promstat.GaugeCronDuration.WithLabelValues(config.C.ClusterName, "sync_alert_subscribes").Set(float64(ms)) 129 | promstat.GaugeSyncNumber.WithLabelValues(config.C.ClusterName, "sync_alert_subscribes").Set(float64(len(lst))) 130 | logger.Infof("timer: sync subscribes done, cost: %dms, number: %d", ms, len(lst)) 131 | 132 | return nil 133 | } 134 | -------------------------------------------------------------------------------- /src/pkg/ibex/ibex.go: -------------------------------------------------------------------------------- 1 | package ibex 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | "fmt" 7 | "io/ioutil" 8 | "net/http" 9 | "net/url" 10 | "strings" 11 | "time" 12 | ) 13 | 14 | type Ibex struct { 15 | address string 16 | authUser string 17 | authPass string 18 | timeout time.Duration 19 | method string 20 | urlPath string 21 | inValue interface{} 22 | outPtr interface{} 23 | headers map[string]string 24 | queries map[string][]string 25 | } 26 | 27 | func New(addr, user, pass string, timeout int64) *Ibex { 28 | if !strings.HasPrefix(addr, "http") { 29 | addr = "http://" + addr 30 | } 31 | 32 | return &Ibex{ 33 | address: addr, 34 | authUser: user, 35 | authPass: pass, 36 | timeout: time.Duration(timeout) * time.Millisecond, 37 | headers: make(map[string]string), 38 | queries: make(map[string][]string), 39 | } 40 | } 41 | 42 | func (i *Ibex) In(v interface{}) *Ibex { 43 | i.inValue = v 44 | return i 45 | } 46 | 47 | func (i *Ibex) Out(ptr interface{}) *Ibex { 48 | i.outPtr = ptr 49 | return i 50 | } 51 | 52 | func (i *Ibex) Path(p string) *Ibex { 53 | i.urlPath = p 54 | return i 55 | } 56 | 57 | func (i *Ibex) Method(m string) *Ibex { 58 | i.method = strings.ToUpper(m) 59 | return i 60 | } 61 | 62 | func (i *Ibex) Header(key, value string) *Ibex { 63 | i.headers[key] = value 64 | return i 65 | } 66 | 67 | func (i *Ibex) QueryString(key, value string) *Ibex { 68 | if param, ok := i.queries[key]; ok { 69 | i.queries[key] = append(param, value) 70 | } else { 71 | i.queries[key] = []string{value} 72 | } 73 | return i 74 | } 75 | 76 | func (i *Ibex) buildUrl() { 77 | var queries string 78 | if len(i.queries) > 0 { 79 | var buf bytes.Buffer 80 | for k, v := range i.queries { 81 | for _, vv := range v { 82 | buf.WriteString(url.QueryEscape(k)) 83 | buf.WriteByte('=') 84 | buf.WriteString(url.QueryEscape(vv)) 85 | buf.WriteByte('&') 86 | } 87 | } 88 | queries = buf.String() 89 | queries = queries[0 : len(queries)-1] 90 | } 91 | 92 | if len(queries) > 0 { 93 | if strings.Contains(i.urlPath, "?") { 94 | i.urlPath += "&" + queries 95 | } else { 96 | i.urlPath = i.urlPath + "?" + queries 97 | } 98 | } 99 | } 100 | 101 | func (i *Ibex) do() error { 102 | i.buildUrl() 103 | 104 | var req *http.Request 105 | var err error 106 | 107 | if i.inValue != nil { 108 | bs, err := json.Marshal(i.inValue) 109 | if err != nil { 110 | return err 111 | } 112 | req, err = http.NewRequest(i.method, i.address+i.urlPath, bytes.NewBuffer(bs)) 113 | } else { 114 | req, err = http.NewRequest(i.method, i.address+i.urlPath, nil) 115 | } 116 | 117 | if err != nil { 118 | return err 119 | } 120 | 121 | for key, value := range i.headers { 122 | req.Header.Set(key, value) 123 | } 124 | 125 | if i.authUser != "" { 126 | req.SetBasicAuth(i.authUser, i.authPass) 127 | } 128 | 129 | if i.method != http.MethodGet { 130 | req.Header.Set("Content-Type", "application/json") 131 | } 132 | 133 | client := http.Client{ 134 | Timeout: i.timeout, 135 | } 136 | 137 | res, err := client.Do(req) 138 | if err != nil { 139 | return err 140 | } 141 | 142 | if res.StatusCode != 200 { 143 | return fmt.Errorf("url(%s) response code: %v", i.urlPath, res.StatusCode) 144 | } 145 | 146 | if res.Body != nil { 147 | defer res.Body.Close() 148 | } 149 | 150 | payload, err := ioutil.ReadAll(res.Body) 151 | if err != nil { 152 | return err 153 | } 154 | 155 | return json.Unmarshal(payload, i.outPtr) 156 | } 157 | 158 | func (i *Ibex) GET() error { 159 | i.Method(http.MethodGet) 160 | return i.do() 161 | } 162 | 163 | func (i *Ibex) POST() error { 164 | i.Method(http.MethodPost) 165 | return i.do() 166 | } 167 | 168 | func (i *Ibex) PUT() error { 169 | i.Method(http.MethodPut) 170 | return i.do() 171 | } 172 | 173 | func (i *Ibex) DELETE() error { 174 | i.Method(http.MethodDelete) 175 | return i.do() 176 | } 177 | 178 | func (i *Ibex) PATCH() error { 179 | i.Method(http.MethodPatch) 180 | return i.do() 181 | } 182 | -------------------------------------------------------------------------------- /src/server/memsto/user_group_cache.go: -------------------------------------------------------------------------------- 1 | package memsto 2 | 3 | import ( 4 | "fmt" 5 | "sync" 6 | "time" 7 | 8 | "github.com/pkg/errors" 9 | "github.com/toolkits/pkg/logger" 10 | 11 | "github.com/didi/nightingale/v5/src/models" 12 | "github.com/didi/nightingale/v5/src/server/config" 13 | promstat "github.com/didi/nightingale/v5/src/server/stat" 14 | ) 15 | 16 | type UserGroupCacheType struct { 17 | statTotal int64 18 | statLastUpdated int64 19 | 20 | sync.RWMutex 21 | ugs map[int64]*models.UserGroup // key: id 22 | } 23 | 24 | var UserGroupCache = UserGroupCacheType{ 25 | statTotal: -1, 26 | statLastUpdated: -1, 27 | ugs: make(map[int64]*models.UserGroup), 28 | } 29 | 30 | func (ugc *UserGroupCacheType) StatChanged(total, lastUpdated int64) bool { 31 | if ugc.statTotal == total && ugc.statLastUpdated == lastUpdated { 32 | return false 33 | } 34 | 35 | return true 36 | } 37 | 38 | func (ugc *UserGroupCacheType) Set(ugs map[int64]*models.UserGroup, total, lastUpdated int64) { 39 | ugc.Lock() 40 | ugc.ugs = ugs 41 | ugc.Unlock() 42 | 43 | // only one goroutine used, so no need lock 44 | ugc.statTotal = total 45 | ugc.statLastUpdated = lastUpdated 46 | } 47 | 48 | func (ugc *UserGroupCacheType) GetByUserGroupId(id int64) *models.UserGroup { 49 | ugc.RLock() 50 | defer ugc.RUnlock() 51 | return ugc.ugs[id] 52 | } 53 | 54 | func (ugc *UserGroupCacheType) GetByUserGroupIds(ids []int64) []*models.UserGroup { 55 | set := make(map[int64]struct{}) 56 | 57 | ugc.RLock() 58 | defer ugc.RUnlock() 59 | 60 | var ugs []*models.UserGroup 61 | for _, id := range ids { 62 | if ugc.ugs[id] == nil { 63 | continue 64 | } 65 | 66 | if _, has := set[id]; has { 67 | continue 68 | } 69 | 70 | ugs = append(ugs, ugc.ugs[id]) 71 | set[id] = struct{}{} 72 | } 73 | 74 | if ugs == nil { 75 | return []*models.UserGroup{} 76 | } 77 | 78 | return ugs 79 | } 80 | 81 | func SyncUserGroups() { 82 | err := syncUserGroups() 83 | if err != nil { 84 | fmt.Println("failed to sync user groups:", err) 85 | exit(1) 86 | } 87 | 88 | go loopSyncUserGroups() 89 | } 90 | 91 | func loopSyncUserGroups() { 92 | duration := time.Duration(9000) * time.Millisecond 93 | for { 94 | time.Sleep(duration) 95 | if err := syncUserGroups(); err != nil { 96 | logger.Warning("failed to sync user groups:", err) 97 | } 98 | } 99 | } 100 | 101 | func syncUserGroups() error { 102 | start := time.Now() 103 | 104 | stat, err := models.UserGroupStatistics() 105 | if err != nil { 106 | return errors.WithMessage(err, "failed to exec UserGroupStatistics") 107 | } 108 | 109 | if !UserGroupCache.StatChanged(stat.Total, stat.LastUpdated) { 110 | promstat.GaugeCronDuration.WithLabelValues(config.C.ClusterName, "sync_user_groups").Set(0) 111 | promstat.GaugeSyncNumber.WithLabelValues(config.C.ClusterName, "sync_user_groups").Set(0) 112 | logger.Debug("user_group not changed") 113 | return nil 114 | } 115 | 116 | lst, err := models.UserGroupGetAll() 117 | if err != nil { 118 | return errors.WithMessage(err, "failed to exec UserGroupGetAll") 119 | } 120 | 121 | m := make(map[int64]*models.UserGroup) 122 | for i := 0; i < len(lst); i++ { 123 | m[lst[i].Id] = lst[i] 124 | } 125 | 126 | // fill user ids 127 | members, err := models.UserGroupMemberGetAll() 128 | if err != nil { 129 | return errors.WithMessage(err, "failed to exec UserGroupMemberGetAll") 130 | } 131 | 132 | for i := 0; i < len(members); i++ { 133 | ug, has := m[members[i].GroupId] 134 | if !has { 135 | continue 136 | } 137 | 138 | if ug == nil { 139 | continue 140 | } 141 | 142 | ug.UserIds = append(ug.UserIds, members[i].UserId) 143 | } 144 | 145 | UserGroupCache.Set(m, stat.Total, stat.LastUpdated) 146 | 147 | ms := time.Since(start).Milliseconds() 148 | promstat.GaugeCronDuration.WithLabelValues(config.C.ClusterName, "sync_user_groups").Set(float64(ms)) 149 | promstat.GaugeSyncNumber.WithLabelValues(config.C.ClusterName, "sync_user_groups").Set(float64(len(m))) 150 | logger.Infof("timer: sync user groups done, cost: %dms, number: %d", ms, len(m)) 151 | 152 | return nil 153 | } 154 | -------------------------------------------------------------------------------- /src/webapi/router/router_login.go: -------------------------------------------------------------------------------- 1 | package router 2 | 3 | import ( 4 | "fmt" 5 | "net/http" 6 | "strings" 7 | 8 | "github.com/dgrijalva/jwt-go" 9 | "github.com/gin-gonic/gin" 10 | "github.com/toolkits/pkg/ginx" 11 | 12 | "github.com/didi/nightingale/v5/src/models" 13 | "github.com/didi/nightingale/v5/src/webapi/config" 14 | ) 15 | 16 | type loginForm struct { 17 | Username string `json:"username" binding:"required"` 18 | Password string `json:"password" binding:"required"` 19 | } 20 | 21 | func loginPost(c *gin.Context) { 22 | var f loginForm 23 | ginx.BindJSON(c, &f) 24 | 25 | user, err := models.PassLogin(f.Username, f.Password) 26 | if err != nil { 27 | // pass validate fail, try ldap 28 | if config.C.LDAP.Enable { 29 | user, err = models.LdapLogin(f.Username, f.Password) 30 | if err != nil { 31 | ginx.NewRender(c).Message(err) 32 | return 33 | } 34 | } else { 35 | ginx.NewRender(c).Message(err) 36 | return 37 | } 38 | } 39 | 40 | if user == nil { 41 | // Theoretically impossible 42 | ginx.NewRender(c).Message("Username or password invalid") 43 | return 44 | } 45 | 46 | userIdentity := fmt.Sprintf("%d-%s", user.Id, user.Username) 47 | 48 | ts, err := createTokens(config.C.JWTAuth.SigningKey, userIdentity) 49 | ginx.Dangerous(err) 50 | ginx.Dangerous(createAuth(c.Request.Context(), userIdentity, ts)) 51 | 52 | ginx.NewRender(c).Data(gin.H{ 53 | "user": user, 54 | "access_token": ts.AccessToken, 55 | "refresh_token": ts.RefreshToken, 56 | }, nil) 57 | } 58 | 59 | func logoutPost(c *gin.Context) { 60 | metadata, err := extractTokenMetadata(c.Request) 61 | if err != nil { 62 | ginx.NewRender(c, http.StatusBadRequest).Message("failed to parse jwt token") 63 | return 64 | } 65 | 66 | delErr := deleteTokens(c.Request.Context(), metadata) 67 | if delErr != nil { 68 | ginx.NewRender(c).Message(InternalServerError) 69 | return 70 | } 71 | 72 | ginx.NewRender(c).Message("") 73 | } 74 | 75 | type refreshForm struct { 76 | RefreshToken string `json:"refresh_token" binding:"required"` 77 | } 78 | 79 | func refreshPost(c *gin.Context) { 80 | var f refreshForm 81 | ginx.BindJSON(c, &f) 82 | 83 | // verify the token 84 | token, err := jwt.Parse(f.RefreshToken, func(token *jwt.Token) (interface{}, error) { 85 | if _, ok := token.Method.(*jwt.SigningMethodHMAC); !ok { 86 | return nil, fmt.Errorf("unexpected jwt signing method: %v", token.Header["alg"]) 87 | } 88 | return []byte(config.C.JWTAuth.SigningKey), nil 89 | }) 90 | 91 | // if there is an error, the token must have expired 92 | if err != nil { 93 | // redirect to login page 94 | ginx.NewRender(c, http.StatusUnauthorized).Message("refresh token expired") 95 | return 96 | } 97 | 98 | // Since token is valid, get the uuid: 99 | claims, ok := token.Claims.(jwt.MapClaims) //the token claims should conform to MapClaims 100 | if ok && token.Valid { 101 | refreshUuid, ok := claims["refresh_uuid"].(string) //convert the interface to string 102 | if !ok { 103 | // Theoretically impossible 104 | ginx.NewRender(c, http.StatusUnauthorized).Message("failed to parse refresh_uuid from jwt") 105 | return 106 | } 107 | 108 | userIdentity, ok := claims["user_identity"].(string) 109 | if !ok { 110 | // Theoretically impossible 111 | ginx.NewRender(c, http.StatusUnauthorized).Message("failed to parse user_identity from jwt") 112 | return 113 | } 114 | 115 | // Delete the previous Refresh Token 116 | err = deleteAuth(c.Request.Context(), refreshUuid) 117 | if err != nil { 118 | ginx.NewRender(c, http.StatusUnauthorized).Message(InternalServerError) 119 | return 120 | } 121 | 122 | // Delete previous Access Token 123 | deleteAuth(c.Request.Context(), strings.Split(refreshUuid, "++")[0]) 124 | 125 | // Create new pairs of refresh and access tokens 126 | ts, err := createTokens(config.C.JWTAuth.SigningKey, userIdentity) 127 | ginx.Dangerous(err) 128 | ginx.Dangerous(createAuth(c.Request.Context(), userIdentity, ts)) 129 | 130 | ginx.NewRender(c).Data(gin.H{ 131 | "access_token": ts.AccessToken, 132 | "refresh_token": ts.RefreshToken, 133 | }, nil) 134 | } else { 135 | // redirect to login page 136 | ginx.NewRender(c, http.StatusUnauthorized).Message("refresh token expired") 137 | } 138 | } 139 | -------------------------------------------------------------------------------- /src/server/server.go: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "os" 7 | "os/signal" 8 | "path/filepath" 9 | "syscall" 10 | 11 | "github.com/toolkits/pkg/i18n" 12 | 13 | "github.com/didi/nightingale/v5/src/pkg/httpx" 14 | "github.com/didi/nightingale/v5/src/pkg/logx" 15 | "github.com/didi/nightingale/v5/src/server/config" 16 | "github.com/didi/nightingale/v5/src/server/engine" 17 | "github.com/didi/nightingale/v5/src/server/idents" 18 | "github.com/didi/nightingale/v5/src/server/memsto" 19 | "github.com/didi/nightingale/v5/src/server/naming" 20 | "github.com/didi/nightingale/v5/src/server/reader" 21 | "github.com/didi/nightingale/v5/src/server/router" 22 | "github.com/didi/nightingale/v5/src/server/stat" 23 | "github.com/didi/nightingale/v5/src/server/writer" 24 | "github.com/didi/nightingale/v5/src/storage" 25 | ) 26 | 27 | type Server struct { 28 | ConfigFile string 29 | Version string 30 | } 31 | 32 | type ServerOption func(*Server) 33 | 34 | func SetConfigFile(f string) ServerOption { 35 | return func(s *Server) { 36 | s.ConfigFile = f 37 | } 38 | } 39 | 40 | func SetVersion(v string) ServerOption { 41 | return func(s *Server) { 42 | s.Version = v 43 | } 44 | } 45 | 46 | // Run run server 47 | func Run(opts ...ServerOption) { 48 | code := 1 49 | sc := make(chan os.Signal, 1) 50 | signal.Notify(sc, syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT) 51 | 52 | server := Server{ 53 | ConfigFile: filepath.Join("etc", "server.conf"), 54 | Version: "not specified", 55 | } 56 | 57 | for _, opt := range opts { 58 | opt(&server) 59 | } 60 | 61 | cleanFunc, err := server.initialize() 62 | if err != nil { 63 | fmt.Println("server init fail:", err) 64 | os.Exit(code) 65 | } 66 | 67 | EXIT: 68 | for { 69 | sig := <-sc 70 | fmt.Println("received signal:", sig.String()) 71 | switch sig { 72 | case syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGINT: 73 | code = 0 74 | break EXIT 75 | case syscall.SIGHUP: 76 | // reload configuration? 77 | default: 78 | break EXIT 79 | } 80 | } 81 | 82 | cleanFunc() 83 | fmt.Println("server exited") 84 | os.Exit(code) 85 | } 86 | 87 | func (s Server) initialize() (func(), error) { 88 | fns := Functions{} 89 | ctx, cancel := context.WithCancel(context.Background()) 90 | fns.Add(cancel) 91 | 92 | // parse config file 93 | config.MustLoad(s.ConfigFile) 94 | 95 | // init i18n 96 | i18n.Init() 97 | 98 | // init logger 99 | loggerClean, err := logx.Init(config.C.Log) 100 | if err != nil { 101 | return fns.Ret(), err 102 | } else { 103 | fns.Add(loggerClean) 104 | } 105 | 106 | // init database 107 | if err = storage.InitDB(storage.DBConfig{ 108 | Gorm: config.C.Gorm, 109 | MySQL: config.C.MySQL, 110 | Postgres: config.C.Postgres, 111 | }); err != nil { 112 | return fns.Ret(), err 113 | } 114 | 115 | // init redis 116 | redisClean, err := storage.InitRedis(config.C.Redis) 117 | if err != nil { 118 | return fns.Ret(), err 119 | } else { 120 | fns.Add(redisClean) 121 | } 122 | 123 | // init prometheus remote writers 124 | if err = writer.Init(config.C.Writers, config.C.WriterOpt); err != nil { 125 | return fns.Ret(), err 126 | } 127 | 128 | // init prometheus remote reader 129 | if err = reader.Init(config.C.Reader); err != nil { 130 | return fns.Ret(), err 131 | } 132 | 133 | // sync rules/users/mutes/targets to memory cache 134 | memsto.Sync() 135 | 136 | // start heartbeat 137 | if err = naming.Heartbeat(ctx); err != nil { 138 | return fns.Ret(), err 139 | } 140 | 141 | // start judge engine 142 | if err = engine.Start(ctx); err != nil { 143 | return fns.Ret(), err 144 | } 145 | 146 | stat.Init() 147 | 148 | // init http server 149 | r := router.New(s.Version) 150 | httpClean := httpx.Init(config.C.HTTP, r) 151 | fns.Add(httpClean) 152 | 153 | // register ident and nodata logic 154 | idents.Handle(ctx) 155 | 156 | // release all the resources 157 | return fns.Ret(), nil 158 | } 159 | 160 | type Functions struct { 161 | List []func() 162 | } 163 | 164 | func (fs *Functions) Add(f func()) { 165 | fs.List = append(fs.List, f) 166 | } 167 | 168 | func (fs *Functions) Ret() func() { 169 | return func() { 170 | for i := 0; i < len(fs.List); i++ { 171 | fs.List[i]() 172 | } 173 | } 174 | } 175 | -------------------------------------------------------------------------------- /docker/docker-compose.yaml: -------------------------------------------------------------------------------- 1 | version: "3.7" 2 | 3 | networks: 4 | nightingale: 5 | driver: bridge 6 | 7 | services: 8 | mysql: 9 | image: "mysql:5.7" 10 | container_name: mysql 11 | hostname: mysql 12 | restart: always 13 | ports: 14 | - "3306:3306" 15 | environment: 16 | TZ: Asia/Shanghai 17 | MYSQL_ROOT_PASSWORD: 1234 18 | volumes: 19 | - ./mysqldata:/var/lib/mysql/ 20 | - ./initsql:/docker-entrypoint-initdb.d/ 21 | - ./mysqletc/my.cnf:/etc/my.cnf 22 | networks: 23 | - nightingale 24 | 25 | redis: 26 | image: "redis:6.2" 27 | container_name: redis 28 | hostname: redis 29 | restart: always 30 | ports: 31 | - "6379:6379" 32 | environment: 33 | TZ: Asia/Shanghai 34 | networks: 35 | - nightingale 36 | 37 | prometheus: 38 | image: prom/prometheus 39 | container_name: prometheus 40 | hostname: prometheus 41 | restart: always 42 | environment: 43 | TZ: Asia/Shanghai 44 | volumes: 45 | - ./prometc:/etc/prometheus 46 | ports: 47 | - "9090:9090" 48 | networks: 49 | - nightingale 50 | command: 51 | - "--config.file=/etc/prometheus/prometheus.yml" 52 | - "--storage.tsdb.path=/prometheus" 53 | - "--web.console.libraries=/usr/share/prometheus/console_libraries" 54 | - "--web.console.templates=/usr/share/prometheus/consoles" 55 | - "--enable-feature=remote-write-receiver" 56 | - "--query.lookback-delta=2m" 57 | 58 | ibex: 59 | image: ulric2019/ibex:0.2 60 | container_name: ibex 61 | hostname: ibex 62 | restart: always 63 | environment: 64 | GIN_MODE: release 65 | TZ: Asia/Shanghai 66 | ports: 67 | - "10090:10090" 68 | - "20090:20090" 69 | volumes: 70 | - ./ibexetc:/app/etc 71 | networks: 72 | - nightingale 73 | depends_on: 74 | - mysql 75 | links: 76 | - mysql:mysql 77 | command: 78 | - "/app/ibex" 79 | - "server" 80 | 81 | nwebapi: 82 | image: ulric2019/nightingale:5.4.1 83 | container_name: nwebapi 84 | hostname: nwebapi 85 | restart: always 86 | environment: 87 | GIN_MODE: release 88 | TZ: Asia/Shanghai 89 | volumes: 90 | - ./n9eetc:/app/etc 91 | ports: 92 | - "18000:18000" 93 | networks: 94 | - nightingale 95 | depends_on: 96 | - mysql 97 | - redis 98 | - prometheus 99 | - ibex 100 | links: 101 | - mysql:mysql 102 | - redis:redis 103 | - prometheus:prometheus 104 | - ibex:ibex 105 | command: 106 | - "/app/n9e" 107 | - "webapi" 108 | 109 | nserver: 110 | image: ulric2019/nightingale:5.4.1 111 | container_name: nserver 112 | hostname: nserver 113 | restart: always 114 | environment: 115 | GIN_MODE: release 116 | TZ: Asia/Shanghai 117 | volumes: 118 | - ./n9eetc:/app/etc 119 | ports: 120 | - "19000:19000" 121 | networks: 122 | - nightingale 123 | depends_on: 124 | - mysql 125 | - redis 126 | - prometheus 127 | - ibex 128 | links: 129 | - mysql:mysql 130 | - redis:redis 131 | - prometheus:prometheus 132 | - ibex:ibex 133 | command: 134 | - "/app/n9e" 135 | - "server" 136 | 137 | telegraf: 138 | image: "telegraf:1.20.3" 139 | container_name: "telegraf" 140 | hostname: "telegraf01" 141 | restart: always 142 | environment: 143 | TZ: Asia/Shanghai 144 | volumes: 145 | - ./telegrafetc/telegraf.conf:/etc/telegraf/telegraf.conf 146 | ports: 147 | - "8125:8125/udp" 148 | - "8092:8092/udp" 149 | - "8094:8094/tcp" 150 | networks: 151 | - nightingale 152 | depends_on: 153 | - nserver 154 | links: 155 | - nserver:nserver 156 | 157 | agentd: 158 | image: ulric2019/ibex:0.2 159 | container_name: agentd 160 | hostname: agentd 161 | restart: always 162 | environment: 163 | GIN_MODE: release 164 | TZ: Asia/Shanghai 165 | volumes: 166 | - ./ibexetc:/app/etc 167 | networks: 168 | - nightingale 169 | depends_on: 170 | - ibex 171 | links: 172 | - ibex:ibex 173 | command: 174 | - "/app/ibex" 175 | - "agentd" 176 | -------------------------------------------------------------------------------- /src/server/engine/consume.go: -------------------------------------------------------------------------------- 1 | package engine 2 | 3 | import ( 4 | "context" 5 | "strconv" 6 | "time" 7 | 8 | "github.com/toolkits/pkg/concurrent/semaphore" 9 | "github.com/toolkits/pkg/logger" 10 | 11 | "github.com/didi/nightingale/v5/src/models" 12 | "github.com/didi/nightingale/v5/src/server/config" 13 | "github.com/didi/nightingale/v5/src/server/memsto" 14 | ) 15 | 16 | func loopConsume(ctx context.Context) { 17 | sema := semaphore.NewSemaphore(config.C.Alerting.NotifyConcurrency) 18 | duration := time.Duration(100) * time.Millisecond 19 | for { 20 | events := EventQueue.PopBackBy(100) 21 | if len(events) == 0 { 22 | time.Sleep(duration) 23 | continue 24 | } 25 | consume(events, sema) 26 | } 27 | } 28 | 29 | func consume(events []interface{}, sema *semaphore.Semaphore) { 30 | for i := range events { 31 | if events[i] == nil { 32 | continue 33 | } 34 | 35 | event := events[i].(*models.AlertCurEvent) 36 | sema.Acquire() 37 | go func(event *models.AlertCurEvent) { 38 | defer sema.Release() 39 | consumeOne(event) 40 | }(event) 41 | } 42 | } 43 | 44 | func consumeOne(event *models.AlertCurEvent) { 45 | logEvent(event, "consume") 46 | persist(event) 47 | if event.NotifyRecovered == 1 { 48 | fillUsers(event) 49 | callback(event) 50 | notify(event) 51 | } 52 | } 53 | 54 | func persist(event *models.AlertCurEvent) { 55 | has, err := models.AlertCurEventExists("hash=?", event.Hash) 56 | if err != nil { 57 | logger.Errorf("event_persist_check_exists_fail: %v rule_id=%d hash=%s", err, event.RuleId, event.Hash) 58 | return 59 | } 60 | 61 | his := event.ToHis() 62 | 63 | // 不管是告警还是恢复,全量告警里都要记录 64 | if err := his.Add(); err != nil { 65 | logger.Errorf( 66 | "event_persist_his_fail: %v rule_id=%d hash=%s tags=%v timestamp=%d value=%s", 67 | err, 68 | event.RuleId, 69 | event.Hash, 70 | event.TagsJSON, 71 | event.TriggerTime, 72 | event.TriggerValue, 73 | ) 74 | } 75 | 76 | if has { 77 | // 活跃告警表中有记录,删之 78 | err = models.AlertCurEventDelByHash(event.Hash) 79 | if err != nil { 80 | logger.Errorf("event_del_cur_fail: %v hash=%s", err, event.Hash) 81 | return 82 | } 83 | 84 | if !event.IsRecovered { 85 | // 恢复事件,从活跃告警列表彻底删掉,告警事件,要重新加进来新的event 86 | // use his id as cur id 87 | event.Id = his.Id 88 | if event.Id > 0 { 89 | if err := event.Add(); err != nil { 90 | logger.Errorf( 91 | "event_persist_cur_fail: %v rule_id=%d hash=%s tags=%v timestamp=%d value=%s", 92 | err, 93 | event.RuleId, 94 | event.Hash, 95 | event.TagsJSON, 96 | event.TriggerTime, 97 | event.TriggerValue, 98 | ) 99 | } 100 | } 101 | } 102 | 103 | return 104 | } 105 | 106 | if event.IsRecovered { 107 | // alert_cur_event表里没有数据,表示之前没告警,结果现在报了恢复,神奇....理论上不应该出现的 108 | return 109 | } 110 | 111 | // use his id as cur id 112 | event.Id = his.Id 113 | if event.Id > 0 { 114 | if err := event.Add(); err != nil { 115 | logger.Errorf( 116 | "event_persist_cur_fail: %v rule_id=%d hash=%s tags=%v timestamp=%d value=%s", 117 | err, 118 | event.RuleId, 119 | event.Hash, 120 | event.TagsJSON, 121 | event.TriggerTime, 122 | event.TriggerValue, 123 | ) 124 | } 125 | } 126 | } 127 | 128 | // for alerting 129 | func fillUsers(e *models.AlertCurEvent) { 130 | gids := make([]int64, 0, len(e.NotifyGroupsJSON)) 131 | for i := 0; i < len(e.NotifyGroupsJSON); i++ { 132 | gid, err := strconv.ParseInt(e.NotifyGroupsJSON[i], 10, 64) 133 | if err != nil { 134 | continue 135 | } 136 | 137 | gids = append(gids, gid) 138 | } 139 | 140 | e.NotifyGroupsObj = memsto.UserGroupCache.GetByUserGroupIds(gids) 141 | 142 | uids := make(map[int64]struct{}) 143 | for i := 0; i < len(e.NotifyGroupsObj); i++ { 144 | ug := e.NotifyGroupsObj[i] 145 | for j := 0; j < len(ug.UserIds); j++ { 146 | uids[ug.UserIds[j]] = struct{}{} 147 | } 148 | } 149 | 150 | e.NotifyUsersObj = memsto.UserCache.GetByUserIds(mapKeys(uids)) 151 | } 152 | 153 | func mapKeys(m map[int64]struct{}) []int64 { 154 | lst := make([]int64, 0, len(m)) 155 | for k := range m { 156 | lst = append(lst, k) 157 | } 158 | return lst 159 | } 160 | 161 | func StringSetKeys(m map[string]struct{}) []string { 162 | lst := make([]string, 0, len(m)) 163 | for k := range m { 164 | lst = append(lst, k) 165 | } 166 | return lst 167 | } 168 | -------------------------------------------------------------------------------- /src/models/dashboard.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | import ( 4 | "strings" 5 | "time" 6 | 7 | "github.com/pkg/errors" 8 | "github.com/toolkits/pkg/str" 9 | "gorm.io/gorm" 10 | ) 11 | 12 | type Dashboard struct { 13 | Id int64 `json:"id" gorm:"primaryKey"` 14 | GroupId int64 `json:"group_id"` 15 | Name string `json:"name"` 16 | Tags string `json:"-"` 17 | TagsLst []string `json:"tags" gorm:"-"` 18 | Configs string `json:"configs"` 19 | CreateAt int64 `json:"create_at"` 20 | CreateBy string `json:"create_by"` 21 | UpdateAt int64 `json:"update_at"` 22 | UpdateBy string `json:"update_by"` 23 | } 24 | 25 | func (d *Dashboard) TableName() string { 26 | return "dashboard" 27 | } 28 | 29 | func (d *Dashboard) Verify() error { 30 | if d.Name == "" { 31 | return errors.New("Name is blank") 32 | } 33 | 34 | if str.Dangerous(d.Name) { 35 | return errors.New("Name has invalid characters") 36 | } 37 | 38 | return nil 39 | } 40 | 41 | func (d *Dashboard) Add() error { 42 | if err := d.Verify(); err != nil { 43 | return err 44 | } 45 | 46 | exists, err := DashboardExists("group_id=? and name=?", d.GroupId, d.Name) 47 | if err != nil { 48 | return errors.WithMessage(err, "failed to count dashboard") 49 | } 50 | 51 | if exists { 52 | return errors.New("Dashboard already exists") 53 | } 54 | 55 | now := time.Now().Unix() 56 | d.CreateAt = now 57 | d.UpdateAt = now 58 | 59 | return Insert(d) 60 | } 61 | 62 | func (d *Dashboard) Update(selectField interface{}, selectFields ...interface{}) error { 63 | if err := d.Verify(); err != nil { 64 | return err 65 | } 66 | 67 | return DB().Model(d).Select(selectField, selectFields...).Updates(d).Error 68 | } 69 | 70 | func (d *Dashboard) Del() error { 71 | cgids, err := ChartGroupIdsOf(d.Id) 72 | if err != nil { 73 | return err 74 | } 75 | 76 | if len(cgids) == 0 { 77 | return DB().Transaction(func(tx *gorm.DB) error { 78 | if err := tx.Where("id=?", d.Id).Delete(&Dashboard{}).Error; err != nil { 79 | return err 80 | } 81 | return nil 82 | }) 83 | } 84 | 85 | return DB().Transaction(func(tx *gorm.DB) error { 86 | if err := tx.Where("group_id in ?", cgids).Delete(&Chart{}).Error; err != nil { 87 | return err 88 | } 89 | 90 | if err := tx.Where("dashboard_id=?", d.Id).Delete(&ChartGroup{}).Error; err != nil { 91 | return err 92 | } 93 | 94 | if err := tx.Where("id=?", d.Id).Delete(&Dashboard{}).Error; err != nil { 95 | return err 96 | } 97 | 98 | return nil 99 | }) 100 | } 101 | 102 | func DashboardGet(where string, args ...interface{}) (*Dashboard, error) { 103 | var lst []*Dashboard 104 | err := DB().Where(where, args...).Find(&lst).Error 105 | if err != nil { 106 | return nil, err 107 | } 108 | 109 | if len(lst) == 0 { 110 | return nil, nil 111 | } 112 | 113 | lst[0].TagsLst = strings.Fields(lst[0].Tags) 114 | 115 | return lst[0], nil 116 | } 117 | 118 | func DashboardCount(where string, args ...interface{}) (num int64, err error) { 119 | return Count(DB().Model(&Dashboard{}).Where(where, args...)) 120 | } 121 | 122 | func DashboardExists(where string, args ...interface{}) (bool, error) { 123 | num, err := DashboardCount(where, args...) 124 | return num > 0, err 125 | } 126 | 127 | func DashboardGets(groupId int64, query string) ([]Dashboard, error) { 128 | session := DB().Where("group_id=?", groupId).Order("name") 129 | 130 | arr := strings.Fields(query) 131 | if len(arr) > 0 { 132 | for i := 0; i < len(arr); i++ { 133 | if strings.HasPrefix(arr[i], "-") { 134 | q := "%" + arr[i][1:] + "%" 135 | session = session.Where("name not like ? and tags not like ?", q, q) 136 | } else { 137 | q := "%" + arr[i] + "%" 138 | session = session.Where("(name like ? or tags like ?)", q, q) 139 | } 140 | } 141 | } 142 | 143 | var objs []Dashboard 144 | err := session.Select("id", "group_id", "name", "tags", "create_at", "create_by", "update_at", "update_by").Find(&objs).Error 145 | if err == nil { 146 | for i := 0; i < len(objs); i++ { 147 | objs[i].TagsLst = strings.Fields(objs[i].Tags) 148 | } 149 | } 150 | 151 | return objs, err 152 | } 153 | 154 | func DashboardGetsByIds(ids []int64) ([]Dashboard, error) { 155 | if len(ids) == 0 { 156 | return []Dashboard{}, nil 157 | } 158 | 159 | var lst []Dashboard 160 | err := DB().Where("id in ?", ids).Order("name").Find(&lst).Error 161 | return lst, err 162 | } 163 | -------------------------------------------------------------------------------- /docker/n9eetc/webapi.conf: -------------------------------------------------------------------------------- 1 | # debug, release 2 | RunMode = "release" 3 | 4 | # # custom i18n dict config 5 | # I18N = "./etc/i18n.json" 6 | 7 | # metrics descriptions 8 | MetricsYamlFile = "./etc/metrics.yaml" 9 | 10 | BuiltinAlertsDir = "./etc/alerts" 11 | BuiltinDashboardsDir = "./etc/dashboards" 12 | 13 | [[NotifyChannels]] 14 | Label = "邮箱" 15 | # do not change Key 16 | Key = "email" 17 | 18 | [[NotifyChannels]] 19 | Label = "钉钉机器人" 20 | # do not change Key 21 | Key = "dingtalk" 22 | 23 | [[NotifyChannels]] 24 | Label = "企微机器人" 25 | # do not change Key 26 | Key = "wecom" 27 | 28 | [[NotifyChannels]] 29 | Label = "飞书机器人" 30 | # do not change Key 31 | Key = "feishu" 32 | 33 | [[ContactKeys]] 34 | Label = "Wecom Robot Token" 35 | # do not change Key 36 | Key = "wecom_robot_token" 37 | 38 | [[ContactKeys]] 39 | Label = "Dingtalk Robot Token" 40 | # do not change Key 41 | Key = "dingtalk_robot_token" 42 | 43 | [[ContactKeys]] 44 | Label = "Feishu Robot Token" 45 | # do not change Key 46 | Key = "feishu_robot_token" 47 | 48 | [Log] 49 | # log write dir 50 | Dir = "logs" 51 | # log level: DEBUG INFO WARNING ERROR 52 | Level = "DEBUG" 53 | # stdout, stderr, file 54 | Output = "stdout" 55 | # # rotate by time 56 | # KeepHours: 4 57 | # # rotate by size 58 | # RotateNum = 3 59 | # # unit: MB 60 | # RotateSize = 256 61 | 62 | [HTTP] 63 | # http listening address 64 | Host = "0.0.0.0" 65 | # http listening port 66 | Port = 18000 67 | # https cert file path 68 | CertFile = "" 69 | # https key file path 70 | KeyFile = "" 71 | # whether print access log 72 | PrintAccessLog = true 73 | # whether enable pprof 74 | PProf = false 75 | # http graceful shutdown timeout, unit: s 76 | ShutdownTimeout = 30 77 | # max content length: 64M 78 | MaxContentLength = 67108864 79 | # http server read timeout, unit: s 80 | ReadTimeout = 20 81 | # http server write timeout, unit: s 82 | WriteTimeout = 40 83 | # http server idle timeout, unit: s 84 | IdleTimeout = 120 85 | 86 | [JWTAuth] 87 | # signing key 88 | SigningKey = "5b94a0fd640fe2765af826acfe42d151" 89 | # unit: min 90 | AccessExpired = 1500 91 | # unit: min 92 | RefreshExpired = 10080 93 | RedisKeyPrefix = "/jwt/" 94 | 95 | [BasicAuth] 96 | user001 = "ccc26da7b9aba533cbb263a36c07dcc5" 97 | 98 | [AnonymousAccess] 99 | PromQuerier = false 100 | AlertDetail = false 101 | 102 | [LDAP] 103 | Enable = false 104 | Host = "ldap.example.org" 105 | Port = 389 106 | BaseDn = "dc=example,dc=org" 107 | # AD: manange@example.org 108 | BindUser = "cn=manager,dc=example,dc=org" 109 | BindPass = "*******" 110 | # openldap format e.g. (&(uid=%s)) 111 | # AD format e.g. (&(sAMAccountName=%s)) 112 | AuthFilter = "(&(uid=%s))" 113 | CoverAttributes = true 114 | TLS = false 115 | StartTLS = true 116 | # ldap user default roles 117 | DefaultRoles = ["Standard"] 118 | 119 | [LDAP.Attributes] 120 | Nickname = "cn" 121 | Phone = "mobile" 122 | Email = "mail" 123 | 124 | [Redis] 125 | # address, ip:port 126 | Address = "redis:6379" 127 | # requirepass 128 | Password = "" 129 | # # db 130 | # DB = 0 131 | 132 | [Gorm] 133 | # enable debug mode or not 134 | Debug = true 135 | # mysql postgres 136 | DBType = "mysql" 137 | # unit: s 138 | MaxLifetime = 7200 139 | # max open connections 140 | MaxOpenConns = 150 141 | # max idle connections 142 | MaxIdleConns = 50 143 | # table prefix 144 | TablePrefix = "" 145 | # enable auto migrate or not 146 | EnableAutoMigrate = false 147 | 148 | [MySQL] 149 | # mysql address host:port 150 | Address = "mysql:3306" 151 | # mysql username 152 | User = "root" 153 | # mysql password 154 | Password = "1234" 155 | # database name 156 | DBName = "n9e_v5" 157 | # connection params 158 | Parameters = "charset=utf8mb4&parseTime=True&loc=Local&allowNativePasswords=true" 159 | 160 | [Postgres] 161 | # pg address host:port 162 | Address = "postgres:5432" 163 | # pg user 164 | User = "root" 165 | # pg password 166 | Password = "1234" 167 | # database name 168 | DBName = "n9e_v5" 169 | # ssl mode 170 | SSLMode = "disable" 171 | 172 | [[Clusters]] 173 | # Prometheus cluster name 174 | Name = "Default" 175 | # Prometheus APIs base url 176 | Prom = "http://prometheus:9090" 177 | # Basic auth username 178 | BasicAuthUser = "" 179 | # Basic auth password 180 | BasicAuthPass = "" 181 | # timeout settings, unit: ms 182 | Timeout = 30000 183 | DialTimeout = 10000 184 | TLSHandshakeTimeout = 30000 185 | ExpectContinueTimeout = 1000 186 | IdleConnTimeout = 90000 187 | # time duration, unit: ms 188 | KeepAlive = 30000 189 | MaxConnsPerHost = 0 190 | MaxIdleConns = 100 191 | MaxIdleConnsPerHost = 100 192 | 193 | [Ibex] 194 | Address = "http://ibex:10090" 195 | # basic auth 196 | BasicAuthUser = "ibex" 197 | BasicAuthPass = "ibex" 198 | # unit: ms 199 | Timeout = 3000 -------------------------------------------------------------------------------- /etc/webapi.conf: -------------------------------------------------------------------------------- 1 | # debug, release 2 | RunMode = "release" 3 | 4 | # # custom i18n dict config 5 | # I18N = "./etc/i18n.json" 6 | 7 | # metrics descriptions 8 | MetricsYamlFile = "./etc/metrics.yaml" 9 | 10 | BuiltinAlertsDir = "./etc/alerts" 11 | BuiltinDashboardsDir = "./etc/dashboards" 12 | 13 | [[NotifyChannels]] 14 | Label = "邮箱" 15 | # do not change Key 16 | Key = "email" 17 | 18 | [[NotifyChannels]] 19 | Label = "钉钉机器人" 20 | # do not change Key 21 | Key = "dingtalk" 22 | 23 | [[NotifyChannels]] 24 | Label = "企微机器人" 25 | # do not change Key 26 | Key = "wecom" 27 | 28 | [[NotifyChannels]] 29 | Label = "飞书机器人" 30 | # do not change Key 31 | Key = "feishu" 32 | 33 | [[ContactKeys]] 34 | Label = "Wecom Robot Token" 35 | # do not change Key 36 | Key = "wecom_robot_token" 37 | 38 | [[ContactKeys]] 39 | Label = "Dingtalk Robot Token" 40 | # do not change Key 41 | Key = "dingtalk_robot_token" 42 | 43 | [[ContactKeys]] 44 | Label = "Feishu Robot Token" 45 | # do not change Key 46 | Key = "feishu_robot_token" 47 | 48 | [Log] 49 | # log write dir 50 | Dir = "logs" 51 | # log level: DEBUG INFO WARNING ERROR 52 | Level = "DEBUG" 53 | # stdout, stderr, file 54 | Output = "stdout" 55 | # # rotate by time 56 | # KeepHours: 4 57 | # # rotate by size 58 | # RotateNum = 3 59 | # # unit: MB 60 | # RotateSize = 256 61 | 62 | [HTTP] 63 | # http listening address 64 | Host = "0.0.0.0" 65 | # http listening port 66 | Port = 18000 67 | # https cert file path 68 | CertFile = "" 69 | # https key file path 70 | KeyFile = "" 71 | # whether print access log 72 | PrintAccessLog = true 73 | # whether enable pprof 74 | PProf = false 75 | # http graceful shutdown timeout, unit: s 76 | ShutdownTimeout = 30 77 | # max content length: 64M 78 | MaxContentLength = 67108864 79 | # http server read timeout, unit: s 80 | ReadTimeout = 20 81 | # http server write timeout, unit: s 82 | WriteTimeout = 40 83 | # http server idle timeout, unit: s 84 | IdleTimeout = 120 85 | 86 | [JWTAuth] 87 | # signing key 88 | SigningKey = "5b94a0fd640fe2765af826acfe42d151" 89 | # unit: min 90 | AccessExpired = 1500 91 | # unit: min 92 | RefreshExpired = 10080 93 | RedisKeyPrefix = "/jwt/" 94 | 95 | [BasicAuth] 96 | user001 = "ccc26da7b9aba533cbb263a36c07dcc5" 97 | 98 | [AnonymousAccess] 99 | PromQuerier = false 100 | AlertDetail = false 101 | 102 | [LDAP] 103 | Enable = false 104 | Host = "ldap.example.org" 105 | Port = 389 106 | BaseDn = "dc=example,dc=org" 107 | # AD: manange@example.org 108 | BindUser = "cn=manager,dc=example,dc=org" 109 | BindPass = "*******" 110 | # openldap format e.g. (&(uid=%s)) 111 | # AD format e.g. (&(sAMAccountName=%s)) 112 | AuthFilter = "(&(uid=%s))" 113 | CoverAttributes = true 114 | TLS = false 115 | StartTLS = true 116 | # ldap user default roles 117 | DefaultRoles = ["Standard"] 118 | 119 | [LDAP.Attributes] 120 | Nickname = "cn" 121 | Phone = "mobile" 122 | Email = "mail" 123 | 124 | [Redis] 125 | # address, ip:port 126 | Address = "127.0.0.1:6379" 127 | # requirepass 128 | Password = "" 129 | # # db 130 | # DB = 0 131 | 132 | [Gorm] 133 | # enable debug mode or not 134 | Debug = true 135 | # mysql postgres 136 | DBType = "mysql" 137 | # unit: s 138 | MaxLifetime = 7200 139 | # max open connections 140 | MaxOpenConns = 150 141 | # max idle connections 142 | MaxIdleConns = 50 143 | # table prefix 144 | TablePrefix = "" 145 | # enable auto migrate or not 146 | EnableAutoMigrate = false 147 | 148 | [MySQL] 149 | # mysql address host:port 150 | Address = "127.0.0.1:3306" 151 | # mysql username 152 | User = "root" 153 | # mysql password 154 | Password = "1234" 155 | # database name 156 | DBName = "n9e_v5" 157 | # connection params 158 | Parameters = "charset=utf8mb4&parseTime=True&loc=Local&allowNativePasswords=true" 159 | 160 | [Postgres] 161 | # pg address host:port 162 | Address = "127.0.0.1:5432" 163 | # pg user 164 | User = "root" 165 | # pg password 166 | Password = "1234" 167 | # database name 168 | DBName = "n9e_v5" 169 | # ssl mode 170 | SSLMode = "disable" 171 | 172 | [[Clusters]] 173 | # Prometheus cluster name 174 | Name = "Default" 175 | # Prometheus APIs base url 176 | Prom = "http://127.0.0.1:9090" 177 | # Basic auth username 178 | BasicAuthUser = "" 179 | # Basic auth password 180 | BasicAuthPass = "" 181 | # timeout settings, unit: ms 182 | Timeout = 30000 183 | DialTimeout = 10000 184 | TLSHandshakeTimeout = 30000 185 | ExpectContinueTimeout = 1000 186 | IdleConnTimeout = 90000 187 | # time duration, unit: ms 188 | KeepAlive = 30000 189 | MaxConnsPerHost = 0 190 | MaxIdleConns = 100 191 | MaxIdleConnsPerHost = 100 192 | 193 | [Ibex] 194 | Address = "http://127.0.0.1:10090" 195 | # basic auth 196 | BasicAuthUser = "ibex" 197 | BasicAuthPass = "ibex" 198 | # unit: ms 199 | Timeout = 3000 --------------------------------------------------------------------------------