├── doc ├── img │ ├── wx.jpg │ ├── arch.png │ ├── intro.gif │ ├── wecom.png │ ├── ccf-n9e.png │ ├── ccf-logo.png │ ├── dingtalk.png │ ├── install-vm.png │ ├── n9e-vx-new.png │ ├── redis-dash.png │ ├── alert-events.png │ ├── arch-product.png │ ├── arch-system.png │ ├── mysql-alerts.png │ ├── Nightingale_L_V.png │ ├── n9e-arch-latest.png │ ├── vm-cluster-arch.png │ ├── n9e-node-dashboard.png │ ├── nightingale_logo_h.png │ ├── nightingale_logo_v.png │ └── n9e-screenshot-gif-v6.gif ├── pmc.md ├── committers.md ├── contributors.md ├── end-users.md └── active-contributors.md ├── docker ├── compose-bridge │ ├── etc-categraf │ │ ├── input.kernel │ │ │ └── kernel.toml │ │ ├── input.netstat │ │ │ └── netstat.toml │ │ ├── input.prometheus │ │ │ └── prometheus.toml │ │ ├── input.cpu │ │ │ └── cpu.toml │ │ ├── input.mem │ │ │ └── mem.toml │ │ ├── input.system │ │ │ └── system.toml │ │ ├── input.processes │ │ │ └── processes.toml │ │ ├── input.net │ │ │ └── net.toml │ │ ├── input.diskio │ │ │ └── diskio.toml │ │ └── input.disk │ │ │ └── disk.toml │ └── etc-mysql │ │ └── my.cnf ├── compose-postgres │ ├── categraf │ │ └── conf │ │ │ ├── input.kernel │ │ │ └── kernel.toml │ │ │ ├── input.netstat │ │ │ └── netstat.toml │ │ │ ├── input.cpu │ │ │ └── cpu.toml │ │ │ ├── input.mem │ │ │ └── mem.toml │ │ │ ├── input.system │ │ │ └── system.toml │ │ │ ├── input.processes │ │ │ └── processes.toml │ │ │ ├── input.net │ │ │ └── net.toml │ │ │ ├── input.diskio │ │ │ └── diskio.toml │ │ │ ├── prometheus.toml │ │ │ └── input.disk │ │ │ └── disk.toml │ ├── prometc_vm │ │ ├── targets.json │ │ └── prometheus.yml │ └── n9eetc_pg │ │ └── template │ │ ├── subject.tpl │ │ ├── feishu.tpl │ │ ├── mm.tpl │ │ ├── telegram.tpl │ │ ├── wecom.tpl │ │ └── dingtalk.tpl ├── .dockerignore ├── compose-host-network │ ├── etc-categraf │ │ ├── input.kernel │ │ │ └── kernel.toml │ │ ├── input.netstat │ │ │ └── netstat.toml │ │ ├── input.cpu │ │ │ └── cpu.toml │ │ ├── input.mem │ │ │ └── mem.toml │ │ ├── input.system │ │ │ └── system.toml │ │ ├── input.processes │ │ │ └── processes.toml │ │ ├── input.net │ │ │ └── net.toml │ │ ├── input.diskio │ │ │ └── diskio.toml │ │ └── input.disk │ │ │ └── disk.toml │ ├── etc-mysql │ │ └── my.cnf │ └── etc-prometheus │ │ └── prometheus.yml ├── compose-host-network-metric-log │ ├── etc-categraf │ │ ├── input.kernel │ │ │ └── kernel.toml │ │ ├── input.netstat │ │ │ └── netstat.toml │ │ ├── input.cpu │ │ │ └── cpu.toml │ │ ├── input.mem │ │ │ └── mem.toml │ │ ├── input.system │ │ │ └── system.toml │ │ ├── input.processes │ │ │ └── processes.toml │ │ ├── input.net │ │ │ └── net.toml │ │ ├── input.diskio │ │ │ └── diskio.toml │ │ └── input.disk │ │ │ └── disk.toml │ ├── etc-mysql │ │ └── my.cnf │ ├── etc-logstash │ │ └── logstash.yaml │ └── etc-prometheus │ │ └── prometheus.yml ├── initsql │ └── c-init.sql ├── Dockerfile.goreleaser.arm64 ├── Dockerfile.goreleaser └── build.sh ├── integrations ├── JMX │ └── icon │ │ └── jmx.png ├── NSQ │ ├── icon │ │ └── nsq.png │ ├── collect │ │ └── nsq │ │ │ └── nsq.toml │ └── markdown │ │ └── README.md ├── Ceph │ ├── icon │ │ └── ceph.png │ └── markdown │ │ ├── alerts.png │ │ ├── ceph.png │ │ ├── ceph-alerts.png │ │ ├── ceph-dash.png │ │ └── README.md ├── Exec │ ├── icon │ │ └── exec.png │ └── collect │ │ └── exec │ │ └── exec.toml ├── IPMI │ ├── icon │ │ └── ipmi.png │ ├── collect │ │ └── ipmi │ │ │ └── conf.toml │ └── markdown │ │ └── README.md ├── IPVS │ ├── icon │ │ └── ipvs.png │ └── collect │ │ └── ipvs │ │ └── ipvs.toml ├── PHP │ ├── icon │ │ └── phpfpm.png │ ├── markdown │ │ └── README.md │ └── collect │ │ └── phpfpm │ │ └── phpfpm.toml ├── Ping │ ├── icon │ │ └── ping.png │ ├── collect │ │ └── ping │ │ │ └── ping.toml │ └── markdown │ │ └── README.md ├── SNMP │ ├── icon │ │ └── snmp.png │ └── dashboards │ │ └── placeholder.json ├── TiDB │ └── icon │ │ └── tidb.png ├── Canal │ └── icon │ │ └── canal.png ├── Kafka │ ├── icon │ │ └── kafka.png │ └── markdown │ │ ├── alerts..png │ │ ├── dash-kafka.png │ │ ├── dashboards.png │ │ └── alerts-kafka.png ├── Linux │ ├── icon │ │ └── linux.png │ ├── collect │ │ ├── arp_packet │ │ │ └── arp_packet.toml │ │ ├── processes │ │ │ └── processes.toml │ │ └── netstat │ │ │ └── netstat.toml │ └── markdown │ │ └── README.md ├── MinIO │ ├── icon │ │ └── minio.png │ └── markdown │ │ ├── minio.png │ │ ├── alerts.png │ │ ├── dash-minio.png │ │ ├── alerts-minio.png │ │ └── README.md ├── Mtail │ ├── icon │ │ └── mtail.png │ ├── markdown │ │ ├── timestamp.png │ │ └── timezone.png │ └── collect │ │ └── mtail │ │ └── mtail.toml ├── MySQL │ ├── icon │ │ └── mysql.png │ └── collect │ │ └── mysql │ │ └── mysql.toml ├── Nginx │ ├── icon │ │ └── nginx.png │ └── collect │ │ ├── nginx │ │ └── nginx.toml │ │ └── nginx_upstream_check │ │ └── nginx_upstream_check.toml ├── Redis │ ├── icon │ │ └── redis.png │ ├── collect │ │ ├── redis_sentinel │ │ │ └── redis_sentinel.toml │ │ └── redis │ │ │ └── redis.toml │ └── markdown │ │ └── README.md ├── SMART │ └── icon │ │ └── smart.png ├── Whois │ ├── icon │ │ └── whois.png │ ├── markdown │ │ └── README.md │ └── collect │ │ └── whois │ │ └── whois.toml ├── XSKYApi │ ├── icon │ │ └── xsky.png │ ├── collect │ │ └── xskyapi │ │ │ └── xskyapi.toml │ └── markdown │ │ └── README.md ├── AliYun │ ├── icon │ │ └── aliyun.png │ ├── markdown │ │ ├── ecs.png │ │ ├── rds.png │ │ ├── slb.png │ │ ├── waf.png │ │ └── redis.png │ └── collect │ │ └── aliyun │ │ └── cloud.toml ├── AutoMQ │ ├── icon │ │ └── automq.png │ ├── collect │ │ └── prometheus │ │ │ └── 采集OTEL-COLLECTOR的样例.toml │ └── markdown │ │ └── overview.md ├── Consul │ ├── icon │ │ └── consul.png │ └── collect │ │ └── consul │ │ └── consul.toml ├── Dns_Query │ ├── icon │ │ └── dns.png │ └── collect │ │ └── dns_query │ │ └── dns_query.toml ├── Docker │ ├── icon │ │ └── docker.png │ └── markdown │ │ └── README.md ├── Gitlab │ ├── icon │ │ └── gitlab.png │ └── markdown │ │ └── README.md ├── GoogleCloud │ ├── icon │ │ └── gcp.png │ ├── collect │ │ └── googlecloud │ │ │ └── gcp.toml │ └── markdown │ │ └── README.md ├── HAProxy │ ├── icon │ │ └── haproxy.png │ ├── markdown │ │ └── README.md │ └── collect │ │ └── haproxy │ │ └── haproxy.toml ├── Jenkins │ ├── icon │ │ └── jenkins.png │ ├── collect │ │ └── jenkins │ │ │ └── jenkins.toml │ └── markdown │ │ └── README.md ├── MongoDB │ └── icon │ │ └── mongodb.png ├── N9E │ ├── icon │ │ └── nightingale.png │ └── markdown │ │ └── README.md ├── NVIDIA │ ├── icon │ │ └── nvidia.png │ ├── collect │ │ └── nvidia_smi │ │ │ └── nvidia_smi.toml │ └── markdown │ │ └── README.md ├── Oracle │ ├── icon │ │ └── oracle.png │ └── markdown │ │ └── README.md ├── Process │ └── icon │ │ └── process.png ├── Systemd │ ├── icon │ │ └── systemd.png │ ├── collect │ │ └── systemd │ │ │ └── systemd.toml │ └── markdown │ │ └── README.md ├── Tomcat │ ├── icon │ │ └── tomcat.png │ ├── collect │ │ └── tomcat │ │ │ └── tomcat.toml │ └── markdown │ │ └── README.md ├── Windows │ ├── icon │ │ └── windows.png │ └── markdown │ │ ├── windows.png │ │ └── README.md ├── vSphere │ └── icon │ │ └── vsphere.png ├── Logstash │ ├── icon │ │ └── logstash.png │ ├── markdown │ │ └── README.md │ └── collect │ │ └── logstash │ │ └── logstash.toml ├── Procstat │ ├── icon │ │ └── process.png │ └── collect │ │ └── procstat │ │ └── procstat.toml ├── RabbitMQ │ ├── icon │ │ └── rabbitmq.png │ └── markdown │ │ ├── rabbitmq.png │ │ └── README.md ├── TDEngine │ ├── icon │ │ └── tdengine.png │ └── markdown │ │ └── README.md ├── cAdvisor │ ├── icon │ │ └── cadvisor.png │ └── collect │ │ └── cadvisor │ │ └── cadvisor.toml ├── Filecount │ ├── icon │ │ └── filecount.png │ └── collect │ │ └── filecount │ │ └── filecount.toml ├── NFSClient │ ├── icon │ │ └── nfsclient.png │ ├── markdown │ │ └── README.md │ └── collect │ │ └── nfsclient │ │ └── nfsclient.toml ├── SQLServer │ ├── icon │ │ └── sqlserver.png │ └── markdown │ │ └── README.md ├── ZooKeeper │ ├── icon │ │ └── zookeeper.png │ ├── collect │ │ └── zookeeper │ │ │ └── zookeeper.toml │ └── markdown │ │ └── README.md ├── AMD_ROCm_SMI │ ├── icon │ │ └── rocm_smi.png │ └── collect │ │ └── amd_rocm_smi │ │ └── rocm.toml ├── ClickHouse │ └── icon │ │ └── clickhouse.png ├── CloudWatch │ └── icon │ │ └── cloudwatch.png ├── Jolokia_Agent │ ├── icon │ │ └── jolokia.png │ ├── markdown │ │ └── README.md │ └── collect │ │ └── jolokia_agent │ │ ├── zookeeper.toml │ │ ├── bitbucket.toml │ │ └── java.toml ├── Kubernetes │ ├── icon │ │ └── kubernetes.png │ └── markdown │ │ └── README.md ├── PostgreSQL │ ├── icon │ │ └── postgresql.png │ └── markdown │ │ ├── alerts.png │ │ ├── dash-pg.png │ │ ├── alerts-pg.png │ │ └── postgresql.png ├── Prometheus │ ├── icon │ │ └── prometheus.png │ ├── markdown │ │ └── README.md │ └── collect │ │ └── prometheus │ │ └── prometheus.toml ├── SpringBoot │ ├── icon │ │ └── springboot.png │ └── markdown │ │ ├── actuator.jpeg │ │ ├── actuator_2.0.png │ │ └── README.md ├── Appdynamics │ ├── icon │ │ └── appdynamics.png │ ├── collect │ │ └── appdynamics │ │ │ └── app.toml │ └── markdown │ │ └── README.md ├── Net_Response │ ├── icon │ │ └── net_response.png │ ├── metrics │ │ └── categraf.json │ ├── collect │ │ └── net_response │ │ │ └── net_response.toml │ └── alerts │ │ └── net_response_by_categraf.json ├── ElasticSearch │ ├── icon │ │ └── elasticsearch.png │ └── markdown │ │ ├── es-dashboard.jpeg │ │ └── README.md ├── HTTP_Response │ └── icon │ │ └── http_response.png ├── Switch_Legacy │ ├── icon │ │ └── switch_legacy.png │ └── markdown │ │ └── README.md ├── VictoriaMetrics │ ├── markdown │ │ ├── alerts.png │ │ ├── dash-vm.png │ │ ├── alerts-vm.png │ │ ├── dashboard.png │ │ └── README.md │ └── icon │ │ └── VictoriaMetrics.png └── Netstat_Filter │ ├── icon │ └── netstat_filter.png │ ├── collect │ └── netstat_filter │ │ └── netstat_filter.toml │ └── markdown │ └── README.md ├── alert ├── sender │ ├── plugin_cmd_windows.go │ ├── plugin_cmd_unix.go │ └── lark.go ├── queue │ └── queue.go ├── naming │ └── leader.go ├── dispatch │ ├── log.go │ └── notify_channel.go ├── common │ └── key.go ├── aconf │ └── conf.go └── process │ └── alert_cur_event.go ├── .gitattributes ├── dumper └── dumper.go ├── pkg ├── prom │ └── client_option.go ├── osx │ └── osx.go ├── cfg │ ├── scan.go │ └── cfg.go ├── hash │ ├── hash_md5.go │ ├── hash.go │ └── hash_fnv.go ├── ctx │ └── ctx.go ├── fasttime │ └── fasttime.go ├── logx │ └── logx.go ├── version │ └── version.go ├── choice │ └── choice.go ├── parser │ └── calc.go ├── poster │ └── post_test.go ├── i18nx │ └── i18n.go └── tplx │ └── conv.go ├── cli ├── cli.go └── upgrade │ ├── readme.md │ └── config.go ├── .github ├── ISSUE_TEMPLATE │ ├── config.yml │ ├── enhancement.md │ └── question.yml ├── PULL_REQUEST_TEMPLATE.md └── workflows │ └── n9e.yml ├── memsto ├── memsto.go └── stat.go ├── front └── statik │ └── statik.go ├── storage └── storage.go ├── pushgw ├── router │ ├── router_target.go │ ├── ident_stats.go │ └── stat.go └── writer │ └── stats.go ├── center ├── cconf │ ├── plugin.go │ ├── conf.go │ ├── metric.go │ └── event_example.go ├── router │ ├── router_dashboard.go │ ├── router_chart_share.go │ ├── router_crypto.go │ ├── router_server.go │ ├── router_role_operation.go │ ├── router_builtin_componet.go │ └── router_config.go ├── sso │ └── sync.go └── cstats │ └── stats.go ├── models ├── chart_share.go ├── chart.go ├── builtin_cate.go ├── notify_config.go ├── sso_config.go ├── migrate │ └── migrate_es_index_pattern.go ├── board_payload.go └── role_operation.go ├── fe.sh ├── cmd ├── cli │ └── main.go ├── edge │ └── main.go ├── alert │ └── main.go ├── pushgw │ └── main.go └── center │ └── main.go ├── .gitignore ├── conf └── crypto.go └── Makefile /doc/img/wx.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/doc/img/wx.jpg -------------------------------------------------------------------------------- /doc/img/arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/doc/img/arch.png -------------------------------------------------------------------------------- /doc/img/intro.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/doc/img/intro.gif -------------------------------------------------------------------------------- /doc/img/wecom.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/doc/img/wecom.png -------------------------------------------------------------------------------- /doc/img/ccf-n9e.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/doc/img/ccf-n9e.png -------------------------------------------------------------------------------- /doc/img/ccf-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/doc/img/ccf-logo.png -------------------------------------------------------------------------------- /doc/img/dingtalk.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/doc/img/dingtalk.png -------------------------------------------------------------------------------- /doc/img/install-vm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/doc/img/install-vm.png -------------------------------------------------------------------------------- /doc/img/n9e-vx-new.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/doc/img/n9e-vx-new.png -------------------------------------------------------------------------------- /doc/img/redis-dash.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/doc/img/redis-dash.png -------------------------------------------------------------------------------- /doc/img/alert-events.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/doc/img/alert-events.png -------------------------------------------------------------------------------- /doc/img/arch-product.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/doc/img/arch-product.png -------------------------------------------------------------------------------- /doc/img/arch-system.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/doc/img/arch-system.png -------------------------------------------------------------------------------- /doc/img/mysql-alerts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/doc/img/mysql-alerts.png -------------------------------------------------------------------------------- /doc/img/Nightingale_L_V.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/doc/img/Nightingale_L_V.png -------------------------------------------------------------------------------- /doc/img/n9e-arch-latest.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/doc/img/n9e-arch-latest.png -------------------------------------------------------------------------------- /doc/img/vm-cluster-arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/doc/img/vm-cluster-arch.png -------------------------------------------------------------------------------- /docker/compose-bridge/etc-categraf/input.kernel/kernel.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | -------------------------------------------------------------------------------- /docker/compose-bridge/etc-categraf/input.netstat/netstat.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | -------------------------------------------------------------------------------- /docker/compose-postgres/categraf/conf/input.kernel/kernel.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | -------------------------------------------------------------------------------- /docker/.dockerignore: -------------------------------------------------------------------------------- 1 | compose-host-network 2 | compose-postgres 3 | compose-bridge 4 | initsql 5 | build.sh 6 | -------------------------------------------------------------------------------- /docker/compose-host-network/etc-categraf/input.kernel/kernel.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | -------------------------------------------------------------------------------- /docker/compose-host-network/etc-categraf/input.netstat/netstat.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | -------------------------------------------------------------------------------- /docker/compose-postgres/categraf/conf/input.netstat/netstat.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | -------------------------------------------------------------------------------- /integrations/JMX/icon/jmx.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/JMX/icon/jmx.png -------------------------------------------------------------------------------- /integrations/NSQ/icon/nsq.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/NSQ/icon/nsq.png -------------------------------------------------------------------------------- /doc/img/n9e-node-dashboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/doc/img/n9e-node-dashboard.png -------------------------------------------------------------------------------- /doc/img/nightingale_logo_h.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/doc/img/nightingale_logo_h.png -------------------------------------------------------------------------------- /doc/img/nightingale_logo_v.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/doc/img/nightingale_logo_v.png -------------------------------------------------------------------------------- /integrations/Ceph/icon/ceph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/Ceph/icon/ceph.png -------------------------------------------------------------------------------- /integrations/Exec/icon/exec.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/Exec/icon/exec.png -------------------------------------------------------------------------------- /integrations/IPMI/icon/ipmi.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/IPMI/icon/ipmi.png -------------------------------------------------------------------------------- /integrations/IPVS/icon/ipvs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/IPVS/icon/ipvs.png -------------------------------------------------------------------------------- /integrations/PHP/icon/phpfpm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/PHP/icon/phpfpm.png -------------------------------------------------------------------------------- /integrations/Ping/icon/ping.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/Ping/icon/ping.png -------------------------------------------------------------------------------- /integrations/SNMP/icon/snmp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/SNMP/icon/snmp.png -------------------------------------------------------------------------------- /integrations/TiDB/icon/tidb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/TiDB/icon/tidb.png -------------------------------------------------------------------------------- /doc/img/n9e-screenshot-gif-v6.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/doc/img/n9e-screenshot-gif-v6.gif -------------------------------------------------------------------------------- /docker/compose-host-network-metric-log/etc-categraf/input.kernel/kernel.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | -------------------------------------------------------------------------------- /docker/compose-host-network-metric-log/etc-categraf/input.netstat/netstat.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | -------------------------------------------------------------------------------- /integrations/Canal/icon/canal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/Canal/icon/canal.png -------------------------------------------------------------------------------- /integrations/IPVS/collect/ipvs/ipvs.toml: -------------------------------------------------------------------------------- 1 | # Collect virtual and real server stats from Linux IPVS 2 | # no configuration 3 | -------------------------------------------------------------------------------- /integrations/Kafka/icon/kafka.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/Kafka/icon/kafka.png -------------------------------------------------------------------------------- /integrations/Linux/icon/linux.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/Linux/icon/linux.png -------------------------------------------------------------------------------- /integrations/MinIO/icon/minio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/MinIO/icon/minio.png -------------------------------------------------------------------------------- /integrations/Mtail/icon/mtail.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/Mtail/icon/mtail.png -------------------------------------------------------------------------------- /integrations/MySQL/icon/mysql.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/MySQL/icon/mysql.png -------------------------------------------------------------------------------- /integrations/Nginx/icon/nginx.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/Nginx/icon/nginx.png -------------------------------------------------------------------------------- /integrations/Redis/icon/redis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/Redis/icon/redis.png -------------------------------------------------------------------------------- /integrations/SMART/icon/smart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/SMART/icon/smart.png -------------------------------------------------------------------------------- /integrations/Whois/icon/whois.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/Whois/icon/whois.png -------------------------------------------------------------------------------- /integrations/XSKYApi/icon/xsky.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/XSKYApi/icon/xsky.png -------------------------------------------------------------------------------- /docker/compose-postgres/prometc_vm/targets.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "targets": [ 4 | "n9e:17000" 5 | ] 6 | } 7 | ] 8 | -------------------------------------------------------------------------------- /integrations/AliYun/icon/aliyun.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/AliYun/icon/aliyun.png -------------------------------------------------------------------------------- /integrations/AliYun/markdown/ecs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/AliYun/markdown/ecs.png -------------------------------------------------------------------------------- /integrations/AliYun/markdown/rds.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/AliYun/markdown/rds.png -------------------------------------------------------------------------------- /integrations/AliYun/markdown/slb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/AliYun/markdown/slb.png -------------------------------------------------------------------------------- /integrations/AliYun/markdown/waf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/AliYun/markdown/waf.png -------------------------------------------------------------------------------- /integrations/AutoMQ/icon/automq.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/AutoMQ/icon/automq.png -------------------------------------------------------------------------------- /integrations/Ceph/markdown/alerts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/Ceph/markdown/alerts.png -------------------------------------------------------------------------------- /integrations/Ceph/markdown/ceph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/Ceph/markdown/ceph.png -------------------------------------------------------------------------------- /integrations/Consul/icon/consul.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/Consul/icon/consul.png -------------------------------------------------------------------------------- /integrations/Dns_Query/icon/dns.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/Dns_Query/icon/dns.png -------------------------------------------------------------------------------- /integrations/Docker/icon/docker.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/Docker/icon/docker.png -------------------------------------------------------------------------------- /integrations/Gitlab/icon/gitlab.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/Gitlab/icon/gitlab.png -------------------------------------------------------------------------------- /integrations/GoogleCloud/icon/gcp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/GoogleCloud/icon/gcp.png -------------------------------------------------------------------------------- /integrations/HAProxy/icon/haproxy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/HAProxy/icon/haproxy.png -------------------------------------------------------------------------------- /integrations/Jenkins/icon/jenkins.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/Jenkins/icon/jenkins.png -------------------------------------------------------------------------------- /integrations/MinIO/markdown/minio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/MinIO/markdown/minio.png -------------------------------------------------------------------------------- /integrations/MongoDB/icon/mongodb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/MongoDB/icon/mongodb.png -------------------------------------------------------------------------------- /integrations/N9E/icon/nightingale.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/N9E/icon/nightingale.png -------------------------------------------------------------------------------- /integrations/NVIDIA/icon/nvidia.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/NVIDIA/icon/nvidia.png -------------------------------------------------------------------------------- /integrations/Oracle/icon/oracle.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/Oracle/icon/oracle.png -------------------------------------------------------------------------------- /integrations/Process/icon/process.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/Process/icon/process.png -------------------------------------------------------------------------------- /integrations/Systemd/icon/systemd.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/Systemd/icon/systemd.png -------------------------------------------------------------------------------- /integrations/Tomcat/icon/tomcat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/Tomcat/icon/tomcat.png -------------------------------------------------------------------------------- /integrations/Windows/icon/windows.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/Windows/icon/windows.png -------------------------------------------------------------------------------- /integrations/vSphere/icon/vsphere.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/vSphere/icon/vsphere.png -------------------------------------------------------------------------------- /integrations/AliYun/markdown/redis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/AliYun/markdown/redis.png -------------------------------------------------------------------------------- /integrations/Kafka/markdown/alerts..png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/Kafka/markdown/alerts..png -------------------------------------------------------------------------------- /integrations/Logstash/icon/logstash.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/Logstash/icon/logstash.png -------------------------------------------------------------------------------- /integrations/MinIO/markdown/alerts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/MinIO/markdown/alerts.png -------------------------------------------------------------------------------- /integrations/Procstat/icon/process.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/Procstat/icon/process.png -------------------------------------------------------------------------------- /integrations/RabbitMQ/icon/rabbitmq.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/RabbitMQ/icon/rabbitmq.png -------------------------------------------------------------------------------- /integrations/TDEngine/icon/tdengine.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/TDEngine/icon/tdengine.png -------------------------------------------------------------------------------- /integrations/cAdvisor/icon/cadvisor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/cAdvisor/icon/cadvisor.png -------------------------------------------------------------------------------- /docker/compose-postgres/n9eetc_pg/template/subject.tpl: -------------------------------------------------------------------------------- 1 | {{if .IsRecovered}}Recovered{{else}}Triggered{{end}}: {{.RuleName}} {{.TagsJSON}} -------------------------------------------------------------------------------- /integrations/Ceph/markdown/ceph-alerts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/Ceph/markdown/ceph-alerts.png -------------------------------------------------------------------------------- /integrations/Ceph/markdown/ceph-dash.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/Ceph/markdown/ceph-dash.png -------------------------------------------------------------------------------- /integrations/Filecount/icon/filecount.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/Filecount/icon/filecount.png -------------------------------------------------------------------------------- /integrations/Kafka/markdown/dash-kafka.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/Kafka/markdown/dash-kafka.png -------------------------------------------------------------------------------- /integrations/Kafka/markdown/dashboards.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/Kafka/markdown/dashboards.png -------------------------------------------------------------------------------- /integrations/MinIO/markdown/dash-minio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/MinIO/markdown/dash-minio.png -------------------------------------------------------------------------------- /integrations/Mtail/markdown/timestamp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/Mtail/markdown/timestamp.png -------------------------------------------------------------------------------- /integrations/Mtail/markdown/timezone.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/Mtail/markdown/timezone.png -------------------------------------------------------------------------------- /integrations/NFSClient/icon/nfsclient.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/NFSClient/icon/nfsclient.png -------------------------------------------------------------------------------- /integrations/SQLServer/icon/sqlserver.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/SQLServer/icon/sqlserver.png -------------------------------------------------------------------------------- /integrations/Windows/markdown/windows.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/Windows/markdown/windows.png -------------------------------------------------------------------------------- /integrations/ZooKeeper/icon/zookeeper.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/ZooKeeper/icon/zookeeper.png -------------------------------------------------------------------------------- /docker/compose-bridge/etc-categraf/input.prometheus/prometheus.toml: -------------------------------------------------------------------------------- 1 | [[instances]] 2 | urls = [ 3 | "http://nightingale:17000/metrics" 4 | ] -------------------------------------------------------------------------------- /integrations/AMD_ROCm_SMI/icon/rocm_smi.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/AMD_ROCm_SMI/icon/rocm_smi.png -------------------------------------------------------------------------------- /integrations/ClickHouse/icon/clickhouse.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/ClickHouse/icon/clickhouse.png -------------------------------------------------------------------------------- /integrations/CloudWatch/icon/cloudwatch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/CloudWatch/icon/cloudwatch.png -------------------------------------------------------------------------------- /integrations/Jolokia_Agent/icon/jolokia.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/Jolokia_Agent/icon/jolokia.png -------------------------------------------------------------------------------- /integrations/Kafka/markdown/alerts-kafka.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/Kafka/markdown/alerts-kafka.png -------------------------------------------------------------------------------- /integrations/Kubernetes/icon/kubernetes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/Kubernetes/icon/kubernetes.png -------------------------------------------------------------------------------- /integrations/Linux/collect/arp_packet/arp_packet.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | 4 | [[instances]] 5 | #eth_device="ens192" -------------------------------------------------------------------------------- /integrations/MinIO/markdown/alerts-minio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/MinIO/markdown/alerts-minio.png -------------------------------------------------------------------------------- /integrations/PostgreSQL/icon/postgresql.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/PostgreSQL/icon/postgresql.png -------------------------------------------------------------------------------- /integrations/PostgreSQL/markdown/alerts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/PostgreSQL/markdown/alerts.png -------------------------------------------------------------------------------- /integrations/PostgreSQL/markdown/dash-pg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/PostgreSQL/markdown/dash-pg.png -------------------------------------------------------------------------------- /integrations/Prometheus/icon/prometheus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/Prometheus/icon/prometheus.png -------------------------------------------------------------------------------- /integrations/RabbitMQ/markdown/rabbitmq.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/RabbitMQ/markdown/rabbitmq.png -------------------------------------------------------------------------------- /integrations/SpringBoot/icon/springboot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/SpringBoot/icon/springboot.png -------------------------------------------------------------------------------- /integrations/Appdynamics/icon/appdynamics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/Appdynamics/icon/appdynamics.png -------------------------------------------------------------------------------- /integrations/Net_Response/icon/net_response.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/Net_Response/icon/net_response.png -------------------------------------------------------------------------------- /integrations/PostgreSQL/markdown/alerts-pg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/PostgreSQL/markdown/alerts-pg.png -------------------------------------------------------------------------------- /integrations/PostgreSQL/markdown/postgresql.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/PostgreSQL/markdown/postgresql.png -------------------------------------------------------------------------------- /integrations/SpringBoot/markdown/actuator.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/SpringBoot/markdown/actuator.jpeg -------------------------------------------------------------------------------- /alert/sender/plugin_cmd_windows.go: -------------------------------------------------------------------------------- 1 | package sender 2 | 3 | import "os/exec" 4 | 5 | func startCmd(c *exec.Cmd) error { 6 | return c.Start() 7 | } 8 | -------------------------------------------------------------------------------- /integrations/ElasticSearch/icon/elasticsearch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/ElasticSearch/icon/elasticsearch.png -------------------------------------------------------------------------------- /integrations/HTTP_Response/icon/http_response.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/HTTP_Response/icon/http_response.png -------------------------------------------------------------------------------- /integrations/SpringBoot/markdown/actuator_2.0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/SpringBoot/markdown/actuator_2.0.png -------------------------------------------------------------------------------- /integrations/Switch_Legacy/icon/switch_legacy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/Switch_Legacy/icon/switch_legacy.png -------------------------------------------------------------------------------- /integrations/VictoriaMetrics/markdown/alerts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/VictoriaMetrics/markdown/alerts.png -------------------------------------------------------------------------------- /integrations/VictoriaMetrics/markdown/dash-vm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/VictoriaMetrics/markdown/dash-vm.png -------------------------------------------------------------------------------- /integrations/Netstat_Filter/icon/netstat_filter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/Netstat_Filter/icon/netstat_filter.png -------------------------------------------------------------------------------- /integrations/VictoriaMetrics/markdown/alerts-vm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/VictoriaMetrics/markdown/alerts-vm.png -------------------------------------------------------------------------------- /integrations/VictoriaMetrics/markdown/dashboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/VictoriaMetrics/markdown/dashboard.png -------------------------------------------------------------------------------- /integrations/ElasticSearch/markdown/es-dashboard.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/ElasticSearch/markdown/es-dashboard.jpeg -------------------------------------------------------------------------------- /integrations/VictoriaMetrics/icon/VictoriaMetrics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/nightingale/HEAD/integrations/VictoriaMetrics/icon/VictoriaMetrics.png -------------------------------------------------------------------------------- /doc/pmc.md: -------------------------------------------------------------------------------- 1 | ### PMC Chair 2 | - [laiwei](https://github.com/laiwei) 3 | 4 | ### PMC Co-Chair 5 | - [UlricQin](https://github.com/UlricQin) 6 | 7 | ### PMC Member 8 | -------------------------------------------------------------------------------- /docker/compose-bridge/etc-categraf/input.cpu/cpu.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | 4 | # # whether collect per cpu 5 | # collect_per_cpu = false 6 | -------------------------------------------------------------------------------- /docker/compose-postgres/categraf/conf/input.cpu/cpu.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | 4 | # # whether collect per cpu 5 | # collect_per_cpu = false 6 | -------------------------------------------------------------------------------- /docker/compose-host-network/etc-categraf/input.cpu/cpu.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | 4 | # # whether collect per cpu 5 | # collect_per_cpu = false 6 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *.css linguist-language=go 2 | *.less linguist-language=go 3 | *.js linguist-language=go 4 | *.tsx linguist-language=go 5 | *.html linguist-language=go 6 | -------------------------------------------------------------------------------- /doc/committers.md: -------------------------------------------------------------------------------- 1 | ## Committers 2 | 3 | - [YeningQin](https://github.com/710leo) 4 | - [FeiKong](https://github.com/kongfei605) 5 | - [XiaqingDai](https://github.com/jsers) 6 | -------------------------------------------------------------------------------- /docker/compose-host-network-metric-log/etc-categraf/input.cpu/cpu.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | 4 | # # whether collect per cpu 5 | # collect_per_cpu = false 6 | -------------------------------------------------------------------------------- /docker/compose-bridge/etc-categraf/input.mem/mem.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | 4 | # # whether collect platform specified metrics 5 | collect_platform_fields = true 6 | -------------------------------------------------------------------------------- /docker/compose-bridge/etc-categraf/input.system/system.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | 4 | # # whether collect metric: system_n_users 5 | # collect_user_number = false 6 | -------------------------------------------------------------------------------- /docker/compose-bridge/etc-mysql/my.cnf: -------------------------------------------------------------------------------- 1 | [mysqld] 2 | pid-file = /var/run/mysqld/mysqld.pid 3 | socket = /var/run/mysqld/mysqld.sock 4 | datadir = /var/lib/mysql 5 | bind-address = 0.0.0.0 -------------------------------------------------------------------------------- /docker/compose-postgres/categraf/conf/input.mem/mem.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | 4 | # # whether collect platform specified metrics 5 | collect_platform_fields = true 6 | -------------------------------------------------------------------------------- /docker/compose-postgres/categraf/conf/input.system/system.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | 4 | # # whether collect metric: system_n_users 5 | # collect_user_number = false 6 | -------------------------------------------------------------------------------- /dumper/dumper.go: -------------------------------------------------------------------------------- 1 | package dumper 2 | 3 | import "github.com/gin-gonic/gin" 4 | 5 | // package level functions 6 | func ConfigRouter(r *gin.Engine) { 7 | syncDumper.ConfigRouter(r) 8 | } 9 | -------------------------------------------------------------------------------- /pkg/prom/client_option.go: -------------------------------------------------------------------------------- 1 | package prom 2 | 3 | type ClientOptions struct { 4 | Url string 5 | BasicAuthUser string 6 | BasicAuthPass string 7 | Headers []string 8 | } 9 | -------------------------------------------------------------------------------- /doc/contributors.md: -------------------------------------------------------------------------------- 1 | ## Contributors 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /docker/compose-host-network/etc-categraf/input.mem/mem.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | 4 | # # whether collect platform specified metrics 5 | collect_platform_fields = true 6 | -------------------------------------------------------------------------------- /docker/compose-host-network/etc-categraf/input.system/system.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | 4 | # # whether collect metric: system_n_users 5 | # collect_user_number = false 6 | -------------------------------------------------------------------------------- /docker/compose-host-network/etc-mysql/my.cnf: -------------------------------------------------------------------------------- 1 | [mysqld] 2 | pid-file = /var/run/mysqld/mysqld.pid 3 | socket = /var/run/mysqld/mysqld.sock 4 | datadir = /var/lib/mysql 5 | bind-address = 127.0.0.1 -------------------------------------------------------------------------------- /cli/cli.go: -------------------------------------------------------------------------------- 1 | package cli 2 | 3 | import ( 4 | "github.com/ccfos/nightingale/v6/cli/upgrade" 5 | ) 6 | 7 | func Upgrade(configFile string) error { 8 | return upgrade.Upgrade(configFile) 9 | } 10 | -------------------------------------------------------------------------------- /docker/compose-host-network-metric-log/etc-categraf/input.mem/mem.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | 4 | # # whether collect platform specified metrics 5 | collect_platform_fields = true 6 | -------------------------------------------------------------------------------- /docker/compose-host-network-metric-log/etc-categraf/input.system/system.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | 4 | # # whether collect metric: system_n_users 5 | # collect_user_number = false 6 | -------------------------------------------------------------------------------- /docker/initsql/c-init.sql: -------------------------------------------------------------------------------- 1 | GRANT ALL ON *.* TO 'root'@'127.0.0.1' IDENTIFIED BY '1234'; 2 | GRANT ALL ON *.* TO 'root'@'localhost' IDENTIFIED BY '1234'; 3 | GRANT ALL ON *.* TO 'root'@'%' IDENTIFIED BY '1234'; -------------------------------------------------------------------------------- /docker/compose-host-network-metric-log/etc-mysql/my.cnf: -------------------------------------------------------------------------------- 1 | [mysqld] 2 | pid-file = /var/run/mysqld/mysqld.pid 3 | socket = /var/run/mysqld/mysqld.sock 4 | datadir = /var/lib/mysql 5 | bind-address = 127.0.0.1 -------------------------------------------------------------------------------- /integrations/NFSClient/markdown/README.md: -------------------------------------------------------------------------------- 1 | # NFS Client 2 | 3 | forked from telegraf/inputs.nfsclient 4 | 5 | ## 停用该插件 6 | 7 | - 方法一:把 `input.nfsclient` 目录改个别的名字,不用 `input.` 打头 8 | - 方法二:nfsclient.toml 中的配置留空 -------------------------------------------------------------------------------- /integrations/Netstat_Filter/collect/netstat_filter/netstat_filter.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | [[instances]] 4 | # laddr_ip = "" 5 | # laddr_port = 0 6 | # raddr_ip = "" 7 | # raddr_port = 0 8 | -------------------------------------------------------------------------------- /integrations/Jolokia_Agent/markdown/README.md: -------------------------------------------------------------------------------- 1 | # Jolokia Agent 2 | 3 | forked from telegraf/inputs.jolokia2_agent 4 | 5 | ## 停用该插件 6 | 7 | - 方法一:把 `input.jolokia_agent_misc` 目录改个别的名字,不用 `input.` 打头 8 | - 方法二:xx.toml 中的配置留空 -------------------------------------------------------------------------------- /integrations/Linux/collect/processes/processes.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | 4 | # # force use ps command to gather 5 | # force_ps = false 6 | 7 | # # force use /proc to gather 8 | # force_proc = false -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | contact_links: 3 | - name: Nightingale docs 4 | url: https://n9e.github.io/ 5 | about: You may want to read through the document before asking questions. -------------------------------------------------------------------------------- /memsto/memsto.go: -------------------------------------------------------------------------------- 1 | package memsto 2 | 3 | import ( 4 | "os" 5 | 6 | "github.com/toolkits/pkg/logger" 7 | ) 8 | 9 | // TODO 优化 exit 处理方式 10 | func exit(code int) { 11 | logger.Close() 12 | os.Exit(code) 13 | } 14 | -------------------------------------------------------------------------------- /integrations/AMD_ROCm_SMI/collect/amd_rocm_smi/rocm.toml: -------------------------------------------------------------------------------- 1 | # Query statistics from AMD Graphics cards using rocm-smi binary 2 | # bin_path = "/opt/rocm/bin/rocm-smi" 3 | 4 | ## Optional: timeout for GPU polling 5 | # timeout = "5s" -------------------------------------------------------------------------------- /docker/compose-bridge/etc-categraf/input.processes/processes.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | 4 | # # force use ps command to gather 5 | # force_ps = false 6 | 7 | # # force use /proc to gather 8 | # force_proc = false -------------------------------------------------------------------------------- /docker/compose-host-network/etc-categraf/input.processes/processes.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | 4 | # # force use ps command to gather 5 | # force_ps = false 6 | 7 | # # force use /proc to gather 8 | # force_proc = false -------------------------------------------------------------------------------- /docker/compose-postgres/categraf/conf/input.processes/processes.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | 4 | # # force use ps command to gather 5 | # force_ps = false 6 | 7 | # # force use /proc to gather 8 | # force_proc = false -------------------------------------------------------------------------------- /docker/compose-host-network-metric-log/etc-categraf/input.processes/processes.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | 4 | # # force use ps command to gather 5 | # force_ps = false 6 | 7 | # # force use /proc to gather 8 | # force_proc = false -------------------------------------------------------------------------------- /docker/Dockerfile.goreleaser.arm64: -------------------------------------------------------------------------------- 1 | FROM --platform=$TARGETPLATFORM python:3-slim 2 | 3 | 4 | WORKDIR /app 5 | ADD n9e /app/ 6 | ADD etc /app/etc/ 7 | ADD integrations /app/integrations/ 8 | 9 | EXPOSE 17000 10 | 11 | CMD ["/app/n9e", "-h"] 12 | -------------------------------------------------------------------------------- /integrations/AutoMQ/collect/prometheus/采集OTEL-COLLECTOR的样例.toml: -------------------------------------------------------------------------------- 1 | interval = 15 2 | 3 | [[instances]] 4 | urls = [ 5 | "http://:/metrics" 6 | ] 7 | 8 | url_label_key = "otel_collector" 9 | url_label_value = "{{.Host}}" -------------------------------------------------------------------------------- /docker/Dockerfile.goreleaser: -------------------------------------------------------------------------------- 1 | FROM --platform=$TARGETPLATFORM python:3-slim 2 | 3 | 4 | WORKDIR /app 5 | ADD n9e /app/ 6 | ADD etc /app/etc/ 7 | ADD integrations /app/integrations/ 8 | RUN pip install requests 9 | 10 | EXPOSE 17000 11 | 12 | CMD ["/app/n9e", "-h"] 13 | -------------------------------------------------------------------------------- /doc/end-users.md: -------------------------------------------------------------------------------- 1 | ## End Users 2 | 3 | - [中移动](https://github.com/ccfos/nightingale/issues/897#issuecomment-1086573166) 4 | - [inke](https://github.com/ccfos/nightingale/issues/897#issuecomment-1099840636) 5 | - [方正证券](https://github.com/ccfos/nightingale/issues/897#issuecomment-1110492461) 6 | -------------------------------------------------------------------------------- /docker/compose-bridge/etc-categraf/input.net/net.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | 4 | # # whether collect protocol stats on Linux 5 | # collect_protocol_stats = false 6 | 7 | # # setting interfaces will tell categraf to gather these explicit interfaces 8 | # interfaces = ["eth0"] -------------------------------------------------------------------------------- /alert/sender/plugin_cmd_unix.go: -------------------------------------------------------------------------------- 1 | //go:build !windows 2 | // +build !windows 3 | 4 | package sender 5 | 6 | import ( 7 | "os/exec" 8 | "syscall" 9 | ) 10 | 11 | func startCmd(c *exec.Cmd) error { 12 | c.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} 13 | return c.Start() 14 | } 15 | -------------------------------------------------------------------------------- /docker/compose-postgres/categraf/conf/input.net/net.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | 4 | # # whether collect protocol stats on Linux 5 | # collect_protocol_stats = false 6 | 7 | # # setting interfaces will tell categraf to gather these explicit interfaces 8 | # interfaces = ["eth0"] -------------------------------------------------------------------------------- /doc/active-contributors.md: -------------------------------------------------------------------------------- 1 | ## Active Contributors 2 | 3 | - [xiaoziv](https://github.com/xiaoziv) 4 | - [tanxiao1990](https://github.com/tanxiao1990) 5 | - [bbaobelief](https://github.com/bbaobelief) 6 | - [freedomkk-qfeng](https://github.com/freedomkk-qfeng) 7 | - [lsy1990](https://github.com/lsy1990) 8 | -------------------------------------------------------------------------------- /docker/compose-bridge/etc-categraf/input.diskio/diskio.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | 4 | # # By default, categraf will gather stats for all devices including disk partitions. 5 | # # Setting devices will restrict the stats to the specified devices. 6 | # devices = ["sda", "sdb", "vd*"] -------------------------------------------------------------------------------- /docker/compose-host-network/etc-categraf/input.net/net.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | 4 | # # whether collect protocol stats on Linux 5 | # collect_protocol_stats = false 6 | 7 | # # setting interfaces will tell categraf to gather these explicit interfaces 8 | # interfaces = ["eth0"] -------------------------------------------------------------------------------- /integrations/Logstash/markdown/README.md: -------------------------------------------------------------------------------- 1 | # logstash 2 | 3 | logstash 监控采集插件,由telegraf改造而来。 4 | 5 | ## Configuration 6 | 7 | 请参考配置[示例](https://github.com/flashcatcloud/categraf/blob/main/conf/input.logstash/logstash.toml) 8 | 9 | ## 监控大盘和告警规则 10 | 11 | 同级目录下的 logstash-dash 是示例的监控面板, 可以直接导入夜莺使用。 -------------------------------------------------------------------------------- /docker/compose-host-network/etc-categraf/input.diskio/diskio.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | 4 | # # By default, categraf will gather stats for all devices including disk partitions. 5 | # # Setting devices will restrict the stats to the specified devices. 6 | # devices = ["sda", "sdb", "vd*"] -------------------------------------------------------------------------------- /docker/compose-postgres/categraf/conf/input.diskio/diskio.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | 4 | # # By default, categraf will gather stats for all devices including disk partitions. 5 | # # Setting devices will restrict the stats to the specified devices. 6 | # devices = ["sda", "sdb", "vd*"] -------------------------------------------------------------------------------- /pkg/osx/osx.go: -------------------------------------------------------------------------------- 1 | package osx 2 | 3 | import "os" 4 | 5 | // getEnv returns the value of an environment variable, or returns the provided fallback value 6 | func GetEnv(key, fallback string) string { 7 | if value, ok := os.LookupEnv(key); ok { 8 | return value 9 | } 10 | return fallback 11 | } 12 | -------------------------------------------------------------------------------- /docker/compose-host-network-metric-log/etc-categraf/input.net/net.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | 4 | # # whether collect protocol stats on Linux 5 | # collect_protocol_stats = false 6 | 7 | # # setting interfaces will tell categraf to gather these explicit interfaces 8 | # interfaces = ["eth0"] -------------------------------------------------------------------------------- /front/statik/statik.go: -------------------------------------------------------------------------------- 1 | // Code generated by statik. DO NOT EDIT. 2 | 3 | package statik 4 | 5 | import ( 6 | "github.com/rakyll/statik/fs" 7 | ) 8 | 9 | func init() { 10 | data := "PK\x05\x06\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" 11 | fs.Register(data) 12 | } 13 | -------------------------------------------------------------------------------- /docker/compose-host-network-metric-log/etc-categraf/input.diskio/diskio.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | 4 | # # By default, categraf will gather stats for all devices including disk partitions. 5 | # # Setting devices will restrict the stats to the specified devices. 6 | # devices = ["sda", "sdb", "vd*"] -------------------------------------------------------------------------------- /storage/storage.go: -------------------------------------------------------------------------------- 1 | package storage 2 | 3 | import ( 4 | "github.com/ccfos/nightingale/v6/pkg/ormx" 5 | 6 | "gorm.io/gorm" 7 | ) 8 | 9 | func New(cfg ormx.DBConfig) (*gorm.DB, error) { 10 | db, err := ormx.New(cfg) 11 | if err != nil { 12 | return nil, err 13 | } 14 | 15 | return db, nil 16 | } 17 | -------------------------------------------------------------------------------- /integrations/Jenkins/collect/jenkins/jenkins.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | 4 | [[instances]] 5 | # Address (host:port) of jenkins server. 6 | # jenkins_url = "http://my-jenkins-instance:8080" 7 | 8 | #jenkins_username = "admin" 9 | #jenkins_password = "" 10 | 11 | #response_timeout = "5s" 12 | 13 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/enhancement.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Enhancement Request 3 | about: Suggest an enhancement to the nightingale project 4 | labels: kind/feature 5 | 6 | --- 7 | 8 | 9 | **What would you like to be added**: 10 | 11 | **Why is this needed**: -------------------------------------------------------------------------------- /integrations/Linux/collect/netstat/netstat.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | 4 | disable_summary_stats = false 5 | ## if machine has many network connections, use this plugin may exhaust your cpu resource, diable connection stat to avoid this 6 | disable_connection_stats = true 7 | 8 | tcp_ext = false 9 | ip_ext = false 10 | -------------------------------------------------------------------------------- /integrations/Systemd/collect/systemd/systemd.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | 4 | enable=false # 设置为true 打开采集 5 | #unit_include=".+" 6 | #unit_exclude="" 7 | enable_start_time_metrics=true #是否采集service unit的启动时间信息 单位秒 8 | enable_task_metrics=true # 是否采集service unit task的metrics 9 | enable_restarts_metrics=true #是否采集service unit重启的次数信息 10 | -------------------------------------------------------------------------------- /docker/compose-postgres/n9eetc_pg/template/feishu.tpl: -------------------------------------------------------------------------------- 1 | 级别状态: S{{.Severity}} {{if .IsRecovered}}Recovered{{else}}Triggered{{end}} 2 | 规则名称: {{.RuleName}}{{if .RuleNote}} 3 | 规则备注: {{.RuleNote}}{{end}} 4 | 监控指标: {{.TagsJSON}} 5 | {{if .IsRecovered}}恢复时间:{{timeformat .LastEvalTime}}{{else}}触发时间: {{timeformat .TriggerTime}} 6 | 触发时值: {{.TriggerValue}}{{end}} 7 | 发送时间: {{timestamp}} -------------------------------------------------------------------------------- /docker/compose-postgres/n9eetc_pg/template/mm.tpl: -------------------------------------------------------------------------------- 1 | 级别状态: S{{.Severity}} {{if .IsRecovered}}Recovered{{else}}Triggered{{end}} 2 | 规则名称: {{.RuleName}}{{if .RuleNote}} 3 | 规则备注: {{.RuleNote}}{{end}} 4 | 监控指标: {{.TagsJSON}} 5 | {{if .IsRecovered}}恢复时间:{{timeformat .LastEvalTime}}{{else}}触发时间: {{timeformat .TriggerTime}} 6 | 触发时值: {{.TriggerValue}}{{end}} 7 | 发送时间: {{timestamp}} -------------------------------------------------------------------------------- /docker/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | if [ $# -ne 1 ]; then 3 | echo "$0 " 4 | exit 0 5 | fi 6 | 7 | tag=$1 8 | 9 | echo "tag: ${tag}" 10 | 11 | rm -rf n9e pub 12 | cp ../n9e . 13 | 14 | docker build -t nightingale:${tag} . 15 | 16 | docker tag nightingale:${tag} ulric2019/nightingale:${tag} 17 | docker push ulric2019/nightingale:${tag} 18 | 19 | rm -rf n9e pub 20 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | **What type of PR is this?** 2 | 3 | **What this PR does / why we need it**: 4 | 7 | 8 | **Which issue(s) this PR fixes**: 9 | 12 | Fixes # 13 | 14 | **Special notes for your reviewer**: -------------------------------------------------------------------------------- /docker/compose-postgres/categraf/conf/prometheus.toml: -------------------------------------------------------------------------------- 1 | [prometheus] 2 | enable=true 3 | scrape_config_file="/etc/prometheus/prometheus.yml" 4 | ## log level, debug warn info error 5 | log_level="info" 6 | ## wal file storage path ,default ./data-agent 7 | # wal_storage_path="/path/to/storage" 8 | ## wal reserve time duration, default value is 2 hour 9 | # wal_min_duration=2 10 | 11 | -------------------------------------------------------------------------------- /integrations/GoogleCloud/collect/googlecloud/gcp.toml: -------------------------------------------------------------------------------- 1 | #interval=60 2 | #[[instances]] 3 | #project_id="your-project-id" 4 | #credentials_file="/path/to/your/key.json" 5 | #delay="2m" 6 | #period="1m" 7 | #filter="metric.type=\"compute.googleapis.com/instance/cpu/utilization\" AND resource.labels.zone=\"asia-northeast1-a\"" 8 | #timeout="5s" 9 | #cache_ttl="1h" 10 | #gce_host_tag="xxx" 11 | #request_inflight=30 12 | -------------------------------------------------------------------------------- /alert/queue/queue.go: -------------------------------------------------------------------------------- 1 | package queue 2 | 3 | import ( 4 | "time" 5 | 6 | "github.com/ccfos/nightingale/v6/alert/astats" 7 | "github.com/toolkits/pkg/container/list" 8 | ) 9 | 10 | var EventQueue = list.NewSafeListLimited(10000000) 11 | 12 | func ReportQueueSize(stats *astats.Stats) { 13 | for { 14 | time.Sleep(time.Second) 15 | 16 | stats.GaugeAlertQueueSize.Set(float64(EventQueue.Len())) 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /integrations/Jenkins/markdown/README.md: -------------------------------------------------------------------------------- 1 | ## Jenkins 2 | 3 | Jenkins 采集插件, 采集 Jenkins 数据 4 | 5 | ## Configuration 6 | 7 | ```toml 8 | # # collect interval 9 | # interval = 15 10 | 11 | [[instances]] 12 | # Address (host:port) of jenkins server. 13 | # jenkins_url = "http://my-jenkins-instance:8080" 14 | 15 | #jenkins_username = "admin" 16 | #jenkins_password = "" 17 | 18 | #response_timeout = "5s" 19 | 20 | 21 | ``` -------------------------------------------------------------------------------- /docker/compose-bridge/etc-categraf/input.disk/disk.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | 4 | # # By default stats will be gathered for all mount points. 5 | # # Set mount_points will restrict the stats to only the specified mount points. 6 | mount_points = ["/"] 7 | 8 | # Ignore mount points by filesystem type. 9 | ignore_fs = ["tmpfs", "devtmpfs", "devfs", "iso9660", "overlay", "aufs", "squashfs", "nsfs"] 10 | 11 | -------------------------------------------------------------------------------- /integrations/Mtail/collect/mtail/mtail.toml: -------------------------------------------------------------------------------- 1 | [[instances]] 2 | # progs = "/path/to/prog1" # prog dir1 3 | # logs = ["/path/to/a.log", "path/to/b.log"] 4 | # override_timezone = "Asia/Shanghai" 5 | # emit_metric_timestamp = "true" #string type 6 | 7 | # [[instances]] 8 | # progs = "/path/to/prog2" # prog dir2 9 | # logs = ["/path/to/logdir/"] 10 | # override_timezone = "Asia/Shanghai" 11 | # emit_metric_timestamp = "true" # string type 12 | -------------------------------------------------------------------------------- /integrations/TDEngine/markdown/README.md: -------------------------------------------------------------------------------- 1 | # TDEngine 2 | 3 | TDEngine 也可以暴露 Prometheus 的监控数据,具体启用方法如下: 4 | 5 | TODO 6 | 7 | ## 采集配置 8 | 9 | 既然暴露了 Prometheus 协议的监控数据,那通过 categraf prometheus 插件直接采集即可。配置文件是 `conf/input.prometheus/prometheus.toml`。配置样例如下: 10 | 11 | ```toml 12 | [[instances]] 13 | urls = [ 14 | "http://192.168.11.177:8080/xxxx" 15 | ] 16 | ``` 17 | 18 | ## 仪表盘 19 | 20 | 夜莺内置了一个 TDEngine 仪表盘,由网友贡献,克隆到自己的业务组下即可使用,欢迎大家一起来提 PR 完善。 -------------------------------------------------------------------------------- /docker/compose-postgres/categraf/conf/input.disk/disk.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | 4 | # # By default stats will be gathered for all mount points. 5 | # # Set mount_points will restrict the stats to only the specified mount points. 6 | # mount_points = ["/"] 7 | 8 | # Ignore mount points by filesystem type. 9 | ignore_fs = ["tmpfs", "devtmpfs", "devfs", "iso9660", "overlay", "aufs", "squashfs"] 10 | 11 | ignore_mount_points = ["/boot"] 12 | -------------------------------------------------------------------------------- /integrations/SQLServer/markdown/README.md: -------------------------------------------------------------------------------- 1 | # sqlserver 2 | 3 | forked from telegraf/sqlserver. 这个插件的作用是获取sqlserver的监控指标,这里去掉了Azure相关部分监控,只保留了本地部署sqlserver情况。 4 | 5 | # 使用 6 | 按照下面方法创建监控账号,用于读取监控数据 7 | USE master; 8 | 9 | CREATE LOGIN [categraf] WITH PASSWORD = N'mystrongpassword'; 10 | 11 | GRANT VIEW SERVER STATE TO [categraf]; 12 | 13 | GRANT VIEW ANY DEFINITION TO [categraf]; 14 | Data Source=10.19.1.1;Initial Catalog=hc;User ID=sa;Password=mystrongpassword; -------------------------------------------------------------------------------- /docker/compose-host-network/etc-categraf/input.disk/disk.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | 4 | # # By default stats will be gathered for all mount points. 5 | # # Set mount_points will restrict the stats to only the specified mount points. 6 | # mount_points = ["/"] 7 | 8 | # Ignore mount points by filesystem type. 9 | ignore_fs = ["tmpfs", "devtmpfs", "devfs", "iso9660", "overlay", "aufs", "squashfs"] 10 | 11 | ignore_mount_points = ["/boot"] 12 | -------------------------------------------------------------------------------- /docker/compose-postgres/n9eetc_pg/template/telegram.tpl: -------------------------------------------------------------------------------- 1 | **级别状态**: {{if .IsRecovered}}S{{.Severity}} Recovered{{else}}S{{.Severity}} Triggered{{end}} 2 | **规则标题**: {{.RuleName}}{{if .RuleNote}} 3 | **规则备注**: {{.RuleNote}}{{end}} 4 | **监控指标**: {{.TagsJSON}} 5 | {{if .IsRecovered}}**恢复时间**:{{timeformat .LastEvalTime}}{{else}}**触发时间**: {{timeformat .TriggerTime}} 6 | **触发时值**: {{.TriggerValue}}{{end}} 7 | **发送时间**: {{timestamp}} -------------------------------------------------------------------------------- /docker/compose-postgres/n9eetc_pg/template/wecom.tpl: -------------------------------------------------------------------------------- 1 | **级别状态**: {{if .IsRecovered}}S{{.Severity}} Recovered{{else}}S{{.Severity}} Triggered{{end}} 2 | **规则标题**: {{.RuleName}}{{if .RuleNote}} 3 | **规则备注**: {{.RuleNote}}{{end}} 4 | **监控指标**: {{.TagsJSON}} 5 | {{if .IsRecovered}}**恢复时间**:{{timeformat .LastEvalTime}}{{else}}**触发时间**: {{timeformat .TriggerTime}} 6 | **触发时值**: {{.TriggerValue}}{{end}} 7 | **发送时间**: {{timestamp}} -------------------------------------------------------------------------------- /docker/compose-host-network-metric-log/etc-categraf/input.disk/disk.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | 4 | # # By default stats will be gathered for all mount points. 5 | # # Set mount_points will restrict the stats to only the specified mount points. 6 | # mount_points = ["/"] 7 | 8 | # Ignore mount points by filesystem type. 9 | ignore_fs = ["tmpfs", "devtmpfs", "devfs", "iso9660", "overlay", "aufs", "squashfs"] 10 | 11 | ignore_mount_points = ["/boot"] 12 | -------------------------------------------------------------------------------- /integrations/Systemd/markdown/README.md: -------------------------------------------------------------------------------- 1 | # systemd 插件 2 | 自 [node_exporter](https://github.com/prometheus/node_exporter/blob/master/collector/systemd_linux.go) fork 并改动 3 | 4 | ## Configuration 5 | ```toml 6 | enable=false # 设置为true 打开采集 7 | #unit_include=".+" 8 | #unit_exclude="" 9 | enable_start_time_metrics=true #是否采集service unit的启动时间信息 单位秒 10 | enable_task_metrics=true # 是否采集service unit task的metrics 11 | enable_restarts_metrics=true #是否采集service unit重启的次数信息 12 | ``` 13 | -------------------------------------------------------------------------------- /pkg/cfg/scan.go: -------------------------------------------------------------------------------- 1 | package cfg 2 | 3 | import ( 4 | "io/ioutil" 5 | ) 6 | 7 | type scanner struct { 8 | data []byte 9 | err error 10 | } 11 | 12 | func NewFileScanner() *scanner { 13 | return &scanner{} 14 | } 15 | 16 | func (s *scanner) Err() error { 17 | return s.err 18 | } 19 | 20 | func (s *scanner) Data() []byte { 21 | return s.data 22 | } 23 | 24 | func (s *scanner) Read(file string) { 25 | if s.err == nil { 26 | s.data, s.err = ioutil.ReadFile(file) 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /docker/compose-host-network-metric-log/etc-logstash/logstash.yaml: -------------------------------------------------------------------------------- 1 | input { 2 | kafka { 3 | bootstrap_servers => "127.0.0.1:9092" 4 | topics => ["flashcatcloud"] 5 | codec => json 6 | type => n9e 7 | } 8 | } 9 | 10 | filter { 11 | grok { 12 | match => {"message" => "%{LOGLEVEL:status}"} 13 | overwrite => ["status"] 14 | } 15 | } 16 | 17 | output { 18 | elasticsearch { 19 | hosts => ["127.0.0.1:9200"] 20 | index => "n9e-%{+YYYY.MM.DD}" 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /pushgw/router/router_target.go: -------------------------------------------------------------------------------- 1 | package router 2 | 3 | import ( 4 | "github.com/ccfos/nightingale/v6/pushgw/idents" 5 | "github.com/gin-gonic/gin" 6 | "github.com/toolkits/pkg/ginx" 7 | ) 8 | 9 | func (rt *Router) targetUpdate(c *gin.Context) { 10 | var f idents.TargetUpdate 11 | ginx.BindJSON(c, &f) 12 | 13 | m := make(map[string]struct{}) 14 | for _, ident := range f.Lst { 15 | m[ident] = struct{}{} 16 | } 17 | 18 | rt.IdentSet.MSet(m) 19 | ginx.NewRender(c).Message(nil) 20 | } 21 | -------------------------------------------------------------------------------- /integrations/NSQ/collect/nsq/nsq.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | 4 | # [[instances]] 5 | ## The Nsq API URI used to collect statistical information. 6 | # targets = ["http://localhost:4151"] 7 | 8 | # headers={Authorization="", X-Forwarded-For="", Host=""} 9 | 10 | # timeout="5s" 11 | 12 | # # basic auth 13 | # username="" 14 | # password="" 15 | 16 | ## append some labels for series 17 | # labels = { product="nsq" } 18 | 19 | ## interval = global.interval * interval_times 20 | # interval_times = 1 21 | -------------------------------------------------------------------------------- /integrations/SNMP/dashboards/placeholder.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": 0, 3 | "group_id": 0, 4 | "name": "占位的,等待老炮 PR", 5 | "ident": "", 6 | "tags": "", 7 | "create_at": 0, 8 | "create_by": "", 9 | "update_at": 0, 10 | "update_by": "", 11 | "configs": { 12 | "panels": [], 13 | "var": [], 14 | "version": "3.0.0" 15 | }, 16 | "public": 0, 17 | "public_cate": 0, 18 | "bgids": null, 19 | "built_in": 0, 20 | "hide": 0, 21 | "uuid": 1717556328370090000 22 | } -------------------------------------------------------------------------------- /integrations/Exec/collect/exec/exec.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | 4 | [[instances]] 5 | # # commands, support glob 6 | commands = [ 7 | # "/opt/categraf/scripts/*.sh" 8 | ] 9 | 10 | # # timeout for each command to complete 11 | # timeout = 5 12 | 13 | # # interval = global.interval * interval_times 14 | # interval_times = 1 15 | 16 | # # choices: influx prometheus falcon 17 | # # influx stdout example: mesurement,labelkey1=labelval1,labelkey2=labelval2 field1=1.2,field2=2.3 18 | # data_format = "influx" 19 | -------------------------------------------------------------------------------- /integrations/HAProxy/markdown/README.md: -------------------------------------------------------------------------------- 1 | # HAProxy 2 | 3 | forked from [haproxy_exporter](https://github.com/prometheus/haproxy_exporter) 4 | 5 | Note: since HAProxy 2.0.0, the official source includes a Prometheus exporter module that can be built into your binary with a single flag during build time and offers an exporter-free Prometheus endpoint. 6 | 7 | 8 | haproxy configurations for `/stats`: 9 | 10 | ``` 11 | frontend stats 12 | bind *:8404 13 | stats enable 14 | stats uri /stats 15 | stats refresh 10s 16 | ``` -------------------------------------------------------------------------------- /integrations/Whois/markdown/README.md: -------------------------------------------------------------------------------- 1 | # whois 2 | 3 | 域名探测插件,用于探测域名的注册时间和到期时间,值为UTC0时间戳 4 | 5 | 6 | ## Configuration 7 | 8 | 最核心的配置就是 domain 配置,配置目标地址,比如想要监控一个地址: 9 | 默认保持注释状态,注释状态下,插件默认不启用 10 | 11 | ```toml 12 | # [[instances]] 13 | ## Used to collect domain name information. 14 | # domain = "baidu.com" 15 | ``` 16 | 请注意这里配置的是域名不是URL 17 | 18 | ## 指标解释 19 | 20 | whois_domain_createddate 域名创建时间戳 21 | whois_domain_updateddate 域名更新时间戳 22 | whois_domain_expirationdate 域名到期时间戳 23 | 24 | ## 注意事项 25 | 请不要将interval设置过短,会导致频繁请求timeout,没太大必要性,请尽量放长请求周期 -------------------------------------------------------------------------------- /integrations/Windows/markdown/README.md: -------------------------------------------------------------------------------- 1 | # Windows 2 | 3 | categraf 不但支持 linux 监控数据采集,也支持 windows 监控数据采集,而且指标命名也是一样的,这样告警规则、仪表盘其实都可以复用。不需要对 windows 做额外处理。 4 | 5 | ## 安装 6 | 7 | categraf 在 windows 下安装请参考这个 [文档](https://flashcat.cloud/docs/content/flashcat-monitor/categraf/2-installation/)。 8 | 9 | ## 仪表盘 10 | 11 | linux、windows 仪表盘其实是可以复用的,只是两种操作系统个别指标不同。比如有些指标是 linux 特有的,有些指标是 windows 特有的。如果你想要分开查看,夜莺也内置了 windows 的仪表盘,克隆到自己的业务组下即可使用。 12 | 13 | ## 告警规则 14 | 15 | 夜莺虽然也内置了 windows 的告警规则,但因为 linux、windows 大部分指标都是一样的,就不建议为 windows 单独管理一份告警规则了。 16 | -------------------------------------------------------------------------------- /center/cconf/plugin.go: -------------------------------------------------------------------------------- 1 | package cconf 2 | 3 | var Plugins = []Plugin{ 4 | { 5 | Id: 1, 6 | Category: "timeseries", 7 | Type: "prometheus", 8 | TypeName: "Prometheus Like", 9 | }, 10 | { 11 | Id: 2, 12 | Category: "logging", 13 | Type: "elasticsearch", 14 | TypeName: "Elasticsearch", 15 | }, 16 | { 17 | Id: 3, 18 | Category: "loki", 19 | Type: "loki", 20 | TypeName: "Loki", 21 | }, 22 | { 23 | Id: 4, 24 | Category: "timeseries", 25 | Type: "tdengine", 26 | TypeName: "TDengine", 27 | }, 28 | } 29 | -------------------------------------------------------------------------------- /center/router/router_dashboard.go: -------------------------------------------------------------------------------- 1 | package router 2 | 3 | type ChartPure struct { 4 | Configs string `json:"configs"` 5 | Weight int `json:"weight"` 6 | } 7 | 8 | type ChartGroupPure struct { 9 | Name string `json:"name"` 10 | Weight int `json:"weight"` 11 | Charts []ChartPure `json:"charts"` 12 | } 13 | 14 | type DashboardPure struct { 15 | Name string `json:"name"` 16 | Tags string `json:"tags"` 17 | Configs string `json:"configs"` 18 | ChartGroups []ChartGroupPure `json:"chart_groups"` 19 | } 20 | -------------------------------------------------------------------------------- /integrations/NVIDIA/collect/nvidia_smi/nvidia_smi.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | 4 | # exec local command 5 | # e.g. nvidia_smi_command = "nvidia-smi" 6 | nvidia_smi_command = "" 7 | 8 | # exec remote command 9 | # nvidia_smi_command = "ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null SSH_USER@SSH_HOST nvidia-smi" 10 | 11 | # Comma-separated list of the query fields. 12 | # You can find out possible fields by running `nvidia-smi --help-query-gpus`. 13 | # The value `AUTO` will automatically detect the fields to query. 14 | query_field_names = "AUTO" -------------------------------------------------------------------------------- /alert/naming/leader.go: -------------------------------------------------------------------------------- 1 | package naming 2 | 3 | import ( 4 | "sort" 5 | 6 | "github.com/toolkits/pkg/logger" 7 | ) 8 | 9 | func (n *Naming) IamLeader() bool { 10 | if !n.ctx.IsCenter { 11 | return false 12 | } 13 | 14 | servers, err := n.ActiveServersByEngineName() 15 | if err != nil { 16 | logger.Errorf("failed to get active servers: %v", err) 17 | return false 18 | } 19 | 20 | if len(servers) == 0 { 21 | logger.Errorf("active servers empty") 22 | return false 23 | } 24 | 25 | sort.Strings(servers) 26 | 27 | return n.heartbeatConfig.Endpoint == servers[0] 28 | } 29 | -------------------------------------------------------------------------------- /integrations/XSKYApi/collect/xskyapi/xskyapi.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | # 4 | [[instances]] 5 | # # append some labels for series 6 | # labels = { region="cloud", product="n9e" } 7 | 8 | # # interval = global.interval * interval_times 9 | # interval_times = 1 10 | 11 | ## must be one of oss/gfs/eus 12 | dss_type = "oss" 13 | 14 | ## URL of each server in the service's cluster 15 | servers = [ 16 | #"http://x.x.x.x:xx" 17 | ] 18 | 19 | ## Set response_timeout (default 5 seconds) 20 | response_timeout = "5s" 21 | 22 | xms_auth_tokens = [ 23 | #"xxxxxxxxxxxxxxx" 24 | ] 25 | 26 | -------------------------------------------------------------------------------- /integrations/N9E/markdown/README.md: -------------------------------------------------------------------------------- 1 | # N9E 2 | 3 | 夜莺V5版本分两个组件,n9e-webapi 和 n9e-server,都通过 `/metrics` 接口暴露了 Prometheus 协议的监控数据。夜莺V6版本默认只有一个组件,就是 n9e,也通过 `/metrics` 接口暴露了 Prometheus 协议的监控数据。如果使用边缘机房部署方案,会用到 n9e-edge,n9e-edge 也通过 `/metrics` 接口暴露了 Prometheus 协议的监控数据。 4 | 5 | 所以,通过 categraf 的 prometheus 插件即可采集夜莺的监控数据。 6 | 7 | ## 采集配置 8 | 9 | categraf 的 `conf/input.prometheus/prometheus.toml` 10 | 11 | ```toml 12 | [[instances]] 13 | urls = [ 14 | "http://IP:17000/metrics" 15 | ] 16 | labels = {job="n9e"} 17 | ``` 18 | 19 | ## Dashboard 20 | 21 | 夜莺内置了两个 N9E 仪表盘,n9e_server 是给 V5 版本用的,n9e_v6 是给 V6 版本用的。 22 | 23 | -------------------------------------------------------------------------------- /integrations/ZooKeeper/collect/zookeeper/zookeeper.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | 4 | [[instances]] 5 | # cluster_name = "dev-zk-cluster" 6 | # addresses = "127.0.0.1:2181" 7 | # timeout = 10 8 | 9 | # important! use global unique string to specify instance 10 | # labels = { instance="n9e-10.2.3.4:2181" } 11 | 12 | ## Optional TLS Config 13 | # use_tls = false 14 | # tls_min_version = "1.2" 15 | # tls_ca = "/etc/categraf/ca.pem" 16 | # tls_cert = "/etc/categraf/cert.pem" 17 | # tls_key = "/etc/categraf/key.pem" 18 | ## Use TLS but skip chain & host verification 19 | # insecure_skip_verify = true -------------------------------------------------------------------------------- /integrations/Jolokia_Agent/collect/jolokia_agent/zookeeper.toml: -------------------------------------------------------------------------------- 1 | [[instances]] 2 | urls = ["http://localhost:8080/jolokia"] 3 | name_prefix = "zk_" 4 | 5 | [[instances.metric]] 6 | name = "quorum" 7 | mbean = "org.apache.ZooKeeperService:name0=*" 8 | tag_keys = ["name0"] 9 | 10 | [[instances.metric]] 11 | name = "leader" 12 | mbean = "org.apache.ZooKeeperService:name0=*,name1=*,name2=Leader" 13 | tag_keys = ["name1"] 14 | 15 | [[instances.metric]] 16 | name = "follower" 17 | mbean = "org.apache.ZooKeeperService:name0=*,name1=*,name2=Follower" 18 | tag_keys = ["name1"] 19 | -------------------------------------------------------------------------------- /integrations/PHP/markdown/README.md: -------------------------------------------------------------------------------- 1 | # PHP-FPM 2 | 3 | *PHP-FPM* (PHP FastCGI Process Manager) 监控采集插件,由telegraf的phpfpm改造而来。 4 | 5 | 该插件需要更改phpfpm的配置文件,开启 *pm.status_path*配置项 6 | ``` 7 | pm.status_path = /status 8 | ``` 9 | 10 | 11 | ## Configuration 12 | 13 | 请参考配置[示例](https://github.com/flashcatcloud/categraf/blob/main/conf/input.phpfpm/phpfpm.toml)文件 14 | 15 | ### 注意事项: 16 | 1. 如下配置 仅生效于HTTP的url 17 | - response_timeout 18 | - username & password 19 | - headers 20 | - TLS config 21 | 2. 如果使用 Unix socket,需要保证 categraf 和 socket path 在同一个主机上,且 categraf 运行用户拥有读取该 path 的权限。 22 | ## 监控大盘和告警规则 23 | 24 | 待更新... -------------------------------------------------------------------------------- /pushgw/router/ident_stats.go: -------------------------------------------------------------------------------- 1 | package router 2 | 3 | import ( 4 | "time" 5 | 6 | "github.com/ccfos/nightingale/v6/memsto" 7 | ) 8 | 9 | var IdentStats *memsto.IdentCountCacheType 10 | 11 | func init() { 12 | IdentStats = memsto.NewIdentCountCache() 13 | } 14 | 15 | func (rt *Router) ReportIdentStats() (interface{}, bool) { 16 | for { 17 | time.Sleep(60 * time.Second) 18 | m := IdentStats.GetsAndFlush() 19 | for k, v := range m { 20 | count := v.Count 21 | if count > rt.Pushgw.IdentStatsThreshold { 22 | CounterSampleReceivedByIdent.WithLabelValues(k).Add(float64(count)) 23 | } 24 | } 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /cli/upgrade/readme.md: -------------------------------------------------------------------------------- 1 | # v5 升级 v6 手册 2 | 0. 操作之前,记得备注下数据库! 3 | 4 | 1. 需要先将你正在使用的夜莺数据源表结构更新到和 v5.15.0 一致,[release](https://github.com/ccfos/nightingale/releases) 页面有每个版本表结构的更新说明,可以根据你正在使用的版本,按照说明,逐个执行的更新表结构的语句 5 | 6 | 2. 解压 n9e 安装包,导入 upgrade.sql 到 n9e_v5 数据库 7 | ``` 8 | mysql -h 127.0.0.1 -u root -p1234 < cli/upgrade/upgrade.sql 9 | ``` 10 | 11 | 3. 执行 n9e-cli 完成数据库表结构升级, webapi.conf 为 v5 版本 n9e-webapi 正在使用的配置文件 12 | ``` 13 | ./n9e-cli --upgrade --config webapi.conf 14 | ``` 15 | 16 | 4. 修改 n9e 配置文件中的数据库为 n9e_v5,启动 n9e 进程 17 | ``` 18 | nohup ./n9e &> n9e.log & 19 | ``` 20 | 21 | 5. n9e 监听的端口为 17000,需要将之前的 web 端口和数据上报的端口,都调整为 17000 -------------------------------------------------------------------------------- /integrations/Whois/collect/whois/whois.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | #interval = 3600 3 | 4 | #[[instances]] 5 | ## Used to collect domain name information. 6 | #domain = "baidu.com" 7 | 8 | ## append some labels for series 9 | #labels = { region="n9e", product="test1" } 10 | 11 | ## interval = global.interval * interval_times 12 | #interval_times = 1 13 | 14 | 15 | #[[instances]] 16 | ## Used to collect domain name information. 17 | #domain = "google.com" 18 | 19 | ## append some labels for series 20 | #labels = { region="n9e", product="test2" } 21 | 22 | ## interval = global.interval * interval_times 23 | #interval_times = 1 24 | 25 | -------------------------------------------------------------------------------- /integrations/Redis/collect/redis_sentinel/redis_sentinel.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | 4 | [[instances]] 5 | # [protocol://][:password]@address[:port] 6 | # e.g. servers = ["tcp://localhost:26379"] 7 | servers = [] 8 | 9 | # # interval = global.interval * interval_times 10 | # interval_times = 1 11 | # add some dimension data by labels 12 | # labels = {} 13 | 14 | ## Optional TLS Config 15 | # use_tls = false 16 | # tls_min_version = "1.2" 17 | # tls_ca = "/etc/categraf/ca.pem" 18 | # tls_cert = "/etc/categraf/cert.pem" 19 | # tls_key = "/etc/categraf/key.pem" 20 | ## Use TLS but skip chain & host verification 21 | # insecure_skip_verify = true 22 | -------------------------------------------------------------------------------- /pkg/hash/hash_md5.go: -------------------------------------------------------------------------------- 1 | package hash 2 | 3 | import ( 4 | prommodel "github.com/prometheus/common/model" 5 | "github.com/toolkits/pkg/str" 6 | ) 7 | 8 | func GetHash2(m prommodel.Metric, ref string) string { 9 | var s string 10 | for k, v := range m { 11 | s += "/" 12 | s += string(k) 13 | s += "/" 14 | s += string(v) 15 | } 16 | s += "/" 17 | s += ref 18 | return str.MD5(s) 19 | } 20 | 21 | func GetTagHash2(m prommodel.Metric) string { 22 | var s string 23 | for k, v := range m { 24 | if k == "__name__" { 25 | continue 26 | } 27 | 28 | s += "/" 29 | s += string(k) 30 | s += "/" 31 | s += string(v) 32 | } 33 | return str.MD5(s) 34 | } 35 | -------------------------------------------------------------------------------- /integrations/XSKYApi/markdown/README.md: -------------------------------------------------------------------------------- 1 | # XSKY Api 2 | 3 | XSKY api 4 | 5 | ## Configations 6 | 7 | ```toml 8 | # # collect interval 9 | # interval = 15 10 | # 11 | [[instances]] 12 | # # append some labels for series 13 | # labels = { region="cloud", product="n9e" } 14 | 15 | # # interval = global.interval * interval_times 16 | # interval_times = 1 17 | 18 | ## must be one of oss/gfs/eus 19 | dss_type = "oss" 20 | 21 | ## URL of each server in the service's cluster 22 | servers = [ 23 | #"http://x.x.x.x:xx" 24 | ] 25 | 26 | ## Set response_timeout (default 5 seconds) 27 | response_timeout = "5s" 28 | 29 | xms_auth_tokens = [ 30 | #"xxxxxxxxxxxxxxx" 31 | ] 32 | 33 | 34 | ``` -------------------------------------------------------------------------------- /integrations/RabbitMQ/markdown/README.md: -------------------------------------------------------------------------------- 1 | # RabbitMQ 2 | 3 | 高版本(3.8以上版本)的 RabbitMQ,已经内置支持了暴露 Prometheus 协议的监控数据。所以,直接使用 categraf 的 prometheus 插件即可采集。开启 RabbitMQ Prometheus 访问: 4 | 5 | ```bash 6 | rabbitmq-plugins enable rabbitmq_prometheus 7 | ``` 8 | 9 | 启用成功的话,rabbitmq 默认会在 15692 端口起监听,访问 `http://localhost:15692/metrics` 即可看到符合 prometheus 协议的监控数据。 10 | 11 | 如果低于 3.8 的版本,还是需要使用 categraf 的 rabbitmq 插件来采集监控数据。 12 | 13 | ## 告警规则 14 | 15 | 夜莺内置了 RabbitMQ 的告警规则,克隆到自己的业务组下即可使用。 16 | 17 | ## 仪表盘 18 | 19 | 夜莺内置了 RabbitMQ 的仪表盘,克隆到自己的业务组下即可使用。`rabbitmq_v3.8_gt` 是大于等于 3.8 版本的仪表盘,`rabbitmq_v3.8_lt` 是小于 3.8 版本的仪表盘。 20 | 21 | ![20230802082542](https://download.flashcat.cloud/ulric/20230802082542.png) 22 | -------------------------------------------------------------------------------- /integrations/Ceph/markdown/README.md: -------------------------------------------------------------------------------- 1 | # ceph plugin 2 | 3 | 开启 ceph prometheus 支持 4 | 5 | ```bash 6 | ceph mgr module enable prometheus 7 | ``` 8 | 9 | ## 采集配置 10 | 11 | 既然 ceph 可以暴露 prometheus 协议的 metrics 数据,则直接使用 prometheus 插件抓取即可。 12 | 13 | categraf 配置文件:`conf/input.prometheus/prometheus.toml` 14 | 15 | ```yaml 16 | [[instances]] 17 | urls = [ 18 | "http://192.168.11.181:9283/metrics" 19 | ] 20 | labels = {service="ceph",cluster="ceph-cluster-001"} 21 | ``` 22 | 23 | 24 | ## 仪表盘效果 25 | 26 | 夜莺内置仪表盘中已经内置了 ceph 的仪表盘,导入即可使用。 27 | 28 | ![20230801152445](https://download.flashcat.cloud/ulric/20230801152445.png) 29 | 30 | ## 告警规则 31 | 32 | 夜莺内置告警规则中已经内置了 ceph 的告警规则,导入即可使用。 33 | 34 | ![20230801152431](https://download.flashcat.cloud/ulric/20230801152431.png) -------------------------------------------------------------------------------- /alert/dispatch/log.go: -------------------------------------------------------------------------------- 1 | package dispatch 2 | 3 | import ( 4 | "github.com/ccfos/nightingale/v6/models" 5 | 6 | "github.com/toolkits/pkg/logger" 7 | ) 8 | 9 | func LogEvent(event *models.AlertCurEvent, location string, err ...error) { 10 | status := "triggered" 11 | if event.IsRecovered { 12 | status = "recovered" 13 | } 14 | 15 | message := "" 16 | if len(err) > 0 && err[0] != nil { 17 | message = "error_message: " + err[0].Error() 18 | } 19 | 20 | logger.Infof( 21 | "event(%s %s) %s: rule_id=%d sub_id:%d cluster:%s %v%s@%d %s", 22 | event.Hash, 23 | status, 24 | location, 25 | event.RuleId, 26 | event.SubRuleId, 27 | event.Cluster, 28 | event.TagsJSON, 29 | event.TriggerValue, 30 | event.TriggerTime, 31 | message, 32 | ) 33 | } 34 | -------------------------------------------------------------------------------- /integrations/AutoMQ/markdown/overview.md: -------------------------------------------------------------------------------- 1 | ## 前言 2 | 3 | AuthMQ 官方文档提供了指标吐出方式以及和监控系统的整合方式,具体可以参考[AutoMQ](https://docs.automq.com/zh/docs/automq-opensource/LkwkwdQlwizjqckhj0dcc2IdnDh)。 4 | 5 | ## 推荐方式 6 | 7 | 建议采用 AutoMQ 文档中的方案二:使用 Prometheus OTLP Receiver 的方式,把所有的指标都收集到 OTel Collector 中,然后使用 Prometheus 或者 Categraf 直接去拉取数据即可。假如使用 Categraf,就是使用 prometheus 插件去拉取数据,比如我们为 prometheus 插件提供一个单独的 automq.toml 的配置文件:`conf/input.prometheus/automq.toml` ,内容如下: 8 | 9 | ```toml 10 | [[instances]] 11 | urls = [ 12 | "http://:/metrics" 13 | ] 14 | 15 | url_label_key = "otel_collector" 16 | url_label_value = "{{.Host}}" 17 | ``` 18 | 19 | 注意,url_label_key 一般都是指定为 instance,但是这里故意指定为其他字符串,是因为 AutoMQ 原始的指标中包含了 instance 标签,为了避免冲突,所以指定为其他字符串。 20 | 21 | -------------------------------------------------------------------------------- /integrations/Dns_Query/collect/dns_query/dns_query.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | 4 | [[instances]] 5 | # # append some labels for series 6 | # labels = { region="cloud", product="n9e" } 7 | 8 | # # interval = global.interval * interval_times 9 | # interval_times = 1 10 | 11 | # # 12 | auto_detect_local_dns_server = false 13 | 14 | ## servers to query 15 | # servers = ["8.8.8.8"] 16 | servers = [] 17 | 18 | ## Network is the network protocol name. 19 | # network = "udp" 20 | 21 | ## Domains or subdomains to query. 22 | # domains = ["."] 23 | 24 | ## Query record type. 25 | ## Possible values: A, AAAA, CNAME, MX, NS, PTR, TXT, SOA, SPF, SRV. 26 | # record_type = "A" 27 | 28 | ## Dns server port. 29 | # port = 53 30 | 31 | ## Query timeout in seconds. 32 | # timeout = 2 -------------------------------------------------------------------------------- /integrations/Netstat_Filter/markdown/README.md: -------------------------------------------------------------------------------- 1 | # netstat_filter 2 | 3 | 该插件采集网络连接情况,并根据用户条件进行过滤统计,以达到监控用户关心链接情况 4 | ## 指标列表 5 | tcp_established 6 | tcp_syn_sent 7 | tcp_syn_recv 8 | tcp_fin_wait1 9 | tcp_fin_wait2 10 | tcp_time_wait 11 | tcp_close 12 | tcp_close_wait 13 | tcp_last_ack 14 | tcp_listen 15 | tcp_closing 16 | tcp_none 17 | tcp_send_queue 18 | tcp_recv_queue 19 | 20 | ## 功能说明 21 | 对源IP、源端口、目标IP和目标端口过滤后进行网卡recv-Q、send-Q进行采集,该指标可以很好反应出指定连接的质量,例如rtt时间过长,导致收到服务端ack确认很慢就会使send-Q长期大于0,可以及时通过监控发现,从而提前优化网络或程序 22 | 23 | 当过滤结果为多个连接时会将send和recv值进行加和 24 | 例如: 25 | 配置文件``raddr_port = 11883`` 26 | 当本地和不同IP的11883都有连接建立的情况下,会将多条连接的结果进行加和。或在并发多连接的情况下,会合并加合,总之过滤的越粗略被加合数就会越多。 27 | 28 | 多条规则请复制``[[instances]]``进行配置 29 | 30 | ## 注意事项 31 | netstat_filter_tcp_send_queue和netstat_filter_tcp_recv_queue指标目前只支持linux。windows用户默认为0。 32 | -------------------------------------------------------------------------------- /integrations/cAdvisor/collect/cadvisor/cadvisor.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | 4 | [[instances]] 5 | # url = "https://1.2.3.4:10250" 6 | # type = "kubelet" 7 | ## url = "http://1.2.3.4:8080/metrics" 8 | ## type = "cadvisor" 9 | 10 | # url_label_key = "instance" 11 | # url_label_value = "{{.Host}}" 12 | # bearer_token_string = "eyJlonglongxxxx.eyJlonglongyyyy.oQsXlonglongZZZ" 13 | ## bearer_token_file = "/path/to/token/file" 14 | 15 | # ignore_label_keys = ["id","name", "container_label*"] 16 | ## choose_label_keys = ["id"] 17 | 18 | # timeout = "3s" 19 | 20 | # use_tls = true 21 | ## tls_min_version = "1.2" 22 | ## tls_ca = "/etc/categraf/ca.pem" 23 | ## tls_cert = "/etc/categraf/cert.pem" 24 | ## tls_key = "/etc/categraf/key.pem" 25 | ## Use TLS but skip chain & host verification 26 | ## insecure_skip_verify = true -------------------------------------------------------------------------------- /models/chart_share.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | import "github.com/ccfos/nightingale/v6/pkg/ctx" 4 | 5 | type ChartShare struct { 6 | Id int64 `json:"id" gorm:"primaryKey"` 7 | Cluster string `json:"cluster"` 8 | DatasourceId int64 `json:"datasource_id"` 9 | Configs string `json:"configs"` 10 | CreateBy string `json:"create_by"` 11 | CreateAt int64 `json:"create_at"` 12 | } 13 | 14 | func (cs *ChartShare) TableName() string { 15 | return "chart_share" 16 | } 17 | 18 | func (cs *ChartShare) Add(ctx *ctx.Context) error { 19 | return Insert(ctx, cs) 20 | } 21 | 22 | func ChartShareGetsByIds(ctx *ctx.Context, ids []int64) ([]ChartShare, error) { 23 | var lst []ChartShare 24 | if len(ids) == 0 { 25 | return lst, nil 26 | } 27 | 28 | err := DB(ctx).Where("id in ?", ids).Order("id").Find(&lst).Error 29 | return lst, err 30 | } 31 | -------------------------------------------------------------------------------- /alert/dispatch/notify_channel.go: -------------------------------------------------------------------------------- 1 | package dispatch 2 | 3 | // NotifyChannels channelKey -> bool 4 | type NotifyChannels map[string]bool 5 | 6 | func NewNotifyChannels(channels []string) NotifyChannels { 7 | nc := make(NotifyChannels) 8 | for _, ch := range channels { 9 | nc[ch] = true 10 | } 11 | return nc 12 | } 13 | 14 | func (nc NotifyChannels) OrMerge(other NotifyChannels) { 15 | nc.merge(other, func(a, b bool) bool { return a || b }) 16 | } 17 | 18 | func (nc NotifyChannels) AndMerge(other NotifyChannels) { 19 | nc.merge(other, func(a, b bool) bool { return a && b }) 20 | } 21 | 22 | func (nc NotifyChannels) merge(other NotifyChannels, f func(bool, bool) bool) { 23 | if other == nil { 24 | return 25 | } 26 | for k, v := range other { 27 | if curV, has := nc[k]; has { 28 | nc[k] = f(curV, v) 29 | } else { 30 | nc[k] = v 31 | } 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /fe.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cp -f ./docker/initsql/a-n9e.sql n9e.sql 4 | 5 | if [ ! -d "./pub" ]; then 6 | TAG=$(curl -sX GET https://api.github.com/repos/n9e/fe/releases/latest | awk '/tag_name/{print $4;exit}' FS='[""]') 7 | 8 | if ! curl -o n9e-fe-${TAG}.tar.gz -L https://github.com/n9e/fe/releases/download/${TAG}/n9e-fe-${TAG}.tar.gz; then 9 | echo "failed to download n9e-fe-${TAG}.tar.gz!" 10 | exit 1 11 | fi 12 | 13 | if ! tar zxf n9e-fe-${TAG}.tar.gz; then 14 | echo "failed to untar n9e-fe-${TAG}.tar.gz!" 15 | exit 2 16 | fi 17 | fi 18 | 19 | GOPATH=$(go env GOPATH) 20 | GOPATH=${GOPATH:-/home/runner/go} 21 | 22 | # Embed files into a go binary 23 | # go install github.com/rakyll/statik 24 | if ! $GOPATH/bin/statik -src=./pub -dest=./front; then 25 | echo "failed to embed files into a go binary!" 26 | exit 4 27 | fi 28 | -------------------------------------------------------------------------------- /integrations/MinIO/markdown/README.md: -------------------------------------------------------------------------------- 1 | # MinIO 2 | 3 | 参考 [使用 Prometheus 采集 MinIO 指标](https://min.io/docs/minio/linux/operations/monitoring/collect-minio-metrics-using-prometheus.html?ref=docs-redirect#minio-metrics-collect-using-prometheus) 4 | 5 | 开启 MinIO Prometheus 访问; 6 | 7 | ```bash 8 | # 启动 MinIO 服务的时候加入下面的变量: 9 | MINIO_PROMETHEUS_AUTH_TYPE=public 10 | ``` 11 | 12 | ## 采集配置 13 | 14 | categraf 的 `conf/input.prometheus/prometheus.toml` 15 | 16 | ```toml 17 | [[instances]] 18 | urls = [ 19 | "http://192.168.1.188:9000/minio/v2/metrics/cluster" 20 | ] 21 | labels = {job="minio-cluster"} 22 | ``` 23 | 24 | ## Dashboard 25 | 26 | 夜莺内置了 MinIO 的仪表盘,克隆到自己的业务组下即可使用。 27 | 28 | ![20230801170735](https://download.flashcat.cloud/ulric/20230801170735.png) 29 | 30 | ## Alerts 31 | 32 | 夜莺内置了 MinIO 的告警规则,克隆到自己的业务组下即可使用。 33 | 34 | ![20230801170725](https://download.flashcat.cloud/ulric/20230801170725.png) -------------------------------------------------------------------------------- /cmd/cli/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "os" 7 | 8 | "github.com/ccfos/nightingale/v6/cli" 9 | "github.com/ccfos/nightingale/v6/pkg/version" 10 | ) 11 | 12 | var ( 13 | upgrade = flag.Bool("upgrade", false, "Upgrade the database.") 14 | showVersion = flag.Bool("version", false, "Show version.") 15 | configFile = flag.String("config", "", "Specify webapi.conf of v5.x version") 16 | ) 17 | 18 | func main() { 19 | flag.Parse() 20 | 21 | if *showVersion { 22 | fmt.Println(version.Version) 23 | os.Exit(0) 24 | } 25 | 26 | if *upgrade { 27 | if *configFile == "" { 28 | fmt.Println("Please specify the configuration directory.") 29 | os.Exit(1) 30 | } 31 | 32 | err := cli.Upgrade(*configFile) 33 | if err != nil { 34 | fmt.Println(err) 35 | os.Exit(1) 36 | } 37 | fmt.Print("Upgrade successfully.") 38 | os.Exit(0) 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /integrations/AliYun/collect/aliyun/cloud.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 60 3 | [[instances]] 4 | # # endpoint region 参考 https://help.aliyun.com/document_detail/28616.html#section-72p-xhs-6qt 5 | # region="cn-beijing" 6 | # endpoint="metrics.cn-hangzhou.aliyuncs.com" 7 | # access_key_id="your-access-key-id" 8 | # access_key_secret="your-access-key-secret" 9 | # interval_times=4 10 | # delay="10m" 11 | # period="60s" 12 | # # namespace 参考 https://help.aliyun.com/document_detail/163515.htm?spm=a2c4g.11186623.0.0.44d65c58mhgNw3 13 | # namespaces=["acs_ecs_dashboard"] 14 | # [[instances.metric_filters]] 15 | # # metric name 参考 https://help.aliyun.com/document_detail/163515.htm?spm=a2c4g.11186623.0.0.401d15c73Z0dZh 16 | # # 参考页面中的Metric Id 填入下面的metricName ,页面中包含中文的Metric Name对应接口中的Description 17 | # metric_names=["cpu_cores","vm.TcpCount"] 18 | # namespace="" 19 | # ratelimit=25 20 | # catch_ttl="1h" 21 | # timeout="5s" 22 | -------------------------------------------------------------------------------- /integrations/Tomcat/collect/tomcat/tomcat.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | 4 | # Gather metrics from the Tomcat server status page. 5 | [[instances]] 6 | ## URL of the Tomcat server status 7 | # url = "http://127.0.0.1:8080/manager/status/all?XML=true" 8 | url = "" 9 | 10 | ## HTTP Basic Auth Credentials 11 | # username = "tomcat" 12 | # password = "s3cret" 13 | 14 | ## Request timeout 15 | # timeout = "5s" 16 | 17 | # # interval = global.interval * interval_times 18 | # interval_times = 1 19 | 20 | # important! use global unique string to specify instance 21 | # labels = { instance="192.168.1.2:8080", url="-" } 22 | 23 | ## Optional TLS Config 24 | # use_tls = false 25 | # tls_min_version = "1.2" 26 | # tls_ca = "/etc/categraf/ca.pem" 27 | # tls_cert = "/etc/categraf/cert.pem" 28 | # tls_key = "/etc/categraf/key.pem" 29 | ## Use TLS but skip chain & host verification 30 | # insecure_skip_verify = true 31 | -------------------------------------------------------------------------------- /memsto/stat.go: -------------------------------------------------------------------------------- 1 | package memsto 2 | 3 | import "github.com/prometheus/client_golang/prometheus" 4 | 5 | type Stats struct { 6 | GaugeCronDuration *prometheus.GaugeVec 7 | GaugeSyncNumber *prometheus.GaugeVec 8 | } 9 | 10 | func NewSyncStats() *Stats { 11 | GaugeCronDuration := prometheus.NewGaugeVec(prometheus.GaugeOpts{ 12 | Namespace: "n9e", 13 | Subsystem: "cron", 14 | Name: "duration", 15 | Help: "Cron method use duration, unit: ms.", 16 | }, []string{"name"}) 17 | 18 | GaugeSyncNumber := prometheus.NewGaugeVec(prometheus.GaugeOpts{ 19 | Namespace: "n9e", 20 | Subsystem: "cron", 21 | Name: "sync_number", 22 | Help: "Cron sync number.", 23 | }, []string{"name"}) 24 | 25 | prometheus.MustRegister( 26 | GaugeCronDuration, 27 | GaugeSyncNumber, 28 | ) 29 | 30 | return &Stats{ 31 | GaugeCronDuration: GaugeCronDuration, 32 | GaugeSyncNumber: GaugeSyncNumber, 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /.github/workflows/n9e.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | push: 5 | tags: 6 | - 'v*' 7 | env: 8 | GO_VERSION: 1.18 9 | 10 | jobs: 11 | goreleaser: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - name: Checkout Source Code 15 | uses: actions/checkout@v3 16 | with: 17 | fetch-depth: 0 18 | - name: Setup Go Environment 19 | uses: actions/setup-go@v3 20 | with: 21 | go-version: ${{ env.GO_VERSION }} 22 | - uses: docker/login-action@v2 23 | with: 24 | username: ${{ secrets.DOCKERHUB_USERNAME }} 25 | password: ${{ secrets.DOCKERHUB_TOKEN }} 26 | - name: Run GoReleaser 27 | uses: goreleaser/goreleaser-action@v3 28 | with: 29 | distribution: goreleaser 30 | version: '~> v1' 31 | args: release --rm-dist 32 | env: 33 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 34 | -------------------------------------------------------------------------------- /integrations/Redis/collect/redis/redis.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | 4 | [[instances]] 5 | # address = "127.0.0.1:6379" 6 | # username = "" 7 | # password = "" 8 | # pool_size = 2 9 | 10 | # # Optional. Specify redis commands to retrieve values 11 | # commands = [ 12 | # {command = ["get", "sample-key1"], metric = "custom_metric_name1"}, 13 | # {command = ["get", "sample-key2"], metric = "custom_metric_name2"} 14 | # ] 15 | 16 | # # interval = global.interval * interval_times 17 | # interval_times = 1 18 | 19 | # important! use global unique string to specify instance 20 | # labels = { instance="n9e-10.2.3.4:6379" } 21 | 22 | ## Optional TLS Config 23 | # use_tls = false 24 | # tls_min_version = "1.2" 25 | # tls_ca = "/etc/categraf/ca.pem" 26 | # tls_cert = "/etc/categraf/cert.pem" 27 | # tls_key = "/etc/categraf/key.pem" 28 | ## Use TLS but skip chain & host verification 29 | # insecure_skip_verify = true 30 | -------------------------------------------------------------------------------- /pkg/ctx/ctx.go: -------------------------------------------------------------------------------- 1 | package ctx 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/ccfos/nightingale/v6/conf" 7 | 8 | "gorm.io/gorm" 9 | ) 10 | 11 | type Context struct { 12 | DB *gorm.DB 13 | CenterApi conf.CenterApi 14 | Ctx context.Context 15 | IsCenter bool 16 | } 17 | 18 | func NewContext(ctx context.Context, db *gorm.DB, isCenter bool, centerApis ...conf.CenterApi) *Context { 19 | var api conf.CenterApi 20 | if len(centerApis) > 0 { 21 | api = centerApis[0] 22 | } 23 | 24 | return &Context{ 25 | Ctx: ctx, 26 | DB: db, 27 | CenterApi: api, 28 | IsCenter: isCenter, 29 | } 30 | } 31 | 32 | // set db to Context 33 | func (c *Context) SetDB(db *gorm.DB) { 34 | c.DB = db 35 | } 36 | 37 | // get context from Context 38 | func (c *Context) GetContext() context.Context { 39 | return c.Ctx 40 | } 41 | 42 | // get db from Context 43 | func (c *Context) GetDB() *gorm.DB { 44 | return c.DB 45 | } 46 | -------------------------------------------------------------------------------- /integrations/HAProxy/collect/haproxy/haproxy.toml: -------------------------------------------------------------------------------- 1 | [[instances]] 2 | # URI on which to scrape HAProxy. 3 | # e.g. 4 | # uri = "http://localhost:5000/baz?stats;csv" 5 | # uri = "http://user:pass@haproxy.example.com/haproxy?stats;csv" 6 | # uri = "unix:/run/haproxy/admin.sock" 7 | uri = "" 8 | 9 | # Flag that enables SSL certificate verification for the scrape URI 10 | ssl_verify = false 11 | 12 | # Comma-separated list of exported server metrics. See http://cbonte.github.io/haproxy-dconv/configuration-1.5.html#9.1 13 | server_metric_fields = "" 14 | 15 | # Comma-separated list of exported server states to exclude. See https://cbonte.github.io/haproxy-dconv/1.8/management.html#9.1, field 17 status 16 | server_exclude_states = "" 17 | 18 | # Timeout for trying to get stats from HAProxy. 19 | timeout = "5s" 20 | 21 | # Flag that enables using HTTP proxy settings from environment variables ($http_proxy, $https_proxy, $no_proxy) 22 | proxy_from_env = false 23 | -------------------------------------------------------------------------------- /integrations/Oracle/markdown/README.md: -------------------------------------------------------------------------------- 1 | # Oracle plugin 2 | 3 | Oracle 插件,用于监控 Oracle 数据库。默认无法跑在 Windows 上。如果你的 Oracle 部署在 Windows 上,也没问题,使用部署在 Linux 上的 Categraf 远程监控 Windows 上的 Oracle,也行得通。 4 | 5 | Oracle 插件的核心监控原理,就是执行下面 [这些 SQL 语句](https://github.com/flashcatcloud/categraf/blob/main/conf/input.oracle/metric.toml),然后解析出结果,上报到监控服务端。 6 | 7 | 以其中一个为例: 8 | 9 | ```toml 10 | [[metrics]] 11 | mesurement = "activity" 12 | metric_fields = [ "value" ] 13 | field_to_append = "name" 14 | timeout = "3s" 15 | request = ''' 16 | SELECT name, value FROM v$sysstat WHERE name IN ('parse count (total)', 'execute count', 'user commits', 'user rollbacks') 17 | ''' 18 | ``` 19 | 20 | - mesurement:指标类别 21 | - label_fields:作为 label 的字段 22 | - metric_fields:作为 metric 的字段,因为是作为 metric 的字段,所以这个字段的值必须是数字 23 | - field_to_append:表示这个字段附加到 metric_name 后面,作为 metric_name 的一部分 24 | - timeout:超时时间 25 | - request:具体查询的 SQL 语句 26 | 27 | 如果你想监控的指标,默认没有采集,只需要增加自定义的 `[[metrics]]` 配置即可。 28 | -------------------------------------------------------------------------------- /integrations/GoogleCloud/markdown/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # GCP 指标获取插件 4 | 5 | ## 需要权限 6 | ```shell 7 | https://www.googleapis.com/auth/monitoring.read 8 | ``` 9 | 10 | ## 配置 11 | ```toml 12 | #采集周期,建议 >= 1分钟 13 | interval=60 14 | [[instances]] 15 | #配置 project_id 16 | project_id="your-project-id" 17 | #配置认证的key文件 18 | credentials_file="/path/to/your/key.json" 19 | #或者配置认证的JSON 20 | credentials_json="xxx" 21 | 22 | # 指标的end time = now - delay 23 | #delay="2m" 24 | # 指标的start time = now - deley - period 25 | #period="1m" 26 | # 过滤器 27 | #filter="metric.type=\"compute.googleapis.com/instance/cpu/utilization\" AND resource.labels.zone=\"asia-northeast1-a\"" 28 | # 请求超时时间 29 | #timeout="5s" 30 | # 指标列表的缓存时长 ,filter为空时 启用 31 | #cache_ttl="1h" 32 | 33 | # 给gce的instance_name 取个别名,放到label中 34 | #gce_host_tag="xxx" 35 | # 每次最多有多少请求同时发起 36 | #request_inflight=30 37 | 38 | # request_inflight 取值(0,100] 39 | # 想配置更大的值 ,前提是你知道你在做什么 40 | force_request_inflight= 200 41 | ``` 42 | -------------------------------------------------------------------------------- /integrations/IPMI/collect/ipmi/conf.toml: -------------------------------------------------------------------------------- 1 | # Read metrics from the bare metal servers via freeipmi 2 | [[instances]] 3 | # target指定是本地采集还是远程采集 4 | #target="localhost" 5 | # 指定采集的用户名和密码,这里务必保证ipmi命令能获取正确输出,不是网上查到一个用户名 密码就可以。 6 | #user = "user" 7 | #pass = "1234" 8 | 9 | # ipmi协议版本,支持1.5 和 2.0 10 | #driver = "LAN_2_0" 11 | 12 | # 指定特权用户名 13 | #privilege = "user" 14 | 15 | ## session-timeout, ms 16 | #timeout = 100000 17 | 18 | # 支持的采集器 bmc, bmc-watchdog, ipmi, chassis, dcmi, sel,sm-lan-mode 19 | # 默认使用 bmc, ipmi, chassis和dcmi,建议保持下列配置便于仪表盘更好的展示 20 | collectors = [ "bmc", "ipmi", "chassis", "sel", "dcmi"] 21 | 22 | # 不关注的传感器,指定id 排除掉 23 | #exclude_sensor_ids = [ 2, 29, 32, 50, 52, 55 ] 24 | 25 | # 如果你想使用定制化的参数覆盖内置的命令,可以修改以下内容; 建议保持注释 26 | #[instances.collector_cmd] 27 | #ipmi = "sudo" 28 | #sel = "sudo" 29 | #[instances.default_args] 30 | #ipmi = [ "--bridge-sensors" ] 31 | #[instances.custom_args] 32 | #ipmi = [ "--bridge-sensors" ] 33 | #sel = [ "ipmi-sel" ] -------------------------------------------------------------------------------- /center/cconf/conf.go: -------------------------------------------------------------------------------- 1 | package cconf 2 | 3 | import "time" 4 | 5 | type Center struct { 6 | Plugins []Plugin 7 | MetricsYamlFile string 8 | OpsYamlFile string 9 | BuiltinIntegrationsDir string 10 | I18NHeaderKey string 11 | MetricDesc MetricDescType 12 | AnonymousAccess AnonymousAccess 13 | UseFileAssets bool 14 | FlashDuty FlashDuty 15 | EventHistoryGroupView bool 16 | } 17 | 18 | type Plugin struct { 19 | Id int64 `json:"id"` 20 | Category string `json:"category"` 21 | Type string `json:"plugin_type"` 22 | TypeName string `json:"plugin_type_name"` 23 | } 24 | 25 | type FlashDuty struct { 26 | Api string 27 | Headers map[string]string 28 | Timeout time.Duration 29 | } 30 | 31 | type AnonymousAccess struct { 32 | PromQuerier bool 33 | AlertDetail bool 34 | } 35 | 36 | func (c *Center) PreCheck() { 37 | if len(c.Plugins) == 0 { 38 | c.Plugins = Plugins 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /docker/compose-host-network/etc-prometheus/prometheus.yml: -------------------------------------------------------------------------------- 1 | # my global config 2 | global: 3 | scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute. 4 | evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute. 5 | # scrape_timeout is set to the global default (10s). 6 | 7 | # Alertmanager configuration 8 | alerting: 9 | alertmanagers: 10 | - static_configs: 11 | - targets: 12 | # - alertmanager:9093 13 | 14 | # Load rules once and periodically evaluate them according to the global 'evaluation_interval'. 15 | rule_files: 16 | # - "first_rules.yml" 17 | # - "second_rules.yml" 18 | 19 | scrape_configs: 20 | # The job name is added as a label `job=` to any timeseries scraped from this config. 21 | - job_name: 'prometheus' 22 | static_configs: 23 | - targets: ['localhost:9090'] 24 | 25 | - job_name: 'nightingale' 26 | static_configs: 27 | - targets: ['localhost:17000'] 28 | -------------------------------------------------------------------------------- /docker/compose-postgres/prometc_vm/prometheus.yml: -------------------------------------------------------------------------------- 1 | # my global config 2 | global: 3 | scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute. 4 | evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute. 5 | # scrape_timeout is set to the global default (10s). 6 | 7 | # A scrape configuration containing exactly one endpoint to scrape: 8 | # Here it's Prometheus itself. 9 | scrape_configs: 10 | # The job name is added as a label `job=` to any timeseries scraped from this config. 11 | - job_name: 'victoriametrics' 12 | # metrics_path defaults to '/metrics' 13 | # scheme defaults to 'http'. 14 | static_configs: 15 | - targets: ['victoriametrics:8428'] 16 | 17 | - job_name: 'n9e' 18 | # static_configs: 19 | # - targets: ['n9e:17000'] 20 | file_sd_configs: 21 | - files: 22 | - targets.json 23 | 24 | remote_write: 25 | - url: 'http://n9e:17000/prometheus/v1/write' 26 | -------------------------------------------------------------------------------- /models/chart.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | import "github.com/ccfos/nightingale/v6/pkg/ctx" 4 | 5 | type Chart struct { 6 | Id int64 `json:"id" gorm:"primaryKey"` 7 | GroupId int64 `json:"group_id"` 8 | Configs string `json:"configs"` 9 | Weight int `json:"weight"` 10 | } 11 | 12 | func (c *Chart) TableName() string { 13 | return "chart" 14 | } 15 | 16 | func ChartsOf(ctx *ctx.Context, chartGroupId int64) ([]Chart, error) { 17 | var objs []Chart 18 | err := DB(ctx).Where("group_id = ?", chartGroupId).Order("weight").Find(&objs).Error 19 | return objs, err 20 | } 21 | 22 | func (c *Chart) Add(ctx *ctx.Context) error { 23 | return Insert(ctx, c) 24 | } 25 | 26 | func (c *Chart) Update(ctx *ctx.Context, selectField interface{}, selectFields ...interface{}) error { 27 | return DB(ctx).Model(c).Select(selectField, selectFields...).Updates(c).Error 28 | } 29 | 30 | func (c *Chart) Del(ctx *ctx.Context) error { 31 | return DB(ctx).Where("id=?", c.Id).Delete(&Chart{}).Error 32 | } 33 | -------------------------------------------------------------------------------- /docker/compose-host-network-metric-log/etc-prometheus/prometheus.yml: -------------------------------------------------------------------------------- 1 | # my global config 2 | global: 3 | scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute. 4 | evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute. 5 | # scrape_timeout is set to the global default (10s). 6 | 7 | # Alertmanager configuration 8 | alerting: 9 | alertmanagers: 10 | - static_configs: 11 | - targets: 12 | # - alertmanager:9093 13 | 14 | # Load rules once and periodically evaluate them according to the global 'evaluation_interval'. 15 | rule_files: 16 | # - "first_rules.yml" 17 | # - "second_rules.yml" 18 | 19 | scrape_configs: 20 | # The job name is added as a label `job=` to any timeseries scraped from this config. 21 | - job_name: 'prometheus' 22 | static_configs: 23 | - targets: ['localhost:9090'] 24 | 25 | - job_name: 'nightingale' 26 | static_configs: 27 | - targets: ['localhost:17000'] 28 | -------------------------------------------------------------------------------- /integrations/VictoriaMetrics/markdown/README.md: -------------------------------------------------------------------------------- 1 | # VictoriaMetrics 2 | 3 | VictoriaMetrics 既可以单机部署,也可以集群方式部署。不管哪种部署方式,VictoriaMetrics 的进程都会暴露 `/metrics` 接口,通过这个接口暴露 Prometheus 协议的监控数据。 4 | 5 | ## 采集配置 6 | 7 | categraf 的 `conf/input.prometheus/prometheus.toml`。因为 VictoriaMetrics 是暴露的 Prometheus 协议的监控数据,所以使用 categraf 的 prometheus 插件即可采集。 8 | 9 | ```toml 10 | # vmstorage 11 | [[instances]] 12 | urls = [ 13 | "http://127.0.0.1:8482/metrics" 14 | ] 15 | labels = {service="vmstorage"} 16 | 17 | # vmselect 18 | [[instances]] 19 | urls = [ 20 | "http://127.0.0.1:8481/metrics" 21 | ] 22 | 23 | labels = {service="vmselect"} 24 | 25 | # vminsert 26 | [[instances]] 27 | urls = [ 28 | "http://127.0.0.1:8480/metrics" 29 | ] 30 | labels = {service="vminsert"} 31 | ``` 32 | 33 | ## 告警规则 34 | 35 | 夜莺内置了 VictoriaMetrics 的告警规则,克隆到自己的业务组下即可使用。 36 | 37 | ## 仪表盘 38 | 39 | 夜莺内置了 VictoriaMetrics 的仪表盘,克隆到自己的业务组下即可使用。 40 | 41 | ![20230802090606](https://download.flashcat.cloud/ulric/20230802090606.png) 42 | 43 | -------------------------------------------------------------------------------- /docker/compose-postgres/n9eetc_pg/template/dingtalk.tpl: -------------------------------------------------------------------------------- 1 | #### {{if .IsRecovered}}💚{{.RuleName}}{{else}}💔{{.RuleName}}{{end}} 2 | 3 | --- 4 | {{$time_duration := sub now.Unix .FirstTriggerTime }}{{if .IsRecovered}}{{$time_duration = sub .LastEvalTime .FirstTriggerTime }}{{end}} 5 | - **告警级别**: {{.Severity}}级 6 | {{- if .RuleNote}} 7 | - **规则备注**: {{.RuleNote}} 8 | {{- end}} 9 | {{- if not .IsRecovered}} 10 | - **当次触发时值**: {{.TriggerValue}} 11 | - **当次触发时间**: {{timeformat .TriggerTime}} 12 | - **告警持续时长**: {{humanizeDurationInterface $time_duration}} 13 | {{- else}} 14 | {{- if .AnnotationsJSON.recovery_value}} 15 | - **恢复时值**: {{formatDecimal .AnnotationsJSON.recovery_value 4}} 16 | {{- end}} 17 | - **恢复时间**: {{timeformat .LastEvalTime}} 18 | - **告警持续时长**: {{humanizeDurationInterface $time_duration}} 19 | {{- end}} 20 | - **告警事件标签**: 21 | {{- range $key, $val := .TagsMap}} 22 | {{- if ne $key "rulename" }} 23 | - `{{$key}}`: `{{$val}}` 24 | {{- end}} 25 | {{- end}} -------------------------------------------------------------------------------- /integrations/SpringBoot/markdown/README.md: -------------------------------------------------------------------------------- 1 | # SpringBoot 2 | 3 | Java 生态的项目,如果要暴露 metrics 数据,一般可以选择 micrometer,不过 SpringBoot 项目可以直接使用 SpringBoot Actuator 暴露 metrics 数据,Actuator 底层也是使用 micrometer 来实现的,只是使用起来更加简单。 4 | 5 | ## 应用配置 6 | 7 | 在 application.properties 中加入如下配置: 8 | 9 | ```properties 10 | management.endpoint.metrics.enabled=true 11 | management.endpoints.web.exposure.include=* 12 | management.endpoint.prometheus.enabled=true 13 | management.metrics.export.prometheus.enabled=true 14 | ``` 15 | 16 | 完事启动项目,访问 `http://localhost:8080/actuator/prometheus` 即可看到符合 prometheus 协议的监控数据。 17 | 18 | ## 采集配置 19 | 20 | 既然暴露了 Prometheus 协议的监控数据,那通过 categraf prometheus 插件直接采集即可。配置文件是 `conf/input.prometheus/prometheus.toml`。配置样例如下: 21 | 22 | ```toml 23 | [[instances]] 24 | urls = [ 25 | "http://192.168.11.177:8080/actuator/prometheus" 26 | ] 27 | ``` 28 | 29 | ## 仪表盘 30 | 31 | 夜莺内置了一个 SpringBoot 仪表盘,由网友贡献,克隆到自己的业务组下即可使用,欢迎大家一起来提 PR 完善。 32 | 33 | ![actuator2.0](http://download.flashcat.cloud/uPic/actuator_2.0.png) 34 | -------------------------------------------------------------------------------- /center/sso/sync.go: -------------------------------------------------------------------------------- 1 | package sso 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/ccfos/nightingale/v6/pkg/ctx" 7 | "github.com/toolkits/pkg/logger" 8 | ) 9 | 10 | func (s *SsoClient) SyncSsoUsers(ctx *ctx.Context) { 11 | if err := s.LDAP.SyncAddAndDelUsers(ctx); err != nil { 12 | fmt.Println("failed to sync the addition and deletion of ldap users:", err) 13 | } 14 | 15 | if err := s.LDAP.SyncDelUsers(ctx); err != nil { 16 | fmt.Println("failed to sync deletion of ldap users:", err) 17 | } 18 | 19 | go s.loopSyncSsoUsers(ctx) 20 | } 21 | 22 | func (s *SsoClient) loopSyncSsoUsers(ctx *ctx.Context) { 23 | for { 24 | select { 25 | case <-s.LDAP.Ticker.C: 26 | lc := s.LDAP.Copy() 27 | 28 | if err := lc.SyncAddAndDelUsers(ctx); err != nil { 29 | logger.Warningf("failed to sync the addition and deletion of ldap users: %v", err) 30 | } 31 | 32 | if err := lc.SyncDelUsers(ctx); err != nil { 33 | logger.Warningf("failed to sync deletion of ldap users: %v", err) 34 | } 35 | } 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /pkg/fasttime/fasttime.go: -------------------------------------------------------------------------------- 1 | package fasttime 2 | 3 | import ( 4 | "sync/atomic" 5 | "time" 6 | ) 7 | 8 | func init() { 9 | go func() { 10 | ticker := time.NewTicker(time.Second) 11 | defer ticker.Stop() 12 | for tm := range ticker.C { 13 | t := uint64(tm.Unix()) 14 | atomic.StoreUint64(¤tTimestamp, t) 15 | } 16 | }() 17 | } 18 | 19 | var currentTimestamp = uint64(time.Now().Unix()) 20 | 21 | // UnixTimestamp returns the current unix timestamp in seconds. 22 | // 23 | // It is faster than time.Now().Unix() 24 | func UnixTimestamp() uint64 { 25 | return atomic.LoadUint64(¤tTimestamp) 26 | } 27 | 28 | // UnixDate returns date from the current unix timestamp. 29 | // 30 | // The date is calculated by dividing unix timestamp by (24*3600) 31 | func UnixDate() uint64 { 32 | return UnixTimestamp() / (24 * 3600) 33 | } 34 | 35 | // UnixHour returns hour from the current unix timestamp. 36 | // 37 | // The hour is calculated by dividing unix timestamp by 3600 38 | func UnixHour() uint64 { 39 | return UnixTimestamp() / 3600 40 | } 41 | -------------------------------------------------------------------------------- /integrations/Net_Response/metrics/categraf.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": 0, 4 | "uuid": 1717556328185013000, 5 | "collector": "Categraf", 6 | "typ": "Net_Response", 7 | "name": "NET 探测结果状态码", 8 | "unit": "none", 9 | "note": "0 值表示正常,大于 0 就是异常,各个值的含义如下:\n\n- 0: Success\n- 1: Timeout\n- 2: ConnectionFailed\n- 3: ReadFailed\n- 4: StringMismatch", 10 | "lang": "zh_CN", 11 | "expression": "net_response_result_code", 12 | "created_at": 0, 13 | "created_by": "", 14 | "updated_at": 0, 15 | "updated_by": "" 16 | }, 17 | { 18 | "id": 0, 19 | "uuid": 1717556328186975000, 20 | "collector": "Categraf", 21 | "typ": "Net_Response", 22 | "name": "NET 探测耗时", 23 | "unit": "seconds", 24 | "note": "", 25 | "lang": "zh_CN", 26 | "expression": "net_response_response_time", 27 | "created_at": 0, 28 | "created_by": "", 29 | "updated_at": 0, 30 | "updated_by": "" 31 | } 32 | ] -------------------------------------------------------------------------------- /pkg/logx/logx.go: -------------------------------------------------------------------------------- 1 | package logx 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/pkg/errors" 7 | "github.com/toolkits/pkg/logger" 8 | ) 9 | 10 | type Config struct { 11 | Dir string 12 | Level string 13 | Output string 14 | KeepHours uint 15 | RotateNum int 16 | RotateSize uint64 17 | } 18 | 19 | func Init(c Config) (func(), error) { 20 | logger.SetSeverity(c.Level) 21 | 22 | if c.Output == "stderr" { 23 | logger.LogToStderr() 24 | } else if c.Output == "file" { 25 | lb, err := logger.NewFileBackend(c.Dir) 26 | if err != nil { 27 | return nil, errors.WithMessage(err, "NewFileBackend failed") 28 | } 29 | 30 | if c.KeepHours != 0 { 31 | lb.SetRotateByHour(true) 32 | lb.SetKeepHours(c.KeepHours) 33 | } else if c.RotateNum != 0 { 34 | lb.Rotate(c.RotateNum, c.RotateSize*1024*1024) 35 | } else { 36 | return nil, errors.New("KeepHours and Rotatenum both are 0") 37 | } 38 | 39 | logger.SetLogging(c.Level, lb) 40 | } 41 | 42 | return func() { 43 | fmt.Println("logger exiting") 44 | logger.Close() 45 | }, nil 46 | } 47 | -------------------------------------------------------------------------------- /integrations/Nginx/collect/nginx/nginx.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | 4 | [[instances]] 5 | ## An array of Nginx stub_status URI to gather stats. 6 | urls = [ 7 | # "http://192.168.0.216:8000/nginx_status", 8 | # "https://www.baidu.com/ngx_status" 9 | ] 10 | 11 | ## append some labels for series 12 | # labels = { region="cloud", product="n9e" } 13 | 14 | ## interval = global.interval * interval_times 15 | # interval_times = 1 16 | 17 | ## Set response_timeout (default 5 seconds) 18 | response_timeout = "5s" 19 | 20 | ## Whether to follow redirects from the server (defaults to false) 21 | # follow_redirects = false 22 | 23 | ## Optional HTTP Basic Auth Credentials 24 | #username = "admin" 25 | #password = "admin" 26 | 27 | ## Optional headers 28 | # headers = ["X-From", "categraf", "X-Xyz", "abc"] 29 | 30 | ## Optional TLS Config 31 | # use_tls = false 32 | # tls_ca = "/etc/categraf/ca.pem" 33 | # tls_cert = "/etc/categraf/cert.pem" 34 | # tls_key = "/etc/categraf/key.pem" 35 | ## Use TLS but skip chain & host verification 36 | # insecure_skip_verify = false -------------------------------------------------------------------------------- /integrations/IPMI/markdown/README.md: -------------------------------------------------------------------------------- 1 | # IPMI plugin 2 | ipmi插件是从ipmi exporter迁移过来。 基本原理是通过执行ipmi的一系列命令并将命令输出转换为指标,如果ipmi没有配置好,是无法采集到指标的,请务必将ipmi配置好。 3 | 4 | categraf的ipmi插件配置举例如下: 5 | ```toml 6 | # Read metrics from the bare metal servers via freeipmi 7 | [[instances]] 8 | # target指定是本地采集还是远程采集 9 | #target="localhost" 10 | # 指定采集的用户名和密码,这里务必保证ipmi命令能获取正确输出,不是网上查到一个用户名 密码就可以。 11 | #user = "user" 12 | #pass = "1234" 13 | 14 | # ipmi协议版本,支持1.5 和 2.0 15 | #driver = "LAN_2_0" 16 | 17 | # 指定特权用户名 18 | #privilege = "user" 19 | 20 | ## session-timeout, ms 21 | #timeout = 100000 22 | 23 | # 支持的采集器 bmc, bmc-watchdog, ipmi, chassis, dcmi, sel,sm-lan-mode 24 | # 默认使用 bmc, ipmi, chassis和dcmi,建议保持下列配置便于仪表盘更好的展示 25 | collectors = [ "bmc", "ipmi", "chassis", "sel", "dcmi"] 26 | 27 | # 不关注的传感器,指定id 排除掉 28 | #exclude_sensor_ids = [ 2, 29, 32, 50, 52, 55 ] 29 | 30 | # 如果你想使用定制化的参数覆盖内置的命令,可以修改以下内容; 建议保持注释 31 | #[instances.collector_cmd] 32 | #ipmi = "sudo" 33 | #sel = "sudo" 34 | #[instances.default_args] 35 | #ipmi = [ "--bridge-sensors" ] 36 | #[instances.custom_args] 37 | #ipmi = [ "--bridge-sensors" ] 38 | #sel = [ "ipmi-sel" ] 39 | ``` -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.exe 2 | *.exe~ 3 | *.dll 4 | *.dylib 5 | *.test 6 | *.out 7 | *.prof 8 | *.log 9 | *.o 10 | *.a 11 | *.so 12 | *.sw[po] 13 | *.tar.gz 14 | *.[568vq] 15 | [568vq].out 16 | 17 | *.cgo1.go 18 | *.cgo2.c 19 | _cgo_defun.c 20 | _cgo_gotypes.go 21 | _cgo_export.* 22 | _testmain.go 23 | _obj 24 | _test 25 | 26 | /log* 27 | /bin 28 | /out 29 | /build 30 | /dist 31 | /etc/*.local.yml 32 | /etc/*.local.conf 33 | /etc/rsa/* 34 | /etc/plugins/*.local.yml 35 | /etc/script/rules.yaml 36 | /etc/script/alert-rules.json 37 | /etc/script/record-rules.json 38 | /data* 39 | /tarball 40 | /run 41 | /vendor 42 | /tmp 43 | /pub 44 | /n9e 45 | /docker/pub 46 | /docker/n9e 47 | /docker/compose-bridge/mysqldata 48 | /docker/compose-host-network/mysqldata 49 | /docker/compose-host-network-metric-log/mysqldata 50 | /docker/compose-host-network-metric-log/n9e-logs 51 | /docker/compose-postgres/pgdata 52 | /etc.local* 53 | /front/statik/statik.go 54 | /docker/compose-bridge/etc-nightingale/rsa/ 55 | 56 | .alerts 57 | .idea 58 | .index 59 | .vscode 60 | .DS_Store 61 | .cache-loader 62 | .payload 63 | queries.active 64 | 65 | /n9e-* 66 | n9e.sql 67 | -------------------------------------------------------------------------------- /models/builtin_cate.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | import ( 4 | "github.com/ccfos/nightingale/v6/pkg/ctx" 5 | ) 6 | 7 | type BuiltinCate struct { 8 | Id int64 `json:"id" gorm:"primaryKey"` 9 | Name string `json:"name"` 10 | UserId int64 `json:"user_id"` 11 | } 12 | 13 | func (b *BuiltinCate) TableName() string { 14 | return "builtin_cate" 15 | } 16 | 17 | // 创建 builtin_cate 18 | func (b *BuiltinCate) Create(c *ctx.Context) error { 19 | return Insert(c, b) 20 | } 21 | 22 | // 删除 builtin_cate 23 | func BuiltinCateDelete(c *ctx.Context, name string, userId int64) error { 24 | return DB(c).Where("name=? and user_id=?", name, userId).Delete(&BuiltinCate{}).Error 25 | } 26 | 27 | // 根据 userId 获取 builtin_cate 28 | func BuiltinCateGetByUserId(c *ctx.Context, userId int64) (map[string]BuiltinCate, error) { 29 | var builtinCates []BuiltinCate 30 | err := DB(c).Where("user_id=?", userId).Find(&builtinCates).Error 31 | var builtinCatesMap = make(map[string]BuiltinCate) 32 | for _, builtinCate := range builtinCates { 33 | builtinCatesMap[builtinCate.Name] = builtinCate 34 | } 35 | 36 | return builtinCatesMap, err 37 | } 38 | -------------------------------------------------------------------------------- /integrations/ElasticSearch/markdown/README.md: -------------------------------------------------------------------------------- 1 | # elasticsearch plugin 2 | 3 | ElasticSearch 通过 HTTP JSON 的方式暴露了自身的监控指标,通过 categraf 的 [elasticsearch](https://github.com/flashcatcloud/categraf/tree/main/inputs/elasticsearch) 插件抓取。 4 | 5 | 如果是小规模集群,设置 `local=false`,从集群中某一个节点抓取数据,即可拿到整个集群所有节点的监控数据。如果是大规模集群,建议设置 `local=true`,在集群的每个节点上都部署抓取器,抓取本地 elasticsearch 进程的监控数据。 6 | 7 | ElasticSearch 详细的监控讲解,请参考这篇 [文章](https://time.geekbang.org/column/article/628847)。 8 | 9 | ## 配置示例 10 | 11 | categraf 配置文件:`conf/input.elasticsearch/elasticsearch.toml` 12 | 13 | ```yaml 14 | [[instances]] 15 | servers = ["http://192.168.11.177:9200"] 16 | http_timeout = "10s" 17 | local = false 18 | cluster_health = true 19 | cluster_health_level = "cluster" 20 | cluster_stats = true 21 | indices_level = "" 22 | node_stats = ["jvm", "breaker", "process", "os", "fs", "indices", "thread_pool", "transport"] 23 | username = "elastic" 24 | password = "xxxxxxxx" 25 | num_most_recent_indices = 1 26 | labels = { service="es" } 27 | ``` 28 | 29 | ## 仪表盘效果 30 | 31 | 夜莺内置仪表盘中已经内置了 Elasticsearch 的仪表盘,导入即可使用。 32 | 33 | ![](http://download.flashcat.cloud/uPic/es-dashboard.jpeg) -------------------------------------------------------------------------------- /integrations/Switch_Legacy/markdown/README.md: -------------------------------------------------------------------------------- 1 | # switch_legacy 2 | 3 | 交换机监控插件,fork 自 [https://github.com/gaochao1/swcollector](https://github.com/gaochao1/swcollector) 可以自动探测网络设备型号,获取 CPU、内存使用率,当然,还有各个网口的监控数据,这是通用的 oid 4 | 5 | ## Configuration 6 | 7 | 最核心的配置就是指定 IP 列表,有三种写法: 8 | 9 | ```toml 10 | [[instances]] 11 | ips = [ 12 | "172.16.2.1", 13 | "172.16.4/24", 14 | "192.168.56.102-192.168.56.120" 15 | ] 16 | ``` 17 | 18 | 该插件只支持 SNMP v2c,所以认证信息就是一个 community 字符串 19 | 20 | ## 唯一标识标签 21 | 22 | 网络设备的监控数据,默认都会带有 ip 标签,指定监控数据来源于哪个设备,如果想把监控数据当做夜莺里的监控对象,让网络设备自动出现在夜莺的监控对象表格里,只需要把 switch_id_label 设置为 ident 即可,这样一来,网络设备的 IP 信息会作为 ident 标签的值上报,夜莺会自动读取 ident 标签的值入库 23 | 24 | ## 名称映射 25 | 26 | 有时,我们看到网络设备的 IP,无法分辨是具体哪个设备,此时可以给 IP 一个映射名称: 27 | 28 | ```ini 29 | [mappings] 30 | "192.168.88.160" = "switch001.bj" 31 | "192.168.88.161" = "switch002.bj" 32 | ``` 33 | 34 | 这样一来,上报的监控数据就不用 IP 做标识了,而是使用 switch001.bj 这样的字符串做标识,更易读一些 35 | 36 | ## 自定义 oid 37 | 38 | `[[instances.customs]]` 部分可以配置多个,表示自定义 oid,默认情况下,该插件采集的都是设备各个网口的监控数据以及CPU和内存的使用率,如果要采集别的 oid,就需要使用这个自定义功能 39 | 40 | ## 监控大盘 41 | 42 | 社区有小伙伴帮忙做了一个监控大盘,就在该 README 同级目录下,大家可以导入夜莺使用 -------------------------------------------------------------------------------- /integrations/Docker/markdown/README.md: -------------------------------------------------------------------------------- 1 | # docker 2 | 3 | forked from telegraf/inputs.docker 4 | 5 | ## change 6 | 7 | 1. Using `container_id` as label not field 8 | 1. Some metrics have been deleted 9 | 10 | ## 容器ID标签 11 | 12 | 通过下面两个配置来控制 container_id 这个标签: 13 | 14 | ```ini 15 | container_id_label_enable = true 16 | container_id_label_short_style = false 17 | ``` 18 | 19 | 默认 container_id_label_enable 设置为 true,表示启用,即会把容器ID放到标签里,container_id_label_short_style 是短格式,容器ID很长,如果把 short_style 设置为 true,就会只截取前面12位 20 | 21 | ## 权限问题 22 | 23 | Categraf 最好是用 root 账号来运行,否则,请求 docker.sock 可能会遇到权限问题,需要把 Categraf 的运行账号,加到 docker group 中,假设 Categraf 使用 categraf 账号运行: 24 | 25 | ``` 26 | sudo usermod -aG docker categraf 27 | ``` 28 | 29 | ## 运行在容器里 30 | 31 | 如果 Categraf 运行在容器中,docker 的 unix socket 就需要挂到 Categraf 的容器里,比如通过 `-v /var/run/docker.sock:/var/run/docker.sock` 这样的参数来启动 Categraf 的容器。如果是在 compose 环境下,也可以在 docker compose 配置中加上 volume 的配置: 32 | 33 | ```yaml 34 | volumes: 35 | - /var/run/docker.sock:/var/run/docker.sock 36 | ``` 37 | 38 | ## 停用该插件 39 | 40 | - 方法一:把 `input.docker` 目录改个别的名字,不用 `input.` 打头 41 | - 方法二:docker.toml 中的 endpoint 配置留空 -------------------------------------------------------------------------------- /pkg/hash/hash.go: -------------------------------------------------------------------------------- 1 | package hash 2 | 3 | import ( 4 | "sort" 5 | 6 | prommodel "github.com/prometheus/common/model" 7 | "github.com/spaolacci/murmur3" 8 | ) 9 | 10 | func GetHash(m prommodel.Metric, ref string) uint64 { 11 | var str string 12 | var strs []string 13 | // get keys from m 14 | for k, _ := range m { 15 | strs = append(strs, string(k)) 16 | } 17 | 18 | // sort keys use sort 19 | sort.Strings(strs) 20 | 21 | for _, k := range strs { 22 | str += "/" 23 | str += k 24 | str += "/" 25 | str += string(m[prommodel.LabelName(k)]) 26 | } 27 | str += "/" 28 | str += ref 29 | 30 | return murmur3.Sum64([]byte(str)) 31 | } 32 | 33 | func GetTagHash(m prommodel.Metric) uint64 { 34 | var str string 35 | var strs []string 36 | // get keys from m 37 | for k, _ := range m { 38 | if k == "__name__" { 39 | continue 40 | } 41 | strs = append(strs, string(k)) 42 | } 43 | 44 | // sort keys use sort 45 | sort.Strings(strs) 46 | 47 | for _, k := range strs { 48 | str += "/" 49 | str += k 50 | str += "/" 51 | str += string(m[prommodel.LabelName(k)]) 52 | } 53 | 54 | return murmur3.Sum64([]byte(str)) 55 | } 56 | -------------------------------------------------------------------------------- /pushgw/router/stat.go: -------------------------------------------------------------------------------- 1 | package router 2 | 3 | import "github.com/prometheus/client_golang/prometheus" 4 | 5 | const ( 6 | namespace = "n9e" 7 | subsystem = "pushgw" 8 | ) 9 | 10 | var ( 11 | CounterSampleTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ 12 | Namespace: namespace, 13 | Subsystem: subsystem, 14 | Name: "samples_received_total", 15 | Help: "Total number samples received.", 16 | }, []string{"channel"}) 17 | 18 | CounterDropSampleTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ 19 | Namespace: namespace, 20 | Subsystem: subsystem, 21 | Name: "drop_sample_total", 22 | Help: "Number of drop sample.", 23 | }, []string{"client_ip"}) 24 | 25 | CounterSampleReceivedByIdent = prometheus.NewCounterVec(prometheus.CounterOpts{ 26 | Namespace: namespace, 27 | Subsystem: subsystem, 28 | Name: "sample_received_by_ident", 29 | Help: "Number of sample push by ident.", 30 | }, []string{"host_ident"}) 31 | ) 32 | 33 | func registerMetrics() { 34 | prometheus.MustRegister( 35 | CounterSampleTotal, 36 | CounterDropSampleTotal, 37 | CounterSampleReceivedByIdent, 38 | ) 39 | } 40 | -------------------------------------------------------------------------------- /integrations/Jolokia_Agent/collect/jolokia_agent/bitbucket.toml: -------------------------------------------------------------------------------- 1 | [[instances]] 2 | urls = ["http://localhost:8778/jolokia"] 3 | metrics_name_prefix = "bitbucket_" 4 | 5 | [[instances.metric]] 6 | name = "jvm_operatingsystem" 7 | mbean = "java.lang:type=OperatingSystem" 8 | 9 | [[instances.metric]] 10 | name = "jvm_runtime" 11 | mbean = "java.lang:type=Runtime" 12 | 13 | [[instances.metric]] 14 | name = "jvm_thread" 15 | mbean = "java.lang:type=Threading" 16 | 17 | [[instances.metric]] 18 | name = "jvm_memory" 19 | mbean = "java.lang:type=Memory" 20 | 21 | [[instances.metric]] 22 | name = "jvm_class_loading" 23 | mbean = "java.lang:type=ClassLoading" 24 | 25 | [[instances.metric]] 26 | name = "jvm_memory_pool" 27 | mbean = "java.lang:type=MemoryPool,name=*" 28 | 29 | [[instances.metric]] 30 | name = "webhooks" 31 | mbean = "com.atlassian.webhooks:name=*" 32 | 33 | [[instances.metric]] 34 | name = "atlassian" 35 | mbean = "com.atlassian.bitbucket:name=*" 36 | 37 | [[instances.metric]] 38 | name = "thread_pools" 39 | mbean = "com.atlassian.bitbucket.thread-pools:name=*" 40 | -------------------------------------------------------------------------------- /center/router/router_chart_share.go: -------------------------------------------------------------------------------- 1 | package router 2 | 3 | import ( 4 | "time" 5 | 6 | "github.com/ccfos/nightingale/v6/models" 7 | 8 | "github.com/gin-gonic/gin" 9 | "github.com/toolkits/pkg/ginx" 10 | "github.com/toolkits/pkg/str" 11 | ) 12 | 13 | func (rt *Router) chartShareGets(c *gin.Context) { 14 | ids := ginx.QueryStr(c, "ids", "") 15 | lst, err := models.ChartShareGetsByIds(rt.Ctx, str.IdsInt64(ids, ",")) 16 | ginx.NewRender(c).Data(lst, err) 17 | } 18 | 19 | type chartShareForm struct { 20 | DatasourceId int64 `json:"datasource_id"` 21 | Configs string `json:"configs"` 22 | } 23 | 24 | func (rt *Router) chartShareAdd(c *gin.Context) { 25 | username := c.MustGet("username").(string) 26 | 27 | var forms []chartShareForm 28 | ginx.BindJSON(c, &forms) 29 | 30 | ids := []int64{} 31 | now := time.Now().Unix() 32 | 33 | for _, f := range forms { 34 | chart := models.ChartShare{ 35 | DatasourceId: f.DatasourceId, 36 | Configs: f.Configs, 37 | CreateBy: username, 38 | CreateAt: now, 39 | } 40 | ginx.Dangerous(chart.Add(rt.Ctx)) 41 | ids = append(ids, chart.Id) 42 | } 43 | 44 | ginx.NewRender(c).Data(ids, nil) 45 | } 46 | -------------------------------------------------------------------------------- /integrations/NVIDIA/markdown/README.md: -------------------------------------------------------------------------------- 1 | # nvidia_smi 2 | 3 | 该采集插件的原理,就是读取 nvidia-smi 命令的内容输出,转换为Prometheus格式的监控数据上报给Nightingale夜莺。 4 | 5 | 是对 [nvidia_gpu_exporter](https://github.com/utkuozdemir/nvidia_gpu_exporter) 代码的集成。 6 | 7 | ## Configuration 8 | 9 | 配置文件在 `conf/input.nvidia_smi/nvidia_smi.toml` 10 | 11 | ```toml 12 | # # collect interval 13 | # interval = 15 14 | 15 | # 下面这个配置是最重要的配置,如果要采集 nvidia-smi 的信息,就打开下面的配置, 16 | # 给出 nvidia-smi 命令的路径,最好是给绝对路径 17 | # 相当于让 Categraf 执行本机的 nvidia-smi 命令,获取本机 GPU 的状态信息 18 | # exec local command 19 | # nvidia_smi_command = "nvidia-smi" 20 | 21 | # 如果想远程方式采集远端机器的 GPU 状态信息,可以使用 ssh 命令,登录远端机器 22 | # 在远端机器执行 nvidia-smi 的命令输出,通常 Categraf 是部署在每个物理机上的 23 | # 所以,ssh 这种方式,理论上用不到 24 | # exec remote command 25 | # nvidia_smi_command = "ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null SSH_USER@SSH_HOST nvidia-smi" 26 | 27 | # Comma-separated list of the query fields. 28 | # You can find out possible fields by running `nvidia-smi --help-query-gpus`. 29 | # The value `AUTO` will automatically detect the fields to query. 30 | query_field_names = "AUTO" 31 | ``` 32 | 33 | ## TODO 34 | 35 | GPU 卡已经关注哪些监控指标,缺少监控大盘JSON和告警规则JSON,欢迎大家 PR 36 | -------------------------------------------------------------------------------- /integrations/Net_Response/collect/net_response/net_response.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | 4 | [mappings] 5 | # "127.0.0.1:22"= {region="local",ssh="test"} 6 | # "127.0.0.1:22"= {region="local",ssh="redis"} 7 | 8 | [[instances]] 9 | targets = [ 10 | # "127.0.0.1:22", 11 | # "localhost:6379", 12 | # ":9090" 13 | ] 14 | 15 | # # append some labels for series 16 | # labels = { region="cloud", product="n9e" } 17 | 18 | # # interval = global.interval * interval_times 19 | # interval_times = 1 20 | 21 | ## Protocol, must be "tcp" or "udp" 22 | ## NOTE: because the "udp" protocol does not respond to requests, it requires 23 | ## a send/expect string pair (see below). 24 | # protocol = "tcp" 25 | 26 | ## Set timeout 27 | # timeout = "1s" 28 | 29 | ## Set read timeout (only used if expecting a response) 30 | # read_timeout = "1s" 31 | 32 | ## The following options are required for UDP checks. For TCP, they are 33 | ## optional. The plugin will send the given string to the server and then 34 | ## expect to receive the given 'expect' string back. 35 | ## string sent to the server 36 | # send = "ssh" 37 | ## expected string in answer 38 | # expect = "ssh" 39 | -------------------------------------------------------------------------------- /pkg/version/version.go: -------------------------------------------------------------------------------- 1 | package version 2 | 3 | import ( 4 | "sync/atomic" 5 | "time" 6 | 7 | "github.com/hashicorp/go-version" 8 | "github.com/toolkits/pkg/logger" 9 | "github.com/toolkits/pkg/net/httplib" 10 | ) 11 | 12 | var Version = "unknown" 13 | var GithubVersion atomic.Value 14 | 15 | func CompareVersion(v1, v2 string) (int, error) { 16 | version1, err := version.NewVersion(v1) 17 | if err != nil { 18 | return 0, err 19 | } 20 | version2, err := version.NewVersion(v2) 21 | if err != nil { 22 | return 0, err 23 | } 24 | 25 | if version1.LessThan(version2) { 26 | return -1, nil 27 | } 28 | if version1.GreaterThan(version2) { 29 | return 1, nil 30 | } 31 | return 0, nil 32 | } 33 | 34 | func GetGithubVersion() { 35 | for { 36 | req := httplib.Get("https://api.github.com/repos/ccfos/nightingale/releases/latest") 37 | var release GithubRelease 38 | err := req.ToJSON(&release) 39 | if err != nil { 40 | logger.Errorf("get github version fail: %v", err) 41 | } 42 | 43 | GithubVersion.Store(release.TagName) 44 | time.Sleep(24 * time.Hour) 45 | } 46 | } 47 | 48 | type GithubRelease struct { 49 | TagName string `json:"tag_name"` 50 | } 51 | -------------------------------------------------------------------------------- /integrations/Procstat/collect/procstat/procstat.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | 4 | # [[instances]] 5 | # # executable name (ie, pgrep ) 6 | # search_exec_substring = "nginx" 7 | 8 | # # pattern as argument for pgrep (ie, pgrep -f ) 9 | # search_cmdline_substring = "n9e server" 10 | 11 | # # windows service name 12 | # search_win_service = "" 13 | 14 | # # search process with specific user, option with exec_substring or cmdline_substring 15 | # search_user = "" 16 | 17 | # # append some labels for series 18 | # labels = { region="cloud", product="n9e" } 19 | 20 | # # interval = global.interval * interval_times 21 | # interval_times = 1 22 | 23 | # # mode to use when calculating CPU usage. can be one of 'solaris' or 'irix' 24 | # mode = "irix" 25 | 26 | # sum of threads/fd/io/cpu/mem, min of uptime/limit 27 | gather_total = true 28 | 29 | # will append pid as tag 30 | gather_per_pid = false 31 | 32 | # gather jvm metrics only when jstat is ready 33 | # gather_more_metrics = [ 34 | # "threads", 35 | # "fd", 36 | # "io", 37 | # "uptime", 38 | # "cpu", 39 | # "mem", 40 | # "limit", 41 | # "jvm" 42 | # ] 43 | -------------------------------------------------------------------------------- /integrations/Consul/collect/consul/consul.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | 4 | [[instances]] 5 | ## Consul server address 6 | # address = "localhost:8500" 7 | 8 | ## URI scheme for the Consul server, one of "http", "https" 9 | # scheme = "http" 10 | 11 | ## ACL token used in every request 12 | # token = "" 13 | 14 | ## HTTP Basic Authentication username and password. 15 | # username = "" 16 | # password = "" 17 | 18 | ## Data center to query the health checks from 19 | # datacenter = "" 20 | 21 | ## Allows any Consul server (non-leader) to service a read. 22 | ## Default is true 23 | # allow_stale = true 24 | 25 | ## Forces the read to be fully consistent. 26 | ## Default is false 27 | # require_consistent = false 28 | 29 | ## Prefix from which to expose key/value pairs. 30 | # kv_prefix = "" 31 | 32 | ## Regex that determines which keys to expose. 33 | ## Default is ".*" 34 | # kv_filter = ".*" 35 | 36 | ## Optional TLS Config 37 | # tls_ca = "/etc/telegraf/ca.pem" 38 | # tls_cert = "/etc/telegraf/cert.pem" 39 | # tls_key = "/etc/telegraf/key.pem" 40 | ## Use TLS but skip chain & host verification 41 | # insecure_skip_verify = true 42 | -------------------------------------------------------------------------------- /integrations/Logstash/collect/logstash/logstash.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | 4 | # Read metrics exposed by Logstash 5 | [[instances]] 6 | # # interval = global.interval * interval_times 7 | # interval_times = 1 8 | 9 | # append labels 10 | # labels = { instance="x" } 11 | 12 | ## The URL of the exposed Logstash API endpoint. 13 | # url = "http://127.0.0.1:9600" 14 | url = "" 15 | 16 | ## Use Logstash 5 single pipeline API, set to true when monitoring 17 | ## Logstash 5. 18 | # single_pipeline = false 19 | 20 | ## Enable optional collection components. Can contain 21 | ## "pipelines", "process", and "jvm". 22 | # collect = ["pipelines", "process", "jvm"] 23 | 24 | ## Timeout for HTTP requests. 25 | # timeout = "5s" 26 | 27 | ## Optional HTTP Basic Auth credentials. 28 | # username = "username" 29 | # password = "pa$$word" 30 | 31 | ## Optional HTTP headers. 32 | # [inputs.logstash.headers] 33 | # "X-Special-Header" = "Special-Value" 34 | 35 | ## Optional TLS Config 36 | # use_tls = false 37 | # tls_min_version = "1.2" 38 | # tls_ca = "/etc/categraf/ca.pem" 39 | # tls_cert = "/etc/categraf/cert.pem" 40 | # tls_key = "/etc/categraf/key.pem" 41 | ## Use TLS but skip chain & host verification 42 | # insecure_skip_verify = true -------------------------------------------------------------------------------- /integrations/Redis/markdown/README.md: -------------------------------------------------------------------------------- 1 | # redis 2 | 3 | redis 的监控原理,就是连上 redis,执行 info 命令,解析结果,整理成监控数据上报。 4 | 5 | ## Configuration 6 | 7 | redis 插件的配置在 `conf/input.redis/redis.toml` 最简单的配置如下: 8 | 9 | ```toml 10 | [[instances]] 11 | address = "127.0.0.1:6379" 12 | username = "" 13 | password = "" 14 | labels = { instance="n9e-10.23.25.2:6379" } 15 | ``` 16 | 17 | 如果要监控多个 redis 实例,就增加 instances 即可: 18 | 19 | ```toml 20 | [[instances]] 21 | address = "10.23.25.2:6379" 22 | username = "" 23 | password = "" 24 | labels = { instance="n9e-10.23.25.2:6379" } 25 | 26 | [[instances]] 27 | address = "10.23.25.3:6379" 28 | username = "" 29 | password = "" 30 | labels = { instance="n9e-10.23.25.3:6379" } 31 | ``` 32 | 33 | 建议通过 labels 配置附加一个 instance 标签,便于后面复用监控大盘。 34 | 35 | ## 监控大盘和告警规则 36 | 37 | 夜莺内置了 redis 的告警规则和监控大盘,克隆到自己的业务组下即可使用。 38 | 39 | ## redis 集群如何监控 40 | 41 | 其实,redis 集群的监控,还是去监控每个 redis 实例。 42 | 43 | 如果一个 redis 集群有 3 个实例,对于业务应用来讲,发起一个请求,可能随机请求到某一个实例上去了,这个是没问题的,但是对于监控 client 而言,显然是希望到所有实例上获取数据的。 44 | 45 | 当然,如果多个 redis 实例组成了集群,我们希望有个标识来标识这个集群,这个时候,可以通过 labels 来实现,比如给每个实例增加一个 redis_clus 的标签,值为集群名字即可。 46 | 47 | 48 | # redis_sentinel 49 | forked from [telegraf/redis_sentinel](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/redis_sentinel) 50 | -------------------------------------------------------------------------------- /integrations/Ping/collect/ping/ping.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | 4 | [[instances]] 5 | # send ping packets to 6 | targets = [ 7 | # "www.baidu.com", 8 | # "127.0.0.1", 9 | # "10.4.5.6", 10 | # "10.4.5.7" 11 | ] 12 | 13 | # # append some labels for series 14 | # labels = { region="cloud", product="n9e" } 15 | 16 | # # interval = global.interval * interval_times 17 | # interval_times = 1 18 | 19 | ## Number of ping packets to send per interval. Corresponds to the "-c" 20 | ## option of the ping command. 21 | # count = 1 22 | 23 | ## Time to wait between sending ping packets in seconds. Operates like the 24 | ## "-i" option of the ping command. 25 | # ping_interval = 1.0 26 | 27 | ## If set, the time to wait for a ping response in seconds. Operates like 28 | ## the "-W" option of the ping command. 29 | # timeout = 3.0 30 | 31 | ## Interface or source address to send ping from. Operates like the -I or -S 32 | ## option of the ping command. 33 | # interface = "" 34 | 35 | ## Use only IPv6 addresses when resolving a hostname. 36 | # ipv6 = false 37 | 38 | ## Number of data bytes to be sent. Corresponds to the "-s" 39 | ## option of the ping command. 40 | # size = 56 41 | 42 | # max concurrency coroutine 43 | # concurrency = 50 44 | -------------------------------------------------------------------------------- /pkg/hash/hash_fnv.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2015 The Kubernetes Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package hash 18 | 19 | import ( 20 | "hash" 21 | 22 | "github.com/davecgh/go-spew/spew" 23 | ) 24 | 25 | // DeepHashObject writes specified object to hash using the spew library 26 | // which follows pointers and prints actual values of the nested objects 27 | // ensuring the hash does not change when a pointer changes. 28 | func DeepHashObject(hasher hash.Hash, objectToWrite interface{}) { 29 | hasher.Reset() 30 | printer := spew.ConfigState{ 31 | Indent: " ", 32 | SortKeys: true, 33 | DisableMethods: true, 34 | SpewKeys: true, 35 | } 36 | printer.Fprintf(hasher, "%#v", objectToWrite) 37 | } 38 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/question.yml: -------------------------------------------------------------------------------- 1 | name: Bug Report & Usage Question 2 | description: Reporting a bug or asking a question about how to use Nightingale 3 | labels: [] 4 | 5 | body: 6 | - type: markdown 7 | attributes: 8 | value: | 9 | The more detailed the form is filled in, the easier the problem will be solved. 10 | 提供的信息越详细,问题解决的可能性就越大。另外, 提问之前请先搜索历史 issue (包括 close 的), 以免重复提问。 11 | - type: textarea 12 | id: question 13 | attributes: 14 | label: Question and Steps to reproduce 15 | description: Describe your question and steps to reproduce the bug. 描述问题以及复现步骤 16 | validations: 17 | required: true 18 | - type: textarea 19 | id: logs 20 | attributes: 21 | label: Relevant logs and configurations 22 | description: Relevant logs and configurations. 报错日志([查看方法](https://flashcat.cloud/docs/content/flashcat-monitor/nightingale-v6/faq/how-to-check-logs/))以及各个相关组件的配置信息 23 | render: text 24 | validations: 25 | required: true 26 | - type: textarea 27 | id: system-info 28 | attributes: 29 | label: Version 30 | description: Include nightingale version, operating system, and other relevant details. 请告知夜莺的版本、操作系统的版本、CPU架构等信息 31 | validations: 32 | required: true 33 | 34 | -------------------------------------------------------------------------------- /alert/common/key.go: -------------------------------------------------------------------------------- 1 | package common 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/ccfos/nightingale/v6/models" 7 | ) 8 | 9 | func RuleKey(datasourceId, id int64) string { 10 | return fmt.Sprintf("alert-%d-%d", datasourceId, id) 11 | } 12 | 13 | func MatchTags(eventTagsMap map[string]string, itags []models.TagFilter) bool { 14 | for _, filter := range itags { 15 | value, has := eventTagsMap[filter.Key] 16 | if !has { 17 | return false 18 | } 19 | if !matchTag(value, filter) { 20 | return false 21 | } 22 | } 23 | return true 24 | } 25 | func MatchGroupsName(groupName string, groupFilter []models.TagFilter) bool { 26 | for _, filter := range groupFilter { 27 | if !matchTag(groupName, filter) { 28 | return false 29 | } 30 | } 31 | return true 32 | } 33 | 34 | func matchTag(value string, filter models.TagFilter) bool { 35 | switch filter.Func { 36 | case "==": 37 | return filter.Value == value 38 | case "!=": 39 | return filter.Value != value 40 | case "in": 41 | _, has := filter.Vset[value] 42 | return has 43 | case "not in": 44 | _, has := filter.Vset[value] 45 | return !has 46 | case "=~": 47 | return filter.Regexp.MatchString(value) 48 | case "!~": 49 | return !filter.Regexp.MatchString(value) 50 | } 51 | // unexpect func 52 | return false 53 | } 54 | -------------------------------------------------------------------------------- /integrations/ZooKeeper/markdown/README.md: -------------------------------------------------------------------------------- 1 | # zookeeper 2 | 3 | 注意: `>=3.6.0` zookeeper 版本内置 [prometheus 的支持](https://zookeeper.apache.org/doc/current/zookeeperMonitor.html),即,如果 zookeeper 启用了 prometheus,Categraf 可使用 prometheus 插件从这个 metrics 接口拉取数据即可。就无需使用 zookeeper 这个插件来采集了。 4 | 5 | ## 说明 6 | 7 | categraf zookeeper 采集插件移植于 [dabealu/zookeeper-exporter](https://github.com/dabealu/zookeeper-exporter),适用于 `<3.6.0` 版本的 zookeeper, 原理就是利用 Zookeper 提供的四字命令(The Four Letter Words)获取监控信息。 8 | 9 | 需要注意的是,在 zookeeper v3.4.10 以后添加了四字命令白名单,需要在 zookeeper 的配置文件 `zoo.cfg` 中新增白名单配置: 10 | 11 | ``` 12 | 4lw.commands.whitelist=mntr,ruok 13 | ``` 14 | 15 | ## 配置 16 | 17 | zookeeper 插件的配置在 `conf/input.zookeeper/zookeeper.toml` 集群中的多个实例地址请用空格分隔: 18 | 19 | ```toml 20 | [[instances]] 21 | cluster_name = "dev-zk-cluster" 22 | addresses = "127.0.0.1:2181" 23 | timeout = 10 24 | ``` 25 | 26 | 如果要监控多个 zookeeper 集群,就增加 instances 即可: 27 | 28 | ```toml 29 | [[instances]] 30 | cluster_name = "dev-zk-cluster" 31 | addresses = "127.0.0.1:2181" 32 | timeout = 10 33 | 34 | [[instances]] 35 | cluster_name = "test-zk-cluster" 36 | addresses = "127.0.0.1:2181 127.0.0.1:2182 127.0.0.1:2183" 37 | timeout = 10 38 | ``` 39 | 40 | ## 监控大盘和告警规则 41 | 42 | 夜莺内置了 zookeeper 的监控大盘和告警规则,克隆到自己的业务组下即可使用。虽说文件名带有 `by_exporter` 字样,没关系,可以在 categraf 中使用。 43 | 44 | -------------------------------------------------------------------------------- /center/cconf/metric.go: -------------------------------------------------------------------------------- 1 | package cconf 2 | 3 | import ( 4 | "path" 5 | 6 | "github.com/toolkits/pkg/file" 7 | ) 8 | 9 | // metricDesc , As load map happens before read map, there is no necessary to use concurrent map for metric desc store 10 | type MetricDescType struct { 11 | CommonDesc map[string]string `yaml:",inline" json:"common"` 12 | Zh map[string]string `yaml:"zh" json:"zh"` 13 | En map[string]string `yaml:"en" json:"en"` 14 | } 15 | 16 | var MetricDesc MetricDescType 17 | 18 | // GetMetricDesc , if metric is not registered, empty string will be returned 19 | func GetMetricDesc(lang, metric string) string { 20 | var m map[string]string 21 | 22 | switch lang { 23 | case "en": 24 | m = MetricDesc.En 25 | default: 26 | m = MetricDesc.Zh 27 | } 28 | 29 | if m != nil { 30 | if desc, ok := m[metric]; ok { 31 | return desc 32 | } 33 | } 34 | 35 | if MetricDesc.CommonDesc != nil { 36 | if desc, ok := MetricDesc.CommonDesc[metric]; ok { 37 | return desc 38 | } 39 | } 40 | 41 | return "" 42 | } 43 | func LoadMetricsYaml(configDir, metricsYamlFile string) error { 44 | fp := metricsYamlFile 45 | if fp == "" { 46 | fp = path.Join(configDir, "metrics.yaml") 47 | } 48 | if !file.IsExist(fp) { 49 | return nil 50 | } 51 | return file.ReadYaml(fp, &MetricDesc) 52 | } 53 | -------------------------------------------------------------------------------- /pkg/choice/choice.go: -------------------------------------------------------------------------------- 1 | // Package choice provides basic functions for working with 2 | // plugin options that must be one of several values. 3 | package choice 4 | 5 | import ( 6 | "fmt" 7 | "strings" 8 | ) 9 | 10 | // Contains return true if the choice in the list of choices. 11 | func Contains(choice string, choices []string) bool { 12 | for _, item := range choices { 13 | if item == choice { 14 | return true 15 | } 16 | } 17 | return false 18 | } 19 | 20 | // Contains return true if the choice in the list of choices. 21 | func ContainsPrefix(choice string, choices []string) bool { 22 | for _, item := range choices { 23 | if strings.HasPrefix(choice, item) { 24 | return true 25 | } 26 | } 27 | return false 28 | } 29 | 30 | // Check returns an error if a choice is not one of 31 | // the available choices. 32 | func Check(choice string, available []string) error { 33 | if !Contains(choice, available) { 34 | return fmt.Errorf("unknown choice %s", choice) 35 | } 36 | return nil 37 | } 38 | 39 | // CheckSlice returns an error if the choices is not a subset of 40 | // available. 41 | func CheckSlice(choices, available []string) error { 42 | for _, choice := range choices { 43 | err := Check(choice, available) 44 | if err != nil { 45 | return err 46 | } 47 | } 48 | return nil 49 | } 50 | -------------------------------------------------------------------------------- /center/cstats/stats.go: -------------------------------------------------------------------------------- 1 | package cstats 2 | 3 | import ( 4 | "time" 5 | 6 | "github.com/prometheus/client_golang/prometheus" 7 | ) 8 | 9 | const Service = "n9e-center" 10 | 11 | var ( 12 | labels = []string{"service", "code", "path", "method"} 13 | 14 | uptime = prometheus.NewCounterVec( 15 | prometheus.CounterOpts{ 16 | Name: "uptime", 17 | Help: "HTTP service uptime.", 18 | }, []string{"service"}, 19 | ) 20 | 21 | RequestCounter = prometheus.NewCounterVec( 22 | prometheus.CounterOpts{ 23 | Name: "http_request_count_total", 24 | Help: "Total number of HTTP requests made.", 25 | }, labels, 26 | ) 27 | 28 | RequestDuration = prometheus.NewHistogramVec( 29 | prometheus.HistogramOpts{ 30 | Buckets: []float64{.01, .1, 1, 10}, 31 | Name: "http_request_duration_seconds", 32 | Help: "HTTP request latencies in seconds.", 33 | }, labels, 34 | ) 35 | ) 36 | 37 | func Init() { 38 | // Register the summary and the histogram with Prometheus's default registry. 39 | prometheus.MustRegister( 40 | uptime, 41 | RequestCounter, 42 | RequestDuration, 43 | ) 44 | 45 | go recordUptime() 46 | } 47 | 48 | // recordUptime increases service uptime per second. 49 | func recordUptime() { 50 | for range time.Tick(time.Second) { 51 | uptime.WithLabelValues(Service).Inc() 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /integrations/Nginx/collect/nginx_upstream_check/nginx_upstream_check.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | 4 | [[instances]] 5 | targets = [ 6 | # "http://127.0.0.1/status?format=json", 7 | # "http://10.2.3.56/status?format=json" 8 | ] 9 | 10 | # # append some labels for series 11 | # labels = { region="cloud", product="n9e" } 12 | 13 | # # interval = global.interval * interval_times 14 | # interval_times = 1 15 | 16 | ## Set http_proxy (categraf uses the system wide proxy settings if it's is not set) 17 | # http_proxy = "http://localhost:8888" 18 | 19 | ## Interface to use when dialing an address 20 | # interface = "eth0" 21 | 22 | ## HTTP Request Method 23 | # method = "GET" 24 | 25 | ## Set timeout (default 5 seconds) 26 | # timeout = "5s" 27 | 28 | ## Whether to follow redirects from the server (defaults to false) 29 | # follow_redirects = false 30 | 31 | ## Optional HTTP Basic Auth Credentials 32 | # username = "username" 33 | # password = "pa$$word" 34 | 35 | ## Optional headers 36 | # headers = ["X-From", "categraf", "X-Xyz", "abc"] 37 | 38 | ## Optional TLS Config 39 | # use_tls = false 40 | # tls_ca = "/etc/categraf/ca.pem" 41 | # tls_cert = "/etc/categraf/cert.pem" 42 | # tls_key = "/etc/categraf/key.pem" 43 | ## Use TLS but skip chain & host verification 44 | # insecure_skip_verify = false 45 | -------------------------------------------------------------------------------- /pkg/parser/calc.go: -------------------------------------------------------------------------------- 1 | package parser 2 | 3 | import ( 4 | "regexp" 5 | "strings" 6 | 7 | "github.com/expr-lang/expr" 8 | "github.com/toolkits/pkg/logger" 9 | ) 10 | 11 | func MathCalc(s string, data map[string]float64) (float64, error) { 12 | m := make(map[string]float64) 13 | for k, v := range data { 14 | m[cleanStr(k)] = v 15 | } 16 | 17 | program, err := expr.Compile(cleanStr(s), expr.Env(m)) 18 | if err != nil { 19 | return 0, err 20 | } 21 | 22 | output, err := expr.Run(program, m) 23 | if err != nil { 24 | return 0, err 25 | } 26 | 27 | if result, ok := output.(float64); ok { 28 | return result, nil 29 | } else if result, ok := output.(bool); ok { 30 | if result { 31 | return 1, nil 32 | } else { 33 | return 0, nil 34 | } 35 | } else { 36 | return 0, nil 37 | } 38 | } 39 | 40 | func Calc(s string, data map[string]float64) bool { 41 | v, err := MathCalc(s, data) 42 | if err != nil { 43 | logger.Errorf("Calc exp:%s data:%v error: %v", s, data, err) 44 | return false 45 | } 46 | 47 | return v > 0 48 | } 49 | 50 | func cleanStr(s string) string { 51 | s = replaceDollarSigns(s) 52 | s = strings.ReplaceAll(s, "$.", "") 53 | return s 54 | } 55 | 56 | func replaceDollarSigns(s string) string { 57 | re := regexp.MustCompile(`\$([A-Z])\.`) 58 | return re.ReplaceAllString(s, "${1}_") 59 | } 60 | -------------------------------------------------------------------------------- /models/notify_config.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | const WEBHOOKKEY = "webhook" 4 | const NOTIFYSCRIPT = "notify_script" 5 | const NOTIFYCHANNEL = "notify_channel" 6 | const NOTIFYCONTACT = "notify_contact" 7 | const SMTP = "smtp_config" 8 | const IBEX = "ibex_server" 9 | 10 | type Webhook struct { 11 | Enable bool `json:"enable"` 12 | Url string `json:"url"` 13 | BasicAuthUser string `json:"basic_auth_user"` 14 | BasicAuthPass string `json:"basic_auth_pass"` 15 | Timeout int `json:"timeout"` 16 | HeaderMap map[string]string `json:"headers"` 17 | Headers []string `json:"headers_str"` 18 | SkipVerify bool `json:"skip_verify"` 19 | Note string `json:"note"` 20 | } 21 | 22 | type NotifyScript struct { 23 | Enable bool `json:"enable"` 24 | Type int `json:"type"` // 0 script 1 path 25 | Content string `json:"content"` 26 | Timeout int `json:"timeout"` 27 | } 28 | 29 | type NotifyChannel struct { 30 | Name string `json:"name"` 31 | Ident string `json:"ident"` 32 | Hide bool `json:"hide"` 33 | BuiltIn bool `json:"built_in"` 34 | } 35 | 36 | type NotifyContact struct { 37 | Name string `json:"name"` 38 | Ident string `json:"ident"` 39 | Hide bool `json:"hide"` 40 | BuiltIn bool `json:"built_in"` 41 | } 42 | -------------------------------------------------------------------------------- /center/router/router_crypto.go: -------------------------------------------------------------------------------- 1 | package router 2 | 3 | import ( 4 | "github.com/ccfos/nightingale/v6/pkg/secu" 5 | 6 | "github.com/gin-gonic/gin" 7 | "github.com/toolkits/pkg/ginx" 8 | ) 9 | 10 | type confPropCrypto struct { 11 | Data string `json:"data" binding:"required"` 12 | Key string `json:"key" binding:"required"` 13 | } 14 | 15 | func (rt *Router) confPropEncrypt(c *gin.Context) { 16 | var f confPropCrypto 17 | ginx.BindJSON(c, &f) 18 | 19 | k := len(f.Key) 20 | switch k { 21 | default: 22 | c.String(400, "The key length should be 16, 24 or 32") 23 | return 24 | case 16, 24, 32: 25 | break 26 | } 27 | 28 | s, err := secu.DealWithEncrypt(f.Data, f.Key) 29 | if err != nil { 30 | c.String(500, err.Error()) 31 | } 32 | 33 | c.JSON(200, gin.H{ 34 | "src": f.Data, 35 | "key": f.Key, 36 | "encrypt": s, 37 | }) 38 | } 39 | 40 | func (rt *Router) confPropDecrypt(c *gin.Context) { 41 | var f confPropCrypto 42 | ginx.BindJSON(c, &f) 43 | 44 | k := len(f.Key) 45 | switch k { 46 | default: 47 | c.String(400, "The key length should be 16, 24 or 32") 48 | return 49 | case 16, 24, 32: 50 | break 51 | } 52 | 53 | s, err := secu.DealWithDecrypt(f.Data, f.Key) 54 | if err != nil { 55 | c.String(500, err.Error()) 56 | } 57 | 58 | c.JSON(200, gin.H{ 59 | "src": f.Data, 60 | "key": f.Key, 61 | "decrypt": s, 62 | }) 63 | } 64 | -------------------------------------------------------------------------------- /models/sso_config.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | import ( 4 | "time" 5 | 6 | "github.com/ccfos/nightingale/v6/pkg/ctx" 7 | ) 8 | 9 | type SsoConfig struct { 10 | Id int64 `json:"id"` 11 | Name string `json:"name"` 12 | Content string `json:"content"` 13 | UpdateAt int64 `json:"update_at"` 14 | } 15 | 16 | func (b *SsoConfig) TableName() string { 17 | return "sso_config" 18 | } 19 | 20 | // get all sso_config 21 | func SsoConfigGets(c *ctx.Context) ([]SsoConfig, error) { 22 | var lst []SsoConfig 23 | err := DB(c).Find(&lst).Error 24 | return lst, err 25 | } 26 | 27 | // 创建 builtin_cate 28 | func (b *SsoConfig) Create(c *ctx.Context) error { 29 | return Insert(c, b) 30 | } 31 | 32 | func (b *SsoConfig) Update(c *ctx.Context) error { 33 | b.UpdateAt = time.Now().Unix() 34 | return DB(c).Model(b).Select("content", "update_at").Updates(b).Error 35 | } 36 | 37 | // get sso_config last update time 38 | func SsoConfigLastUpdateTime(c *ctx.Context) (int64, error) { 39 | var lastUpdateTime int64 40 | err := DB(c).Model(&SsoConfig{}).Select("max(update_at)").Row().Scan(&lastUpdateTime) 41 | return lastUpdateTime, err 42 | } 43 | 44 | // get sso_config coutn by name 45 | func SsoConfigCountByName(c *ctx.Context, name string) (int64, error) { 46 | var count int64 47 | err := DB(c).Model(&SsoConfig{}).Where("name = ?", name).Count(&count).Error 48 | return count, err 49 | } 50 | -------------------------------------------------------------------------------- /integrations/Linux/markdown/README.md: -------------------------------------------------------------------------------- 1 | # Linux 2 | 3 | Linux 类别下,包含多个内置插件,比如 cpu、mem、net、netstat、kernel_vmstat 等,这些插件大都是默认是开启的,无需额外配置,可能有额外配置需求的插件如下。 4 | 5 | ## cpu 6 | 7 | 统计 CPU 使用率,默认只采集整机的情况,不采集每个 CPU Core 的情况,如果想采集每个 CPU Core 的情况,可以配置如下。 8 | 9 | ```ini 10 | collect_per_cpu = true 11 | ``` 12 | 13 | ## netstat 14 | 15 | 统计网络连接数,默认配置如下,可根据实际情况调整。 16 | 17 | ```ini 18 | # 默认开启了 smmary 统计,类似 ss -s 命令的输出 19 | disable_summary_stats = false 20 | # 默认关闭了所有连接的详细统计,在连接数较多的机器上统计此数据会影响性能 21 | disable_connection_stats = true 22 | # 读取 /proc/net/netstat 的内容,默认关闭了,可以开启,这部分不影响性能 23 | tcp_ext = false 24 | ip_ext = false 25 | ``` 26 | 27 | ## disk 28 | 29 | 统计磁盘使用率,默认配置如下,可根据实际情况调整。 30 | 31 | ```ini 32 | # 严格指定要采集的挂载点,如果指定了,就只采集指定的挂载点 33 | # mount_points = ["/"] 34 | 35 | # 有些 fstype 没必要采集,可以忽略 36 | ignore_fs = ["tmpfs", "devtmpfs", "devfs", "iso9660", "overlay", "aufs", "squashfs", "nsfs", "CDFS", "fuse.juicefs"] 37 | 38 | # 有些挂载点没必要采集,可以忽略,这里可以配置前缀,符合前缀的挂载点都会被忽略 39 | ignore_mount_points = ["/boot", "/var/lib/kubelet/pods"] 40 | ``` 41 | 42 | ## kernel_vmstat 43 | 44 | 统计的信息来自 `/proc/vmstat`,只有高版本内核才支持,这个文件的内容较多,默认配置只采集了 oom_kill 次数,其他指标均未采集,如果你想打开其他采集开关,可以修改 white_list 部分的配置。下面是截取了一部分内容,供参考: 45 | 46 | ```toml 47 | [white_list] 48 | oom_kill = 1 49 | nr_free_pages = 0 50 | nr_alloc_batch = 0 51 | ... 52 | ``` 53 | 54 | ## arp_package 55 | 56 | 统计 ARP 包的数量,该插件依赖 cgo,如果需要该插件需要下载 `with-cgo` 的 categraf 发布包。 -------------------------------------------------------------------------------- /models/migrate/migrate_es_index_pattern.go: -------------------------------------------------------------------------------- 1 | package migrate 2 | 3 | import ( 4 | "github.com/toolkits/pkg/logger" 5 | "gorm.io/gorm" 6 | ) 7 | 8 | type EsIndexPattern struct { 9 | Id int64 `gorm:"primaryKey;type:bigint unsigned"` 10 | DatasourceId int64 `gorm:"type:bigint not null default '0';uniqueIndex:idx_ds_name"` 11 | Name string `gorm:"type:varchar(191) not null default '';uniqueIndex:idx_ds_name"` 12 | TimeField string `gorm:"type:varchar(128) not null default ''"` 13 | AllowHideSystemIndices int `gorm:"type:tinyint(1) not null default 0"` 14 | FieldsFormat string `gorm:"type:varchar(4096) not null default ''"` 15 | CreateAt int64 `gorm:"type:bigint default '0'"` 16 | CreateBy string `gorm:"type:varchar(64) default ''"` 17 | UpdateAt int64 `gorm:"type:bigint default '0'"` 18 | UpdateBy string `gorm:"type:varchar(64) default ''"` 19 | } 20 | 21 | func MigrateEsIndexPatternTable(db *gorm.DB) error { 22 | db = db.Set("gorm:table_options", "CHARSET=utf8mb4") 23 | if db.Migrator().HasTable("es_index_pattern") { 24 | return nil 25 | } 26 | 27 | err := db.Table("es_index_pattern").AutoMigrate(&EsIndexPattern{}) 28 | if err != nil { 29 | logger.Errorf("failed to migrate es index pattern table: %v", err) 30 | return err 31 | } 32 | 33 | return nil 34 | } 35 | -------------------------------------------------------------------------------- /conf/crypto.go: -------------------------------------------------------------------------------- 1 | package conf 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/ccfos/nightingale/v6/pkg/secu" 7 | ) 8 | 9 | func decryptConfig(config *ConfigType, cryptoKey string) error { 10 | decryptDsn, err := secu.DealWithDecrypt(config.DB.DSN, cryptoKey) 11 | if err != nil { 12 | return fmt.Errorf("failed to decrypt the db dsn: %s", err) 13 | } 14 | 15 | config.DB.DSN = decryptDsn 16 | 17 | for k := range config.HTTP.APIForService.BasicAuth { 18 | decryptPwd, err := secu.DealWithDecrypt(config.HTTP.APIForService.BasicAuth[k], cryptoKey) 19 | if err != nil { 20 | return fmt.Errorf("failed to decrypt http basic auth password: %s", err) 21 | } 22 | 23 | config.HTTP.APIForService.BasicAuth[k] = decryptPwd 24 | } 25 | 26 | for k := range config.HTTP.APIForAgent.BasicAuth { 27 | decryptPwd, err := secu.DealWithDecrypt(config.HTTP.APIForAgent.BasicAuth[k], cryptoKey) 28 | if err != nil { 29 | return fmt.Errorf("failed to decrypt http basic auth password: %s", err) 30 | } 31 | 32 | config.HTTP.APIForAgent.BasicAuth[k] = decryptPwd 33 | } 34 | 35 | for i, v := range config.Pushgw.Writers { 36 | decryptWriterPwd, err := secu.DealWithDecrypt(v.BasicAuthPass, cryptoKey) 37 | if err != nil { 38 | return fmt.Errorf("failed to decrypt writer basic auth password: %s", err) 39 | } 40 | 41 | config.Pushgw.Writers[i].BasicAuthPass = decryptWriterPwd 42 | } 43 | 44 | return nil 45 | } 46 | -------------------------------------------------------------------------------- /cli/upgrade/config.go: -------------------------------------------------------------------------------- 1 | package upgrade 2 | 3 | import ( 4 | "bytes" 5 | "path" 6 | 7 | "github.com/ccfos/nightingale/v6/pkg/cfg" 8 | "github.com/ccfos/nightingale/v6/pkg/ormx" 9 | "github.com/ccfos/nightingale/v6/pkg/tlsx" 10 | "github.com/koding/multiconfig" 11 | ) 12 | 13 | type Config struct { 14 | DB ormx.DBConfig 15 | Clusters []ClusterOptions 16 | } 17 | 18 | type ClusterOptions struct { 19 | Name string 20 | Prom string 21 | 22 | BasicAuthUser string 23 | BasicAuthPass string 24 | 25 | Headers []string 26 | 27 | Timeout int64 28 | DialTimeout int64 29 | 30 | UseTLS bool 31 | tlsx.ClientConfig 32 | 33 | MaxIdleConnsPerHost int 34 | } 35 | 36 | func Parse(fpath string, configPtr interface{}) error { 37 | var ( 38 | tBuf []byte 39 | ) 40 | loaders := []multiconfig.Loader{ 41 | &multiconfig.TagLoader{}, 42 | &multiconfig.EnvironmentLoader{}, 43 | } 44 | s := cfg.NewFileScanner() 45 | 46 | s.Read(path.Join(fpath)) 47 | tBuf = append(tBuf, s.Data()...) 48 | tBuf = append(tBuf, []byte("\n")...) 49 | 50 | if s.Err() != nil { 51 | return s.Err() 52 | } 53 | 54 | if len(tBuf) != 0 { 55 | loaders = append(loaders, &multiconfig.TOMLLoader{Reader: bytes.NewReader(tBuf)}) 56 | } 57 | 58 | m := multiconfig.DefaultLoader{ 59 | Loader: multiconfig.MultiLoader(loaders...), 60 | Validator: multiconfig.MultiValidator(&multiconfig.RequiredValidator{}), 61 | } 62 | return m.Load(configPtr) 63 | } 64 | -------------------------------------------------------------------------------- /integrations/Kubernetes/markdown/README.md: -------------------------------------------------------------------------------- 1 | # Kubernetes 2 | 3 | 这个插件已经废弃。Kubernetes 监控系列可以参考这个 [文章](https://flashcat.cloud/categories/kubernetes%E7%9B%91%E6%8E%A7%E4%B8%93%E6%A0%8F/)。或者参考 [专栏](https://time.geekbang.org/column/article/630306)。 4 | 5 | 不过 Kubernetes 这个类别下的内置告警规则和内置仪表盘都是可以使用的。 6 | 7 | --- 8 | 9 | 下面是老插件文档: 10 | 11 | forked from telegraf/kubernetes. 这个插件的作用是通过kubelet提供的API获取监控数据,包括系统容器的监控数据、node的、pod数据卷的、pod网络的、pod容器的。 12 | 13 | ## Change 14 | 15 | 增加了一些控制开关: 16 | 17 | `gather_system_container_metrics = true` 18 | 19 | 是否采集 system 容器(kubelet、runtime、misc、pods),比如 kubelet 一般就是静态容器,非业务容器 20 | 21 | `gather_node_metrics = true` 22 | 23 | 是否采集 node 层面的指标,机器层面的指标其实 categraf 来采集了,这里理论上不需要再采集了,可以设置为 false,采集也没问题,也没多少数据 24 | 25 | `gather_pod_container_metrics = true` 26 | 27 | 是否采集 Pod 中的容器的指标,这些 Pod 一般是业务容器 28 | 29 | `gather_pod_volume_metrics = true` 30 | 31 | 是否采集 Pod 的数据卷的指标 32 | 33 | `gather_pod_network_metrics = true` 34 | 35 | 是否采集 Pod 的网络监控数据 36 | 37 | ## 容器监控 38 | 39 | 通过这些开关可以看出,kubernetes 这个插件,采集的只是 pod、容器的监控指标,这些指标数据来自 kubelet 的 `/stats/summary` `/pods` 等接口。那么问题来了,容器监控到底是应该读取 `/metrics/cadvisor` 接口还是应该用这个 kubernetes 插件?有几个决策依据: 40 | 41 | 1. `/metrics/cadvisor` 采集的数据没有业务自定义标签,kubernetes 这个插件会自动带上业务自定义标签。但是业务标签可能比较混乱,建议每个公司制定规范,比如要求业务只能打 project、region、env、service、app、job 等标签,其他标签都过滤掉,通过 kubernetes 插件的 label_include label_exclude 配置,可以做标签过滤。 42 | 2. kubernetes 这个插件采集的数据比 `/metrics/cadvisor` 吐出的指标要少,不过常见的 cpu、mem、net、volume 相关的也都有。 43 | -------------------------------------------------------------------------------- /integrations/Prometheus/markdown/README.md: -------------------------------------------------------------------------------- 1 | # prometheus 2 | 3 | prometheus 插件的作用,就是抓取 `/metrics` 接口的数据,上报给服务端。通过,各类 exporter 会暴露 `/metrics` 接口数据,越来越多的开源组件也会内置 prometheus SDK,吐出 prometheus 格式的监控数据,比如 rabbitmq 插件,其 README 中就有介绍。 4 | 5 | 这个插件 fork 自 telegraf/prometheus,做了一些删减改造,仍然支持通过 consul 做服务发现,管理所有的目标地址,删掉了 Kubernetes 部分,Kubernetes 部分准备放到其他插件里实现。 6 | 7 | 增加了两个配置:url_label_key 和 url_label_value。为了标识监控数据是从哪个 scrape url 拉取的,会为监控数据附一个标签来标识这个 url,默认的标签 KEY 是用 instance,当然,也可以改成别的,不过不建议。url_label_value 是标签值,支持 go template 语法,如果为空,就是整个 url 的内容,也可以通过模板变量只取一部分,比如 `http://localhost:9104/metrics`,只想取 IP 和端口部分,就可以写成: 8 | 9 | ```ini 10 | url_label_value = "{{.Host}}" 11 | ``` 12 | 13 | 如果 HTTP scheme 部分和 `/metrics` Path 部分都想取,可以这么写: 14 | 15 | ```ini 16 | url_label_value = "{{.Scheme}}://{{.Host}}{{.Path}}" 17 | ``` 18 | 19 | 相关变量是用这个方法生成的,供大家参考: 20 | 21 | ```go 22 | func (ul *UrlLabel) GenerateLabel(u *url.URL) (string, string, error) { 23 | if ul.LabelValue == "" { 24 | return ul.LabelKey, u.String(), nil 25 | } 26 | 27 | dict := map[string]string{ 28 | "Scheme": u.Scheme, 29 | "Host": u.Host, 30 | "Hostname": u.Hostname(), 31 | "Port": u.Port(), 32 | "Path": u.Path, 33 | "Query": u.RawQuery, 34 | "Fragment": u.Fragment, 35 | } 36 | 37 | var buffer bytes.Buffer 38 | err := ul.LabelValueTpl.Execute(&buffer, dict) 39 | if err != nil { 40 | return "", "", err 41 | } 42 | 43 | return ul.LabelKey, buffer.String(), nil 44 | } 45 | ``` -------------------------------------------------------------------------------- /alert/aconf/conf.go: -------------------------------------------------------------------------------- 1 | package aconf 2 | 3 | import ( 4 | "path" 5 | ) 6 | 7 | type Alert struct { 8 | Disable bool 9 | EngineDelay int64 10 | Heartbeat HeartbeatConfig 11 | Alerting Alerting 12 | } 13 | 14 | type SMTPConfig struct { 15 | Host string 16 | Port int 17 | User string 18 | Pass string 19 | From string 20 | InsecureSkipVerify bool 21 | Batch int 22 | } 23 | 24 | type HeartbeatConfig struct { 25 | IP string 26 | Interval int64 27 | Endpoint string 28 | EngineName string 29 | } 30 | 31 | type Alerting struct { 32 | Timeout int64 33 | TemplatesDir string 34 | NotifyConcurrency int 35 | } 36 | 37 | type CallPlugin struct { 38 | Enable bool 39 | PluginPath string 40 | Caller string 41 | } 42 | 43 | type RedisPub struct { 44 | Enable bool 45 | ChannelPrefix string 46 | ChannelKey string 47 | } 48 | 49 | func (a *Alert) PreCheck(configDir string) { 50 | if a.Alerting.TemplatesDir == "" { 51 | a.Alerting.TemplatesDir = path.Join(configDir, "template") 52 | } 53 | 54 | if a.Alerting.NotifyConcurrency == 0 { 55 | a.Alerting.NotifyConcurrency = 10 56 | } 57 | 58 | if a.Heartbeat.Interval == 0 { 59 | a.Heartbeat.Interval = 1000 60 | } 61 | 62 | if a.Heartbeat.EngineName == "" { 63 | a.Heartbeat.EngineName = "default" 64 | } 65 | 66 | if a.EngineDelay == 0 { 67 | a.EngineDelay = 30 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /center/router/router_server.go: -------------------------------------------------------------------------------- 1 | package router 2 | 3 | import ( 4 | "time" 5 | 6 | "github.com/ccfos/nightingale/v6/models" 7 | 8 | "github.com/gin-gonic/gin" 9 | "github.com/toolkits/pkg/ginx" 10 | ) 11 | 12 | func (rt *Router) serversGet(c *gin.Context) { 13 | list, err := models.AlertingEngineGets(rt.Ctx, "") 14 | ginx.NewRender(c).Data(list, err) 15 | } 16 | 17 | func (rt *Router) serverClustersGet(c *gin.Context) { 18 | list, err := models.AlertingEngineGetsClusters(rt.Ctx, "") 19 | ginx.NewRender(c).Data(list, err) 20 | } 21 | 22 | func (rt *Router) serverHeartbeat(c *gin.Context) { 23 | var req models.HeartbeatInfo 24 | ginx.BindJSON(c, &req) 25 | err := models.AlertingEngineHeartbeatWithCluster(rt.Ctx, req.Instance, req.EngineCluster, req.DatasourceId) 26 | ginx.NewRender(c).Message(err) 27 | } 28 | 29 | func (rt *Router) serversActive(c *gin.Context) { 30 | datasourceId := ginx.QueryInt64(c, "dsid", 0) 31 | engineName := ginx.QueryStr(c, "engine_name", "") 32 | if engineName != "" { 33 | servers, err := models.AlertingEngineGetsInstances(rt.Ctx, "engine_cluster = ? and clock > ?", engineName, time.Now().Unix()-30) 34 | ginx.NewRender(c).Data(servers, err) 35 | return 36 | } 37 | 38 | if datasourceId == 0 { 39 | ginx.NewRender(c).Message("dsid is required") 40 | return 41 | } 42 | servers, err := models.AlertingEngineGetsInstances(rt.Ctx, "datasource_id = ? and clock > ?", datasourceId, time.Now().Unix()-30) 43 | ginx.NewRender(c).Data(servers, err) 44 | } 45 | -------------------------------------------------------------------------------- /integrations/Jolokia_Agent/collect/jolokia_agent/java.toml: -------------------------------------------------------------------------------- 1 | 2 | [[instances]] 3 | urls = ["http://localhost:8080/jolokia"] 4 | 5 | [[instances.metric]] 6 | name = "java_runtime" 7 | mbean = "java.lang:type=Runtime" 8 | paths = ["Uptime"] 9 | 10 | [[instances.metric]] 11 | name = "java_memory" 12 | mbean = "java.lang:type=Memory" 13 | paths = ["HeapMemoryUsage", "NonHeapMemoryUsage", "ObjectPendingFinalizationCount"] 14 | 15 | [[instances.metric]] 16 | name = "java_garbage_collector" 17 | mbean = "java.lang:name=*,type=GarbageCollector" 18 | paths = ["CollectionTime", "CollectionCount"] 19 | tag_keys = ["name"] 20 | 21 | [[instances.metric]] 22 | name = "java_last_garbage_collection" 23 | mbean = "java.lang:name=G1 Young Generation,type=GarbageCollector" 24 | paths = ["LastGcInfo/duration", "LastGcInfo/GcThreadCount", "LastGcInfo/memoryUsageAfterGc"] 25 | 26 | [[instances.metric]] 27 | name = "java_threading" 28 | mbean = "java.lang:type=Threading" 29 | paths = ["TotalStartedThreadCount", "ThreadCount", "DaemonThreadCount", "PeakThreadCount"] 30 | 31 | [[instances.metric]] 32 | name = "java_class_loading" 33 | mbean = "java.lang:type=ClassLoading" 34 | paths = ["LoadedClassCount", "UnloadedClassCount", "TotalLoadedClassCount"] 35 | 36 | [[instances.metric]] 37 | name = "java_memory_pool" 38 | mbean = "java.lang:name=*,type=MemoryPool" 39 | paths = ["Usage", "PeakUsage", "CollectionUsage"] 40 | tag_keys = ["name"] 41 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: prebuild build 2 | 3 | ROOT:=$(shell pwd -P) 4 | GIT_COMMIT:=$(shell git --work-tree ${ROOT} rev-parse 'HEAD^{commit}') 5 | _GIT_VERSION:=$(shell git --work-tree ${ROOT} describe --tags --abbrev=14 "${GIT_COMMIT}^{commit}" 2>/dev/null) 6 | TAG=$(shell echo "${_GIT_VERSION}" | awk -F"-" '{print $$1}') 7 | RELEASE_VERSION:="$(TAG)-$(GIT_COMMIT)" 8 | 9 | all: prebuild build 10 | 11 | prebuild: 12 | echo "begin download and embed the front-end file..." 13 | sh fe.sh 14 | echo "front-end file download and embedding completed." 15 | 16 | build: 17 | go build -ldflags "-w -s -X github.com/ccfos/nightingale/v6/pkg/version.Version=$(RELEASE_VERSION)" -o n9e ./cmd/center/main.go 18 | 19 | build-edge: 20 | go build -ldflags "-w -s -X github.com/ccfos/nightingale/v6/pkg/version.Version=$(RELEASE_VERSION)" -o n9e-edge ./cmd/edge/ 21 | 22 | build-alert: 23 | go build -ldflags "-w -s -X github.com/ccfos/nightingale/v6/pkg/version.Version=$(RELEASE_VERSION)" -o n9e-alert ./cmd/alert/main.go 24 | 25 | build-pushgw: 26 | go build -ldflags "-w -s -X github.com/ccfos/nightingale/v6/pkg/version.Version=$(RELEASE_VERSION)" -o n9e-pushgw ./cmd/pushgw/main.go 27 | 28 | build-cli: 29 | go build -ldflags "-w -s -X github.com/ccfos/nightingale/v6/pkg/version.Version=$(RELEASE_VERSION)" -o n9e-cli ./cmd/cli/main.go 30 | 31 | run: 32 | nohup ./n9e > n9e.log 2>&1 & 33 | 34 | run-alert: 35 | nohup ./n9e-alert > n9e-alert.log 2>&1 & 36 | 37 | run-pushgw: 38 | nohup ./n9e-pushgw > n9e-pushgw.log 2>&1 & 39 | 40 | release: 41 | goreleaser --skip-validate --skip-publish --snapshot -------------------------------------------------------------------------------- /models/board_payload.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | import ( 4 | "errors" 5 | 6 | "github.com/ccfos/nightingale/v6/pkg/ctx" 7 | ) 8 | 9 | type BoardPayload struct { 10 | Id int64 `json:"id" gorm:"primaryKey"` 11 | Payload string `json:"payload"` 12 | } 13 | 14 | func (p *BoardPayload) TableName() string { 15 | return "board_payload" 16 | } 17 | 18 | func (p *BoardPayload) Update(ctx *ctx.Context, selectField interface{}, selectFields ...interface{}) error { 19 | return DB(ctx).Model(p).Select(selectField, selectFields...).Updates(p).Error 20 | } 21 | 22 | func BoardPayloadGets(ctx *ctx.Context, ids []int64) ([]*BoardPayload, error) { 23 | if len(ids) == 0 { 24 | return nil, errors.New("empty ids") 25 | } 26 | 27 | var arr []*BoardPayload 28 | err := DB(ctx).Where("id in ?", ids).Find(&arr).Error 29 | return arr, err 30 | } 31 | 32 | func BoardPayloadGet(ctx *ctx.Context, id int64) (string, error) { 33 | payloads, err := BoardPayloadGets(ctx, []int64{id}) 34 | if err != nil { 35 | return "", err 36 | } 37 | 38 | if len(payloads) == 0 { 39 | return "", nil 40 | } 41 | 42 | return payloads[0].Payload, nil 43 | } 44 | 45 | func BoardPayloadSave(ctx *ctx.Context, id int64, payload string) error { 46 | var bp BoardPayload 47 | err := DB(ctx).Where("id = ?", id).Find(&bp).Error 48 | if err != nil { 49 | return err 50 | } 51 | 52 | if bp.Id > 0 { 53 | // already exists 54 | bp.Payload = payload 55 | return bp.Update(ctx, "payload") 56 | } 57 | 58 | return Insert(ctx, &BoardPayload{ 59 | Id: id, 60 | Payload: payload, 61 | }) 62 | } 63 | -------------------------------------------------------------------------------- /pkg/poster/post_test.go: -------------------------------------------------------------------------------- 1 | package poster 2 | 3 | import ( 4 | "encoding/json" 5 | "net/http" 6 | "net/http/httptest" 7 | "testing" 8 | 9 | "github.com/ccfos/nightingale/v6/conf" 10 | "github.com/ccfos/nightingale/v6/pkg/ctx" 11 | ) 12 | 13 | func TestPostByUrls(t *testing.T) { 14 | 15 | server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 16 | response := DataResponse[interface{}]{Dat: "", Err: ""} 17 | json.NewEncoder(w).Encode(response) 18 | })) 19 | defer server.Close() 20 | 21 | ctx := &ctx.Context{ 22 | CenterApi: conf.CenterApi{ 23 | Addrs: []string{server.URL}, 24 | }} 25 | 26 | if err := PostByUrls(ctx, "/v1/n9e/server-heartbeat", map[string]string{"a": "aa"}); err != nil { 27 | t.Errorf("PostByUrls() error = %v ", err) 28 | } 29 | } 30 | 31 | func TestPostByUrlsWithResp(t *testing.T) { 32 | 33 | expected := int64(123) 34 | 35 | server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 36 | response := DataResponse[int64]{Dat: expected, Err: ""} 37 | json.NewEncoder(w).Encode(response) 38 | })) 39 | defer server.Close() 40 | 41 | ctx := &ctx.Context{ 42 | CenterApi: conf.CenterApi{ 43 | Addrs: []string{server.URL}, 44 | }} 45 | 46 | gotT, err := PostByUrlsWithResp[int64](ctx, "/v1/n9e/event-persist", map[string]string{"b": "bb"}) 47 | if err != nil { 48 | t.Errorf("PostByUrlsWithResp() error = %v", err) 49 | return 50 | } 51 | if gotT != expected { 52 | t.Errorf("PostByUrlsWithResp() gotT = %v,expected = %v", gotT, expected) 53 | } 54 | 55 | } 56 | -------------------------------------------------------------------------------- /integrations/Prometheus/collect/prometheus/prometheus.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | 4 | [[instances]] 5 | urls = [ 6 | # "http://localhost:19000/metrics" 7 | ] 8 | 9 | url_label_key = "instance" 10 | url_label_value = "{{.Host}}" 11 | 12 | ## Scrape Services available in Consul Catalog 13 | # [instances.consul] 14 | # enabled = false 15 | # agent = "http://localhost:8500" 16 | # query_interval = "5m" 17 | 18 | # [[instances.consul.query]] 19 | # name = "a service name" 20 | # tag = "a service tag" 21 | # url = 'http://{{if ne .ServiceAddress ""}}{{.ServiceAddress}}{{else}}{{.Address}}{{end}}:{{.ServicePort}}/{{with .ServiceMeta.metrics_path}}{{.}}{{else}}metrics{{end}}' 22 | # [instances.consul.query.tags] 23 | # host = "{{.Node}}" 24 | 25 | # bearer_token_string = "" 26 | 27 | # e.g. /run/secrets/kubernetes.io/serviceaccount/token 28 | # bearer_token_file = "" 29 | 30 | # # basic auth 31 | # username = "" 32 | # password = "" 33 | 34 | # headers = ["X-From", "categraf"] 35 | 36 | # # interval = global.interval * interval_times 37 | # interval_times = 1 38 | 39 | # labels = {} 40 | 41 | # support glob 42 | # ignore_metrics = [ "go_*" ] 43 | 44 | # support glob 45 | # ignore_label_keys = [] 46 | 47 | # timeout for every url 48 | # timeout = "3s" 49 | 50 | ## Optional TLS Config 51 | # use_tls = false 52 | # tls_min_version = "1.2" 53 | # tls_ca = "/etc/categraf/ca.pem" 54 | # tls_cert = "/etc/categraf/cert.pem" 55 | # tls_key = "/etc/categraf/key.pem" 56 | ## Use TLS but skip chain & host verification 57 | # insecure_skip_verify = true 58 | -------------------------------------------------------------------------------- /integrations/NFSClient/collect/nfsclient/nfsclient.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | 4 | ## Read more low-level metrics (optional, defaults to false) 5 | fullstat = false 6 | 7 | ## List of mounts to explictly include or exclude (optional) 8 | ## The pattern (Go regexp) is matched against the mount point (not the 9 | ## device being mounted). If include_mounts is set, all mounts are ignored 10 | ## unless present in the list. If a mount is listed in both include_mounts 11 | ## and exclude_mounts, it is excluded. Go regexp patterns can be used. 12 | 13 | # include_mounts = [] 14 | # exclude_mounts = [] 15 | 16 | ## List of operations to include or exclude from collecting. This applies 17 | ## only when fullstat=true. Symantics are similar to {include,exclude}_mounts: 18 | ## the default is to collect everything; when include_operations is set, only 19 | ## those OPs are collected; when exclude_operations is set, all are collected 20 | ## except those listed. If include and exclude are set, the OP is excluded. 21 | ## See /proc/self/mountstats for a list of valid operations; note that 22 | ## NFSv3 and NFSv4 have different lists. While it is not possible to 23 | ## have different include/exclude lists for NFSv3/4, unused elements 24 | ## in the list should be okay. It is possible to have different lists 25 | ## for different mountpoints: use mulitple [[input.nfsclient]] stanzas, 26 | ## with their own lists. See "include_mounts" above, and be careful of 27 | ## duplicate metrics. 28 | 29 | # include_operations = ['READ','WRITE','ACCESS','GETATTR','READDIR','LOOKUP'] 30 | # exclude_operations = [] 31 | -------------------------------------------------------------------------------- /integrations/PHP/collect/phpfpm/phpfpm.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | 4 | [[instances]] 5 | ## An array of Nginx stub_status URI to gather stats. 6 | urls = [ 7 | ## HTTP: the URL must start with http:// or https://, ie: 8 | # "http://localhost/status", 9 | # "https://www.baidu.com/phpfpm-status", 10 | ## fcgi: the URL must start with fcgi:// or cgi://, and port must be present, ie: 11 | # "fcgi://127.0.0.1:9001", 12 | # "cgi://192.168.0.1:9000/status", 13 | ## Unix socket: path to fpm socket, ie: 14 | # "/run/php/php7.2-fpm.sock", 15 | ## or using a custom fpm status path: 16 | # "/var/run/php5-fpm.sock:/fpm-custom-status-path", 17 | ## glob patterns are also supported: 18 | # "/var/run/php*.sock" 19 | ] 20 | 21 | ## append some labels for series 22 | # labels = { region="cloud", product="n9e" } 23 | 24 | ## interval = global.interval * interval_times 25 | # interval_times = 1 26 | 27 | ## Set response_timeout (default 5 seconds),HTTP urls only 28 | response_timeout = "5s" 29 | 30 | ## Whether to follow redirects from the server (defaults to false),HTTP urls only 31 | # follow_redirects = false 32 | 33 | ## Optional HTTP Basic Auth Credentials,HTTP urls only 34 | #username = "admin" 35 | #password = "admin" 36 | 37 | ## Optional headers,HTTP urls only 38 | # headers = ["X-From", "categraf", "X-Xyz", "abc"] 39 | 40 | ## Optional TLS Config,only http 41 | # use_tls = false 42 | # tls_ca = "/etc/categraf/ca.pem" 43 | # tls_cert = "/etc/categraf/cert.pem" 44 | # tls_key = "/etc/categraf/key.pem" 45 | ## Use TLS but skip chain & host verification 46 | # insecure_skip_verify = false -------------------------------------------------------------------------------- /integrations/Net_Response/alerts/net_response_by_categraf.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": 0, 4 | "group_id": 0, 5 | "cate": "", 6 | "datasource_ids": null, 7 | "cluster": "", 8 | "name": "Network address probe failed", 9 | "note": "", 10 | "prod": "", 11 | "algorithm": "", 12 | "algo_params": null, 13 | "delay": 0, 14 | "severity": 2, 15 | "severities": null, 16 | "disabled": 0, 17 | "prom_for_duration": 60, 18 | "prom_ql": "net_response_result_code != 0", 19 | "rule_config": null, 20 | "prom_eval_interval": 15, 21 | "enable_stime": "00:00", 22 | "enable_stimes": null, 23 | "enable_etime": "23:59", 24 | "enable_etimes": null, 25 | "enable_days_of_week": [ 26 | "1", 27 | "2", 28 | "3", 29 | "4", 30 | "5", 31 | "6", 32 | "0" 33 | ], 34 | "enable_days_of_weeks": null, 35 | "enable_in_bg": 0, 36 | "notify_recovered": 1, 37 | "notify_channels": [], 38 | "notify_groups_obj": null, 39 | "notify_groups": null, 40 | "notify_repeat_step": 60, 41 | "notify_max_number": 0, 42 | "recover_duration": 0, 43 | "callbacks": [], 44 | "runbook_url": "", 45 | "append_tags": [], 46 | "annotations": null, 47 | "extra_config": null, 48 | "create_at": 0, 49 | "create_by": "", 50 | "update_at": 0, 51 | "update_by": "", 52 | "uuid": 1717556328182186000 53 | } 54 | ] -------------------------------------------------------------------------------- /alert/sender/lark.go: -------------------------------------------------------------------------------- 1 | package sender 2 | 3 | import ( 4 | "html/template" 5 | "strings" 6 | 7 | "github.com/ccfos/nightingale/v6/models" 8 | ) 9 | 10 | var ( 11 | _ CallBacker = (*LarkSender)(nil) 12 | ) 13 | 14 | type LarkSender struct { 15 | tpl *template.Template 16 | } 17 | 18 | func (lk *LarkSender) CallBack(ctx CallBackContext) { 19 | if len(ctx.Events) == 0 || len(ctx.CallBackURL) == 0 { 20 | return 21 | } 22 | 23 | body := feishu{ 24 | Msgtype: "text", 25 | Content: feishuContent{ 26 | Text: BuildTplMessage(models.Lark, lk.tpl, ctx.Events), 27 | }, 28 | } 29 | 30 | doSend(ctx.CallBackURL, body, models.Lark, ctx.Stats) 31 | ctx.Stats.AlertNotifyTotal.WithLabelValues("rule_callback").Inc() 32 | } 33 | 34 | func (lk *LarkSender) Send(ctx MessageContext) { 35 | if len(ctx.Users) == 0 || len(ctx.Events) == 0 { 36 | return 37 | } 38 | urls := lk.extract(ctx.Users) 39 | message := BuildTplMessage(models.Lark, lk.tpl, ctx.Events) 40 | for _, url := range urls { 41 | body := feishu{ 42 | Msgtype: "text", 43 | Content: feishuContent{ 44 | Text: message, 45 | }, 46 | } 47 | doSend(url, body, models.Lark, ctx.Stats) 48 | } 49 | } 50 | 51 | func (lk *LarkSender) extract(users []*models.User) []string { 52 | urls := make([]string, 0, len(users)) 53 | 54 | for _, user := range users { 55 | if token, has := user.ExtractToken(models.Lark); has { 56 | url := token 57 | if !strings.HasPrefix(token, "https://") && !strings.HasPrefix(token, "http://") { 58 | url = "https://open.larksuite.com/open-apis/bot/v2/hook/" + token 59 | } 60 | urls = append(urls, url) 61 | } 62 | } 63 | return urls 64 | } 65 | -------------------------------------------------------------------------------- /integrations/Appdynamics/collect/appdynamics/app.toml: -------------------------------------------------------------------------------- 1 | #interval=15s 2 | 3 | [[instances]] 4 | #url_base = "http://{{.ip}}:{{.port}}/a.json?metric-path={{.metric_path}}&time-range-type=BETWEEN_TIMES&start-time={{.start_time}}&end-time={{.end_time}}&output=JSON" 5 | #url_vars = [ 6 | # { ip="127.0.0.1", port="8090", application="cms", metric_path="Application Infrastructure Performance|AdminServer|Individual Nodes|xxxxx|Agent|App|Availability", start_time="$START_TIME", end_time="$END_TIME"}, 7 | #] 8 | 9 | # # 指定url_vars中哪些key 作为最终的label附加 10 | # url_var_label_keys= [] 11 | 12 | # #从url中提取变量 13 | # url_label_key="instance" 14 | # url_label_value="{{.Host}}" 15 | # #自定义 http header 16 | #headers = { Authorization="", X-Forwarded-For="", Host=""} 17 | # #每次请求的超时时间 18 | #timeout="5s" 19 | 20 | # # precision of start-time and end-time 21 | #precision="ms" 22 | 23 | ## basic auth 24 | #username="" 25 | #password="" 26 | 27 | # # endtime = now - delay 28 | #delay = "1m" 29 | # # starttime = now - delay - period = endtime - period 30 | #period = "1m" 31 | 32 | # # 想要添加的额外label 33 | #labels = {application="cms"} 34 | # # 从返回中过滤哪些指标 35 | filters = ["current", "max", "min", "value","sum", "count"] 36 | 37 | # # 限制并发请求量, 最多同时有多少个请求 38 | # # 默认范围(0,100) 39 | #request_inflight= 10 40 | ## 强制开启100以上的并发请求 (不推荐) 41 | # force_request_inflight = 1000 42 | 43 | # # 是否开启 tls 44 | # use_tls = true 45 | # # tls 最小版本 46 | ## tls_min_version = "1.2" 47 | # # tls ca证书路径 48 | ## tls_ca = "/etc/categraf/ca.pem" 49 | # # tls cert 路径 50 | ## tls_cert = "/etc/categraf/cert.pem" 51 | # # tls key 路径 52 | ## tls_key = "/etc/categraf/key.pem" 53 | # # 是否跳过证书验证 54 | ## insecure_skip_verify = true 55 | -------------------------------------------------------------------------------- /models/role_operation.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | import ( 4 | "github.com/ccfos/nightingale/v6/pkg/ctx" 5 | "github.com/toolkits/pkg/slice" 6 | ) 7 | 8 | type RoleOperation struct { 9 | RoleName string 10 | Operation string 11 | } 12 | 13 | func (RoleOperation) TableName() string { 14 | return "role_operation" 15 | } 16 | 17 | func RoleHasOperation(ctx *ctx.Context, roles []string, operation string) (bool, error) { 18 | if len(roles) == 0 { 19 | return false, nil 20 | } 21 | 22 | return Exists(DB(ctx).Model(&RoleOperation{}).Where("operation = ? and role_name in ?", operation, roles)) 23 | } 24 | 25 | func OperationsOfRole(ctx *ctx.Context, roles []string) ([]string, error) { 26 | session := DB(ctx).Model(&RoleOperation{}).Select("distinct(operation) as operation") 27 | 28 | if !slice.ContainsString(roles, AdminRole) { 29 | session = session.Where("role_name in ?", roles) 30 | } 31 | 32 | var ret []string 33 | err := session.Pluck("operation", &ret).Error 34 | return ret, err 35 | } 36 | 37 | func RoleOperationBind(ctx *ctx.Context, roleName string, operation []string) error { 38 | tx := DB(ctx).Begin() 39 | 40 | if err := tx.Where("role_name = ?", roleName).Delete(&RoleOperation{}).Error; err != nil { 41 | tx.Rollback() 42 | return err 43 | } 44 | 45 | if len(operation) == 0 { 46 | return tx.Commit().Error 47 | } 48 | 49 | var ops []RoleOperation 50 | for _, op := range operation { 51 | ops = append(ops, RoleOperation{ 52 | RoleName: roleName, 53 | Operation: op, 54 | }) 55 | } 56 | 57 | if err := tx.Create(&ops).Error; err != nil { 58 | tx.Rollback() 59 | return err 60 | } 61 | 62 | return tx.Commit().Error 63 | } 64 | -------------------------------------------------------------------------------- /center/router/router_role_operation.go: -------------------------------------------------------------------------------- 1 | package router 2 | 3 | import ( 4 | "net/http" 5 | 6 | "github.com/ccfos/nightingale/v6/center/cconf" 7 | "github.com/ccfos/nightingale/v6/models" 8 | "github.com/gin-gonic/gin" 9 | "github.com/toolkits/pkg/ginx" 10 | "github.com/toolkits/pkg/i18n" 11 | ) 12 | 13 | func (rt *Router) operationOfRole(c *gin.Context) { 14 | id := ginx.UrlParamInt64(c, "id") 15 | role, err := models.RoleGet(rt.Ctx, "id=?", id) 16 | ginx.Dangerous(err) 17 | if role == nil { 18 | ginx.Bomb(http.StatusOK, "role not found") 19 | } 20 | 21 | if role.Name == "Admin" { 22 | var lst []string 23 | for _, ops := range cconf.Operations.Ops { 24 | lst = append(lst, ops.Ops...) 25 | } 26 | ginx.NewRender(c).Data(lst, nil) 27 | return 28 | } 29 | 30 | ops, err := models.OperationsOfRole(rt.Ctx, []string{role.Name}) 31 | ginx.NewRender(c).Data(ops, err) 32 | } 33 | 34 | func (rt *Router) roleBindOperation(c *gin.Context) { 35 | id := ginx.UrlParamInt64(c, "id") 36 | role, err := models.RoleGet(rt.Ctx, "id=?", id) 37 | ginx.Dangerous(err) 38 | if role == nil { 39 | ginx.Bomb(http.StatusOK, "role not found") 40 | } 41 | 42 | if role.Name == "Admin" { 43 | ginx.Bomb(http.StatusOK, "admin role can not be modified") 44 | } 45 | 46 | var ops []string 47 | ginx.BindJSON(c, &ops) 48 | 49 | ginx.NewRender(c).Message(models.RoleOperationBind(rt.Ctx, role.Name, ops)) 50 | } 51 | 52 | func (rt *Router) operations(c *gin.Context) { 53 | var ops []cconf.Ops 54 | for _, v := range rt.Operations.Ops { 55 | v.Cname = i18n.Sprintf(c.GetHeader("X-Language"), v.Cname) 56 | ops = append(ops, v) 57 | } 58 | 59 | ginx.NewRender(c).Data(ops, nil) 60 | } 61 | -------------------------------------------------------------------------------- /integrations/Filecount/collect/filecount/filecount.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | 4 | [[instances]] 5 | # # append some labels for series 6 | # labels = { region="cloud", product="n9e" } 7 | 8 | # # interval = global.interval * interval_times 9 | # interval_times = 1 10 | 11 | ## Directories to gather stats about. 12 | ## This accept standard unit glob matching rules, but with the addition of 13 | ## ** as a "super asterisk". ie: 14 | ## /var/log/** -> recursively find all directories in /var/log and count files in each directories 15 | ## /var/log/*/* -> find all directories with a parent dir in /var/log and count files in each directories 16 | ## /var/log -> count all files in /var/log and all of its subdirectories 17 | ## directories = ["/var/cache/apt", "/tmp"] 18 | directories = ["/tmp"] 19 | 20 | ## Only count files that match the name pattern. Defaults to "*". 21 | file_name = "*" 22 | 23 | ## Count files in subdirectories. Defaults to true. 24 | recursive = true 25 | 26 | ## Only count regular files. Defaults to true. 27 | regular_only = true 28 | 29 | ## Follow all symlinks while walking the directory tree. Defaults to false. 30 | follow_symlinks = false 31 | 32 | ## Only count files that are at least this size. If size is 33 | ## a negative number, only count files that are smaller than the 34 | ## absolute value of size. Acceptable units are B, KiB, MiB, KB, ... 35 | ## Without quotes and units, interpreted as size in bytes. 36 | size = "0B" 37 | 38 | ## Only count files that have not been touched for at least this 39 | ## duration. If mtime is negative, only count files that have been 40 | ## touched in this duration. Defaults to "0s". 41 | mtime = "0s" 42 | -------------------------------------------------------------------------------- /center/cconf/event_example.go: -------------------------------------------------------------------------------- 1 | package cconf 2 | 3 | const EVENT_EXAMPLE = ` 4 | { 5 | "id": 1000000, 6 | "cate": "prometheus", 7 | "datasource_id": 1, 8 | "group_id": 1, 9 | "group_name": "Default Busi Group", 10 | "hash": "2cb966f9ba1cdc7af94c3796e855955a", 11 | "rule_id": 23, 12 | "rule_name": "测试告警", 13 | "rule_note": "测试告警", 14 | "rule_prod": "metric", 15 | "rule_config": { 16 | "queries": [ 17 | { 18 | "key": "all_hosts", 19 | "op": "==", 20 | "values": [] 21 | } 22 | ], 23 | "triggers": [ 24 | { 25 | "duration": 3, 26 | "percent": 10, 27 | "severity": 3, 28 | "type": "pct_target_miss" 29 | } 30 | ] 31 | }, 32 | "prom_for_duration": 60, 33 | "prom_eval_interval": 30, 34 | "callbacks": ["https://n9e.github.io"], 35 | "notify_recovered": 1, 36 | "notify_channels": ["dingtalk"], 37 | "notify_groups": [], 38 | "notify_groups_obj": null, 39 | "target_ident": "host01", 40 | "target_note": "机器备注", 41 | "trigger_time": 1677229517, 42 | "trigger_value": "2273533952", 43 | "tags": [ 44 | "__name__=disk_free", 45 | "dc=qcloud-dev", 46 | "device=vda1", 47 | "fstype=ext4", 48 | "ident=tt-fc-dev00.nj" 49 | ], 50 | "is_recovered": false, 51 | "notify_users_obj": null, 52 | "last_eval_time": 1677229517, 53 | "last_sent_time": 1677229517, 54 | "notify_cur_number": 1, 55 | "first_trigger_time": 1677229517, 56 | "annotations": { 57 | "summary": "测试告警" 58 | } 59 | } 60 | ` 61 | -------------------------------------------------------------------------------- /integrations/Ping/markdown/README.md: -------------------------------------------------------------------------------- 1 | # ping 2 | 3 | ping 监控插件,探测远端目标地址能否 ping 通,如果机器没有禁 ping,这就是一个很好用的探测机器存活的手段 4 | 5 | ## Configuration 6 | 7 | categraf 的 `conf/input.ping/ping.toml`。 8 | 9 | 要探测的机器配置到 targets 中,targets 是个数组,可以配置多个,当然也可以拆成多个 `[[instances]]` 配置段,比如: 10 | 11 | ``` 12 | [[instances]] 13 | targets = [ "10.4.5.6" ] 14 | labels = { region="cloud", product="n9e" } 15 | 16 | [[instances]] 17 | targets = [ "10.4.5.7" ] 18 | labels = { region="cloud", product="zbx" } 19 | ``` 20 | 21 | 上例中是 ping 两个地址,为了信息更丰富,附加了 region 和 product 标签 22 | 23 | ## File Limit 24 | 25 | ```sh 26 | systemctl edit categraf 27 | ``` 28 | 29 | Increase the number of open files: 30 | 31 | ```ini 32 | [Service] 33 | LimitNOFILE=8192 34 | ``` 35 | 36 | Restart Categraf: 37 | 38 | ```sh 39 | systemctl restart categraf 40 | ``` 41 | 42 | ### Linux Permissions 43 | 44 | On most systems, ping requires `CAP_NET_RAW` capabilities or for Categraf to be run as root. 45 | 46 | With systemd: 47 | 48 | ```sh 49 | systemctl edit categraf 50 | ``` 51 | 52 | ```ini 53 | [Service] 54 | CapabilityBoundingSet=CAP_NET_RAW 55 | AmbientCapabilities=CAP_NET_RAW 56 | ``` 57 | 58 | ```sh 59 | systemctl restart categraf 60 | ``` 61 | 62 | Without systemd: 63 | 64 | ```sh 65 | setcap cap_net_raw=eip /usr/bin/categraf 66 | ``` 67 | 68 | Reference [`man 7 capabilities`][man 7 capabilities] for more information about 69 | setting capabilities. 70 | 71 | [man 7 capabilities]: http://man7.org/linux/man-pages/man7/capabilities.7.html 72 | 73 | ### Other OS Permissions 74 | 75 | When using `method = "native"`, you will need permissions similar to the executable ping program for your OS. 76 | 77 | ## 监控大盘和告警规则 78 | 79 | 夜莺内置了告警规则和监控大盘,克隆到自己的业务组下即可使用。 80 | -------------------------------------------------------------------------------- /pkg/i18nx/i18n.go: -------------------------------------------------------------------------------- 1 | package i18nx 2 | 3 | import ( 4 | "encoding/json" 5 | "path" 6 | 7 | "github.com/toolkits/pkg/file" 8 | "github.com/toolkits/pkg/i18n" 9 | "github.com/toolkits/pkg/logger" 10 | ) 11 | 12 | func Init(configDir string) { 13 | filePath := path.Join(configDir, "i18n.json") 14 | m := make(map[string]map[string]string) 15 | builtInConf := make(map[string]map[string]string) 16 | 17 | var content = I18N 18 | var err error 19 | //use built-in config 20 | err = json.Unmarshal([]byte(content), &builtInConf) 21 | if err != nil { 22 | logger.Errorf("parse i18n config file %s fail: %s\n", filePath, err) 23 | return 24 | } 25 | if !file.IsExist(filePath) { 26 | m = builtInConf 27 | } else { 28 | //expand config 29 | //prioritize the settings within the expand config options in case of conflicts 30 | content, err = file.ToTrimString(filePath) 31 | if err != nil { 32 | logger.Errorf("read i18n config file %s fail: %s\n", filePath, err) 33 | return 34 | } 35 | err = json.Unmarshal([]byte(content), &m) 36 | if err != nil { 37 | logger.Errorf("parse i18n config file %s fail: %s\n", filePath, err) 38 | return 39 | } 40 | // json Example: 41 | //{ 42 | // "zh": { 43 | // "username":"用户名" 44 | // }, 45 | // "fr": { 46 | // "username":"nom d'utilisateur" 47 | // } 48 | //} 49 | for languageKey, languageDict := range builtInConf { 50 | if _, hasL := m[languageKey]; hasL { //languages 51 | for k, v := range languageDict { 52 | if _, has := m[languageKey][k]; !has { 53 | m[languageKey][k] = v 54 | } 55 | } 56 | } else { 57 | m[languageKey] = languageDict 58 | } 59 | } 60 | } 61 | 62 | i18n.DictRegister(m) 63 | } 64 | -------------------------------------------------------------------------------- /center/router/router_builtin_componet.go: -------------------------------------------------------------------------------- 1 | package router 2 | 3 | import ( 4 | "net/http" 5 | 6 | "github.com/ccfos/nightingale/v6/models" 7 | "github.com/gin-gonic/gin" 8 | "github.com/toolkits/pkg/ginx" 9 | ) 10 | 11 | func (rt *Router) builtinComponentsAdd(c *gin.Context) { 12 | var lst []models.BuiltinComponent 13 | ginx.BindJSON(c, &lst) 14 | 15 | username := Username(c) 16 | 17 | count := len(lst) 18 | if count == 0 { 19 | ginx.Bomb(http.StatusBadRequest, "input json is empty") 20 | } 21 | 22 | reterr := make(map[string]string) 23 | for i := 0; i < count; i++ { 24 | if err := lst[i].Add(rt.Ctx, username); err != nil { 25 | reterr[lst[i].Ident] = err.Error() 26 | } 27 | } 28 | 29 | ginx.NewRender(c).Data(reterr, nil) 30 | } 31 | 32 | func (rt *Router) builtinComponentsGets(c *gin.Context) { 33 | query := ginx.QueryStr(c, "query", "") 34 | 35 | bc, err := models.BuiltinComponentGets(rt.Ctx, query) 36 | ginx.Dangerous(err) 37 | 38 | ginx.NewRender(c).Data(bc, nil) 39 | } 40 | 41 | func (rt *Router) builtinComponentsPut(c *gin.Context) { 42 | var req models.BuiltinComponent 43 | ginx.BindJSON(c, &req) 44 | 45 | bc, err := models.BuiltinComponentGet(rt.Ctx, "id = ?", req.ID) 46 | ginx.Dangerous(err) 47 | 48 | if bc == nil { 49 | ginx.NewRender(c, http.StatusNotFound).Message("No such builtin component") 50 | return 51 | } 52 | 53 | username := Username(c) 54 | req.UpdatedBy = username 55 | 56 | ginx.NewRender(c).Message(bc.Update(rt.Ctx, req)) 57 | } 58 | 59 | func (rt *Router) builtinComponentsDel(c *gin.Context) { 60 | var req idsForm 61 | ginx.BindJSON(c, &req) 62 | 63 | req.Verify() 64 | 65 | ginx.NewRender(c).Message(models.BuiltinComponentDels(rt.Ctx, req.Ids)) 66 | } 67 | -------------------------------------------------------------------------------- /cmd/edge/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "log" 7 | "os" 8 | "os/signal" 9 | "syscall" 10 | 11 | "github.com/ccfos/nightingale/v6/pkg/osx" 12 | "github.com/ccfos/nightingale/v6/pkg/version" 13 | 14 | "github.com/toolkits/pkg/runner" 15 | ) 16 | 17 | var ( 18 | showVersion = flag.Bool("version", false, "Show version.") 19 | configDir = flag.String("configs", osx.GetEnv("N9E_EDGE_CONFIGS", "etc"), "Specify configuration directory.(env:N9E_EDGE_CONFIGS)") 20 | cryptoKey = flag.String("crypto-key", "", "Specify the secret key for configuration file field encryption.") 21 | ) 22 | 23 | func main() { 24 | flag.Parse() 25 | 26 | if *showVersion { 27 | fmt.Println(version.Version) 28 | os.Exit(0) 29 | } 30 | 31 | printEnv() 32 | 33 | cleanFunc, err := Initialize(*configDir, *cryptoKey) 34 | if err != nil { 35 | log.Fatalln("failed to initialize:", err) 36 | } 37 | 38 | code := 1 39 | sc := make(chan os.Signal, 1) 40 | signal.Notify(sc, syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT) 41 | 42 | EXIT: 43 | for { 44 | sig := <-sc 45 | fmt.Println("received signal:", sig.String()) 46 | switch sig { 47 | case syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGINT: 48 | code = 0 49 | break EXIT 50 | case syscall.SIGHUP: 51 | // reload configuration? 52 | default: 53 | break EXIT 54 | } 55 | } 56 | 57 | cleanFunc() 58 | fmt.Println("process exited") 59 | os.Exit(code) 60 | } 61 | 62 | func printEnv() { 63 | runner.Init() 64 | fmt.Println("runner.cwd:", runner.Cwd) 65 | fmt.Println("runner.hostname:", runner.Hostname) 66 | fmt.Println("runner.fd_limits:", runner.FdLimits()) 67 | fmt.Println("runner.vm_limits:", runner.VMLimits()) 68 | } 69 | -------------------------------------------------------------------------------- /integrations/Tomcat/markdown/README.md: -------------------------------------------------------------------------------- 1 | # tomcat 2 | 3 | tomcat 采集器,是读取 tomcat 的管理侧接口 `/manager/status/all` 这个接口需要鉴权。修改 `tomcat-users.xml` ,增加下面的内容: 4 | 5 | ```xml 6 | 7 | 8 | ``` 9 | 10 | 此外,还需要注释文件**webapps/manager/META-INF/context.xml**的以下内容, 11 | ```xml 12 | 14 | ``` 15 | 16 | 否则 tomcat 会报以下错误,导致 tomcat 采集器无法采集到数据。 17 | 18 | ```html 19 | 403 Access Denied 20 | You are not authorized to view this page. 21 | 22 | By default the Manager is only accessible from a browser running on the same machine as Tomcat. If you wish to modify this restriction, you'll need to edit the Manager's context.xml file. 23 | ``` 24 | 25 | ## Configuration 26 | 27 | 配置文件在 `conf/input.tomcat/tomcat.toml` 28 | 29 | ```toml 30 | [[instances]] 31 | ## URL of the Tomcat server status 32 | url = "http://127.0.0.1:8080/manager/status/all?XML=true" 33 | 34 | ## HTTP Basic Auth Credentials 35 | username = "tomcat" 36 | password = "s3cret" 37 | 38 | ## Request timeout 39 | # timeout = "5s" 40 | 41 | # # interval = global.interval * interval_times 42 | # interval_times = 1 43 | 44 | # important! use global unique string to specify instance 45 | # labels = { instance="192.168.1.2:8080", url="-" } 46 | 47 | ## Optional TLS Config 48 | # use_tls = false 49 | # tls_min_version = "1.2" 50 | # tls_ca = "/etc/categraf/ca.pem" 51 | # tls_cert = "/etc/categraf/cert.pem" 52 | # tls_key = "/etc/categraf/key.pem" 53 | ## Use TLS but skip chain & host verification 54 | # insecure_skip_verify = true 55 | ``` 56 | 57 | ## 监控大盘 58 | 59 | 夜莺内置了 tomcat 仪表盘,克隆到自己的业务组下使用即可。 60 | -------------------------------------------------------------------------------- /pkg/cfg/cfg.go: -------------------------------------------------------------------------------- 1 | package cfg 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "path" 7 | "strings" 8 | 9 | "github.com/koding/multiconfig" 10 | "github.com/toolkits/pkg/file" 11 | "github.com/toolkits/pkg/runner" 12 | ) 13 | 14 | func LoadConfigByDir(configDir string, configPtr interface{}) error { 15 | var ( 16 | tBuf []byte 17 | ) 18 | 19 | loaders := []multiconfig.Loader{ 20 | &multiconfig.TagLoader{}, 21 | &multiconfig.EnvironmentLoader{}, 22 | } 23 | 24 | if !file.IsExist(configDir) { 25 | return fmt.Errorf("config directory: %s not exist. working directory: %s", configDir, runner.Cwd) 26 | } 27 | 28 | files, err := file.FilesUnder(configDir) 29 | if err != nil { 30 | return fmt.Errorf("failed to list files under: %s : %v", configDir, err) 31 | } 32 | s := NewFileScanner() 33 | for _, fpath := range files { 34 | switch { 35 | case strings.HasSuffix(fpath, ".toml"): 36 | s.Read(path.Join(configDir, fpath)) 37 | tBuf = append(tBuf, s.Data()...) 38 | tBuf = append(tBuf, []byte("\n")...) 39 | case strings.HasSuffix(fpath, ".json"): 40 | loaders = append(loaders, &multiconfig.JSONLoader{Path: path.Join(configDir, fpath)}) 41 | case strings.HasSuffix(fpath, ".yaml") || strings.HasSuffix(fpath, ".yml"): 42 | loaders = append(loaders, &multiconfig.YAMLLoader{Path: path.Join(configDir, fpath)}) 43 | } 44 | if s.Err() != nil { 45 | return s.Err() 46 | } 47 | } 48 | 49 | if len(tBuf) != 0 { 50 | loaders = append(loaders, &multiconfig.TOMLLoader{Reader: bytes.NewReader(tBuf)}) 51 | } 52 | 53 | m := multiconfig.DefaultLoader{ 54 | Loader: multiconfig.MultiLoader(loaders...), 55 | Validator: multiconfig.MultiValidator(&multiconfig.RequiredValidator{}), 56 | } 57 | return m.Load(configPtr) 58 | } 59 | -------------------------------------------------------------------------------- /pkg/tplx/conv.go: -------------------------------------------------------------------------------- 1 | package tplx 2 | 3 | import ( 4 | "fmt" 5 | "strconv" 6 | ) 7 | 8 | // ToFloat64 convert interface to float64 9 | func ToFloat64(val interface{}) (float64, error) { 10 | switch v := val.(type) { 11 | case string: 12 | if f, err := strconv.ParseFloat(v, 64); err == nil { 13 | return f, nil 14 | } 15 | 16 | // try int 17 | if i, err := strconv.ParseInt(v, 0, 64); err == nil { 18 | return float64(i), nil 19 | } 20 | 21 | // try bool 22 | b, err := strconv.ParseBool(v) 23 | if err == nil { 24 | if b { 25 | return 1, nil 26 | } else { 27 | return 0, nil 28 | } 29 | } 30 | 31 | if v == "Yes" || v == "yes" || v == "YES" || v == "Y" || v == "ON" || v == "on" || v == "On" || v == "ok" || v == "up" { 32 | return 1, nil 33 | } 34 | 35 | if v == "No" || v == "no" || v == "NO" || v == "N" || v == "OFF" || v == "off" || v == "Off" || v == "fail" || v == "err" || v == "down" { 36 | return 0, nil 37 | } 38 | 39 | return 0, fmt.Errorf("unparseable value %v", v) 40 | case float64: 41 | return v, nil 42 | case uint64: 43 | return float64(v), nil 44 | case uint32: 45 | return float64(v), nil 46 | case uint16: 47 | return float64(v), nil 48 | case uint8: 49 | return float64(v), nil 50 | case uint: 51 | return float64(v), nil 52 | case int64: 53 | return float64(v), nil 54 | case int32: 55 | return float64(v), nil 56 | case int16: 57 | return float64(v), nil 58 | case int8: 59 | return float64(v), nil 60 | case bool: 61 | if v { 62 | return 1, nil 63 | } else { 64 | return 0, nil 65 | } 66 | case int: 67 | return float64(v), nil 68 | case float32: 69 | return float64(v), nil 70 | default: 71 | return strconv.ParseFloat(fmt.Sprint(v), 64) 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /alert/process/alert_cur_event.go: -------------------------------------------------------------------------------- 1 | package process 2 | 3 | import ( 4 | "sync" 5 | 6 | "github.com/ccfos/nightingale/v6/models" 7 | ) 8 | 9 | type AlertCurEventMap struct { 10 | sync.RWMutex 11 | Data map[string]*models.AlertCurEvent 12 | } 13 | 14 | func NewAlertCurEventMap(data map[string]*models.AlertCurEvent) *AlertCurEventMap { 15 | if data == nil { 16 | return &AlertCurEventMap{ 17 | Data: make(map[string]*models.AlertCurEvent), 18 | } 19 | } 20 | return &AlertCurEventMap{ 21 | Data: data, 22 | } 23 | } 24 | 25 | func (a *AlertCurEventMap) SetAll(data map[string]*models.AlertCurEvent) { 26 | a.Lock() 27 | defer a.Unlock() 28 | a.Data = data 29 | } 30 | 31 | func (a *AlertCurEventMap) Set(key string, value *models.AlertCurEvent) { 32 | a.Lock() 33 | defer a.Unlock() 34 | a.Data[key] = value 35 | } 36 | 37 | func (a *AlertCurEventMap) Get(key string) (*models.AlertCurEvent, bool) { 38 | a.RLock() 39 | defer a.RUnlock() 40 | event, exists := a.Data[key] 41 | return event, exists 42 | } 43 | 44 | func (a *AlertCurEventMap) UpdateLastEvalTime(key string, lastEvalTime int64) { 45 | a.Lock() 46 | defer a.Unlock() 47 | event, exists := a.Data[key] 48 | if !exists { 49 | return 50 | } 51 | event.LastEvalTime = lastEvalTime 52 | } 53 | 54 | func (a *AlertCurEventMap) Delete(key string) { 55 | a.Lock() 56 | defer a.Unlock() 57 | delete(a.Data, key) 58 | } 59 | 60 | func (a *AlertCurEventMap) Keys() []string { 61 | a.RLock() 62 | defer a.RUnlock() 63 | keys := make([]string, 0, len(a.Data)) 64 | for k := range a.Data { 65 | keys = append(keys, k) 66 | } 67 | return keys 68 | } 69 | 70 | func (a *AlertCurEventMap) GetAll() map[string]*models.AlertCurEvent { 71 | a.RLock() 72 | defer a.RUnlock() 73 | return a.Data 74 | } 75 | -------------------------------------------------------------------------------- /pushgw/writer/stats.go: -------------------------------------------------------------------------------- 1 | package writer 2 | 3 | import "github.com/prometheus/client_golang/prometheus" 4 | 5 | const ( 6 | namespace = "n9e" 7 | subsystem = "pushgw" 8 | ) 9 | 10 | var ( 11 | // 发往后端TSDB,延迟如何 12 | ForwardDuration = prometheus.NewHistogramVec( 13 | prometheus.HistogramOpts{ 14 | Namespace: namespace, 15 | Subsystem: subsystem, 16 | Buckets: []float64{.1, 1, 10}, 17 | Name: "forward_duration_seconds", 18 | Help: "Forward samples to TSDB. latencies in seconds.", 19 | }, []string{"url"}, 20 | ) 21 | 22 | GaugeSampleQueueSize = prometheus.NewGaugeVec( 23 | prometheus.GaugeOpts{ 24 | Namespace: namespace, 25 | Subsystem: subsystem, 26 | Name: "sample_queue_size", 27 | Help: "The size of sample queue.", 28 | }, []string{"host_ident"}, 29 | ) 30 | 31 | CounterWirteTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ 32 | Namespace: namespace, 33 | Subsystem: subsystem, 34 | Name: "write_total", 35 | Help: "Number of write.", 36 | }, []string{"url"}) 37 | 38 | CounterWirteErrorTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ 39 | Namespace: namespace, 40 | Subsystem: subsystem, 41 | Name: "write_error_total", 42 | Help: "Number of write error.", 43 | }, []string{"url"}) 44 | 45 | CounterPushQueueErrorTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ 46 | Namespace: namespace, 47 | Subsystem: subsystem, 48 | Name: "push_queue_error_total", 49 | Help: "Number of push queue error.", 50 | }, []string{"host_ident"}) 51 | ) 52 | 53 | func init() { 54 | prometheus.MustRegister( 55 | ForwardDuration, 56 | CounterWirteTotal, 57 | CounterWirteErrorTotal, 58 | CounterPushQueueErrorTotal, 59 | GaugeSampleQueueSize, 60 | ) 61 | } 62 | -------------------------------------------------------------------------------- /integrations/Appdynamics/markdown/README.md: -------------------------------------------------------------------------------- 1 | ## Appdynamics 2 | 3 | Appdynamics 采集插件, 采集 Appdynamics 数据 4 | 5 | ## Configuration 6 | 7 | ```toml 8 | #interval=15s 9 | 10 | [[instances]] 11 | #url_base = "http://{{.ip}}:{{.port}}/a.json?metric-path={{.metric_path}}&time-range-type=BETWEEN_TIMES&start-time={{.start_time}}&end-time={{.end_time}}&output=JSON" 12 | #url_vars = [ 13 | # { ip="127.0.0.1", port="8090", application="cms", metric_path="Application Infrastructure Performance|AdminServer|Individual Nodes|xxxxx|Agent|App|Availability", start_time="$START_TIME", end_time="$END_TIME"}, 14 | #] 15 | 16 | # # 指定url_vars中哪些key 作为最终的label附加 17 | # url_var_label_keys= [] 18 | 19 | # #从url中提取变量 20 | # url_label_key="instance" 21 | # url_label_value="{{.Host}}" 22 | # #自定义 http header 23 | #headers = { Authorization="", X-Forwarded-For="", Host=""} 24 | # #每次请求的超时时间 25 | #timeout="5s" 26 | 27 | # # precision of start-time and end-time 28 | #precision="ms" 29 | 30 | ## basic auth 31 | #username="" 32 | #password="" 33 | 34 | # # endtime = now - delay 35 | #delay = "1m" 36 | # # starttime = now - delay - period = endtime - period 37 | #period = "1m" 38 | 39 | # # 想要添加的额外label 40 | #labels = {application="cms"} 41 | # # 从返回中过滤哪些指标 42 | filters = ["current", "max", "min", "value","sum", "count"] 43 | 44 | # # 限制并发请求量, 最多同时有多少个请求 45 | # # 默认范围(0,100) 46 | #request_inflight= 10 47 | ## 强制开启100以上的并发请求 (不推荐) 48 | # force_request_inflight = 1000 49 | 50 | # # 是否开启 tls 51 | # use_tls = true 52 | # # tls 最小版本 53 | ## tls_min_version = "1.2" 54 | # # tls ca证书路径 55 | ## tls_ca = "/etc/categraf/ca.pem" 56 | # # tls cert 路径 57 | ## tls_cert = "/etc/categraf/cert.pem" 58 | # # tls key 路径 59 | ## tls_key = "/etc/categraf/key.pem" 60 | # # 是否跳过证书验证 61 | ## insecure_skip_verify = true 62 | 63 | ``` -------------------------------------------------------------------------------- /cmd/alert/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "log" 7 | "os" 8 | "os/signal" 9 | "syscall" 10 | 11 | "github.com/ccfos/nightingale/v6/alert" 12 | "github.com/ccfos/nightingale/v6/pkg/osx" 13 | "github.com/ccfos/nightingale/v6/pkg/version" 14 | 15 | "github.com/toolkits/pkg/runner" 16 | ) 17 | 18 | var ( 19 | showVersion = flag.Bool("version", false, "Show version.") 20 | configDir = flag.String("configs", osx.GetEnv("N9E_ALERT_CONFIGS", "etc"), "Specify configuration directory.(env:N9E_ALERT_CONFIGS)") 21 | cryptoKey = flag.String("crypto-key", "", "Specify the secret key for configuration file field encryption.") 22 | ) 23 | 24 | func main() { 25 | flag.Parse() 26 | 27 | if *showVersion { 28 | fmt.Println(version.Version) 29 | os.Exit(0) 30 | } 31 | 32 | printEnv() 33 | 34 | cleanFunc, err := alert.Initialize(*configDir, *cryptoKey) 35 | if err != nil { 36 | log.Fatalln("failed to initialize:", err) 37 | } 38 | 39 | code := 1 40 | sc := make(chan os.Signal, 1) 41 | signal.Notify(sc, syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT) 42 | 43 | EXIT: 44 | for { 45 | sig := <-sc 46 | fmt.Println("received signal:", sig.String()) 47 | switch sig { 48 | case syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGINT: 49 | code = 0 50 | break EXIT 51 | case syscall.SIGHUP: 52 | // reload configuration? 53 | default: 54 | break EXIT 55 | } 56 | } 57 | 58 | cleanFunc() 59 | fmt.Println("process exited") 60 | os.Exit(code) 61 | } 62 | 63 | func printEnv() { 64 | runner.Init() 65 | fmt.Println("runner.cwd:", runner.Cwd) 66 | fmt.Println("runner.hostname:", runner.Hostname) 67 | fmt.Println("runner.fd_limits:", runner.FdLimits()) 68 | fmt.Println("runner.vm_limits:", runner.VMLimits()) 69 | } 70 | -------------------------------------------------------------------------------- /cmd/pushgw/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "log" 7 | "os" 8 | "os/signal" 9 | "syscall" 10 | 11 | "github.com/ccfos/nightingale/v6/pkg/osx" 12 | "github.com/ccfos/nightingale/v6/pkg/version" 13 | "github.com/ccfos/nightingale/v6/pushgw" 14 | 15 | "github.com/toolkits/pkg/runner" 16 | ) 17 | 18 | var ( 19 | showVersion = flag.Bool("version", false, "Show version.") 20 | configDir = flag.String("configs", osx.GetEnv("N9E_PUSHGW_CONFIGS", "etc"), "Specify configuration directory.(env:N9E_PUSHGW_CONFIGS)") 21 | cryptoKey = flag.String("crypto-key", "", "Specify the secret key for configuration file field encryption.") 22 | ) 23 | 24 | func main() { 25 | flag.Parse() 26 | 27 | if *showVersion { 28 | fmt.Println(version.Version) 29 | os.Exit(0) 30 | } 31 | 32 | printEnv() 33 | 34 | cleanFunc, err := pushgw.Initialize(*configDir, *cryptoKey) 35 | if err != nil { 36 | log.Fatalln("failed to initialize:", err) 37 | } 38 | 39 | code := 1 40 | sc := make(chan os.Signal, 1) 41 | signal.Notify(sc, syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT) 42 | 43 | EXIT: 44 | for { 45 | sig := <-sc 46 | fmt.Println("received signal:", sig.String()) 47 | switch sig { 48 | case syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGINT: 49 | code = 0 50 | break EXIT 51 | case syscall.SIGHUP: 52 | // reload configuration? 53 | default: 54 | break EXIT 55 | } 56 | } 57 | 58 | cleanFunc() 59 | fmt.Println("process exited") 60 | os.Exit(code) 61 | } 62 | 63 | func printEnv() { 64 | runner.Init() 65 | fmt.Println("runner.cwd:", runner.Cwd) 66 | fmt.Println("runner.hostname:", runner.Hostname) 67 | fmt.Println("runner.fd_limits:", runner.FdLimits()) 68 | fmt.Println("runner.vm_limits:", runner.VMLimits()) 69 | } 70 | -------------------------------------------------------------------------------- /integrations/Gitlab/markdown/README.md: -------------------------------------------------------------------------------- 1 | # Gitlab 2 | 3 | Gitlab 默认提供 Prometheus 协议的监控数据,参考:[Monitoring GitLab with Prometheus](https://docs.gitlab.com/ee/administration/monitoring/prometheus/)。所以,使用 categraf 的 prometheus 插件即可采集。 4 | 5 | ## 采集配置 6 | 7 | 配置文件:categraf 的 `conf/input.prometheus/prometheus.toml` 8 | 9 | ```toml 10 | [[instances]] 11 | urls = [ 12 | "http://192.168.11.77:9236/metrics" 13 | ] 14 | labels = {service="gitlab", job="gitaly"} 15 | 16 | [[instances]] 17 | urls = [ 18 | "http://192.168.11.77:9168/sidekiq" 19 | ] 20 | labels = {service="gitlab", job="gitlab-exporter-sidekiq"} 21 | 22 | [[instances]] 23 | urls = [ 24 | "http://192.168.11.77:9168/database" 25 | ] 26 | labels = {service="gitlab",job="gitlab-exporter-database"} 27 | 28 | [[instances]] 29 | urls = [ 30 | "http://192.168.11.77:8082/metrics" 31 | ] 32 | labels = {service="gitlab", job="gitlab-sidekiq"} 33 | 34 | [[instances]] 35 | urls = [ 36 | "http://192.168.11.77:8082/metrics" 37 | ] 38 | labels = {service="gitlab", job="gitlab-sidekiq"} 39 | 40 | [[instances]] 41 | urls = [ 42 | "http://192.168.11.77:9229/metrics" 43 | ] 44 | labels = {service="gitlab",job="gitlab-workhorse"} 45 | 46 | [[instances]] 47 | urls = [ 48 | "http://192.168.11.77:9100/metrics" 49 | ] 50 | labels = {service="gitlab", job="node"} 51 | 52 | [[instances]] 53 | urls = [ 54 | "http://192.168.11.77:9187/metrics" 55 | ] 56 | labels = {service="gitlab", job="postgres"} 57 | 58 | [[instances]] 59 | urls = [ 60 | "http://192.168.11.77:9121/metrics" 61 | ] 62 | labels = {service="gitlab", job="redis"} 63 | 64 | [[instances]] 65 | urls = [ 66 | "http://192.168.11.77:9999/metrics" 67 | ] 68 | labels = {service="gitlab", job="nginx"} 69 | ``` 70 | 71 | ## 仪表盘和告警规则 72 | 73 | 夜莺内置提供了 gitlab 各个组件相关的仪表盘和告警规则,导入自己的业务组即可使用。 74 | 75 | -------------------------------------------------------------------------------- /integrations/NSQ/markdown/README.md: -------------------------------------------------------------------------------- 1 | # nsq 2 | forked from [telegraf/nsq](https://github.com/influxdata/telegraf/blob/master/plugins/inputs/nsq/nsq.go) 3 | ## Configuration 4 | - 配置文件,[参考示例](https://github.com/flashcatcloud/categraf/blob/main/conf/input.nsq/nsq.toml) 5 | 6 | ## 指标列表 7 | ### nsq_client类 8 | ready_count 可消费消息数 9 | inflight_count 正在处理消息数 10 | message_count 消息总数 11 | finish_count 完成统计 12 | requeue_count 重新排队消息数 13 | 14 | ### nsq_channel类 15 | depth 当前的积压量 16 | backend_depth 消息缓冲队列积压量 17 | inflight_count 正在处理消息数 18 | deferred_count 延迟消息数 19 | message_count 消息总数 20 | requeue_count 重新排队消息数 21 | timeout_count 超时消息数 22 | client_count 客户端数量 23 | 24 | ### nsq_topic类 25 | depth 消息队列积压量 26 | backend_depth 消息缓冲队列积压量 27 | message_count 消息总数 28 | channel_count 消费者总数 29 | 30 | ## metrics 31 | 此配置可 克隆到nightingale的metrics.yaml文件中作为中文指标解释 32 | # [nsq] 33 | nsq_server_server_count: "nsq 服务端总计" 34 | nsq_server_topic_count: "nsq topic总数" 35 | 36 | nsq_topic_depth: 消息队列积压量 37 | nsq_topic_backend_depth: 消息缓冲队列积压量 38 | nsq_topic_message_count: 消息总数 39 | nsq_topic_channel_count: 消费者总数 40 | 41 | nsq_channel_depth: "当前消息数,内存和硬盘转存的消息数,即当前的积压量" 42 | nsq_channel_backend_depth: 消息缓冲队列积压量 43 | nsq_channel_inflight_count: "当前未完成的消息数,包括发送但未返回FIN/重新入队列REQ/超时TIMEOUT 三种消息数之和,代表已经投递还未消费掉的消息" 44 | nsq_channel_deferred_count: "重新入队的延迟消息数,指还未发布的重入队消息数量,即未消费的定时(延时)消息数" 45 | nsq_channel_message_count: 节点启动后的所有新消息总数,真正的消息次数 46 | nsq_channel_requeue_count: 重新入队的消息数,即返回REQ的消息数量 47 | nsq_channel_timeout_count: 已重入队列但按配置的超时时间内还收到响应的消息数 48 | nsq_channel_client_count: 客户端连接数 49 | 50 | nsq_client_ready_count: 客户端可消费消息数 51 | nsq_client_inflight_count: 客户端正在处理消息数 52 | nsq_client_message_count: 客户端消息总数 53 | nsq_client_finish_count: 客户端完成的消息数,即返回FIN的消息数 54 | nsq_client_requeue_count: 客户端重新入队的消息数,即返回REQ的消息数量 55 | -------------------------------------------------------------------------------- /cmd/center/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "log" 7 | "os" 8 | "os/signal" 9 | "syscall" 10 | 11 | "github.com/ccfos/nightingale/v6/center" 12 | "github.com/ccfos/nightingale/v6/pkg/osx" 13 | "github.com/ccfos/nightingale/v6/pkg/version" 14 | 15 | "github.com/toolkits/pkg/net/tcpx" 16 | "github.com/toolkits/pkg/runner" 17 | ) 18 | 19 | var ( 20 | showVersion = flag.Bool("version", false, "Show version.") 21 | configDir = flag.String("configs", osx.GetEnv("N9E_CONFIGS", "etc"), "Specify configuration directory.(env:N9E_CONFIGS)") 22 | cryptoKey = flag.String("crypto-key", "", "Specify the secret key for configuration file field encryption.") 23 | ) 24 | 25 | func main() { 26 | flag.Parse() 27 | 28 | if *showVersion { 29 | fmt.Println(version.Version) 30 | os.Exit(0) 31 | } 32 | 33 | printEnv() 34 | 35 | tcpx.WaitHosts() 36 | 37 | cleanFunc, err := center.Initialize(*configDir, *cryptoKey) 38 | if err != nil { 39 | log.Fatalln("failed to initialize:", err) 40 | } 41 | 42 | code := 1 43 | sc := make(chan os.Signal, 1) 44 | signal.Notify(sc, syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT) 45 | 46 | EXIT: 47 | for { 48 | sig := <-sc 49 | fmt.Println("received signal:", sig.String()) 50 | switch sig { 51 | case syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGINT: 52 | code = 0 53 | break EXIT 54 | case syscall.SIGHUP: 55 | // reload configuration? 56 | default: 57 | break EXIT 58 | } 59 | } 60 | 61 | cleanFunc() 62 | fmt.Println("process exited") 63 | os.Exit(code) 64 | } 65 | 66 | func printEnv() { 67 | runner.Init() 68 | fmt.Println("runner.cwd:", runner.Cwd) 69 | fmt.Println("runner.hostname:", runner.Hostname) 70 | fmt.Println("runner.fd_limits:", runner.FdLimits()) 71 | fmt.Println("runner.vm_limits:", runner.VMLimits()) 72 | } 73 | -------------------------------------------------------------------------------- /center/router/router_config.go: -------------------------------------------------------------------------------- 1 | package router 2 | 3 | import ( 4 | "encoding/json" 5 | 6 | "github.com/ccfos/nightingale/v6/models" 7 | 8 | "github.com/gin-gonic/gin" 9 | "github.com/toolkits/pkg/ginx" 10 | ) 11 | 12 | func (rt *Router) notifyChannelsGets(c *gin.Context) { 13 | var labelAndKeys []models.LabelAndKey 14 | cval, err := models.ConfigsGet(rt.Ctx, models.NOTIFYCHANNEL) 15 | ginx.Dangerous(err) 16 | 17 | if cval == "" { 18 | ginx.NewRender(c).Data(labelAndKeys, nil) 19 | return 20 | } 21 | 22 | var notifyChannels []models.NotifyChannel 23 | err = json.Unmarshal([]byte(cval), ¬ifyChannels) 24 | ginx.Dangerous(err) 25 | 26 | for _, v := range notifyChannels { 27 | if v.Hide { 28 | continue 29 | } 30 | var labelAndKey models.LabelAndKey 31 | labelAndKey.Label = v.Name 32 | labelAndKey.Key = v.Ident 33 | labelAndKeys = append(labelAndKeys, labelAndKey) 34 | } 35 | 36 | ginx.NewRender(c).Data(labelAndKeys, nil) 37 | } 38 | 39 | func (rt *Router) contactKeysGets(c *gin.Context) { 40 | var labelAndKeys []models.LabelAndKey 41 | cval, err := models.ConfigsGet(rt.Ctx, models.NOTIFYCONTACT) 42 | ginx.Dangerous(err) 43 | 44 | if cval == "" { 45 | ginx.NewRender(c).Data(labelAndKeys, nil) 46 | return 47 | } 48 | 49 | var notifyContacts []models.NotifyContact 50 | err = json.Unmarshal([]byte(cval), ¬ifyContacts) 51 | ginx.Dangerous(err) 52 | 53 | for _, v := range notifyContacts { 54 | if v.Hide { 55 | continue 56 | } 57 | var labelAndKey models.LabelAndKey 58 | labelAndKey.Label = v.Name 59 | labelAndKey.Key = v.Ident 60 | labelAndKeys = append(labelAndKeys, labelAndKey) 61 | } 62 | 63 | ginx.NewRender(c).Data(labelAndKeys, nil) 64 | } 65 | 66 | func (rt *Router) siteInfo(c *gin.Context) { 67 | config, err := models.ConfigsGet(rt.Ctx, "site_info") 68 | ginx.NewRender(c).Data(config, err) 69 | } 70 | -------------------------------------------------------------------------------- /integrations/MySQL/collect/mysql/mysql.toml: -------------------------------------------------------------------------------- 1 | # # collect interval 2 | # interval = 15 3 | 4 | # [[queries]] 5 | # mesurement = "users" 6 | # metric_fields = [ "total" ] 7 | # label_fields = [ "service" ] 8 | # timeout = "3s" 9 | # request = ''' 10 | # select 'n9e' as service, count(*) as total from n9e_v5.users 11 | # ''' 12 | 13 | 14 | [[instances]] 15 | # address = "127.0.0.1:3306" 16 | # username = "root" 17 | # password = "1234" 18 | 19 | # # set tls=custom to enable tls 20 | # parameters = "tls=false" 21 | 22 | # extra_status_metrics = true 23 | # extra_innodb_metrics = false 24 | # gather_processlist_processes_by_state = false 25 | # gather_processlist_processes_by_user = false 26 | # gather_schema_size = true 27 | # gather_table_size = false 28 | # gather_system_table_size = false 29 | # gather_slave_status = true 30 | 31 | # # timeout 32 | # timeout_seconds = 3 33 | 34 | # # interval = global.interval * interval_times 35 | # interval_times = 1 36 | 37 | # important! use global unique string to specify instance 38 | # labels = { instance="n9e-10.2.3.4:3306" } 39 | 40 | ## Optional TLS Config 41 | # use_tls = false 42 | # tls_min_version = "1.2" 43 | # tls_ca = "/etc/categraf/ca.pem" 44 | # tls_cert = "/etc/categraf/cert.pem" 45 | # tls_key = "/etc/categraf/key.pem" 46 | ## Use TLS but skip chain & host verification 47 | # insecure_skip_verify = true 48 | 49 | #[[instances.queries]] 50 | # mesurement = "lock_wait" 51 | # metric_fields = [ "total" ] 52 | # timeout = "3s" 53 | # request = ''' 54 | #SELECT count(*) as total FROM information_schema.innodb_trx WHERE trx_state='LOCK WAIT' 55 | #''' 56 | 57 | # [[instances.queries]] 58 | # mesurement = "users" 59 | # metric_fields = [ "total" ] 60 | # label_fields = [ "service" ] 61 | # # field_to_append = "" 62 | # timeout = "3s" 63 | # request = ''' 64 | # select 'n9e' as service, count(*) as total from n9e_v5.users 65 | # ''' 66 | --------------------------------------------------------------------------------