├── .dockerignore ├── collector ├── constants.go ├── exporter.go ├── logging.go ├── storage.go ├── projects.go ├── endpoints.go ├── scheduler.go ├── eventarc.go ├── compute.go ├── cloudrun.go ├── monitoring.go ├── iam.go ├── artifactregistry.go ├── functions.go ├── gke.go └── pubssub.go ├── images ├── gmail.png ├── prometheus.rules.png ├── alertmanager.firing.png ├── alertmanager.quiet.png ├── prometheus.alerts.png └── prometheus.targets.png ├── .gitignore ├── cosign.pub ├── .github ├── dependabot.yml └── workflows │ └── build.yml ├── kubernetes.rule.yaml ├── gcp └── account.go ├── Dockerfile ├── prometheus.yml ├── kubernetes.yaml ├── go.mod ├── docker-compose.yml ├── rules.yml ├── main.go ├── go.sum ├── LICENSE └── README.md /.dockerignore: -------------------------------------------------------------------------------- 1 | .github 2 | images 3 | bin 4 | -------------------------------------------------------------------------------- /collector/constants.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | const ( 4 | prefix = "gcp" 5 | ) 6 | -------------------------------------------------------------------------------- /images/gmail.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DazWilkin/gcp-exporter/HEAD/images/gmail.png -------------------------------------------------------------------------------- /images/prometheus.rules.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DazWilkin/gcp-exporter/HEAD/images/prometheus.rules.png -------------------------------------------------------------------------------- /images/alertmanager.firing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DazWilkin/gcp-exporter/HEAD/images/alertmanager.firing.png -------------------------------------------------------------------------------- /images/alertmanager.quiet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DazWilkin/gcp-exporter/HEAD/images/alertmanager.quiet.png -------------------------------------------------------------------------------- /images/prometheus.alerts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DazWilkin/gcp-exporter/HEAD/images/prometheus.alerts.png -------------------------------------------------------------------------------- /images/prometheus.targets.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DazWilkin/gcp-exporter/HEAD/images/prometheus.targets.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Contains secrets 2 | .env.test 3 | alertmanager.yml 4 | kubernetes.alerting.yaml 5 | 6 | # Cosign 7 | cosign.key 8 | 9 | # Visual Studio Code config 10 | .vscode/ 11 | -------------------------------------------------------------------------------- /cosign.pub: -------------------------------------------------------------------------------- 1 | -----BEGIN PUBLIC KEY----- 2 | MFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEIuEcUJDOxuz3O+Kdw+/HAxAK98a4 3 | Bo+i8KSV5z5EbhI2Ger8iMkzqAB9Zl2BQMFbmKjvNpV+tzOo6fvGWSIg6A== 4 | -----END PUBLIC KEY----- 5 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | # Maintain dependencies for GitHub Actions 4 | - package-ecosystem: "github-actions" 5 | directory: "/" 6 | schedule: 7 | interval: "monthly" 8 | # Maintain dependencies for Docker 9 | - package-ecosystem: "docker" 10 | directory: "/" 11 | schedule: 12 | interval: "monthly" 13 | # Maintain dependencies for Golang Modules 14 | - package-ecosystem: "gomod" 15 | directory: "/" 16 | schedule: 17 | interval: "monthly" 18 | -------------------------------------------------------------------------------- /kubernetes.rule.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: PrometheusRule 3 | metadata: 4 | labels: 5 | prometheus: service-prometheus 6 | role: alert-rules 7 | name: gcp-exporter 8 | namespace: monitoring # Must be created in 'monitoring' namespace 9 | spec: 10 | groups: 11 | - name: GCP 12 | rules: 13 | - alert: GCPCloudRunServicesRunning 14 | expr: gcp_cloudrun_services{} > 0 15 | for: 60m 16 | labels: 17 | severity: warning 18 | annotations: 19 | summary: GCP Cloud Run services running 20 | -------------------------------------------------------------------------------- /gcp/account.go: -------------------------------------------------------------------------------- 1 | package gcp 2 | 3 | import ( 4 | "log" 5 | "sync" 6 | 7 | "google.golang.org/api/cloudresourcemanager/v1" 8 | ) 9 | 10 | // Account represents a Google Cloud Platform account 11 | type Account struct { 12 | mu sync.Mutex 13 | 14 | // Projects list that's account across Collectors 15 | Projects []*cloudresourcemanager.Project 16 | } 17 | 18 | // NewAccount creates a new Account 19 | func NewAccount() *Account { 20 | projects := []*cloudresourcemanager.Project{} 21 | return &Account{ 22 | Projects: projects, 23 | } 24 | } 25 | 26 | // Update is method that transactionally updates the list of GCP projects 27 | func (x *Account) Update(projects []*cloudresourcemanager.Project) { 28 | log.Printf("[Update] replacing projects") 29 | x.mu.Lock() 30 | x.Projects = projects 31 | x.mu.Unlock() 32 | } 33 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | ARG GOLANG_VERSION=1.25.3 2 | 3 | ARG TARGETOS 4 | ARG TARGETARCH 5 | 6 | ARG COMMIT 7 | ARG VERSION 8 | 9 | FROM --platform=${TARGETARCH} docker.io/golang:${GOLANG_VERSION} AS build 10 | 11 | WORKDIR /gcp-exporter 12 | 13 | COPY go.* ./ 14 | COPY main.go . 15 | COPY collector ./collector 16 | COPY gcp ./gcp 17 | 18 | ARG TARGETOS 19 | ARG TARGETARCH 20 | 21 | ARG VERSION 22 | ARG COMMIT 23 | 24 | RUN CGO_ENABLED=0 GOOS=${TARGETOS} GOARCH=${TARGETARCH} \ 25 | go build \ 26 | -ldflags "-X main.OSVersion=${VERSION} -X main.GitCommit=${COMMIT}" \ 27 | -a -installsuffix cgo \ 28 | -o /go/bin/gcp-exporter \ 29 | ./main.go 30 | 31 | FROM --platform=${TARGETARCH} gcr.io/distroless/static-debian12:latest 32 | 33 | LABEL org.opencontainers.image.description="Prometheus Exporter for GCP" 34 | LABEL org.opencontainers.image.source="https://github.com/DazWilkin/gcp-exporter" 35 | 36 | COPY --from=build /go/bin/gcp-exporter / 37 | 38 | EXPOSE 9402 39 | 40 | ENTRYPOINT ["/gcp-exporter"] 41 | -------------------------------------------------------------------------------- /prometheus.yml: -------------------------------------------------------------------------------- 1 | global: 2 | scrape_interval: 1m 3 | evaluation_interval: 1m 4 | 5 | rule_files: 6 | - "/etc/alertmanager/rules.yml" 7 | 8 | alerting: 9 | alertmanagers: 10 | - scheme: http 11 | static_configs: 12 | - targets: 13 | - "alertmanager:9093" 14 | 15 | scrape_configs: 16 | # Self 17 | - job_name: "prometheus-server" 18 | static_configs: 19 | - targets: 20 | - "localhost:9090" 21 | 22 | # Alertmanager 23 | - job_name: "alertmanager" 24 | static_configs: 25 | - targets: 26 | - "alertmanager:9093" 27 | 28 | # GCP Exporter 29 | - job_name: "gcp-exporter" 30 | scrape_interval: 15m # Reduced frequency for GCP 31 | scrape_timeout: 1m 32 | static_configs: 33 | - targets: 34 | - "gcp-exporter:9402" 35 | 36 | # GCP Status 37 | - job_name: "gcp-status" 38 | scrape_interval: 15m # Reduced frequency for GCP 39 | static_configs: 40 | - targets: 41 | - "gcp-status:9989" 42 | 43 | # cAdvisor exports metrics for *all* containers running on this host 44 | - job_name: cadvisor 45 | static_configs: 46 | - targets: 47 | - "cadvisor:8080" 48 | 49 | # Node Exporter 50 | - job_name: node-exporter 51 | static_configs: 52 | - targets: 53 | - "node-exporter:9100" 54 | -------------------------------------------------------------------------------- /kubernetes.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: List 3 | metadata: {} 4 | items: 5 | - kind: Service 6 | apiVersion: v1 7 | metadata: 8 | labels: 9 | app: gcp-exporter 10 | name: gcp-exporter 11 | spec: 12 | selector: 13 | app: gcp-exporter 14 | ports: 15 | - name: metrics 16 | port: 9402 17 | targetPort: 9402 18 | - kind: Deployment 19 | apiVersion: apps/v1 20 | metadata: 21 | labels: 22 | app: gcp-exporter 23 | name: gcp-exporter 24 | spec: 25 | replicas: 1 26 | selector: 27 | matchLabels: 28 | app: gcp-exporter 29 | template: 30 | metadata: 31 | labels: 32 | app: gcp-exporter 33 | spec: 34 | containers: 35 | - name: gcp-exporter 36 | image: ghcr.io/dazwilkin/gcp-exporter:b3498435eecadc7c01e024ee0ce16b8c1e41aa40 37 | env: 38 | - name: GOOGLE_APPLICATION_CREDENTIALS 39 | value: /secrets/client_secrets.json 40 | ports: 41 | - name: metrics 42 | containerPort: 9402 43 | volumeMounts: 44 | - name: secrets 45 | mountPath: /secrets 46 | restartPolicy: Always 47 | volumes: 48 | - name: secrets 49 | secret: 50 | secretName: gcp-exporter 51 | - kind: ServiceMonitor 52 | apiVersion: monitoring.coreos.com/v1 53 | metadata: 54 | name: gcp-exporter 55 | labels: 56 | app: gcp-exporter 57 | spec: 58 | selector: 59 | matchLabels: 60 | app: gcp-exporter 61 | endpoints: 62 | - port: metrics 63 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/DazWilkin/gcp-exporter 2 | 3 | go 1.25.3 4 | 5 | require ( 6 | github.com/prometheus/client_golang v1.23.2 7 | google.golang.org/api v0.256.0 8 | ) 9 | 10 | require ( 11 | cloud.google.com/go/auth v0.17.0 // indirect 12 | cloud.google.com/go/auth/oauth2adapt v0.2.8 // indirect 13 | cloud.google.com/go/compute/metadata v0.9.0 // indirect 14 | github.com/beorn7/perks v1.0.1 // indirect 15 | github.com/cespare/xxhash/v2 v2.3.0 // indirect 16 | github.com/felixge/httpsnoop v1.0.4 // indirect 17 | github.com/go-logr/logr v1.4.3 // indirect 18 | github.com/go-logr/stdr v1.2.2 // indirect 19 | github.com/google/s2a-go v0.1.9 // indirect 20 | github.com/google/uuid v1.6.0 // indirect 21 | github.com/googleapis/enterprise-certificate-proxy v0.3.7 // indirect 22 | github.com/googleapis/gax-go/v2 v2.15.0 // indirect 23 | github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect 24 | github.com/prometheus/client_model v0.6.2 // indirect 25 | github.com/prometheus/common v0.67.2 // indirect 26 | github.com/prometheus/procfs v0.19.1 // indirect 27 | go.opentelemetry.io/auto/sdk v1.2.1 // indirect 28 | go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 // indirect 29 | go.opentelemetry.io/otel v1.38.0 // indirect 30 | go.opentelemetry.io/otel/metric v1.38.0 // indirect 31 | go.opentelemetry.io/otel/trace v1.38.0 // indirect 32 | go.yaml.in/yaml/v2 v2.4.3 // indirect 33 | golang.org/x/crypto v0.45.0 // indirect 34 | golang.org/x/net v0.47.0 // indirect 35 | golang.org/x/oauth2 v0.33.0 // indirect 36 | golang.org/x/sys v0.38.0 // indirect 37 | golang.org/x/text v0.31.0 // indirect 38 | google.golang.org/genproto/googleapis/rpc v0.0.0-20251103181224-f26f9409b101 // indirect 39 | google.golang.org/grpc v1.76.0 // indirect 40 | google.golang.org/protobuf v1.36.10 // indirect 41 | ) 42 | -------------------------------------------------------------------------------- /collector/exporter.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | import ( 4 | "github.com/prometheus/client_golang/prometheus" 5 | ) 6 | 7 | // ExporterCollector collects metrics, mostly runtime, about this exporter in general. 8 | type ExporterCollector struct { 9 | gitCommit string 10 | goVersion string 11 | osVersion string 12 | startTime int64 13 | 14 | StartTime *prometheus.Desc 15 | BuildInfo *prometheus.Desc 16 | } 17 | 18 | // NewExporterCollector returns a new ExporterCollector. 19 | func NewExporterCollector(osVersion, goVersion, gitCommit string, startTime int64) *ExporterCollector { 20 | subsystem := "exporter" 21 | return &ExporterCollector{ 22 | osVersion: osVersion, 23 | goVersion: goVersion, 24 | gitCommit: gitCommit, 25 | startTime: startTime, 26 | 27 | StartTime: prometheus.NewDesc( 28 | prometheus.BuildFQName(prefix, subsystem, "start_time"), 29 | "Exporter start time in Unix epoch seconds", 30 | nil, 31 | nil, 32 | ), 33 | BuildInfo: prometheus.NewDesc( 34 | prometheus.BuildFQName(prefix, subsystem, "build_info"), 35 | "A metric with a constant '1' value labeled by OS version, Go version, and the Git commit of the exporter", 36 | []string{"os_version", "go_version", "git_commit"}, 37 | nil, 38 | ), 39 | } 40 | } 41 | 42 | // Collect implements Prometheus' Collector interface and is used to collect metrics 43 | func (c *ExporterCollector) Collect(ch chan<- prometheus.Metric) { 44 | ch <- prometheus.MustNewConstMetric( 45 | c.StartTime, 46 | prometheus.GaugeValue, 47 | float64(c.startTime), 48 | ) 49 | ch <- prometheus.MustNewConstMetric( 50 | c.BuildInfo, 51 | prometheus.CounterValue, 52 | 1.0, 53 | c.osVersion, c.goVersion, c.gitCommit, 54 | ) 55 | } 56 | 57 | // Describe implements Prometheus' Collector interface and is used to describe metrics 58 | func (c *ExporterCollector) Describe(ch chan<- *prometheus.Desc) { 59 | ch <- c.StartTime 60 | } 61 | -------------------------------------------------------------------------------- /collector/logging.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "log" 7 | "sync" 8 | 9 | "github.com/DazWilkin/gcp-exporter/gcp" 10 | "github.com/prometheus/client_golang/prometheus" 11 | "google.golang.org/api/logging/v2" 12 | ) 13 | 14 | // LoggingCollector represents Cloud Logging 15 | type LoggingCollector struct { 16 | account *gcp.Account 17 | loggingService *logging.Service 18 | 19 | Logs *prometheus.Desc 20 | } 21 | 22 | // NewLoggingCollector creates a new LoggingCollector 23 | func NewLoggingCollector(account *gcp.Account) (*LoggingCollector, error) { 24 | subsystem := "cloud_logging" 25 | 26 | ctx := context.Background() 27 | loggingService, err := logging.NewService(ctx) 28 | if err != nil { 29 | log.Println(err) 30 | return nil, err 31 | } 32 | 33 | return &LoggingCollector{ 34 | account: account, 35 | loggingService: loggingService, 36 | 37 | Logs: prometheus.NewDesc( 38 | prometheus.BuildFQName(prefix, subsystem, "logs"), 39 | "Number of Logs", 40 | []string{ 41 | "project", 42 | }, 43 | nil, 44 | ), 45 | }, nil 46 | } 47 | 48 | // Collect implements Prometheus' Collector interface and is used to collect metrics 49 | func (c *LoggingCollector) Collect(ch chan<- prometheus.Metric) { 50 | ctx := context.Background() 51 | 52 | // Enumerate all projects 53 | var wg sync.WaitGroup 54 | for _, p := range c.account.Projects { 55 | log.Printf("[LoggingCollector] Project: %s", p.ProjectId) 56 | 57 | name := fmt.Sprintf("projects/%s", p.ProjectId) 58 | 59 | // Logs 60 | wg.Add(1) 61 | go func(project string) { 62 | defer wg.Done() 63 | 64 | count := 0 65 | rqst := c.loggingService.Projects.Logs.List(name) 66 | if err := rqst.Pages(ctx, func(page *logging.ListLogsResponse) error { 67 | count += len(page.LogNames) 68 | return nil 69 | }); err != nil { 70 | log.Println(err) 71 | return 72 | } 73 | 74 | if count != 0 { 75 | ch <- prometheus.MustNewConstMetric( 76 | c.Logs, 77 | prometheus.GaugeValue, 78 | float64(count), 79 | []string{ 80 | project, 81 | }..., 82 | ) 83 | } 84 | }(p.ProjectId) 85 | } 86 | 87 | // Wait for all projects to process 88 | wg.Wait() 89 | } 90 | 91 | // Describe implements Prometheus' Collector interface and is used to describe metrics 92 | func (c *LoggingCollector) Describe(ch chan<- *prometheus.Desc) { 93 | ch <- c.Logs 94 | } 95 | -------------------------------------------------------------------------------- /collector/storage.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | import ( 4 | "context" 5 | "log" 6 | "sync" 7 | 8 | "github.com/DazWilkin/gcp-exporter/gcp" 9 | "github.com/prometheus/client_golang/prometheus" 10 | 11 | "google.golang.org/api/cloudresourcemanager/v1" 12 | "google.golang.org/api/storage/v1" 13 | ) 14 | 15 | // StorageCollector represents Cloud Storage 16 | type StorageCollector struct { 17 | account *gcp.Account 18 | storageService *storage.Service 19 | 20 | Buckets *prometheus.Desc 21 | } 22 | 23 | // NewStorageCollector returns a StorageCollector 24 | func NewStorageCollector(account *gcp.Account) (*StorageCollector, error) { 25 | subsystem := "storage" 26 | 27 | ctx := context.Background() 28 | storageService, err := storage.NewService(ctx) 29 | if err != nil { 30 | log.Println(err) 31 | return nil, err 32 | } 33 | 34 | return &StorageCollector{ 35 | account: account, 36 | storageService: storageService, 37 | 38 | Buckets: prometheus.NewDesc( 39 | prometheus.BuildFQName(prefix, subsystem, "buckets"), 40 | "Number of buckets", 41 | []string{ 42 | "project", 43 | // "region", 44 | }, 45 | nil, 46 | ), 47 | }, nil 48 | } 49 | 50 | // Collect implements Prometheus' Collector inteface and is used to collect metrics 51 | func (c *StorageCollector) Collect(ch chan<- prometheus.Metric) { 52 | ctx := context.Background() 53 | 54 | // Enumerate all of the projects 55 | var wg sync.WaitGroup 56 | for _, p := range c.account.Projects { 57 | wg.Add(1) 58 | go func(p *cloudresourcemanager.Project) { 59 | defer wg.Done() 60 | log.Printf("[StorageCollector] Project: %s", p.ProjectId) 61 | resp, err := c.storageService.Buckets.List(p.ProjectId).MaxResults(500).Context(ctx).Do() 62 | if err != nil { 63 | log.Println(err) 64 | return 65 | } 66 | if resp.NextPageToken != "" { 67 | log.Println("[StorageCollector] Some buckets are being excluded from the results") 68 | } 69 | // for _, b := range resp.Items { 70 | // } 71 | ch <- prometheus.MustNewConstMetric( 72 | c.Buckets, 73 | prometheus.GaugeValue, 74 | float64(len(resp.Items)), 75 | []string{ 76 | p.ProjectId, 77 | }..., 78 | ) 79 | }(p) 80 | } 81 | wg.Wait() 82 | } 83 | 84 | // Describe implements Prometheus' Collector interface and is used to describe metrics 85 | func (c *StorageCollector) Describe(ch chan<- *prometheus.Desc) { 86 | ch <- c.Buckets 87 | } 88 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | 3 | services: 4 | cadvisor: 5 | restart: always 6 | image: gcr.io/google-containers/cadvisor:v0.36.0 7 | container_name: cadvisor 8 | volumes: 9 | - "/:/rootfs:ro" 10 | - "/var/run:/var/run:rw" 11 | - "/sys:/sys:ro" 12 | # Default location 13 | # - "/var/lib/docker/:/var/lib/docker:ro" 14 | # Snap location 15 | - "/var/snap/docker/current:/var/lib/docker:ro" 16 | expose: 17 | - "8080" 18 | ports: 19 | - 8085:8080 20 | 21 | node-exporter: 22 | restart: unless-stopped 23 | image: prom/node-exporter:v1.1.2 24 | container_name: node_exporter 25 | command: 26 | - '--path.rootfs=/host' 27 | volumes: 28 | - '/:/host:ro,rslave' 29 | expose: 30 | - "9100" 31 | ports: 32 | - 9100:9100 33 | 34 | prometheus: 35 | restart: always 36 | depends_on: 37 | - gcp-exporter 38 | image: docker.io/prom/prometheus:v2.42.0 39 | container_name: prometheus 40 | command: 41 | - --config.file=/etc/prometheus/prometheus.yml 42 | # Permits `curl --request POST http://localhost:9090/-/reload` 43 | - --web.enable-lifecycle 44 | volumes: 45 | - ${PWD}/prometheus.yml:/etc/prometheus/prometheus.yml 46 | - ${PWD}/rules.yml:/etc/alertmanager/rules.yml 47 | expose: 48 | - "9090" 49 | ports: 50 | - 9090:9090 51 | 52 | alertmanager: 53 | restart: always 54 | depends_on: 55 | - prometheus 56 | image: docker.io/prom/alertmanager:v0.25.0 57 | container_name: alertmanager 58 | volumes: 59 | - ${PWD}/alertmanager.yml:/etc/alertmanager/alertmanager.yml 60 | expose: 61 | - "9093" 62 | ports: 63 | - 9093:9093 64 | 65 | gcp-exporter: 66 | image: ghcr.io/dazwilkin/gcp-exporter:b3498435eecadc7c01e024ee0ce16b8c1e41aa40 67 | container_name: gcp-exporter 68 | environment: 69 | - GOOGLE_APPLICATION_CREDENTIALS=/secrets/client_secrets.json 70 | volumes: 71 | # Default location 72 | # - ${HOME}/.config/gcloud/application_default_credentials.json:/secrets/client_secrets.json 73 | # Using Snap will not resolve ${HOME} and requires fully resolved path 74 | - /home/userid/.config/gcloud/application_default_credentials.json:/secrets/client_secrets.json 75 | expose: 76 | - "9402" 77 | ports: 78 | - 9402:9402 79 | 80 | gcp-status: 81 | image: ghcr.io/dazwilkin/gcp-status:e18a9c39494e3fc13f31a696a7703c97ba2a5017 82 | container_name: gcp-status 83 | expose: 84 | - "9989" 85 | ports: 86 | - 9989:9989 -------------------------------------------------------------------------------- /collector/projects.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | import ( 4 | "context" 5 | "log" 6 | 7 | "github.com/DazWilkin/gcp-exporter/gcp" 8 | "github.com/prometheus/client_golang/prometheus" 9 | "google.golang.org/api/cloudresourcemanager/v1" 10 | ) 11 | 12 | // ProjectsCollector represents Google Cloud Platform projects 13 | type ProjectsCollector struct { 14 | account *gcp.Account 15 | cloudresourcemanagerService *cloudresourcemanager.Service 16 | 17 | filter string 18 | pagesize int64 19 | 20 | Count *prometheus.Desc 21 | } 22 | 23 | // NewProjectsCollector returns a new ProjectsCollector 24 | func NewProjectsCollector(account *gcp.Account, filter string, pagesize int64) (*ProjectsCollector, error) { 25 | subsystem := "projects" 26 | 27 | // Combine any user-specified filter with "lifecycleState:ACTIVE" to only process active projects 28 | if filter != "" { 29 | filter += " " 30 | } 31 | filter = filter + "lifecycleState:ACTIVE" 32 | log.Printf("Projects filter: '%s'", filter) 33 | 34 | ctx := context.Background() 35 | cloudresourcemanagerService, err := cloudresourcemanager.NewService(ctx) 36 | if err != nil { 37 | log.Fatal(err) 38 | return nil, err 39 | } 40 | 41 | return &ProjectsCollector{ 42 | account: account, 43 | cloudresourcemanagerService: cloudresourcemanagerService, 44 | 45 | filter: filter, 46 | pagesize: pagesize, 47 | 48 | Count: prometheus.NewDesc( 49 | prometheus.BuildFQName(prefix, subsystem, "count"), 50 | "Number of Projects", 51 | []string{}, 52 | nil, 53 | ), 54 | }, nil 55 | } 56 | 57 | // Collect implements Prometheus' Collector interface and is used to collect metrics 58 | func (c *ProjectsCollector) Collect(ch chan<- prometheus.Metric) { 59 | ctx := context.Background() 60 | 61 | // Create the Projects.List request 62 | // Return at most (!) '--pagesize' projects 63 | // Filter the results to only include the project ID and number 64 | req := c.cloudresourcemanagerService.Projects.List().PageSize(c.pagesize).Fields("projects.projectId", "projects.projectNumber").Filter(c.filter) 65 | 66 | projects := []*cloudresourcemanager.Project{} 67 | 68 | // Do request at least once 69 | for { 70 | resp, err := req.Context(ctx).Do() 71 | if err != nil { 72 | log.Println("Unable to list projects") 73 | return 74 | } 75 | 76 | if len(resp.Projects) == 0 { 77 | log.Println("There are 0 projects. Nothing to do") 78 | return 79 | } 80 | 81 | // Append projects 82 | projects = append(projects, resp.Projects...) 83 | 84 | if resp.NextPageToken == "" { 85 | break 86 | } 87 | 88 | } 89 | 90 | // Now we have a revised list of projects 91 | // Update the shard list 92 | c.account.Update(projects) 93 | 94 | // Update the metric 95 | ch <- prometheus.MustNewConstMetric( 96 | c.Count, 97 | prometheus.GaugeValue, 98 | float64(len(projects)), 99 | []string{}..., 100 | ) 101 | 102 | } 103 | 104 | // Describe implements Prometheus' Collector interface and is used to desribe metrics 105 | func (c *ProjectsCollector) Describe(ch chan<- *prometheus.Desc) { 106 | ch <- c.Count 107 | } 108 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: build 2 | on: 3 | push: 4 | branches: 5 | - master 6 | paths-ignore: 7 | - '**.md' 8 | - '**.json' 9 | - '**.jsonnet' 10 | - '**.key' 11 | - '**.pub' 12 | - '**.sh' 13 | jobs: 14 | golangci: 15 | name: lint 16 | runs-on: ubuntu-latest 17 | steps: 18 | - name: Checkout 19 | uses: actions/checkout@v6 20 | - name: Setup 21 | uses: actions/setup-go@v6 22 | with: 23 | go-version: stable 24 | - name: golangci-lint 25 | uses: golangci/golangci-lint-action@v9 26 | with: 27 | version: latest 28 | gcp-exporter: 29 | name: gcp-exporter 30 | needs: golangci 31 | runs-on: ubuntu-latest 32 | env: 33 | REPO: dazwilkin/gcp-exporter 34 | steps: 35 | - name: Checkout 36 | uses: actions/checkout@v6 37 | - name: Setup 38 | uses: docker/setup-buildx-action@v3 39 | - name: QEMU 40 | uses: docker/setup-qemu-action@v3 41 | - name: Login 42 | uses: docker/login-action@v3 43 | with: 44 | registry: ghcr.io 45 | username: ${{ github.repository_owner }} 46 | password: ${{ secrets.GHCR }} 47 | - name: Get kernel version 48 | run: echo "VERSION=$(uname --kernel-release)" >> ${GITHUB_ENV} 49 | - name: Buildx Multi-platform Linux Docker Images 50 | id: docker-build-push-multi-platform 51 | uses: docker/build-push-action@v6 52 | with: 53 | context: . 54 | platforms: linux/amd64,linux/arm/v7,linux/arm64 55 | file: ./Dockerfile 56 | build-args: | 57 | VERSION=${{ env.VERSION }} 58 | COMMIT=${{ github.sha }} 59 | tags: ghcr.io/${{ env.REPO }}:${{ github.sha }} 60 | push: true 61 | - name: Install Cosign 62 | uses: sigstore/cosign-installer@main 63 | - name: Write signing key to disk (only needed for `cosign sign --key`) 64 | run: echo "${{ secrets.SIGNING }}" > ./cosign.key 65 | - name: Sign container image 66 | run: | 67 | DIGEST=${{ steps.docker-build-push-multi-platform.outputs.digest }} 68 | cosign sign \ 69 | --yes \ 70 | --key=./cosign.key \ 71 | --annotations="repo=${{ github.repository }}" \ 72 | --annotations="workflow=${{ github.workflow }}" \ 73 | --annotations="commit=${{ github.sha }}" \ 74 | --annotations="version=${{ env.VERSION }}" \ 75 | ghcr.io/${{ env.REPO }}@${DIGEST} 76 | env: 77 | COSIGN_PASSWORD: "" 78 | - name: revise occurrences of the image 79 | run: | 80 | git config --local user.email "action@github.com" 81 | git config --local user.name "GitHub Actions" 82 | 83 | for FILENAME in "./docker-compose.yml" "./kubernetes.yaml" "./README.md" 84 | do 85 | sed \ 86 | --in-place \ 87 | "s|ghcr.io/${{ env.REPO }}:[0-9a-f]\{40\}|ghcr.io/${{ env.REPO }}:${{ github.sha }}|g" \ 88 | ${FILENAME} 89 | git add ${FILENAME} 90 | done 91 | 92 | git commit --message "GitHub Actions update image references" 93 | git push origin master 94 | -------------------------------------------------------------------------------- /collector/endpoints.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | import ( 4 | "context" 5 | "log" 6 | "net/http" 7 | "sync" 8 | 9 | "github.com/DazWilkin/gcp-exporter/gcp" 10 | "github.com/prometheus/client_golang/prometheus" 11 | 12 | "google.golang.org/api/cloudresourcemanager/v1" 13 | "google.golang.org/api/googleapi" 14 | "google.golang.org/api/servicemanagement/v1" 15 | ) 16 | 17 | var ( 18 | _ prometheus.Collector = (*EndpointsCollector)(nil) 19 | ) 20 | 21 | // EndpointsCollector represents Services Management services 22 | type EndpointsCollector struct { 23 | account *gcp.Account 24 | servicemanagementService *servicemanagement.APIService 25 | 26 | Services *prometheus.Desc 27 | } 28 | 29 | // NewEndpointsCollector returns a new ServiceManagementCollector 30 | func NewEndpointsCollector(account *gcp.Account) (*EndpointsCollector, error) { 31 | subsystem := "cloud_endpoints" 32 | 33 | ctx := context.Background() 34 | servicemanagementService, err := servicemanagement.NewService(ctx) 35 | if err != nil { 36 | log.Println(err) 37 | return nil, err 38 | } 39 | 40 | return &EndpointsCollector{ 41 | account: account, 42 | servicemanagementService: servicemanagementService, 43 | 44 | Services: prometheus.NewDesc( 45 | prometheus.BuildFQName(prefix, subsystem, "services"), 46 | "Number of Cloud Endpoints services", 47 | []string{ 48 | "project", 49 | }, 50 | nil, 51 | ), 52 | }, nil 53 | } 54 | 55 | // Collect implements Prometheus' Collector interface and is used to collect metrics 56 | func (c *EndpointsCollector) Collect(ch chan<- prometheus.Metric) { 57 | // Enumerate all of the projects 58 | var wg sync.WaitGroup 59 | for _, p := range c.account.Projects { 60 | wg.Add(1) 61 | go func(p *cloudresourcemanager.Project) { 62 | defer wg.Done() 63 | log.Printf("[ServiceManagementCollector] Project: %s", p.ProjectId) 64 | 65 | // Uses Service Management API but filters by the services 66 | // That have this project ID as their Producer Project ID 67 | // See: https://servicemanagement.googleapis.com/v1/services 68 | rqst := c.servicemanagementService.Services.List().ProducerProjectId(p.ProjectId) 69 | 70 | services := 0 71 | 72 | for { 73 | resp, err := rqst.Do() 74 | if err != nil { 75 | if e, ok := err.(*googleapi.Error); ok { 76 | if e.Code == http.StatusForbidden { 77 | // Probably Service Management API has not been enabled for Project (p) 78 | return 79 | } 80 | 81 | log.Printf("Google API Error: %d [%s]", e.Code, e.Message) 82 | return 83 | } 84 | 85 | log.Println(err) 86 | return 87 | } 88 | 89 | services += len(resp.Services) 90 | 91 | // If there are no more pages, we're done 92 | if resp.NextPageToken == "" { 93 | break 94 | } 95 | 96 | // Otherwise, next page 97 | rqst = rqst.PageToken(resp.NextPageToken) 98 | } 99 | 100 | ch <- prometheus.MustNewConstMetric( 101 | c.Services, 102 | prometheus.GaugeValue, 103 | float64(services), 104 | []string{ 105 | p.ProjectId, 106 | }..., 107 | ) 108 | }(p) 109 | } 110 | wg.Wait() 111 | } 112 | 113 | // Describe implements Prometheus' Collector interface and is used to describe metrics 114 | func (c *EndpointsCollector) Describe(ch chan<- *prometheus.Desc) { 115 | ch <- c.Services 116 | } 117 | -------------------------------------------------------------------------------- /collector/scheduler.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "log" 7 | "net/http" 8 | "sync" 9 | 10 | "github.com/DazWilkin/gcp-exporter/gcp" 11 | "github.com/prometheus/client_golang/prometheus" 12 | 13 | cloudresourcemanager "google.golang.org/api/cloudresourcemanager/v1" 14 | cloudscheduler "google.golang.org/api/cloudscheduler/v1" 15 | "google.golang.org/api/googleapi" 16 | ) 17 | 18 | var ( 19 | _ prometheus.Collector = (*SchedulerCollector)(nil) 20 | ) 21 | 22 | // SchedulerCollector represents Cloud Scheduler 23 | type SchedulerCollector struct { 24 | account *gcp.Account 25 | schedulerService *cloudscheduler.Service 26 | 27 | Jobs *prometheus.Desc 28 | } 29 | 30 | // NewSchedulerCollector returns a new SchedulerCollector 31 | func NewSchedulerCollector(account *gcp.Account) (*SchedulerCollector, error) { 32 | subsystem := "cloud_scheduler" 33 | 34 | ctx := context.Background() 35 | schedulerService, err := cloudscheduler.NewService(ctx) 36 | if err != nil { 37 | log.Println(err) 38 | return nil, err 39 | } 40 | 41 | return &SchedulerCollector{ 42 | account: account, 43 | schedulerService: schedulerService, 44 | 45 | Jobs: prometheus.NewDesc( 46 | prometheus.BuildFQName(prefix, subsystem, "jobs"), 47 | "Number of Jobs", 48 | []string{ 49 | "project", 50 | // "region", 51 | }, 52 | nil, 53 | ), 54 | }, nil 55 | } 56 | 57 | // Collect implements Prometheus' Collector interface and is used to collect metrics 58 | func (c *SchedulerCollector) Collect(ch chan<- prometheus.Metric) { 59 | ctx := context.Background() 60 | 61 | // Enumerate all of the projects 62 | var wg sync.WaitGroup 63 | for _, p := range c.account.Projects { 64 | wg.Add(1) 65 | go func(p *cloudresourcemanager.Project) { 66 | defer wg.Done() 67 | log.Printf("[SchedulerCollector] Project: %s", p.ProjectId) 68 | 69 | name := fmt.Sprintf("projects/%s", p.ProjectId) 70 | count := 0 71 | 72 | rqst := c.schedulerService.Projects.Locations.List(name) 73 | if err := rqst.Pages(ctx, func(page *cloudscheduler.ListLocationsResponse) error { 74 | for _, l := range page.Locations { 75 | log.Printf("[SchedulerCollector] Project: %s (Location: %s)", p.ProjectId, l.LocationId) 76 | 77 | name2 := fmt.Sprintf("%s/locations/%s", name, l.LocationId) 78 | rqst2 := c.schedulerService.Projects.Locations.Jobs.List(name2) 79 | if err := rqst2.Pages(ctx, func(page2 *cloudscheduler.ListJobsResponse) error { 80 | // Count the number of Jobs 81 | count += len(page2.Jobs) 82 | // for _, j := range page2.Jobs { 83 | // log.Printf("[SchedulerCollector] Job: %s", j) 84 | // } 85 | return nil 86 | }); err != nil { 87 | if e, ok := err.(*googleapi.Error); ok { 88 | log.Printf("Google API Error: %d [%s]", e.Code, e.Message) 89 | return nil 90 | } 91 | 92 | log.Println(err) 93 | return nil 94 | } 95 | } 96 | return nil 97 | }); err != nil { 98 | if e, ok := err.(*googleapi.Error); ok { 99 | if e.Code == http.StatusForbidden { 100 | // Probably (!) Cloud Scheduler API has not been enabled for Project (p) 101 | return 102 | } 103 | 104 | log.Printf("Googe API Error: %d (%s)", e.Code, e.Message) 105 | return 106 | } 107 | 108 | log.Println(err) 109 | return 110 | } 111 | 112 | if count != 0 { 113 | ch <- prometheus.MustNewConstMetric( 114 | c.Jobs, 115 | prometheus.GaugeValue, 116 | float64(count), 117 | []string{ 118 | p.ProjectId, 119 | }..., 120 | ) 121 | } 122 | }(p) 123 | wg.Wait() 124 | } 125 | } 126 | 127 | // Describe implements Prometheus' Collector interface and is used to describe metrics 128 | func (c *SchedulerCollector) Describe(ch chan<- *prometheus.Desc) { 129 | ch <- c.Jobs 130 | } 131 | -------------------------------------------------------------------------------- /collector/eventarc.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "log" 7 | "net/http" 8 | "sync" 9 | 10 | "github.com/DazWilkin/gcp-exporter/gcp" 11 | "github.com/prometheus/client_golang/prometheus" 12 | "google.golang.org/api/eventarc/v1" 13 | "google.golang.org/api/googleapi" 14 | ) 15 | 16 | // EventarcCollector represents EventArc 17 | type EventarcCollector struct { 18 | account *gcp.Account 19 | eventarcService *eventarc.Service 20 | 21 | Channels *prometheus.Desc 22 | Triggers *prometheus.Desc 23 | } 24 | 25 | // NewEventarcCollector creates a new EventarcCollector 26 | func NewEventarcCollector(account *gcp.Account) (*EventarcCollector, error) { 27 | subsystem := "eventarc" 28 | 29 | ctx := context.Background() 30 | eventarcService, err := eventarc.NewService(ctx) 31 | if err != nil { 32 | log.Println(err) 33 | return nil, err 34 | } 35 | 36 | return &EventarcCollector{ 37 | account: account, 38 | eventarcService: eventarcService, 39 | 40 | Channels: prometheus.NewDesc( 41 | prometheus.BuildFQName(prefix, subsystem, "channels"), 42 | "1 if the channel exists", 43 | []string{ 44 | "project", 45 | "name", 46 | "provider", 47 | "pubsubtopic", 48 | "state", 49 | }, 50 | nil, 51 | ), 52 | Triggers: prometheus.NewDesc( 53 | prometheus.BuildFQName(prefix, subsystem, "triggers"), 54 | "1 if the trigger exists", 55 | []string{ 56 | "project", 57 | "name", 58 | "channel", 59 | "contenttype", 60 | "destination", 61 | }, 62 | nil, 63 | ), 64 | }, nil 65 | } 66 | 67 | // Collect implements Prometheus' Collector interface and is used to collect metrics 68 | func (c *EventarcCollector) Collect(ch chan<- prometheus.Metric) { 69 | // Enumerate all of the projects 70 | var wg sync.WaitGroup 71 | for _, p := range c.account.Projects { 72 | log.Printf("[EventarcCollector] Project: %s", p.ProjectId) 73 | parent := fmt.Sprintf("projects/%s/locations/-", p.ProjectId) 74 | 75 | // Channels 76 | wg.Add(1) 77 | go func() { 78 | defer wg.Done() 79 | 80 | rqst := c.eventarcService.Projects.Locations.Channels.List(parent) 81 | resp, err := rqst.Do() 82 | if err != nil { 83 | if e, ok := err.(*googleapi.Error); ok { 84 | if e.Code == http.StatusForbidden { 85 | // Probably (!) Eventarc API has not enabled in this Project 86 | return 87 | } 88 | 89 | log.Printf("Google API Error: %d [%s]", e.Code, e.Message) 90 | return 91 | } 92 | 93 | log.Println(err) 94 | return 95 | } 96 | 97 | for _, channel := range resp.Channels { 98 | log.Printf("[EventarcCollector] channel: %s", channel.Name) 99 | ch <- prometheus.MustNewConstMetric( 100 | c.Channels, 101 | prometheus.CounterValue, 102 | 1.0, 103 | []string{ 104 | p.ProjectId, 105 | channel.Name, 106 | channel.Provider, 107 | channel.PubsubTopic, 108 | channel.State, 109 | }..., 110 | ) 111 | } 112 | }() 113 | 114 | // Triggers 115 | wg.Add(1) 116 | go func() { 117 | defer wg.Done() 118 | 119 | rqst := c.eventarcService.Projects.Locations.Triggers.List(parent) 120 | resp, err := rqst.Do() 121 | if err != nil { 122 | if e, ok := err.(*googleapi.Error); ok { 123 | if e.Code == http.StatusForbidden { 124 | // Probably (!) Eventarc API has not enabled in this Project 125 | return 126 | } 127 | 128 | log.Printf("Google API Error: %d [%s]", e.Code, e.Message) 129 | return 130 | } 131 | 132 | log.Println(err) 133 | return 134 | } 135 | 136 | for _, trigger := range resp.Triggers { 137 | log.Printf("[EventarcCollector] trigger: %s", trigger.Name) 138 | ch <- prometheus.MustNewConstMetric( 139 | c.Triggers, 140 | prometheus.CounterValue, 141 | 1.0, 142 | []string{ 143 | p.ProjectId, 144 | trigger.Name, 145 | trigger.Channel, 146 | trigger.EventDataContentType, 147 | func(d *eventarc.Destination) string { 148 | if d.CloudFunction != "" { 149 | return "cloudfunction" 150 | } 151 | if d.CloudRun != nil { 152 | return "cloudrun" 153 | } 154 | if d.Gke != nil { 155 | return "gke" 156 | } 157 | if d.Workflow != "" { 158 | return "workflow" 159 | } 160 | return "" 161 | }(trigger.Destination), 162 | }..., 163 | ) 164 | } 165 | }() 166 | } 167 | wg.Wait() 168 | } 169 | 170 | // Describe implements Prometheus' Collector interface and is used to describe metrics 171 | func (c *EventarcCollector) Describe(ch chan<- *prometheus.Desc) { 172 | ch <- c.Channels 173 | ch <- c.Triggers 174 | } 175 | -------------------------------------------------------------------------------- /collector/compute.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | import ( 4 | "context" 5 | "log" 6 | "sync" 7 | 8 | "github.com/DazWilkin/gcp-exporter/gcp" 9 | "github.com/prometheus/client_golang/prometheus" 10 | 11 | "google.golang.org/api/cloudresourcemanager/v1" 12 | "google.golang.org/api/compute/v1" 13 | "google.golang.org/api/googleapi" 14 | ) 15 | 16 | // ComputeCollector represents Compute Engine 17 | type ComputeCollector struct { 18 | account *gcp.Account 19 | computeService *compute.Service 20 | 21 | Instances *prometheus.Desc 22 | ForwardingRules *prometheus.Desc 23 | } 24 | 25 | // NewComputeCollector returns a new ComputeCollector 26 | func NewComputeCollector(account *gcp.Account) (*ComputeCollector, error) { 27 | subsystem := "compute_engine" 28 | 29 | ctx := context.Background() 30 | computeService, err := compute.NewService(ctx) 31 | if err != nil { 32 | log.Println(err) 33 | return nil, err 34 | } 35 | 36 | return &ComputeCollector{ 37 | account: account, 38 | computeService: computeService, 39 | 40 | Instances: prometheus.NewDesc( 41 | prometheus.BuildFQName(prefix, subsystem, "instances"), 42 | "Number of instances", 43 | []string{ 44 | "project", 45 | "zone", 46 | }, 47 | nil, 48 | ), 49 | ForwardingRules: prometheus.NewDesc( 50 | prometheus.BuildFQName(prefix, subsystem, "forwardingrules"), 51 | "Number of forwardingrules", 52 | []string{ 53 | "project", 54 | "region", 55 | }, 56 | nil, 57 | ), 58 | }, nil 59 | } 60 | 61 | // Collect implements Prometheus' Collector interface and is used to collect metrics 62 | func (c *ComputeCollector) Collect(ch chan<- prometheus.Metric) { 63 | ctx := context.Background() 64 | 65 | // Enumerate all of the projects 66 | // WaitGroup is used for project Instances|ForwardingRules only (not the projects themselves) 67 | var wg sync.WaitGroup 68 | for _, p := range c.account.Projects { 69 | log.Printf("[ComputeCollector] Project: %s", p.ProjectId) 70 | 71 | wg.Add(1) 72 | go func(p *cloudresourcemanager.Project) { 73 | defer wg.Done() 74 | // Compute Engine API instances.list requires zone 75 | // Must repeat the call for all possible zones 76 | zoneList, err := c.computeService.Zones.List(p.ProjectId).Context(ctx).Do() 77 | if err != nil { 78 | if e, ok := err.(*googleapi.Error); ok { 79 | log.Printf("[ComputeCollector] Project: %s -- Zones.List (%d)", p.ProjectId, e.Code) 80 | } 81 | return 82 | } 83 | for _, z := range zoneList.Items { 84 | wg.Add(1) 85 | go func(z *compute.Zone) { 86 | defer wg.Done() 87 | rqst := c.computeService.Instances.List(p.ProjectId, z.Name).MaxResults(500) 88 | count := 0 89 | // Page through more results 90 | if err := rqst.Pages(ctx, func(page *compute.InstanceList) error { 91 | count += len(page.Items) 92 | // for _, instance := range page.Items { 93 | // instance. 94 | // } 95 | return nil 96 | }); err != nil { 97 | log.Println(err) 98 | return 99 | } 100 | if count != 0 { 101 | ch <- prometheus.MustNewConstMetric( 102 | c.Instances, 103 | prometheus.GaugeValue, 104 | float64(count), 105 | []string{ 106 | p.ProjectId, 107 | z.Name, 108 | }..., 109 | ) 110 | } 111 | }(z) 112 | } 113 | }(p) 114 | 115 | wg.Add(1) 116 | go func(p *cloudresourcemanager.Project) { 117 | defer wg.Done() 118 | // Compute Engine API forwardingrules.list requires region 119 | // Must repeat call for all possible regions 120 | regionList, err := c.computeService.Regions.List(p.ProjectId).Context(ctx).Do() 121 | if err != nil { 122 | if e, ok := err.(*googleapi.Error); ok { 123 | log.Printf("[ComputeCollector] Project: %s -- Regions.List (%d)", p.ProjectId, e.Code) 124 | } else { 125 | log.Println(err) 126 | } 127 | return 128 | } 129 | for _, r := range regionList.Items { 130 | wg.Add(1) 131 | go func(r *compute.Region) { 132 | defer wg.Done() 133 | rqst := c.computeService.ForwardingRules.List(p.ProjectId, r.Name).MaxResults(500) 134 | count := 0 135 | if err := rqst.Pages(ctx, func(page *compute.ForwardingRuleList) error { 136 | count += len(page.Items) 137 | return nil 138 | }); err != nil { 139 | log.Println(err) 140 | return 141 | } 142 | if count != 0 { 143 | ch <- prometheus.MustNewConstMetric( 144 | c.ForwardingRules, 145 | prometheus.GaugeValue, 146 | float64(count), 147 | []string{ 148 | p.ProjectId, 149 | r.Name, 150 | }..., 151 | ) 152 | } 153 | }(r) 154 | } 155 | }(p) 156 | } 157 | wg.Wait() 158 | } 159 | 160 | // Describe implements Prometheus' Collector interface and is used to describe metrics 161 | func (c *ComputeCollector) Describe(ch chan<- *prometheus.Desc) { 162 | ch <- c.Instances 163 | ch <- c.ForwardingRules 164 | } 165 | -------------------------------------------------------------------------------- /collector/cloudrun.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "log" 7 | "net/http" 8 | "sync" 9 | 10 | "github.com/DazWilkin/gcp-exporter/gcp" 11 | "github.com/prometheus/client_golang/prometheus" 12 | 13 | "google.golang.org/api/cloudresourcemanager/v1" 14 | "google.golang.org/api/googleapi" 15 | "google.golang.org/api/run/v1" 16 | ) 17 | 18 | var ( 19 | _ prometheus.Collector = (*CloudRunCollector)(nil) 20 | ) 21 | 22 | // CloudRunCollector represents Cloud Run 23 | type CloudRunCollector struct { 24 | account *gcp.Account 25 | cloudrunService *run.APIService 26 | 27 | Jobs *prometheus.Desc 28 | Services *prometheus.Desc 29 | } 30 | 31 | // NewCloudRunCollector returns a new CloudRunCollector 32 | func NewCloudRunCollector(account *gcp.Account) (*CloudRunCollector, error) { 33 | subsystem := "cloud_run" 34 | 35 | ctx := context.Background() 36 | cloudrunService, err := run.NewService(ctx) 37 | if err != nil { 38 | log.Println(err) 39 | return nil, err 40 | } 41 | 42 | return &CloudRunCollector{ 43 | account: account, 44 | cloudrunService: cloudrunService, 45 | 46 | Jobs: prometheus.NewDesc( 47 | prometheus.BuildFQName(prefix, subsystem, "jobs"), 48 | "Number of Jobs", 49 | []string{ 50 | "project", 51 | // "region", 52 | }, 53 | nil, 54 | ), 55 | Services: prometheus.NewDesc( 56 | prometheus.BuildFQName(prefix, subsystem, "services"), 57 | "Number of Services", 58 | []string{ 59 | "project", 60 | // "region", 61 | }, 62 | nil, 63 | ), 64 | }, nil 65 | } 66 | 67 | // Collect implements Prometheus' Collector interface and is used to collect metrics 68 | func (c *CloudRunCollector) Collect(ch chan<- prometheus.Metric) { 69 | // Enumerate all of the projects 70 | // WaitGroup is used for project Services|Jobs 71 | var wg sync.WaitGroup 72 | for _, p := range c.account.Projects { 73 | log.Printf("[CloudRunCollector] Project: %s", p.ProjectId) 74 | 75 | parent := fmt.Sprintf("namespaces/%s", p.ProjectId) 76 | 77 | // Cloud Run services 78 | wg.Add(1) 79 | go func(p *cloudresourcemanager.Project) { 80 | defer wg.Done() 81 | 82 | // ListServicesResponse may (!) contain Metadata 83 | // If Metadata is presnet, it may (!) contain Continue iff there's more data 84 | // https://pkg.go.dev/google.golang.org/api@v0.43.0/run/v1#ListServicesResponse 85 | // https://pkg.go.dev/google.golang.org/api@v0.43.0/run/v1#ListMeta 86 | 87 | rqst := c.cloudrunService.Namespaces.Services.List(parent) 88 | 89 | // Do request at least once 90 | cont := "" 91 | count := 0 92 | for { 93 | rqst.Continue(cont) 94 | resp, err := rqst.Do() 95 | if err != nil { 96 | if e, ok := err.(*googleapi.Error); ok { 97 | if e.Code == http.StatusForbidden { 98 | // Probably (!) Cloud Run Admin API has not been used in this project 99 | return 100 | } 101 | 102 | log.Printf("Google API Error: %d [%s]", e.Code, e.Message) 103 | return 104 | } 105 | 106 | log.Println(err) 107 | return 108 | } 109 | 110 | pageSize := len(resp.Items) 111 | count += pageSize 112 | 113 | if resp.Metadata != nil { 114 | // If there's Metadata, update cont 115 | cont = resp.Metadata.Continue 116 | } else { 117 | // Otherwise, we're done 118 | break 119 | } 120 | } 121 | 122 | if count != 0 { 123 | ch <- prometheus.MustNewConstMetric( 124 | c.Services, 125 | prometheus.GaugeValue, 126 | float64(count), 127 | []string{ 128 | p.ProjectId, 129 | }..., 130 | ) 131 | } 132 | }(p) 133 | 134 | // Cloud Run jobs 135 | wg.Add(1) 136 | go func(p *cloudresourcemanager.Project) { 137 | defer wg.Done() 138 | 139 | rqst := c.cloudrunService.Namespaces.Jobs.List(parent) 140 | 141 | // Do request at least once 142 | cont := "" 143 | count := 0 144 | for { 145 | rqst.Continue(cont) 146 | resp, err := rqst.Do() 147 | if err != nil { 148 | if e, ok := err.(*googleapi.Error); ok { 149 | if e.Code == http.StatusForbidden { 150 | // Probably (!) Cloud Run Admin API has not been used in this project 151 | return 152 | } 153 | 154 | log.Printf("Google API Error: %d [%s]", e.Code, e.Message) 155 | return 156 | } 157 | 158 | log.Println(err) 159 | return 160 | } 161 | 162 | pageSize := len(resp.Items) 163 | count += pageSize 164 | 165 | if resp.Metadata != nil { 166 | // If there's Metadata, update cont 167 | cont = resp.Metadata.Continue 168 | } else { 169 | // We're done 170 | break 171 | } 172 | } 173 | if count != 0 { 174 | ch <- prometheus.MustNewConstMetric( 175 | c.Jobs, 176 | prometheus.GaugeValue, 177 | float64(count), 178 | []string{ 179 | p.ProjectId, 180 | }..., 181 | ) 182 | } 183 | }(p) 184 | } 185 | wg.Wait() 186 | } 187 | 188 | // Describe implements Prometheus' Collector interface and is used to describe metrics 189 | func (c *CloudRunCollector) Describe(ch chan<- *prometheus.Desc) { 190 | ch <- c.Services 191 | ch <- c.Jobs 192 | } 193 | -------------------------------------------------------------------------------- /collector/monitoring.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "log" 7 | "sync" 8 | 9 | "github.com/DazWilkin/gcp-exporter/gcp" 10 | 11 | "github.com/prometheus/client_golang/prometheus" 12 | 13 | "google.golang.org/api/monitoring/v3" 14 | ) 15 | 16 | // MonitoringCollector represents Cloud Monitoring 17 | type MonitoringCollector struct { 18 | account *gcp.Account 19 | monitoringService *monitoring.Service 20 | 21 | AlertPolicies *prometheus.Desc 22 | Alerts *prometheus.Desc 23 | UptimeChecks *prometheus.Desc 24 | } 25 | 26 | // NewMonitoringCollector create a new MonitoringCollector 27 | func NewMonitoringCollector(account *gcp.Account) (*MonitoringCollector, error) { 28 | subsystem := "cloud_monitoring" 29 | 30 | ctx := context.Background() 31 | monitoringService, err := monitoring.NewService(ctx) 32 | if err != nil { 33 | log.Println(err) 34 | return nil, err 35 | } 36 | 37 | return &MonitoringCollector{ 38 | account: account, 39 | monitoringService: monitoringService, 40 | 41 | AlertPolicies: prometheus.NewDesc( 42 | prometheus.BuildFQName(prefix, subsystem, "alert_policies"), 43 | "Number of Alert Policies", 44 | []string{ 45 | "project", 46 | }, 47 | nil, 48 | ), 49 | Alerts: prometheus.NewDesc( 50 | prometheus.BuildFQName(prefix, subsystem, "alerts"), 51 | "Number of Alerts", 52 | []string{ 53 | "project", 54 | }, 55 | nil, 56 | ), 57 | UptimeChecks: prometheus.NewDesc( 58 | prometheus.BuildFQName(prefix, subsystem, "uptime_checks"), 59 | "Number of Uptime Checks", 60 | []string{ 61 | "project", 62 | }, 63 | nil, 64 | ), 65 | }, nil 66 | } 67 | 68 | // Collect implements Prometheus' Collector interface and is used to collect metrics 69 | func (c *MonitoringCollector) Collect(ch chan<- prometheus.Metric) { 70 | ctx := context.Background() 71 | 72 | // Enumerate all projects 73 | // WaitGroup is used for project AlertPolicies|UptimeChecks 74 | var wg sync.WaitGroup 75 | for _, p := range c.account.Projects { 76 | log.Printf("[MonitoringCollector] Project: %s", p.ProjectId) 77 | 78 | parent := fmt.Sprintf("projects/%s", p.ProjectId) 79 | 80 | c.collectAlertPolicies(ctx, &wg, ch, parent, p.ProjectId) 81 | c.collectAlerts(ctx, &wg, ch, parent, p.ProjectId) 82 | c.collectUptimeChecks(ctx, &wg, ch, parent, p.ProjectId) 83 | } 84 | // Wait for all projects to process 85 | wg.Wait() 86 | } 87 | 88 | // collectAlertPolicies collects alert policy metrics 89 | func (c *MonitoringCollector) collectAlertPolicies(ctx context.Context, wg *sync.WaitGroup, ch chan<- prometheus.Metric, parent, projectID string) { 90 | wg.Add(1) 91 | go func(project string) { 92 | defer wg.Done() 93 | 94 | count := 0 95 | 96 | rqst := c.monitoringService.Projects.AlertPolicies.List(parent) 97 | if err := rqst.Pages(ctx, func(page *monitoring.ListAlertPoliciesResponse) error { 98 | count += len(page.AlertPolicies) 99 | return nil 100 | }); err != nil { 101 | log.Println(err) 102 | return 103 | } 104 | 105 | if count != 0 { 106 | ch <- prometheus.MustNewConstMetric( 107 | c.AlertPolicies, 108 | prometheus.GaugeValue, 109 | float64(count), 110 | []string{ 111 | project, 112 | }..., 113 | ) 114 | } 115 | }(projectID) 116 | } 117 | 118 | // collectAlerts collects alert metrics 119 | func (c *MonitoringCollector) collectAlerts(ctx context.Context, wg *sync.WaitGroup, ch chan<- prometheus.Metric, parent, projectID string) { 120 | wg.Add(1) 121 | go func(project string) { 122 | defer wg.Done() 123 | 124 | count := 0 125 | 126 | rqst := c.monitoringService.Projects.Alerts.List(parent) 127 | if err := rqst.Pages(ctx, func(page *monitoring.ListAlertsResponse) error { 128 | count += len(page.Alerts) 129 | return nil 130 | }); err != nil { 131 | log.Println(err) 132 | return 133 | } 134 | 135 | if count != 0 { 136 | ch <- prometheus.MustNewConstMetric( 137 | c.Alerts, 138 | prometheus.GaugeValue, 139 | float64(count), 140 | []string{ 141 | project, 142 | }..., 143 | ) 144 | } 145 | }(projectID) 146 | } 147 | 148 | // collectUptimeChecks collects uptime check metrics 149 | func (c *MonitoringCollector) collectUptimeChecks(ctx context.Context, wg *sync.WaitGroup, ch chan<- prometheus.Metric, parent, projectID string) { 150 | wg.Add(1) 151 | go func(project string) { 152 | defer wg.Done() 153 | 154 | count := 0 155 | 156 | rqst := c.monitoringService.Projects.UptimeCheckConfigs.List(parent) 157 | if err := rqst.Pages(ctx, func(page *monitoring.ListUptimeCheckConfigsResponse) error { 158 | count += len(page.UptimeCheckConfigs) 159 | return nil 160 | }); err != nil { 161 | log.Println(err) 162 | return 163 | } 164 | 165 | if count != 0 { 166 | ch <- prometheus.MustNewConstMetric( 167 | c.UptimeChecks, 168 | prometheus.GaugeValue, 169 | float64(count), 170 | []string{ 171 | project, 172 | }..., 173 | ) 174 | } 175 | }(projectID) 176 | } 177 | 178 | // Describe implements Prometheus' Collector interface and is used to describe metrics 179 | func (c *MonitoringCollector) Describe(ch chan<- *prometheus.Desc) { 180 | ch <- c.AlertPolicies 181 | ch <- c.UptimeChecks 182 | } 183 | -------------------------------------------------------------------------------- /collector/iam.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "log" 7 | "net/http" 8 | "strings" 9 | "sync" 10 | 11 | "github.com/DazWilkin/gcp-exporter/gcp" 12 | "github.com/prometheus/client_golang/prometheus" 13 | "google.golang.org/api/cloudresourcemanager/v1" 14 | "google.golang.org/api/googleapi" 15 | "google.golang.org/api/iam/v1" 16 | ) 17 | 18 | // IAMCollector represents Identity and Access Management (IAM) 19 | type IAMCollector struct { 20 | account *gcp.Account 21 | iamService *iam.Service 22 | 23 | Up *prometheus.Desc 24 | ServiceAccounts *prometheus.Desc 25 | ServiceAccountKeys *prometheus.Desc 26 | } 27 | 28 | // NewIAMCollector creates a new IAMCollector 29 | func NewIAMCollector(account *gcp.Account) (*IAMCollector, error) { 30 | subsystem := "iam" 31 | 32 | ctx := context.Background() 33 | iamService, err := iam.NewService(ctx) 34 | if err != nil { 35 | log.Println(err) 36 | return nil, err 37 | } 38 | 39 | return &IAMCollector{ 40 | account: account, 41 | iamService: iamService, 42 | 43 | Up: prometheus.NewDesc( 44 | prometheus.BuildFQName(prefix, subsystem, "up"), 45 | "1 if the IAM service is up, 0 otherwise", 46 | nil, 47 | nil, 48 | ), 49 | ServiceAccounts: prometheus.NewDesc( 50 | prometheus.BuildFQName(prefix, subsystem, "service_accounts"), 51 | "Number of Service Accounts", 52 | []string{ 53 | "project", 54 | "name", 55 | "disabled", 56 | }, 57 | nil, 58 | ), 59 | ServiceAccountKeys: prometheus.NewDesc( 60 | prometheus.BuildFQName(prefix, subsystem, "service_account_keys"), 61 | "Number of Service Account Keys", 62 | []string{ 63 | "project", 64 | "service_account_email", 65 | "key", 66 | "type", 67 | "disabled", 68 | }, 69 | nil, 70 | ), 71 | }, nil 72 | } 73 | 74 | // Collect implements Prometheus' Collector interface and is used to collect metrics 75 | func (c *IAMCollector) Collect(ch chan<- prometheus.Metric) { 76 | ctx := context.Background() 77 | 78 | // Enumerate all of the projects 79 | var wg sync.WaitGroup 80 | for _, p := range c.account.Projects { 81 | wg.Add(1) 82 | go func(p *cloudresourcemanager.Project) { 83 | defer wg.Done() 84 | log.Printf("IAMCollector:go] Project: %s", p.ProjectId) 85 | parent := fmt.Sprintf("projects/%s", p.ProjectId) 86 | resp, err := c.iamService.Projects.ServiceAccounts.List(parent).Context(ctx).Do() 87 | if err != nil { 88 | if e, ok := err.(*googleapi.Error); ok { 89 | if e.Code == http.StatusForbidden { 90 | // Probably (!) IAM API has not been enabled for Project (p) 91 | return 92 | } 93 | 94 | log.Printf("Google API Error: %d [%s]", e.Code, e.Message) 95 | return 96 | } 97 | 98 | log.Println(err) 99 | return 100 | } 101 | 102 | for _, account := range resp.Accounts { 103 | log.Printf("IAMCollector:go] ServiceAccount: %s", account.Name) 104 | 105 | // Record Service Account metrics 106 | ch <- prometheus.MustNewConstMetric( 107 | c.ServiceAccounts, 108 | prometheus.GaugeValue, 109 | 1.0, 110 | []string{ 111 | p.ProjectId, 112 | account.Email, 113 | fmt.Sprintf("%t", account.Disabled), 114 | }..., 115 | ) 116 | 117 | // Service Account Keys within Service Account 118 | name := fmt.Sprintf("projects/%s/serviceAccounts/%s", p.ProjectId, account.UniqueId) 119 | resp, err := c.iamService.Projects.ServiceAccounts.Keys.List(name).Context(ctx).Do() 120 | if err != nil { 121 | if e, ok := err.(*googleapi.Error); ok { 122 | if e.Code == http.StatusForbidden { 123 | // Probably (!) IAM API has not been enabled for Project (p) 124 | return 125 | } 126 | 127 | log.Printf("Google API Error: %d [%s]", e.Code, e.Message) 128 | return 129 | } 130 | 131 | log.Println(err) 132 | return 133 | } 134 | 135 | for _, key := range resp.Keys { 136 | log.Printf("[IAMCollector:go] ServiceAccountKey: %s", key.Name) 137 | 138 | // Name = projects/{PROJECT_ID}/serviceAccounts/{ACCOUNT}/keys/{key} 139 | keyID, err := func(name string) (string, error) { 140 | if name == "" { 141 | return "", fmt.Errorf("name is empty") 142 | } 143 | 144 | parts := strings.Split(name, "/") 145 | if len(parts) != 6 { 146 | return "", fmt.Errorf("expected 6 parts, got %d (%s)", len(parts), parts) 147 | } 148 | 149 | // Return the last part (key) 150 | key := parts[len(parts)-1] 151 | return key, nil 152 | }(key.Name) 153 | if err != nil { 154 | log.Printf("unable to extract {key} from %s", key.Name) 155 | continue 156 | } 157 | 158 | // Record Service Account Key metrics 159 | ch <- prometheus.MustNewConstMetric( 160 | c.ServiceAccountKeys, 161 | prometheus.GaugeValue, 162 | 1.0, 163 | []string{ 164 | p.ProjectId, 165 | account.Email, 166 | keyID, 167 | key.KeyType, 168 | fmt.Sprintf("%t", key.Disabled), 169 | }..., 170 | ) 171 | } 172 | } 173 | }(p) 174 | } 175 | wg.Wait() 176 | } 177 | 178 | // Describe implements Prometheus' Collector interface and is used to describe metrics 179 | func (c *IAMCollector) Describe(ch chan<- *prometheus.Desc) { 180 | ch <- c.Up 181 | ch <- c.ServiceAccounts 182 | ch <- c.ServiceAccountKeys 183 | } 184 | -------------------------------------------------------------------------------- /collector/artifactregistry.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "log" 7 | "net/http" 8 | "sync" 9 | 10 | "github.com/DazWilkin/gcp-exporter/gcp" 11 | 12 | artifactregistry "google.golang.org/api/artifactregistry/v1beta2" 13 | "google.golang.org/api/cloudresourcemanager/v1" 14 | "google.golang.org/api/googleapi" 15 | 16 | "github.com/prometheus/client_golang/prometheus" 17 | ) 18 | 19 | var ( 20 | _ prometheus.Collector = (*ArtifactRegistryCollector)(nil) 21 | ) 22 | 23 | // ArtifactRegistryCollector represents an Artifact Registry 24 | type ArtifactRegistryCollector struct { 25 | account *gcp.Account 26 | artifactregistryService *artifactregistry.Service 27 | 28 | Registries *prometheus.Desc 29 | Locations *prometheus.Desc 30 | Formats *prometheus.Desc 31 | } 32 | 33 | // NewArtifactRegistryCollector returns a new ArtifactRegistryCollector 34 | func NewArtifactRegistryCollector(account *gcp.Account) (*ArtifactRegistryCollector, error) { 35 | subsystem := "artifact_registry" 36 | 37 | ctx := context.Background() 38 | artifactregistryService, err := artifactregistry.NewService(ctx) 39 | if err != nil { 40 | log.Println(err) 41 | return nil, err 42 | } 43 | 44 | return &ArtifactRegistryCollector{ 45 | account: account, 46 | artifactregistryService: artifactregistryService, 47 | 48 | Registries: prometheus.NewDesc( 49 | prometheus.BuildFQName(prefix, subsystem, "registries"), 50 | "Number of Registries", 51 | []string{ 52 | "project", 53 | }, 54 | nil, 55 | ), 56 | Locations: prometheus.NewDesc( 57 | prometheus.BuildFQName(prefix, subsystem, "locations"), 58 | "Number of Locations", 59 | []string{ 60 | "project", 61 | "location", 62 | }, 63 | nil, 64 | ), 65 | Formats: prometheus.NewDesc( 66 | prometheus.BuildFQName(prefix, subsystem, "formats"), 67 | "Number of Formats", 68 | []string{ 69 | "project", 70 | "format", 71 | }, 72 | nil, 73 | ), 74 | }, nil 75 | } 76 | 77 | // Collect implements Prometheus' Collector interface and is used to collect metrics 78 | func (c *ArtifactRegistryCollector) Collect(ch chan<- prometheus.Metric) { 79 | // Enumerate all of the projects 80 | var wg sync.WaitGroup 81 | for _, p := range c.account.Projects { 82 | wg.Add(1) 83 | go func(p *cloudresourcemanager.Project) { 84 | defer wg.Done() 85 | log.Printf("[ArtifactRegistryCollector] Project: %s", p.ProjectId) 86 | name := fmt.Sprintf("projects/%s", p.ProjectId) 87 | rqst := c.artifactregistryService.Projects.Locations.List(name) 88 | resp, err := rqst.Do() 89 | if err != nil { 90 | if e, ok := err.(*googleapi.Error); ok { 91 | if e.Code == http.StatusForbidden { 92 | // Probably (!) Artifact Registry API has not been enabled for Project (p) 93 | return 94 | } 95 | 96 | log.Printf("Google API Error: %d [%s]", e.Code, e.Message) 97 | return 98 | } 99 | 100 | log.Println(err) 101 | return 102 | } 103 | 104 | repositories := 0 105 | locations := make(map[string]int) 106 | formats := make(map[string]int) 107 | 108 | // For each Location 109 | // Enumerate the list of repositories 110 | for _, l := range resp.Locations { 111 | // LocationID is the short form e.g. "us-west1" 112 | parent := fmt.Sprintf("projects/%s/locations/%s", p.ProjectId, l.LocationId) 113 | rqst := c.artifactregistryService.Projects.Locations.Repositories.List(parent) 114 | 115 | for { 116 | resp, err := rqst.Do() 117 | if err != nil { 118 | if e, ok := err.(*googleapi.Error); ok { 119 | if e.Code == http.StatusForbidden { 120 | // Probably (!) Cloud Functions API has not been enabled for Project (p) 121 | return 122 | } 123 | log.Printf("Google API Error: %d [%s]", e.Code, e.Message) 124 | } 125 | log.Println(err) 126 | return 127 | } 128 | 129 | // If there are any repositories in this location 130 | if len(resp.Repositories) > 0 { 131 | repositories += len(resp.Repositories) 132 | locations[l.LocationId]++ 133 | 134 | for _, repository := range resp.Repositories { 135 | formats[repository.Format]++ 136 | } 137 | } 138 | 139 | // If there are no more pages, we're done 140 | if resp.NextPageToken == "" { 141 | break 142 | } 143 | 144 | // Otherwise, next page 145 | rqst = rqst.PageToken(resp.NextPageToken) 146 | } 147 | } 148 | 149 | ch <- prometheus.MustNewConstMetric( 150 | c.Registries, 151 | prometheus.GaugeValue, 152 | float64(repositories), 153 | []string{ 154 | p.ProjectId, 155 | }..., 156 | ) 157 | 158 | for location, count := range locations { 159 | ch <- prometheus.MustNewConstMetric( 160 | c.Locations, 161 | prometheus.GaugeValue, 162 | float64(count), 163 | []string{ 164 | p.ProjectId, 165 | location, 166 | }..., 167 | ) 168 | } 169 | for format, count := range formats { 170 | ch <- prometheus.MustNewConstMetric( 171 | c.Formats, 172 | prometheus.GaugeValue, 173 | float64(count), 174 | []string{ 175 | p.ProjectId, 176 | format, 177 | }..., 178 | ) 179 | } 180 | }(p) 181 | } 182 | wg.Wait() 183 | } 184 | 185 | // Describe implements Prometheus' Collector interface and is used to describe metrics 186 | func (c *ArtifactRegistryCollector) Describe(ch chan<- *prometheus.Desc) { 187 | ch <- c.Registries 188 | ch <- c.Locations 189 | ch <- c.Formats 190 | } 191 | -------------------------------------------------------------------------------- /collector/functions.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "log" 7 | "net/http" 8 | "strings" 9 | "sync" 10 | 11 | "github.com/DazWilkin/gcp-exporter/gcp" 12 | "github.com/prometheus/client_golang/prometheus" 13 | 14 | "google.golang.org/api/cloudfunctions/v1" 15 | "google.golang.org/api/cloudresourcemanager/v1" 16 | "google.golang.org/api/googleapi" 17 | ) 18 | 19 | var ( 20 | _ prometheus.Collector = (*FunctionsCollector)(nil) 21 | ) 22 | 23 | // FunctionsCollector represents Cloud Functions 24 | type FunctionsCollector struct { 25 | account *gcp.Account 26 | cloudfunctionsService *cloudfunctions.Service 27 | 28 | Functions *prometheus.Desc 29 | Locations *prometheus.Desc 30 | Runtimes *prometheus.Desc 31 | } 32 | 33 | // NewFunctionsCollector returns a new FunctionsCollector 34 | func NewFunctionsCollector(account *gcp.Account) (*FunctionsCollector, error) { 35 | subsystem := "cloud_functions" 36 | 37 | ctx := context.Background() 38 | cloudfunctionsService, err := cloudfunctions.NewService(ctx) 39 | if err != nil { 40 | log.Println(err) 41 | return nil, err 42 | } 43 | 44 | return &FunctionsCollector{ 45 | account: account, 46 | cloudfunctionsService: cloudfunctionsService, 47 | 48 | Functions: prometheus.NewDesc( 49 | prometheus.BuildFQName(prefix, subsystem, "functions"), 50 | "Number of Cloud Functions", 51 | []string{ 52 | "project", 53 | }, 54 | nil, 55 | ), 56 | Locations: prometheus.NewDesc( 57 | prometheus.BuildFQName(prefix, subsystem, "locations"), 58 | "Number of Functions by Location", 59 | []string{ 60 | "project", 61 | "location", 62 | }, 63 | nil, 64 | ), 65 | Runtimes: prometheus.NewDesc( 66 | prometheus.BuildFQName(prefix, subsystem, "runtimes"), 67 | "Number of Functions by Runtime", 68 | []string{ 69 | "project", 70 | "runtime", 71 | }, 72 | nil, 73 | ), 74 | }, nil 75 | } 76 | 77 | // Collect implements Prometheus' Collector interface and is used to collect metrics 78 | func (c *FunctionsCollector) Collect(ch chan<- prometheus.Metric) { 79 | // Enumerate all of the projects 80 | var wg sync.WaitGroup 81 | for _, p := range c.account.Projects { 82 | wg.Add(1) 83 | go func(p *cloudresourcemanager.Project) { 84 | defer wg.Done() 85 | log.Printf("[CloudFunctionsCollector] Project: %s", p.ProjectId) 86 | parent := fmt.Sprintf("projects/%s/locations/-", p.ProjectId) 87 | rqst := c.cloudfunctionsService.Projects.Locations.Functions.List(parent) 88 | 89 | functions := 0 90 | locations := make(map[string]int) 91 | runtimes := make(map[string]int) 92 | 93 | // Do request at least once 94 | for { 95 | resp, err := rqst.Do() 96 | if err != nil { 97 | if e, ok := err.(*googleapi.Error); ok { 98 | if e.Code == http.StatusForbidden { 99 | // Probably (!) Cloud Functions API has not been enabled for Project (p) 100 | return 101 | } 102 | 103 | log.Printf("Google API Error: %d [%s]", e.Code, e.Message) 104 | return 105 | } 106 | 107 | log.Println(err) 108 | return 109 | } 110 | 111 | functions += len(resp.Functions) 112 | 113 | // https://cloud.google.com/functions/docs/reference/rest/v1/projects.locations.functions#CloudFunction 114 | for _, function := range resp.Functions { 115 | // Name == projects/*/locations/*/functions/* 116 | log.Printf("[CloudFunctionsCollector] function: %s", function.Name) 117 | parts := strings.Split(function.Name, "/") 118 | // 0="projects",1="{project}",2="locations",3="{location}",4="functions",5="{function}" 119 | if len(parts) != 6 { 120 | log.Printf("[CloudFunctionsCollector] Unable to parse function name: %s", function.Name) 121 | } 122 | // Increment locations count by this function's location 123 | locations[parts[3]]++ 124 | 125 | log.Printf("[CloudFunctionsCollector] runtime: %s", function.Runtime) 126 | // Increment runtimes count by this function's runtime 127 | runtimes[function.Runtime]++ 128 | } 129 | 130 | // If there are no more pages, we're done 131 | if resp.NextPageToken == "" { 132 | break 133 | } 134 | 135 | // Otherwise, next page 136 | rqst = rqst.PageToken(resp.NextPageToken) 137 | } 138 | 139 | // Now we know the number of Functions 140 | // Because this count is by project, include project labels to avoid duplication 141 | // Can always total by location across projects 142 | // gcp_cloudfunctions_locations{location="us-central1",project="gcp"} 1 143 | // gcp_cloudfunctions_locations{location="us-central1",project="yyy"} 1 144 | ch <- prometheus.MustNewConstMetric( 145 | c.Functions, 146 | prometheus.GaugeValue, 147 | float64(functions), 148 | []string{ 149 | p.ProjectId, 150 | }..., 151 | ) 152 | for location, count := range locations { 153 | ch <- prometheus.MustNewConstMetric( 154 | c.Locations, 155 | prometheus.GaugeValue, 156 | float64(count), 157 | []string{ 158 | p.ProjectId, 159 | location, 160 | }..., 161 | ) 162 | } 163 | // Can always total by runtime across projects 164 | // gcp_cloudfunctions_runtimes{project="gcp",runtime="go113"} 1 165 | // gcp_cloudfunctions_runtimes{project="yyy",runtime="go113"} 1 166 | for runtime, count := range runtimes { 167 | ch <- prometheus.MustNewConstMetric( 168 | c.Runtimes, 169 | prometheus.GaugeValue, 170 | float64(count), 171 | []string{ 172 | p.ProjectId, 173 | runtime, 174 | }..., 175 | ) 176 | } 177 | }(p) 178 | } 179 | wg.Wait() 180 | } 181 | 182 | // Describe implements Prometheus' Collector interface and is used to describe metrics 183 | func (c *FunctionsCollector) Describe(ch chan<- *prometheus.Desc) { 184 | ch <- c.Functions 185 | } 186 | -------------------------------------------------------------------------------- /collector/gke.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "log" 7 | "net/http" 8 | "strconv" 9 | "strings" 10 | "sync" 11 | 12 | "github.com/DazWilkin/gcp-exporter/gcp" 13 | "github.com/prometheus/client_golang/prometheus" 14 | 15 | "google.golang.org/api/cloudresourcemanager/v1" 16 | "google.golang.org/api/container/v1" 17 | "google.golang.org/api/googleapi" 18 | ) 19 | 20 | type GKECollector struct { 21 | account *gcp.Account 22 | containerService *container.Service 23 | 24 | enableExtendedMetrics bool 25 | 26 | Info *prometheus.Desc 27 | NodePoolsInfo *prometheus.Desc 28 | Nodes *prometheus.Desc 29 | Up *prometheus.Desc 30 | } 31 | 32 | func NewGKECollector(account *gcp.Account, enableExtendedMetrics bool) (*GKECollector, error) { 33 | subsystem := "gke" 34 | labelKeys := []string{"project", "name", "location", "version"} 35 | 36 | ctx := context.Background() 37 | containerService, err := container.NewService(ctx) 38 | if err != nil { 39 | log.Println(err) 40 | return nil, err 41 | } 42 | 43 | return &GKECollector{ 44 | account: account, 45 | containerService: containerService, 46 | 47 | enableExtendedMetrics: enableExtendedMetrics, 48 | 49 | Up: prometheus.NewDesc( 50 | prometheus.BuildFQName(prefix, subsystem, "up"), 51 | "1 if the cluster is running, 0 otherwise", 52 | labelKeys, nil, 53 | ), 54 | Info: prometheus.NewDesc( 55 | prometheus.BuildFQName(prefix, subsystem, "info"), 56 | "Cluster control plane information. 1 if the cluster is running, 0 otherwise", 57 | append(labelKeys, "id", "mode", "endpoint", "network", "subnetwork", 58 | "initial_cluster_version", "node_pools_count"), 59 | nil, 60 | ), 61 | Nodes: prometheus.NewDesc( 62 | prometheus.BuildFQName(prefix, subsystem, "nodes"), 63 | "Number of nodes currently in the cluster", 64 | labelKeys, nil, 65 | ), 66 | NodePoolsInfo: prometheus.NewDesc( 67 | prometheus.BuildFQName(prefix, subsystem, "node_pools_info"), 68 | "Cluster Node Pools Information. 1 if the Node Pool is running, 0 otherwise", 69 | append(labelKeys, "etag", "cluster_id", "autoscaling", "disk_size_gb", 70 | "disk_type", "image_type", "machine_type", "locations", "spot", "preemptible"), 71 | nil, 72 | ), 73 | }, nil 74 | } 75 | 76 | func (c *GKECollector) Collect(ch chan<- prometheus.Metric) { 77 | ctx := context.Background() 78 | 79 | var wg sync.WaitGroup 80 | for _, p := range c.account.Projects { 81 | wg.Add(1) 82 | go func(p *cloudresourcemanager.Project) { 83 | defer wg.Done() 84 | c.collectProjectMetrics(ctx, c.containerService, p, ch) 85 | }(p) 86 | } 87 | wg.Wait() 88 | } 89 | 90 | func (c *GKECollector) collectProjectMetrics(ctx context.Context, containerService *container.Service, 91 | p *cloudresourcemanager.Project, ch chan<- prometheus.Metric) { 92 | 93 | log.Printf("[GKECollector:go] Project: %s", p.ProjectId) 94 | parent := fmt.Sprintf("projects/%s/locations/-", p.ProjectId) 95 | resp, err := containerService.Projects.Locations.Clusters.List(parent).Context(ctx).Do() 96 | 97 | if err != nil { 98 | if e, ok := err.(*googleapi.Error); ok && e.Code == http.StatusForbidden { 99 | log.Printf("Google API Error: %d [%s]", e.Code, e.Message) 100 | return 101 | } 102 | log.Println("Google API Error:", err) 103 | return 104 | } 105 | 106 | for _, cluster := range resp.Clusters { 107 | c.collectClusterMetrics(p, cluster, ch) 108 | } 109 | } 110 | 111 | func (c *GKECollector) collectClusterMetrics(p *cloudresourcemanager.Project, cluster *container.Cluster, 112 | ch chan<- prometheus.Metric) { 113 | 114 | log.Printf("[GKECollector] cluster: %s", cluster.Name) 115 | 116 | clusterStatus := 0.0 117 | if cluster.Status == "RUNNING" { 118 | clusterStatus = 1.0 119 | } 120 | 121 | ch <- prometheus.MustNewConstMetric(c.Up, prometheus.GaugeValue, clusterStatus, 122 | p.ProjectId, cluster.Name, cluster.Location, cluster.CurrentMasterVersion) 123 | 124 | ch <- prometheus.MustNewConstMetric(c.Nodes, prometheus.GaugeValue, float64(cluster.CurrentNodeCount), 125 | p.ProjectId, cluster.Name, cluster.Location, cluster.CurrentNodeVersion) 126 | 127 | if c.enableExtendedMetrics { 128 | c.collectExtendedMetrics(p, cluster, ch, clusterStatus) 129 | } 130 | } 131 | 132 | func (c *GKECollector) collectExtendedMetrics(p *cloudresourcemanager.Project, cluster *container.Cluster, 133 | ch chan<- prometheus.Metric, clusterStatus float64) { 134 | 135 | if len(cluster.NodePools) == 0 { 136 | return 137 | } 138 | 139 | nodePoolsSize := strconv.Itoa(len(cluster.NodePools)) 140 | clusterMode := "Standard" 141 | 142 | if cluster.Autopilot != nil && cluster.Autopilot.Enabled { 143 | clusterMode = "Autopilot" 144 | } 145 | 146 | ch <- prometheus.MustNewConstMetric(c.Info, prometheus.GaugeValue, clusterStatus, 147 | p.ProjectId, cluster.Name, cluster.Location, cluster.CurrentMasterVersion, 148 | cluster.Id, clusterMode, cluster.Endpoint, cluster.Network, cluster.Subnetwork, 149 | cluster.InitialClusterVersion, nodePoolsSize) 150 | 151 | for _, nodePool := range cluster.NodePools { 152 | nodePoolStatus := 0.0 153 | if nodePool.Status == "RUNNING" { 154 | nodePoolStatus = 1.0 155 | } 156 | 157 | boolToString := func(b bool) string { return strconv.FormatBool(b) } 158 | 159 | ch <- prometheus.MustNewConstMetric(c.NodePoolsInfo, prometheus.GaugeValue, nodePoolStatus, 160 | p.ProjectId, nodePool.Name, cluster.Location, nodePool.Version, nodePool.Etag, cluster.Id, 161 | boolToString(nodePool.Autoscaling.Enabled), 162 | strconv.FormatInt(nodePool.Config.DiskSizeGb, 10), nodePool.Config.DiskType, 163 | nodePool.Config.ImageType, nodePool.Config.MachineType, 164 | strings.Join(nodePool.Locations, ","), 165 | boolToString(nodePool.Config.Spot), 166 | boolToString(nodePool.Config.Preemptible)) 167 | } 168 | } 169 | 170 | func (c *GKECollector) Describe(ch chan<- *prometheus.Desc) { 171 | ch <- c.Info 172 | ch <- c.NodePoolsInfo 173 | ch <- c.Nodes 174 | ch <- c.Up 175 | } 176 | -------------------------------------------------------------------------------- /collector/pubssub.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "log" 7 | "path" 8 | "sync" 9 | 10 | "github.com/DazWilkin/gcp-exporter/gcp" 11 | "github.com/prometheus/client_golang/prometheus" 12 | 13 | "google.golang.org/api/cloudresourcemanager/v1" 14 | "google.golang.org/api/option" 15 | "google.golang.org/api/pubsub/v1" 16 | ) 17 | 18 | type PubSubCollector struct { 19 | account *gcp.Account 20 | pubsubService *pubsub.Service 21 | 22 | Schemas *prometheus.Desc 23 | Snapshots *prometheus.Desc 24 | Subscriptions *prometheus.Desc 25 | Topics *prometheus.Desc 26 | // Up *prometheus.Desc 27 | } 28 | 29 | func NewPubSubCollector(account *gcp.Account, endpoint string) (*PubSubCollector, error) { 30 | subsystem := "pubsub" 31 | 32 | ctx := context.Background() 33 | 34 | opts := []option.ClientOption{} 35 | if endpoint != "" { 36 | opts = append(opts, option.WithEndpoint(endpoint)) 37 | } 38 | 39 | pubsubService, err := pubsub.NewService(ctx, opts...) 40 | if err != nil { 41 | log.Println(err) 42 | return nil, err 43 | } 44 | 45 | return &PubSubCollector{ 46 | account: account, 47 | pubsubService: pubsubService, 48 | 49 | // https://pkg.go.dev/google.golang.org/api@v0.242.0/pubsub/v1#Schema 50 | Schemas: prometheus.NewDesc( 51 | prometheus.BuildFQName(prefix, subsystem, "schemas"), 52 | "Number of schemas", 53 | []string{ 54 | "project", 55 | "name", 56 | "type", 57 | }, 58 | nil, 59 | ), 60 | // https://pkg.go.dev/google.golang.org/api@v0.242.0/pubsub/v1#Snapshot 61 | Snapshots: prometheus.NewDesc( 62 | prometheus.BuildFQName(prefix, subsystem, "snapshots"), 63 | "Number of Snapshots", 64 | []string{ 65 | "project", 66 | "name", 67 | "topic", 68 | }, 69 | nil, 70 | ), 71 | // https://pkg.go.dev/google.golang.org/api@v0.242.0/pubsub/v1#Subscription 72 | Subscriptions: prometheus.NewDesc( 73 | prometheus.BuildFQName(prefix, subsystem, "subscriptions"), 74 | "Number of subscriptions", 75 | []string{ 76 | "project", 77 | "name", 78 | "state", 79 | "topic", 80 | }, 81 | nil, 82 | ), 83 | // https://pkg.go.dev/google.golang.org/api@v0.242.0/pubsub/v1#Topic 84 | Topics: prometheus.NewDesc( 85 | prometheus.BuildFQName(prefix, subsystem, "topics"), 86 | "Number of topics", 87 | []string{ 88 | "project", 89 | "name", 90 | "state", 91 | }, 92 | nil, 93 | ), 94 | // Up: prometheus.NewDesc( 95 | // prometheus.BuildFQName(prefix, subsystem, "up"), 96 | // "1 if the topic is accessible, 0 otherwise", 97 | // []string{}, 98 | // nil, 99 | // ), 100 | }, nil 101 | } 102 | 103 | // Collect implements Prometheus' Collector interface and is used to collect metrics 104 | func (c *PubSubCollector) Collect(ch chan<- prometheus.Metric) { 105 | // ctx := context.Background() 106 | 107 | var wg sync.WaitGroup 108 | for _, p := range c.account.Projects { 109 | log.Printf("[PubSubCollector] Project: %s", p.ProjectId) 110 | 111 | // Schemas 112 | wg.Add(1) 113 | go c.collectSchemas(&wg, ch, p) 114 | 115 | // Snapshots 116 | wg.Add(1) 117 | go c.collectSnapshots(&wg, ch, p) 118 | 119 | // Subscriptions 120 | wg.Add(1) 121 | go c.collectSubscriptions(&wg, ch, p) 122 | 123 | // Topics 124 | wg.Add(1) 125 | go c.collectTopics(&wg, ch, p) 126 | } 127 | wg.Wait() 128 | } 129 | 130 | // collectSchemas collects schema metrics for a project 131 | func (c *PubSubCollector) collectSchemas(wg *sync.WaitGroup, ch chan<- prometheus.Metric, p *cloudresourcemanager.Project) { 132 | defer wg.Done() 133 | 134 | project := fmt.Sprintf("projects/%s", p.ProjectId) 135 | rqst := c.pubsubService.Projects.Schemas.List(project) 136 | resp, err := rqst.Do() 137 | if err != nil { 138 | log.Printf("[PubSubCollector] Error listing schemas for %s: %v", p.ProjectId, err) 139 | return 140 | } 141 | 142 | for _, s := range resp.Schemas { 143 | ch <- prometheus.MustNewConstMetric( 144 | c.Schemas, 145 | prometheus.GaugeValue, 146 | 1, 147 | []string{ 148 | p.ProjectId, 149 | // https://pkg.go.dev/path#Base 150 | path.Base(s.Name), 151 | s.Type, 152 | }..., 153 | ) 154 | } 155 | } 156 | 157 | // collectSnapshots collects snapshot metrics for a project 158 | func (c *PubSubCollector) collectSnapshots(wg *sync.WaitGroup, ch chan<- prometheus.Metric, p *cloudresourcemanager.Project) { 159 | defer wg.Done() 160 | 161 | project := fmt.Sprintf("projects/%s", p.ProjectId) 162 | rqst := c.pubsubService.Projects.Snapshots.List(project) 163 | resp, err := rqst.Do() 164 | if err != nil { 165 | log.Printf("[PubSubCollector] Error listing snapshots for %s: %v", p.ProjectId, err) 166 | return 167 | } 168 | 169 | for _, s := range resp.Snapshots { 170 | ch <- prometheus.MustNewConstMetric( 171 | c.Snapshots, 172 | prometheus.GaugeValue, 173 | 1, 174 | []string{ 175 | p.ProjectId, 176 | // https://pkg.go.dev/path#Base 177 | path.Base(s.Name), 178 | path.Base(s.Topic), 179 | }..., 180 | ) 181 | } 182 | } 183 | 184 | // collectSubscriptions collects subscription metrics for a project 185 | func (c *PubSubCollector) collectSubscriptions(wg *sync.WaitGroup, ch chan<- prometheus.Metric, p *cloudresourcemanager.Project) { 186 | defer wg.Done() 187 | 188 | project := fmt.Sprintf("projects/%s", p.ProjectId) 189 | rqst := c.pubsubService.Projects.Subscriptions.List(project) 190 | resp, err := rqst.Do() 191 | if err != nil { 192 | log.Printf("[PubSubCollector] Error listing subscriptions for %s: %v", p.ProjectId, err) 193 | return 194 | } 195 | 196 | for _, s := range resp.Subscriptions { 197 | ch <- prometheus.MustNewConstMetric( 198 | c.Subscriptions, 199 | prometheus.GaugeValue, 200 | 1, 201 | []string{ 202 | p.ProjectId, 203 | // https://pkg.go.dev/path#Base 204 | path.Base(s.Name), 205 | s.State, 206 | path.Base(s.Topic), 207 | }..., 208 | ) 209 | } 210 | } 211 | 212 | // collectTopics collects topic metrics for a project 213 | func (c *PubSubCollector) collectTopics(wg *sync.WaitGroup, ch chan<- prometheus.Metric, p *cloudresourcemanager.Project) { 214 | defer wg.Done() 215 | 216 | project := fmt.Sprintf("projects/%s", p.ProjectId) 217 | rqst := c.pubsubService.Projects.Topics.List(project) 218 | resp, err := rqst.Do() 219 | if err != nil { 220 | log.Printf("[PubSubCollector] Error listing topics for %s: %v", p.ProjectId, err) 221 | return 222 | } 223 | 224 | for _, t := range resp.Topics { 225 | ch <- prometheus.MustNewConstMetric( 226 | c.Topics, 227 | prometheus.GaugeValue, 228 | 1, 229 | []string{ 230 | p.ProjectId, 231 | // https://pkg.go.dev/path#Base 232 | path.Base(t.Name), 233 | t.State, 234 | }..., 235 | ) 236 | } 237 | } 238 | 239 | // Describe implements Prometheus' Collector interface and is used to describe metrics 240 | func (c *PubSubCollector) Describe(ch chan<- *prometheus.Desc) { 241 | ch <- c.Schemas 242 | ch <- c.Snapshots 243 | ch <- c.Subscriptions 244 | ch <- c.Topics 245 | // ch <- c.Up 246 | } 247 | -------------------------------------------------------------------------------- /rules.yml: -------------------------------------------------------------------------------- 1 | groups: 2 | - name: ackal 3 | rules: 4 | - alert: cloud_logging_logs 5 | expr: min_over_time(gcp_cloud_logging_logs{project=~"ackal-[0-9]{6}"}[15m]) > 39 6 | for: 6h 7 | labels: 8 | severity: page 9 | annotations: 10 | summary: "Ackal Project ({{ $labels.project}}) has {{ $value }} logs" 11 | - name: gcp_exporter 12 | rules: 13 | - alert: gcp_artifact_registry_repositories 14 | # `15m` matches the prometheus.yml scrape_interval 15 | expr: min_over_time(gcp_artifact_registry_registries{}[15m]) > 0 16 | for: 6h 17 | labels: 18 | severity: page 19 | annotations: 20 | summary: "GCP Artifact Registry repositories ({{ $value }}) exist (project: {{ $labels.project }})" 21 | - alert: gcp_cloud_endpoints_services 22 | # `15m` matches the prometheus.yml scrape_interval 23 | expr: min_over_time(gcp_cloud_endpoints_services{}[15m]) > 0 24 | for: 6h 25 | labels: 26 | severity: page 27 | annotations: 28 | summary: "GCP Cloud Endpoints services ({{ $value }}) deployed (project: {{ $labels.project }})" 29 | - alert: gcp_cloud_functions_running 30 | # `15m` matches the prometheus.yml scrape_interval 31 | expr: min_over_time(gcp_cloud_functions_functions{}[15m]) > 0 32 | for: 6h 33 | labels: 34 | severity: page 35 | annotations: 36 | summary: "GCP Cloud Functions ({{ $value }}) running (project: {{ $labels.project }})" 37 | - alert: gcp_cloud_run_jobs_running 38 | # `15m` matches the prometheus.yml scrape_interval 39 | expr: min_over_time(gcp_cloud_run_jobs{}[15m]) > 0 40 | for: 6h 41 | labels: 42 | severity: page 43 | annotations: 44 | summary: "GCP Cloud Run jobs ({{ $value }}) running (project: {{ $labels.project }})" 45 | - alert: gcp_cloud_run_services_running 46 | # `15m` matches the prometheus.yml scrape_interval 47 | expr: min_over_time(gcp_cloud_run_services{}[15m]) > 0 48 | for: 6h 49 | labels: 50 | severity: page 51 | annotations: 52 | summary: "GCP Cloud Run services ({{ $value }}) running (project: {{ $labels.project }})" 53 | - alert: gcp_cloud_monitoring_alert_policies_running 54 | expr: min_over_time(gcp_cloud_monitoring_alert_policies{}[15m]) > 0 55 | for: 6h 56 | labels: 57 | severity: page 58 | annotations: 59 | summary: "GCP Cloud Monitoring Alert Policies ({{ $value }}) running (project: {{ $labels.project }})" 60 | - alert: gcp_cloud_monitoring_uptime_checks_running 61 | expr: min_over_time(gcp_cloud_monitoring_uptime_checks{}[15m]) > 0 62 | for: 6h 63 | labels: 64 | severity: page 65 | annotations: 66 | summary: "GCP Cloud Monitoring Uptime Checks ({{ $value }}) running (project: {{ $labels.project }})" 67 | - alert: gcp_cloud_scheduler_jobs_running 68 | # `15m` matches the prometheus.yml scrape_interval 69 | expr: min_over_time(gcp_cloud_scheduler_jobs{}[15m]) > 0 70 | for: 6h 71 | labels: 72 | severity: page 73 | annotations: 74 | summary: "GCP Cloud Scheduler jobs ({{ $value }}) running (project: {{ $labels.project }})" 75 | - alert: gcp_compute_engine_instances_running 76 | expr: min_over_time(gcp_compute_engine_instances{}[15m]) > 0 77 | for: 6h 78 | labels: 79 | severity: page 80 | annotations: 81 | summary: "GCP Compute Engine Instances ({{ $value }}) running (project: {{ $labels.project }})" 82 | - alert: gcp_compute_engine_forwarding_rules_running 83 | expr: min_over_time(gcp_compute_engine_forwardingrules{}[15m]) > 0 84 | for: 6h 85 | labels: 86 | severity: page 87 | annotations: 88 | summary: "GCP Compute Engine ForwardingRules ({{ $value }}) running (project: {{ $labels.project }})" 89 | - alert: gcp_kubernetes_clusters_running 90 | # `15m` matches the prometheus.yml scrape_interval 91 | expr: min_over_time(gcp_kubernetes_engine_cluster_up{}[15m]) > 0 92 | for: 6h 93 | labels: 94 | severity: page 95 | annotations: 96 | summary: "GCP Kubernetes Engine clusters ({{ $value }}) running (project: {{ $labels.project }})" 97 | - alert: gcp_storage_buckets 98 | expr: min_over_time(gcp_storage_buckets{}[15m]) > 0 99 | for: 6h 100 | labels: 101 | severity: warning 102 | annotations: 103 | summary: "GCP Storage buckets ({{ $value }}) exist (project: {{ $labels.project }})" 104 | - name: gcp_status 105 | rules: 106 | - alert: gcp_status_services 107 | # `15m` matches the prometheus.yml scrape_interval 108 | expr: gcp_status_services{} != 100 109 | for: 15m 110 | labels: 111 | severity: warning 112 | annotations: 113 | summary: GCP Status Services ({{ $labels.region }}) availability discrepancy ({{ $value }}!=100) 114 | - alert: gcp_status_up 115 | # `15m` matches the prometheus.yml scrape_interval 116 | expr: min_over_time(gcp_status_up{region="Americas"}[15m]) != 1.0 117 | for: 15m 118 | labels: 119 | severity: warning 120 | annotations: 121 | summary: GCP Status Service {{ $labels.service }} down 122 | - name: fly_exporter 123 | rules: 124 | - alert: fly_exporter_apps_running 125 | expr: min_over_time(fly_exporter_apps{}[15m]) > 0 126 | for: 6h 127 | labels: 128 | severity: page 129 | annotations: 130 | summary: fly.io {{ $value }} apps deployed 131 | - name: linode_exporter 132 | rules: 133 | - alert: linode_instance_up 134 | expr: min_over_time(linode_instance_up{}[15m])>0 135 | for: 3h 136 | labels: 137 | severity: page 138 | annotations: 139 | summary: Linode {{ $value }} Instances exist 140 | - alert: linode_kubernetes_up 141 | expr: min_over_time(linode_kubernetes_up{}[15m])>0 142 | for: 3h 143 | labels: 144 | severity: page 145 | annotations: 146 | summary: Linode {{ $value }} Kubernetes clusters exist 147 | - alert: linode_nodebalancer_up 148 | expr: min_over_time(linode_nodebalancer_up{}[15m])>0 149 | for: 3h 150 | labels: 151 | severity: page 152 | annotations: 153 | summary: Linode {{ $value }} NodeBalancers exist 154 | - alert: linode_volume_up 155 | expr: min_over_time(linode_volume_up{}[15m])>0 156 | for: 3h 157 | labels: 158 | severity: page 159 | annotations: 160 | summary: Linode {{ $value }} Volumes exist 161 | - name: vultr_exporter 162 | rules: 163 | - alert: vultr_block_storage_up 164 | expr: min_over_time(vultr_block_storage_up{}[15m]) > 0 165 | for: 3h 166 | labels: 167 | severity: page 168 | annotations: 169 | summary: Vultr {{ $value }} Block Storage volumes exist 170 | - alert: vultr_kubernetes_cluster_up 171 | expr: min_over_time(vultr_kubernetes_cluster_up{}[15m]) > 0 172 | for: 3h 173 | labels: 174 | severity: page 175 | annotations: 176 | summary: Vultr {{ $value }} Kubernetes Engine cluster running 177 | - alert: vultr_load_balancer_up 178 | expr: min_over_time(vultr_load_balancer_up{}[15m]) > 0 179 | for: 3h 180 | labels: 181 | severity: page 182 | annotations: 183 | summary: Vultr {{ $value }} Load Balancers running 184 | - alert: vultr_reserved_ips_up 185 | expr: min_over_time(vultr_reserved_ips_up{}[15m]) > 0 186 | for: 3h 187 | labels: 188 | severity: page 189 | annotations: 190 | summary: Vultr {{ $value }} Reserved IPs exist 191 | - name: azure_exporter 192 | rules: 193 | - alert: azure_container_apps_running 194 | expr: min_over_time(azure_container_apps_total{}[15m]) > 0 195 | for: 6h 196 | labels: 197 | severity: page 198 | annotations: 199 | summary: "Azure Container Apps ({{ $value }}) running (resource group: {{ $labels.resourcegroup }})" 200 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "html/template" 6 | "log" 7 | "net/http" 8 | _ "net/http/pprof" 9 | "runtime" 10 | "time" 11 | 12 | "github.com/DazWilkin/gcp-exporter/collector" 13 | "github.com/DazWilkin/gcp-exporter/gcp" 14 | 15 | "github.com/prometheus/client_golang/prometheus" 16 | "github.com/prometheus/client_golang/prometheus/promhttp" 17 | ) 18 | 19 | var ( 20 | // GitCommit is the git commit value and is expected to be set during build 21 | GitCommit string 22 | // GoVersion is the Golang runtime version 23 | GoVersion = runtime.Version() 24 | // OSVersion is the OS version (uname --kernel-release) and is expected to be set during build 25 | OSVersion string 26 | // StartTime is the start time of the exporter represented as a UNIX epoch 27 | StartTime = time.Now().Unix() 28 | ) 29 | 30 | var ( 31 | filter = flag.String("filter", "", "Filter the results of the request") 32 | pagesize = flag.Int64("max_projects", 10, "Maximum number of projects to include") 33 | endpoint = flag.String("endpoint", ":9402", "The endpoint of the HTTP server") 34 | metricsPath = flag.String("path", "/metrics", "The path on which Prometheus metrics will be served") 35 | 36 | profilingEnabled = flag.Bool("profiling_enabled", false, "Enable profiling endpoint") 37 | profilingEndpoint = flag.String("profiling_endpoint", ":6060", "The endpoint of the profiling server") 38 | 39 | disableArtifactRegistryCollector = flag.Bool("collector.artifact_registry.disable", false, "Disables the metrics collector for the Artifact Registry") 40 | disableCloudRunCollector = flag.Bool("collector.cloud_run.disable", false, "Disables the metrics collector for Cloud Run") 41 | disableComputeCollector = flag.Bool("collector.compute.disable", false, "Disables the metrics collector for Compute Engine") 42 | disableEndpointsCollector = flag.Bool("collector.endpoints.disable", false, "Disables the metrics collector for Cloud Endpoints") 43 | disableEventarcCollector = flag.Bool("collector.eventarc.disable", false, "Disables the metrics collector for Cloud Eventarc") 44 | disableFunctionsCollector = flag.Bool("collector.functions.disable", false, "Disables the metrics collector for Cloud Functions") 45 | disableIAMCollector = flag.Bool("collector.iam.disable", false, "Disables the metrics collector for Cloud IAM") 46 | disableGKECollector = flag.Bool("collector.gke.disable", false, "Disables the metrics collector for Google Kubernetes Engine (GKE)") 47 | disableLoggingCollector = flag.Bool("collector.logging.disable", false, "Disables the metrics collector for Cloud Logging") 48 | disableMonitoringCollector = flag.Bool("collector.monitoring.disable", false, "Disables the metrics collector for Cloud Monitoring") 49 | disablePubSubCollector = flag.Bool("collector.pubsub.disable", false, "Disables the metrics collector for Cloud Pub/Sub") 50 | disableSchedulerCollector = flag.Bool("collector.scheduler.disable", false, "Disables the metrics collector for Cloud Scheduler") 51 | disableStorageCollector = flag.Bool("collector.storage.disable", false, "Disables the metrics collector for Cloud Storage") 52 | 53 | endpointPubSub = flag.String("collector.pubsub.endpoint", "", "The endpoint of the Pub/Sub service or emulator") 54 | 55 | enableExtendedMetricsGKECollector = flag.Bool("collector.gke.extendedMetrics.enable", false, "Enable the metrics collector for Google Kubernetes Engine (GKE) to collect ControlPlane and NodePool metrics") 56 | ) 57 | 58 | const ( 59 | rootTemplate = ` 60 | 61 |
62 | 63 |