├── ui
├── src
│ ├── assets
│ │ └── .gitkeep
│ ├── favicon.ico
│ ├── environments
│ │ ├── environment.prod.ts
│ │ └── environment.ts
│ ├── typings.d.ts
│ ├── tsconfig.app.json
│ ├── tsconfig.spec.json
│ ├── app
│ │ ├── shared
│ │ │ └── socket.types.ts
│ │ ├── daemons
│ │ │ ├── daemons.component.html
│ │ │ └── daemons.component.ts
│ │ ├── app.component.ts
│ │ ├── app.module.ts
│ │ ├── checks
│ │ │ ├── checks.component.ts
│ │ │ └── checks.component.html
│ │ ├── results
│ │ │ ├── results.component.html
│ │ │ └── results.component.ts
│ │ └── socket.service.ts
│ ├── main.ts
│ ├── index.html
│ ├── test.ts
│ ├── styles.css
│ └── polyfills.ts
├── .editorconfig
├── tsconfig.json
├── .gitignore
├── protractor.conf.js
├── karma.conf.js
├── angular-cli.json
├── package.json
└── tslint.json
├── img
└── screenshot.png
├── models
├── basemodel.go
├── checks.go
├── deamon.go
├── types.go
└── results.go
├── install
├── ose-mon-hub.service
├── ose-mon-daemon.service
├── ose-mon-daemon.service.standalone
├── ose-mon-template.yaml
└── ose-mon-standalone-template.yaml
├── .gitignore
├── Dockerfile
├── daemon
├── client
│ ├── checks
│ │ ├── docker.go
│ │ ├── common_test.go
│ │ ├── networking.go
│ │ ├── storage.go
│ │ ├── common.go
│ │ ├── certificates.go
│ │ └── openshift.go
│ ├── webserver.go
│ ├── handlers
│ │ ├── general.go
│ │ ├── major.go
│ │ └── minor.go
│ ├── communication.go
│ └── daemon.go
└── main.go
├── hub
├── main.go
└── server
│ ├── webserver.go
│ └── hub.go
├── .travis.yml
├── README.md
└── LICENSE
/ui/src/assets/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/img/screenshot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oscp/openshift-monitoring/HEAD/img/screenshot.png
--------------------------------------------------------------------------------
/ui/src/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/oscp/openshift-monitoring/HEAD/ui/src/favicon.ico
--------------------------------------------------------------------------------
/ui/src/environments/environment.prod.ts:
--------------------------------------------------------------------------------
1 | export const environment = {
2 | production: true
3 | };
4 |
--------------------------------------------------------------------------------
/ui/src/typings.d.ts:
--------------------------------------------------------------------------------
1 | /* SystemJS module definition */
2 | declare var module: NodeModule;
3 | interface NodeModule {
4 | id: string;
5 | }
6 |
--------------------------------------------------------------------------------
/models/basemodel.go:
--------------------------------------------------------------------------------
1 | package models
2 |
3 | type BaseModel struct {
4 | Type string `json:"type"`
5 | Message interface{} `json:"message"`
6 | }
7 |
--------------------------------------------------------------------------------
/ui/src/tsconfig.app.json:
--------------------------------------------------------------------------------
1 | {
2 | "extends": "../tsconfig.json",
3 | "compilerOptions": {
4 | "outDir": "../out-tsc/app",
5 | "baseUrl": "./",
6 | "module": "es2015",
7 | "types": []
8 | },
9 | "exclude": [
10 | "test.ts",
11 | "**/*.spec.ts"
12 | ]
13 | }
14 |
--------------------------------------------------------------------------------
/ui/.editorconfig:
--------------------------------------------------------------------------------
1 | # Editor configuration, see http://editorconfig.org
2 | root = true
3 |
4 | [*]
5 | charset = utf-8
6 | indent_style = space
7 | indent_size = 2
8 | insert_final_newline = true
9 | trim_trailing_whitespace = true
10 |
11 | [*.md]
12 | max_line_length = off
13 | trim_trailing_whitespace = false
14 |
--------------------------------------------------------------------------------
/install/ose-mon-hub.service:
--------------------------------------------------------------------------------
1 | [Unit]
2 | Description=Openshift Update Monitoring Hub
3 |
4 | [Service]
5 | ExecStart=/opt/ose-mon/hub -RPC_ADDR=xxx -UI_ADDR=xxx -MASTER_API_URLS=xxx,xxx -DAEMON_PUBLIC_URL=xxx -ETCD_IPS=xxx
6 | Restart=always
7 | WorkingDirectory=/opt/ose-mon
8 |
9 | [Install]
10 | WantedBy=multi-user.target
--------------------------------------------------------------------------------
/install/ose-mon-daemon.service:
--------------------------------------------------------------------------------
1 | [Unit]
2 | Description=OSE Update Monitoring Daemon
3 |
4 | [Service]
5 | Environment=HUB_ADDRESS= ## add your value here ##
6 | Environment=DAEMON_TYPE= ## add your value here ##
7 | ExecStart=/opt/ose-mon/daemon
8 | Restart=always
9 | WorkingDirectory=/opt/ose-mon
10 |
11 | [Install]
12 | WantedBy=multi-user.target
--------------------------------------------------------------------------------
/ui/src/tsconfig.spec.json:
--------------------------------------------------------------------------------
1 | {
2 | "extends": "../tsconfig.json",
3 | "compilerOptions": {
4 | "outDir": "../out-tsc/spec",
5 | "baseUrl": "./",
6 | "module": "commonjs",
7 | "types": [
8 | "jasmine",
9 | "node"
10 | ]
11 | },
12 | "files": [
13 | "test.ts"
14 | ],
15 | "include": [
16 | "**/*.spec.ts",
17 | "**/*.d.ts"
18 | ]
19 | }
20 |
--------------------------------------------------------------------------------
/ui/src/app/shared/socket.types.ts:
--------------------------------------------------------------------------------
1 | export class SocketType {
2 | static ALL_DAEMONS = 'ALL_DAEMONS';
3 | static NEW_DAEMON = 'NEW_DAEMON';
4 | static DAEMON_LEFT = 'DAEMON_LEFT';
5 |
6 | static CURRENT_CHECKS = 'CURRENT_CHECKS';
7 | static START_CHECKS = 'START_CHECKS';
8 | static STOP_CHECKS = 'STOP_CHECKS';
9 | static RESET_STATS = 'RESET_STATS';
10 | static CHECK_RESULTS = 'CHECK_RESULTS';
11 | }
12 |
--------------------------------------------------------------------------------
/ui/src/main.ts:
--------------------------------------------------------------------------------
1 | import { enableProdMode } from '@angular/core';
2 | import { platformBrowserDynamic } from '@angular/platform-browser-dynamic';
3 |
4 | import { AppModule } from './app/app.module';
5 | import { environment } from './environments/environment';
6 |
7 | if (environment.production) {
8 | enableProdMode();
9 | }
10 |
11 | platformBrowserDynamic().bootstrapModule(AppModule)
12 | .catch(err => console.log(err));
13 |
--------------------------------------------------------------------------------
/ui/src/environments/environment.ts:
--------------------------------------------------------------------------------
1 | // The file contents for the current environment will overwrite these during build.
2 | // The build system defaults to the dev environment which uses `environment.ts`, but if you do
3 | // `ng build --env=prod` then `environment.prod.ts` will be used instead.
4 | // The list of which env maps to which file can be found in `.angular-cli.json`.
5 |
6 | export const environment = {
7 | production: false
8 | };
9 |
--------------------------------------------------------------------------------
/ui/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "compileOnSave": false,
3 | "compilerOptions": {
4 | "outDir": "./dist/out-tsc",
5 | "sourceMap": true,
6 | "declaration": false,
7 | "moduleResolution": "node",
8 | "emitDecoratorMetadata": true,
9 | "experimentalDecorators": true,
10 | "target": "es5",
11 | "typeRoots": [
12 | "node_modules/@types"
13 | ],
14 | "lib": [
15 | "es2017",
16 | "dom"
17 | ]
18 | }
19 | }
20 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Compiled Object files, Static and Dynamic libs (Shared Objects)
2 | *.o
3 | *.a
4 | *.so
5 |
6 | # Folders
7 | _obj
8 | _test
9 |
10 | # Architecture specific extensions/prefixes
11 | *.[568vq]
12 | [568vq].out
13 |
14 | *.cgo1.go
15 | *.cgo2.c
16 | _cgo_defun.c
17 | _cgo_gotypes.go
18 | _cgo_export.*
19 |
20 | _testmain.go
21 |
22 | *.exe
23 | *.test
24 | *.prof
25 |
26 | *.iml
27 | *.idea/
28 | /target/
29 |
30 | node_modules/
31 | dist/
32 | tmp/
--------------------------------------------------------------------------------
/ui/src/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | OpenShift Update Monitoring
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM golang:1.8-jessie
2 |
3 | WORKDIR /go/src/github.com/oscp/openshift-monitoring/daemon/
4 |
5 | COPY . /go/src/github.com/oscp/openshift-monitoring/
6 |
7 | RUN go get github.com/cenkalti/rpc2 \
8 | && go get github.com/gorilla/websocket \
9 | && go get github.com/mitchellh/mapstructure \
10 | && go get gopkg.in/yaml.v2 \
11 | && go install -v
12 |
13 | # Install necessary tools
14 | RUN apt-get update && apt-get install -y --no-install-recommends dnsutils
15 |
16 | CMD ["daemon"]
--------------------------------------------------------------------------------
/models/checks.go:
--------------------------------------------------------------------------------
1 | package models
2 |
3 | type Checks struct {
4 | IsRunning bool `json:"isRunning"`
5 | CheckInterval int `json:"checkInterval"`
6 | MasterApiCheck bool `json:"masterApiCheck"`
7 | MasterApiUrls string `json:"masterApiUrls"`
8 | DnsCheck bool `json:"dnsCheck"`
9 | HttpChecks bool `json:"httpChecks"`
10 | DaemonPublicUrl string `json:"daemonPublicUrl"`
11 | EtcdCheck bool `json:"etcdCheck"`
12 | EtcdIps string `json:"etcdIps"`
13 | EtcdCertPath string `json:"etcdCertPath"`
14 | }
15 |
--------------------------------------------------------------------------------
/daemon/client/checks/docker.go:
--------------------------------------------------------------------------------
1 | package checks
2 |
3 | import (
4 | "fmt"
5 | "log"
6 | "os/exec"
7 | "strconv"
8 | )
9 |
10 | func CheckDockerPool(okSize int) error {
11 | log.Println("Checking docker pool used size")
12 |
13 | out, err := exec.Command("bash", "-c", "lvs -o data_percent,metadata_percent,LV_NAME --noheadings --units G --nosuffix | grep docker-pool").Output()
14 | if err != nil {
15 | // ignore errors. grep exits with 1 if docker-pool is not found
16 | return nil
17 | }
18 |
19 | isOk := isLvsSizeOk(string(out), okSize)
20 | if !isOk {
21 | return fmt.Errorf("Docker pool size is above: %v", strconv.Itoa(okSize))
22 | }
23 | return nil
24 | }
25 |
--------------------------------------------------------------------------------
/ui/src/app/daemons/daemons.component.html:
--------------------------------------------------------------------------------
1 |
2 |
Connected Daemons
3 |
4 |
5 |
6 | | Hostname |
7 | Type |
8 | Checks started/ok/failed |
9 |
10 |
11 |
12 |
13 | | {{d.hostname}} |
14 | {{d.daemonType}} |
15 | {{d.startedChecks}} {{d.successfulChecks}} {{d.failedChecks}}
17 | |
18 |
19 |
20 |
21 |
22 |
--------------------------------------------------------------------------------
/ui/.gitignore:
--------------------------------------------------------------------------------
1 | # See http://help.github.com/ignore-files/ for more about ignoring files.
2 |
3 | # compiled output
4 | /dist
5 | /dist-server
6 | /tmp
7 | /out-tsc
8 |
9 | # dependencies
10 | /node_modules
11 |
12 | # IDEs and editors
13 | /.idea
14 | .project
15 | .classpath
16 | .c9/
17 | *.launch
18 | .settings/
19 | *.sublime-workspace
20 |
21 | # IDE - VSCode
22 | .vscode/*
23 | !.vscode/settings.json
24 | !.vscode/tasks.json
25 | !.vscode/launch.json
26 | !.vscode/extensions.json
27 |
28 | # misc
29 | /.sass-cache
30 | /connect.lock
31 | /coverage
32 | /libpeerconnection.log
33 | npm-debug.log
34 | testem.log
35 | /typings
36 |
37 | # e2e
38 | /e2e/*.js
39 | /e2e/*.map
40 |
41 | # System Files
42 | .DS_Store
43 | Thumbs.db
44 |
--------------------------------------------------------------------------------
/ui/src/test.ts:
--------------------------------------------------------------------------------
1 | // This file is required by karma.conf.js and loads recursively all the .spec and framework files
2 |
3 | import 'zone.js/dist/zone-testing';
4 | import { getTestBed } from '@angular/core/testing';
5 | import {
6 | BrowserDynamicTestingModule,
7 | platformBrowserDynamicTesting
8 | } from '@angular/platform-browser-dynamic/testing';
9 |
10 | declare const require: any;
11 |
12 | // First, initialize the Angular testing environment.
13 | getTestBed().initTestEnvironment(
14 | BrowserDynamicTestingModule,
15 | platformBrowserDynamicTesting()
16 | );
17 | // Then we find all the tests.
18 | const context = require.context('./', true, /\.spec\.ts$/);
19 | // And load the modules.
20 | context.keys().map(context);
21 |
--------------------------------------------------------------------------------
/models/deamon.go:
--------------------------------------------------------------------------------
1 | package models
2 |
3 | import "github.com/cenkalti/rpc2"
4 |
5 | type Daemon struct {
6 | Hostname string `json:"hostname"`
7 | Namespace string `json:"namespace"`
8 | DaemonType string `json:"daemonType"`
9 | StartedChecks int `json:"startedChecks"`
10 | SuccessfulChecks int `json:"successfulChecks"`
11 | FailedChecks int `json:"failedChecks"`
12 | }
13 |
14 | func (d *Daemon) IsMaster() bool {
15 | return d.DaemonType == "MASTER"
16 | }
17 |
18 | func (d *Daemon) IsNode() bool {
19 | return d.DaemonType == "NODE"
20 | }
21 |
22 | func (d *Daemon) IsPod() bool {
23 | return d.DaemonType == "POD"
24 | }
25 |
26 | type DaemonClient struct {
27 | Daemon Daemon
28 | Client *rpc2.Client
29 | Quit chan bool
30 | ToHub chan CheckResult
31 | }
32 |
--------------------------------------------------------------------------------
/daemon/client/webserver.go:
--------------------------------------------------------------------------------
1 | package client
2 |
3 | import (
4 | "github.com/oscp/openshift-monitoring/daemon/client/handlers"
5 | "log"
6 | "net/http"
7 | "os"
8 | )
9 |
10 | func RunWebserver(daemonType string) {
11 | addr := os.Getenv("SERVER_ADDRESS")
12 |
13 | if len(addr) == 0 {
14 | addr = ":8090"
15 | }
16 |
17 | log.Println("starting webserver on", addr)
18 |
19 | http.HandleFunc("/fast", handlers.FastHandler)
20 | http.HandleFunc("/slow", handlers.SlowHandler)
21 |
22 | http.HandleFunc("/checks/minor", func(w http.ResponseWriter, r *http.Request) {
23 | handlers.HandleMinorChecks(daemonType, w, r)
24 | })
25 | http.HandleFunc("/checks/major", func(w http.ResponseWriter, r *http.Request) {
26 | handlers.HandleMajorChecks(daemonType, w, r)
27 | })
28 |
29 | log.Fatal(http.ListenAndServe(addr, nil))
30 | }
31 |
--------------------------------------------------------------------------------
/models/types.go:
--------------------------------------------------------------------------------
1 | package models
2 |
3 | // Websocket types
4 | const (
5 | NewDaemon = "NEW_DAEMON"
6 | AllDaemons = "ALL_DAEMONS"
7 | DaemonLeft = "DAEMON_LEFT"
8 |
9 | CurrentChecks = "CURRENT_CHECKS"
10 | StartChecks = "START_CHECKS"
11 | StopChecks = "STOP_CHECKS"
12 | CheckResults = "CHECK_RESULTS"
13 | ResetStats = "RESET_STATS"
14 | )
15 |
16 | // Check types
17 | const (
18 | MasterApiCheck = "MASTER_API_CHECK"
19 | DnsNslookupKubernetes = "DNS_NSLOOKUP_KUBERNETES"
20 | DnsServiceNode = "DNS_SERVICE_NODE"
21 | DnsServicePod = "DNS_SERVICE_POD"
22 | HttpPodServiceAB = "HTTP_POD_SERVICE_A_B"
23 | HttpPodServiceAC = "HTTP_POD_SERVICE_A_C"
24 | HttpHaProxy = "HTTP_HAPROXY"
25 | HttpServiceABC = "HTTP_SERVICE_ABC"
26 | EtcdHealth = "ETCD_HEALTH"
27 | )
28 |
--------------------------------------------------------------------------------
/ui/protractor.conf.js:
--------------------------------------------------------------------------------
1 | // Protractor configuration file, see link for more information
2 | // https://github.com/angular/protractor/blob/master/lib/config.ts
3 |
4 | const { SpecReporter } = require('jasmine-spec-reporter');
5 |
6 | exports.config = {
7 | allScriptsTimeout: 11000,
8 | specs: [
9 | './e2e/**/*.e2e-spec.ts'
10 | ],
11 | capabilities: {
12 | 'browserName': 'chrome'
13 | },
14 | directConnect: true,
15 | baseUrl: 'http://localhost:4200/',
16 | framework: 'jasmine',
17 | jasmineNodeOpts: {
18 | showColors: true,
19 | defaultTimeoutInterval: 30000,
20 | print: function() {}
21 | },
22 | onPrepare() {
23 | require('ts-node').register({
24 | project: 'e2e/tsconfig.e2e.json'
25 | });
26 | jasmine.getEnv().addReporter(new SpecReporter({ spec: { displayStacktrace: true } }));
27 | }
28 | };
29 |
--------------------------------------------------------------------------------
/daemon/client/checks/common_test.go:
--------------------------------------------------------------------------------
1 | package checks
2 |
3 | import (
4 | "io/ioutil"
5 | "log"
6 | "testing"
7 | )
8 |
9 | func init() {
10 | // Omit standard log output when running tests to allow one to focus on
11 | // actual test results.
12 | log.SetOutput(ioutil.Discard)
13 | }
14 |
15 | func TestIsVgSizeOk(t *testing.T) {
16 | tests := []struct {
17 | line string
18 | okSize int
19 | want bool
20 | }{
21 | {"invalid input", 99, false},
22 | {"5.37 26.84 vg_slow", 5, true},
23 | {"5.37 26.84 vg_slow", 25, false},
24 | {" 0 511.03 fedora", 10, false},
25 | {"\t25\t250 test", 10, true},
26 | {"10G 50G test", 15, true},
27 | {"10G 50G test", 25, false},
28 | }
29 | for _, tt := range tests {
30 | if got := isVgSizeOk(tt.line, tt.okSize); got != tt.want {
31 | t.Errorf("isVgSizeOk(%q, %v) = %v, want %v", tt.line, tt.okSize, got, tt.want)
32 | }
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/models/results.go:
--------------------------------------------------------------------------------
1 | package models
2 |
3 | import "time"
4 |
5 | type Results struct {
6 | SuccessfulChecks int `json:"successfulChecks"`
7 | FailedChecks int `json:"failedChecks"`
8 | StartedChecks int `json:"startedChecks"`
9 | FinishedChecks int `json:"finishedChecks"`
10 |
11 | SuccessfulChecksByType map[string]int `json:"successfulChecksByType"`
12 | FailedChecksByType map[string]int `json:"failedChecksByType"`
13 |
14 | Ticks []Tick `json:"ticks"`
15 | Errors []Failures `json:"failures"`
16 | }
17 |
18 | type Tick struct {
19 | SuccessfulChecks int `json:"successfulChecks"`
20 | FailedChecks int `json:"failedChecks"`
21 | }
22 |
23 | type Failures struct {
24 | Date time.Time `json:"date"`
25 | Hostname string `json:"hostname"`
26 | Type string `json:"type"`
27 | Message string `json:"message"`
28 | }
29 |
30 | type CheckResult struct {
31 | Hostname string
32 | Type string
33 | IsOk bool
34 | Message string
35 | }
36 |
--------------------------------------------------------------------------------
/install/ose-mon-daemon.service.standalone:
--------------------------------------------------------------------------------
1 | [Unit]
2 | Description=OSE Update Monitoring Daemon
3 |
4 | [Service]
5 | Environment=DAEMON_TYPE= ## add your value here ##
6 | Environment=SERVER_ADDRESS=localhost:2600
7 |
8 | ## MASTER
9 | Environment=EXTERNAL_SYSTEM_URL=https://www.google.ch
10 | Environment=HAWCULAR_SVC_IP=## ip here ##
11 | Environment=ETCD_IPS=https://192.168.125.1:2379,https://192.168.125.2:2379,https://192.168.125.3:2379
12 | Environment=REGISTRY_SVC_IP=## ip here ##
13 | Environment=ROUTER_IPS=## ip here ##
14 | Environment=PROJECTS_WITHOUT_LIMITS=## infra project count here ##
15 | Environment=CHECK_CERTIFICATE_URLS=## URLS to check for certificate validity here ##
16 | Environment=CHECK_CERTIFICATE_PATHS=## Paths to check for certificate validity here. Filter is *.crt ##
17 |
18 | ## STORAGE
19 | Environment=IS_GLUSTER_SERVER=true
20 | Environment=MOUNTPOINTS_TO_CHECK=/gluster/registry
21 |
22 | ExecStart=/opt/ose-mon/daemon
23 | Restart=always
24 | WorkingDirectory=/opt/ose-mon
25 | User=root
26 |
27 | [Install]
28 | WantedBy=multi-user.target
--------------------------------------------------------------------------------
/ui/karma.conf.js:
--------------------------------------------------------------------------------
1 | // Karma configuration file, see link for more information
2 | // https://karma-runner.github.io/1.0/config/configuration-file.html
3 |
4 | module.exports = function (config) {
5 | config.set({
6 | basePath: '',
7 | frameworks: ['jasmine', '@angular/cli'],
8 | plugins: [
9 | require('karma-jasmine'),
10 | require('karma-chrome-launcher'),
11 | require('karma-jasmine-html-reporter'),
12 | require('karma-coverage-istanbul-reporter'),
13 | require('@angular/cli/plugins/karma')
14 | ],
15 | client:{
16 | clearContext: false // leave Jasmine Spec Runner output visible in browser
17 | },
18 | coverageIstanbulReporter: {
19 | reports: [ 'html', 'lcovonly' ],
20 | fixWebpackSourcePaths: true
21 | },
22 | angularCli: {
23 | environment: 'dev'
24 | },
25 | reporters: ['progress', 'kjhtml'],
26 | port: 9876,
27 | colors: true,
28 | logLevel: config.LOG_INFO,
29 | autoWatch: true,
30 | browsers: ['Chrome'],
31 | singleRun: false
32 | });
33 | };
34 |
--------------------------------------------------------------------------------
/daemon/client/checks/networking.go:
--------------------------------------------------------------------------------
1 | package checks
2 |
3 | import (
4 | "errors"
5 | "log"
6 | "os"
7 | "os/exec"
8 | "strconv"
9 | "strings"
10 | )
11 |
12 | func CheckBondNetworkInterface() error {
13 | log.Println("Checking bond0 interface")
14 |
15 | if _, err := os.Stat("/proc/net/bonding/bond0"); err == nil {
16 | // bond0 exists, execute check
17 | out, err := exec.Command("bash", "-c", "grep 'MII Status: up' /proc/net/bonding/bond0 | wc -l").Output()
18 | if err != nil {
19 | msg := "Could not evaluate bond0 status: " + err.Error()
20 | log.Println(msg)
21 | return errors.New(msg)
22 | }
23 |
24 | nr, err := strconv.Atoi(strings.TrimSpace(string(out)))
25 | if err != nil {
26 | return errors.New("Could not parse output to integer: " + string(out))
27 | }
28 |
29 | if nr != 3 {
30 | // 3 is the expected number of occurrences
31 | return errors.New("bond0 degraded: At least one interface is not 'UP'")
32 | }
33 |
34 | } else {
35 | log.Println("bond0 does not exist, skipping this check...")
36 | }
37 |
38 | return nil
39 | }
40 |
--------------------------------------------------------------------------------
/ui/src/app/app.component.ts:
--------------------------------------------------------------------------------
1 | import {Component} from '@angular/core';
2 |
3 | @Component({
4 | selector: 'app-root',
5 | template: `
6 |
12 |
13 |
14 |
27 | `
28 | })
29 | export class AppComponent {
30 | notificationOptions = {
31 | position: ['top', 'right'],
32 | timeOut: 3000,
33 | showProgressBar: true,
34 | maxStack: 8,
35 | preventDuplicates: true,
36 | maxLength: 10
37 | };
38 | }
39 |
--------------------------------------------------------------------------------
/daemon/client/handlers/general.go:
--------------------------------------------------------------------------------
1 | package handlers
2 |
3 | import (
4 | "encoding/json"
5 | "github.com/oscp/openshift-monitoring/models"
6 | "io"
7 | "math/rand"
8 | "net/http"
9 | "os"
10 | "time"
11 | )
12 |
13 | func FastHandler(w http.ResponseWriter, r *http.Request) {
14 | io.WriteString(w, "Hello, world")
15 | }
16 |
17 | func SlowHandler(w http.ResponseWriter, r *http.Request) {
18 | s := random(1, 60000)
19 | time.Sleep(time.Duration(s) * time.Millisecond)
20 |
21 | io.WriteString(w, "Hello, world")
22 | }
23 |
24 | func random(min, max int) int {
25 | rand.Seed(time.Now().Unix())
26 | return rand.Intn(max-min) + min
27 | }
28 |
29 | func generateResponse(w http.ResponseWriter, errors []string) {
30 | host, _ := os.Hostname()
31 | r := models.CheckResult{
32 | Hostname: host,
33 | Type: "OSE_CHECKS",
34 | IsOk: true,
35 | }
36 |
37 | for _, s := range errors {
38 | r.IsOk = false
39 | r.Message += " | " + s
40 | }
41 |
42 | json, err := json.Marshal(r)
43 | if err != nil {
44 | http.Error(w, "Error while generating response", http.StatusInternalServerError)
45 | return
46 | }
47 |
48 | w.Header().Set("Content-Type", "application/json")
49 | w.Write(json)
50 | }
51 |
--------------------------------------------------------------------------------
/daemon/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "github.com/oscp/openshift-monitoring/daemon/client"
5 | "log"
6 | "os"
7 | "os/signal"
8 | "syscall"
9 | )
10 |
11 | func main() {
12 | daemonType := os.Getenv("DAEMON_TYPE")
13 | hubAddr := os.Getenv("HUB_ADDRESS")
14 |
15 | if len(daemonType) == 0 {
16 | log.Fatal("env variable 'DAEMON_TYPE' must be specified")
17 | }
18 |
19 | // Communication with the hub is optional
20 | if len(hubAddr) > 0 {
21 | // Webserver for /slow /fast checks
22 | go client.RunWebserver(daemonType)
23 |
24 | namespace := os.Getenv("POD_NAMESPACE")
25 |
26 | if daemonType == "POD" && len(namespace) == 0 {
27 | log.Fatal("if type is 'POD' env variable 'POD_NAMESPACE' must be specified")
28 | }
29 |
30 | // Register on hub
31 | cl := client.StartDaemon(hubAddr, daemonType, namespace)
32 |
33 | // Exit gracefully
34 | c := make(chan os.Signal, 2)
35 | signal.Notify(c, os.Interrupt, syscall.SIGTERM)
36 | func() {
37 | <-c
38 | log.Println("got sigterm, unregistring on hub")
39 | client.StopDaemon(cl)
40 | os.Exit(1)
41 | }()
42 | } else {
43 | // Just run the webserver for external monitoring system
44 | client.RunWebserver(daemonType)
45 | }
46 | }
47 |
--------------------------------------------------------------------------------
/ui/src/app/app.module.ts:
--------------------------------------------------------------------------------
1 | import {BrowserModule} from '@angular/platform-browser';
2 | import {NgModule} from '@angular/core';
3 | import {FormsModule} from '@angular/forms';
4 | import {AppComponent} from './app.component';
5 | import {SocketService} from './socket.service';
6 | import {SimpleNotificationsModule} from 'angular2-notifications';
7 | import {DaemonsComponent} from './daemons/daemons.component';
8 | import { ChecksComponent } from './checks/checks.component';
9 | import { ResultsComponent } from './results/results.component';
10 | import {ChartsModule} from "ng2-charts";
11 | import {NotificationsService} from "angular2-notifications";
12 | import {HttpClientModule} from "@angular/common/http";
13 | import {BrowserAnimationsModule} from "@angular/platform-browser/animations";
14 |
15 | @NgModule({
16 | declarations: [
17 | AppComponent,
18 | DaemonsComponent,
19 | ChecksComponent,
20 | ResultsComponent
21 | ],
22 | imports: [
23 | BrowserModule,
24 | BrowserAnimationsModule,
25 | FormsModule,
26 | HttpClientModule,
27 | SimpleNotificationsModule,
28 | ChartsModule
29 | ],
30 | providers: [SocketService, NotificationsService],
31 | bootstrap: [AppComponent]
32 | })
33 | export class AppModule {
34 | }
35 |
--------------------------------------------------------------------------------
/ui/src/styles.css:
--------------------------------------------------------------------------------
1 | /* You can add global styles to this file, and also import other style files */
2 | .spinner {
3 | width: 50px;
4 | height: 40px;
5 | }
6 |
7 | .spinner > div {
8 | background-color: #333;
9 | height: 100%;
10 | width: 6px;
11 | display: inline-block;
12 |
13 | -webkit-animation: sk-stretchdelay 1.2s infinite ease-in-out;
14 | animation: sk-stretchdelay 1.2s infinite ease-in-out;
15 | }
16 |
17 | .spinner .rect2 {
18 | -webkit-animation-delay: -1.1s;
19 | animation-delay: -1.1s;
20 | }
21 |
22 | .spinner .rect3 {
23 | -webkit-animation-delay: -1.0s;
24 | animation-delay: -1.0s;
25 | }
26 |
27 | .spinner .rect4 {
28 | -webkit-animation-delay: -0.9s;
29 | animation-delay: -0.9s;
30 | }
31 |
32 | .spinner .rect5 {
33 | -webkit-animation-delay: -0.8s;
34 | animation-delay: -0.8s;
35 | }
36 |
37 | @-webkit-keyframes sk-stretchdelay {
38 | 0%, 40%, 100% { -webkit-transform: scaleY(0.4) }
39 | 20% { -webkit-transform: scaleY(1.0) }
40 | }
41 |
42 | @keyframes sk-stretchdelay {
43 | 0%, 40%, 100% {
44 | transform: scaleY(0.4);
45 | -webkit-transform: scaleY(0.4);
46 | } 20% {
47 | transform: scaleY(1.0);
48 | -webkit-transform: scaleY(1.0);
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/ui/src/app/checks/checks.component.ts:
--------------------------------------------------------------------------------
1 | import {Component, OnInit} from '@angular/core';
2 | import {SocketService} from "../socket.service";
3 | import {SocketType} from "../shared/socket.types";
4 |
5 | @Component({
6 | selector: 'app-checks',
7 | templateUrl: 'checks.component.html'
8 | })
9 | export class ChecksComponent implements OnInit {
10 | public checks = {};
11 |
12 | constructor(private socketService: SocketService) {
13 | this.getCurrentChecks();
14 | }
15 |
16 | ngOnInit() {
17 | this.socketService.websocket.subscribe(
18 | msg => {
19 | let data = JSON.parse(msg.data);
20 | switch (data.type) {
21 | case SocketType.CURRENT_CHECKS:
22 | this.checks = data.message;
23 | break;
24 | }
25 | }
26 | );
27 | }
28 |
29 | public startChecks() {
30 | this.socketService.websocket.next({type: SocketType.START_CHECKS, message: this.checks});
31 | }
32 |
33 | public stopChecks() {
34 | this.socketService.websocket.next({type: SocketType.STOP_CHECKS});
35 | }
36 |
37 | public resetStats() {
38 | this.socketService.websocket.next({type: SocketType.RESET_STATS});
39 | }
40 |
41 | private getCurrentChecks() {
42 | this.socketService.websocket.next({Type: SocketType.CURRENT_CHECKS});
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/ui/src/app/daemons/daemons.component.ts:
--------------------------------------------------------------------------------
1 | import {Component, OnInit} from '@angular/core';
2 | import {SocketService} from '../socket.service';
3 | import {SocketType} from '../shared/socket.types';
4 | import {NotificationsService} from 'angular2-notifications';
5 |
6 | @Component({
7 | selector: 'app-daemon-overview',
8 | templateUrl: './daemons.component.html'
9 | })
10 | export class DaemonsComponent implements OnInit {
11 | private daemons: any;
12 |
13 | constructor(private socketService: SocketService, private notificationService: NotificationsService) {
14 | this.getDaemons();
15 | }
16 |
17 | ngOnInit() {
18 | this.socketService.websocket.subscribe(
19 | msg => {
20 | let data = JSON.parse(msg.data);
21 | switch (data.type) {
22 | case SocketType.ALL_DAEMONS:
23 | this.daemons = data.message.sort((a, b) => {
24 | return a.hostname > b.hostname ? 1 : ((b.hostname > a.hostname) ? -1 : 0);
25 | });
26 | break;
27 | case SocketType.NEW_DAEMON:
28 | this.notificationService.info('Daemon joined', 'New daemon joined: ' + data.message);
29 | this.getDaemons();
30 | break;
31 | case SocketType.DAEMON_LEFT:
32 | this.notificationService.info('Daemon left', 'Daemon left: ' + data.message);
33 | this.getDaemons();
34 | }
35 | }
36 | );
37 | }
38 |
39 | getDaemons() {
40 | this.socketService.websocket.next({type: SocketType.ALL_DAEMONS});
41 | }
42 | }
43 |
--------------------------------------------------------------------------------
/ui/angular-cli.json:
--------------------------------------------------------------------------------
1 | {
2 | "$schema": "./node_modules/@angular/cli/lib/config/schema.json",
3 | "project": {
4 | "name": "openshift-monitoring-ui"
5 | },
6 | "apps": [
7 | {
8 | "root": "src",
9 | "outDir": "dist",
10 | "assets": [
11 | "assets",
12 | "favicon.ico"
13 | ],
14 | "index": "index.html",
15 | "main": "main.ts",
16 | "polyfills": "polyfills.ts",
17 | "test": "test.ts",
18 | "tsconfig": "tsconfig.app.json",
19 | "testTsconfig": "tsconfig.spec.json",
20 | "prefix": "app",
21 | "styles": [
22 | "styles.css",
23 | "../node_modules/bulma/css/bulma.css"
24 | ],
25 | "scripts": [],
26 | "environmentSource": "environments/environment.ts",
27 | "environments": {
28 | "dev": "environments/environment.ts",
29 | "prod": "environments/environment.prod.ts"
30 | }
31 | }
32 | ],
33 | "e2e": {
34 | "protractor": {
35 | "config": "./protractor.conf.js"
36 | }
37 | },
38 | "lint": [
39 | {
40 | "project": "src/tsconfig.app.json",
41 | "exclude": "**/node_modules/**"
42 | },
43 | {
44 | "project": "src/tsconfig.spec.json",
45 | "exclude": "**/node_modules/**"
46 | },
47 | {
48 | "project": "e2e/tsconfig.e2e.json",
49 | "exclude": "**/node_modules/**"
50 | }
51 | ],
52 | "test": {
53 | "karma": {
54 | "config": "./karma.conf.js"
55 | }
56 | },
57 | "defaults": {
58 | "styleExt": "css",
59 | "component": {}
60 | }
61 | }
62 |
--------------------------------------------------------------------------------
/hub/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "flag"
5 | "github.com/oscp/openshift-monitoring/hub/server"
6 | "log"
7 | "net/http"
8 | )
9 |
10 | var uiAddr = flag.String("UI_ADDR", "localhost:8080", "http service endpoint")
11 | var hubAddr = flag.String("RPC_ADDR", "localhost:2600", "go hub rcp2 address")
12 | var masterApiUrls = flag.String("MASTER_API_URLS", "https://master1:8443,https://master2:8443", "addresses of master api's")
13 | var daemonPublicUrl = flag.String("DAEMON_PUBLIC_URL", "http://daemon.yourroute.com", "external address of the daemon service (route)")
14 | var etcdIps = flag.String("ETCD_IPS", "https://localhost:2379,https://server1:2379", "adresses of etcd servers")
15 | var etcdCertPath = flag.String("ETCD_CERT_PATH", "/etc/etcd/", "Path of alternative etcd certificates")
16 |
17 | func main() {
18 | flag.Parse()
19 | log.Println("hub waiting for daemons on", *hubAddr)
20 | log.Println("ui server waiting for websocket on", *uiAddr)
21 | log.Println("master api urls are", *masterApiUrls)
22 | log.Println("daemons public url is", *daemonPublicUrl)
23 | log.Println("etcd ips are", *etcdIps)
24 | log.Println("etcdCertPath is", *etcdCertPath)
25 |
26 | // Start hub rcp server
27 | hub := server.NewHub(*hubAddr, *masterApiUrls, *daemonPublicUrl, *etcdIps, *etcdCertPath)
28 | go hub.Serve()
29 |
30 | // Serve UI & websockets
31 | fs := http.FileServer(http.Dir("static"))
32 | http.Handle("/", fs)
33 | http.HandleFunc("/ui", func(w http.ResponseWriter, r *http.Request) {
34 | server.OnUISocket(hub, w, r)
35 | })
36 |
37 | log.Fatal(http.ListenAndServe(*uiAddr, nil))
38 | }
39 |
--------------------------------------------------------------------------------
/ui/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "ose-monitoring-ui",
3 | "version": "1.0.0",
4 | "license": "MIT",
5 | "angular-cli": {},
6 | "scripts": {
7 | "ng": "ng",
8 | "start": "ng serve",
9 | "build": "ng build --prod --aot=false",
10 | "test": "ng test",
11 | "lint": "ng lint",
12 | "e2e": "ng e2e",
13 | "build-prod": "ng build --prod --aot=false --progress=false"
14 | },
15 | "private": true,
16 | "dependencies": {
17 | "@angular/animations": "=5.2.0",
18 | "@angular/common": "=5.2.0",
19 | "@angular/compiler": "=5.2.0",
20 | "@angular/core": "=5.2.0",
21 | "@angular/forms": "=5.2.0",
22 | "@angular/http": "=5.2.0",
23 | "@angular/platform-browser": "=5.2.0",
24 | "@angular/platform-browser-dynamic": "=5.2.0",
25 | "@angular/router": "=5.2.0",
26 | "angular2-notifications": "=0.9.7",
27 | "bulma": "=0.6.2",
28 | "chart.js": "=2.7.1",
29 | "core-js": "=2.4.1",
30 | "ng2-charts": "=1.6.0",
31 | "robust-websocket": "=0.3.0",
32 | "rxjs": "=5.5.6",
33 | "zone.js": "=0.8.19"
34 | },
35 | "devDependencies": {
36 | "@angular/cli": "=1.6.7",
37 | "@angular/compiler-cli": "=5.2.0",
38 | "@angular/language-service": "=5.2.0",
39 | "@types/jasmine": "=2.8.3",
40 | "@types/jasminewd2": "=2.0.2",
41 | "@types/node": "=6.0.60",
42 | "codelyzer": "=4.0.1",
43 | "jasmine-core": "=2.8.0",
44 | "jasmine-spec-reporter": "=4.2.1",
45 | "karma": "=2.0.0",
46 | "karma-chrome-launcher": "=2.2.0",
47 | "karma-coverage-istanbul-reporter": "=1.2.1",
48 | "karma-jasmine": "=1.1.0",
49 | "karma-jasmine-html-reporter": "=0.2.2",
50 | "protractor": "=5.1.2",
51 | "ts-node": "=4.1.0",
52 | "tslint": "=5.9.1",
53 | "typescript": "=2.5.3"
54 | }
55 | }
56 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: go
2 | go:
3 | - 1.8.x
4 |
5 | before_install:
6 | - go get github.com/mitchellh/gox
7 | - rm -rf ~/.nvm && git clone https://github.com/creationix/nvm.git ~/.nvm && (cd ~/.nvm && git checkout `git describe --abbrev=0 --tags`) && source ~/.nvm/nvm.sh && nvm install stable
8 |
9 | install:
10 | - go get github.com/cenkalti/rpc2
11 | - go get github.com/mitchellh/mapstructure
12 | - go get github.com/gorilla/websocket
13 | - go get gopkg.in/yaml.v2
14 |
15 | script:
16 | - mkdir -p dist/static
17 | - cp ./install/* ./dist/
18 | - gox -osarch="linux/amd64" -output "./dist/{{.Dir}}" github.com/oscp/openshift-monitoring/hub github.com/oscp/openshift-monitoring/daemon
19 | - cd ./ui
20 | - npm install
21 | - npm run build-prod
22 | - cp ./dist/* ../dist/static/
23 | - cd ..
24 | - tar -zcvf ose-mon.tar.gz dist
25 | - tar -zcvf ose-mon-daemon-standalone.tar.gz dist/daemon dist/ose-mon-daemon.service.standalone
26 |
27 | deploy:
28 | provider: releases
29 | api_key:
30 | secure: fk1WqHNOZm5cCXRQZ1wleFxP7lORnuZ5xqJLjyJmaw+APX25c3XL1ASiv004Ied7fNTqCzjLJMfntuDK4y/zkyE26p6p/7GvRYq+j6ejSA3w60UqzYPckA5kp21T2Kb2eED2LXJ1TaTlnJxCP+vXpHQUYeqZ45i5Dimq1TlIUOlBmBKQCCOucNB52RA62+919W1fB0hjKH1X9YSpO/uwf/BCbqWKzGLNz6zHE9+wIzRgAEpTgccEIkXOoxoNWGWv9Gbbe8t206qND+mscogiK3bDhJjoAW4SaQe2ZHf0H9cNN3f0S5FaAMZLEfVz6d7rDAsyTnSgEyPCRP0i9HjQMRwkPiXvtcZBfKS0C6rpbQVw+vhWL2AvPXUhHKVS2+g82+qQ+eOUqA3MOPPAMcMesNWuNSzE6sb6EZ6Z9bzL+FFAq1nZZkj/zGlsbhFxiCOVXWpUuHNnMJFZdcpnQrwLgQfcXbMBuiQkS/6oKGiPeiCb2FgfOAqGdDjBW4vyQeja36QNMA4qN2rh6agunKqTyZTEoJFH6FLA5yT2hAJQM4WiTU4WPP4yiz6lsQShhkF8IdQtv+6ST8Y+bJJttI22LLl2ka4f+JnPmMUsMbBOu2KhTj8v2eTPQNrvsf+5VckRgNDQM8K11FUyvGhX/aSFWGGWl7VyD7HjgGMEaR91UhE=
31 | file:
32 | - "ose-mon.tar.gz"
33 | - "ose-mon-daemon-standalone.tar.gz"
34 | skip_cleanup: true
35 | on:
36 | tags: true
37 |
38 |
--------------------------------------------------------------------------------
/hub/server/webserver.go:
--------------------------------------------------------------------------------
1 | package server
2 |
3 | import (
4 | "github.com/gorilla/websocket"
5 | "github.com/oscp/openshift-monitoring/models"
6 | "log"
7 | "net/http"
8 | )
9 |
10 | var upgrader = websocket.Upgrader{
11 | CheckOrigin: func(r *http.Request) bool {
12 | return true
13 | },
14 | }
15 |
16 | func OnUISocket(h *Hub, w http.ResponseWriter, r *http.Request) {
17 | log.Println("ui joined")
18 |
19 | c, err := upgrader.Upgrade(w, r, nil)
20 | if err != nil {
21 | log.Println("upgrade-error: ", err)
22 | return
23 | }
24 |
25 | go handleFromUI(h, c)
26 | go handleToUI(h, c)
27 | }
28 |
29 | func handleToUI(h *Hub, c *websocket.Conn) {
30 | for {
31 | var msg = <-h.toUi
32 |
33 | err := c.WriteJSON(msg)
34 | if err != nil {
35 | log.Println("socket to UI was closed, resending message", err)
36 | h.toUi <- msg
37 | break
38 | }
39 | }
40 | }
41 |
42 | func handleFromUI(h *Hub, c *websocket.Conn) {
43 | for {
44 | // parse message
45 | var msg models.BaseModel
46 | err := c.ReadJSON(&msg)
47 | if err != nil {
48 | log.Println("read-error on ws: ", err)
49 | break
50 | }
51 |
52 | var res interface{}
53 | switch msg.Type {
54 | case models.AllDaemons:
55 | res = models.BaseModel{Type: models.AllDaemons, Message: h.Daemons()}
56 | break
57 | case models.StartChecks:
58 | res = h.StartChecks(msg.Message)
59 | break
60 | case models.StopChecks:
61 | res = h.StopChecks()
62 | break
63 | case models.ResetStats:
64 | h.ResetStats <- true
65 | res = models.BaseModel{Type: models.AllDaemons, Message: h.Daemons()}
66 | break
67 | case models.CurrentChecks:
68 | res = models.BaseModel{Type: models.CurrentChecks, Message: h.currentChecks}
69 | }
70 |
71 | err = c.WriteJSON(res)
72 | if err != nil {
73 | log.Println("error sending message to UI on websocket: ", err)
74 | }
75 | }
76 | }
77 |
--------------------------------------------------------------------------------
/ui/src/app/results/results.component.html:
--------------------------------------------------------------------------------
1 |
2 |
Successful checks
3 |
4 |
5 |
10 |
11 |
12 |
18 |
19 |
20 |
24 |
25 |
26 |
27 |
Failed Checks
28 |
29 |
30 |
35 |
36 |
37 |
38 |
39 |
40 | | Date |
41 | Host |
42 | Type |
43 | Message |
44 |
45 |
46 |
47 |
48 | | {{e.date | date: 'dd.MM.yyyy HH:mm:ss'}} |
49 | {{e.hostname}} |
50 | {{e.type}} |
51 | {{e.message}} |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
--------------------------------------------------------------------------------
/daemon/client/communication.go:
--------------------------------------------------------------------------------
1 | package client
2 |
3 | import (
4 | "github.com/cenkalti/rpc2"
5 | "github.com/oscp/openshift-monitoring/models"
6 | "log"
7 | "os"
8 | "time"
9 | )
10 |
11 | func registerOnHub(h string, dc *models.DaemonClient) {
12 | log.Println("registring on the hub:", h)
13 | var rep string
14 | for {
15 | err := dc.Client.Call("register", dc.Daemon, &rep)
16 | if err != nil {
17 | log.Println("error registring on hub. Will try again in 5 seconds. Error: ", err)
18 | time.Sleep(5 * time.Second)
19 | } else {
20 | break
21 | }
22 | }
23 | if rep != "ok" {
24 | log.Fatalf("expected the hub to answer with ok. he did with: %+v", rep)
25 | }
26 | }
27 |
28 | func unregisterOnHub(c *rpc2.Client) {
29 | var rep string
30 | host, _ := os.Hostname()
31 | err := c.Call("unregister", host, &rep)
32 | if err != nil {
33 | log.Fatalf("error when unregistring from hub: %s", err)
34 | }
35 | c.Close()
36 | }
37 |
38 | func HandleCheckStarted(dc *models.DaemonClient) {
39 | dc.Daemon.StartedChecks++
40 | updateDaemonOnHub(dc)
41 | }
42 |
43 | func HandleCheckFinished(dc *models.DaemonClient, err error, t string) {
44 | // Update check counts
45 | if err == nil {
46 | dc.ToHub <- models.CheckResult{Type: t, IsOk: true, Message: ""}
47 | dc.Daemon.SuccessfulChecks++
48 | } else {
49 | dc.ToHub <- models.CheckResult{Type: t, IsOk: false, Message: err.Error()}
50 | dc.Daemon.FailedChecks++
51 | }
52 | updateDaemonOnHub(dc)
53 | }
54 |
55 | func HandleChecksStopped(dc *models.DaemonClient) {
56 | log.Println("stopped checks")
57 | updateDaemonOnHub(dc)
58 | }
59 |
60 | func updateDaemonOnHub(dc *models.DaemonClient) {
61 | var rep string
62 | err := dc.Client.Call("updateCheckcount", dc.Daemon, &rep)
63 | if err != nil {
64 | log.Println("error updating Checkcounts on hub: ", err)
65 | }
66 | }
67 |
68 | func handleCheckResultToHub(dc *models.DaemonClient) {
69 | for {
70 | var r = <-dc.ToHub
71 | r.Hostname = dc.Daemon.Hostname
72 |
73 | if err := dc.Client.Call("checkResult", r, nil); err != nil {
74 | log.Println("error sending CheckResult to hub", err)
75 | }
76 | }
77 | }
78 |
--------------------------------------------------------------------------------
/ui/src/app/socket.service.ts:
--------------------------------------------------------------------------------
1 | import {Injectable} from '@angular/core';
2 | import {Observer, Observable, Subject} from 'rxjs/Rx';
3 | import {NotificationsService} from "angular2-notifications";
4 |
5 | @Injectable()
6 | export class SocketService {
7 | public websocket: Subject;
8 |
9 | constructor(private notificationService: NotificationsService) {
10 | this.connectToUI();
11 | }
12 |
13 | private reconnectWebsocket() {
14 | let that = this;
15 | this.notificationService.error("Error on websocket", "Error on websocket. Reconnecting...");
16 | setTimeout(
17 | () => {
18 | console.log('reconnecting websocket');
19 | that.websocket = undefined;
20 | that.connectToUI();
21 | }
22 | , 1000
23 | );
24 | }
25 |
26 | private connectToUI() {
27 | let that = this;
28 | let hubUrl = window.location.origin === 'http://localhost:4200' ? 'http://localhost:8080/ui' : window.location.origin + '/ui';
29 | let socket = new WebSocket(hubUrl.replace('http://', 'ws://'));
30 | let observable = Observable.create(
31 | (observer: Observer) => {
32 | socket.onmessage = observer.next.bind(observer);
33 | socket.onerror = () => {
34 | that.reconnectWebsocket();
35 | };
36 | socket.onclose = () => {
37 | that.reconnectWebsocket();
38 | };
39 | return socket.close.bind(socket);
40 | }
41 | ).share();
42 |
43 | let observer = {
44 | next: (data: Object) => {
45 | that.waitForSocketConnection(socket, () => {
46 | socket.send(JSON.stringify(data));
47 | });
48 | }
49 | };
50 |
51 | this.websocket = Subject.create(observer, observable);
52 | }
53 |
54 | private waitForSocketConnection(socket, callback) {
55 | const that = this;
56 | setTimeout(
57 | function () {
58 | if (socket.readyState === 1) {
59 | if (callback != null) {
60 | callback();
61 | }
62 | return;
63 |
64 | } else {
65 | console.log('wait for connection...');
66 | that.waitForSocketConnection(socket, callback);
67 | }
68 |
69 | }, 5); // wait 5 milisecond for the connection...
70 | }
71 | }
72 |
--------------------------------------------------------------------------------
/ui/src/polyfills.ts:
--------------------------------------------------------------------------------
1 | /**
2 | * This file includes polyfills needed by Angular and is loaded before the app.
3 | * You can add your own extra polyfills to this file.
4 | *
5 | * This file is divided into 2 sections:
6 | * 1. Browser polyfills. These are applied before loading ZoneJS and are sorted by browsers.
7 | * 2. Application imports. Files imported after ZoneJS that should be loaded before your main
8 | * file.
9 | *
10 | * The current setup is for so-called "evergreen" browsers; the last versions of browsers that
11 | * automatically update themselves. This includes Safari >= 10, Chrome >= 55 (including Opera),
12 | * Edge >= 13 on the desktop, and iOS 10 and Chrome on mobile.
13 | *
14 | * Learn more in https://angular.io/docs/ts/latest/guide/browser-support.html
15 | */
16 |
17 | /***************************************************************************************************
18 | * BROWSER POLYFILLS
19 | */
20 |
21 | /** IE9, IE10 and IE11 requires all of the following polyfills. **/
22 | // import 'core-js/es6/symbol';
23 | // import 'core-js/es6/object';
24 | // import 'core-js/es6/function';
25 | // import 'core-js/es6/parse-int';
26 | // import 'core-js/es6/parse-float';
27 | // import 'core-js/es6/number';
28 | // import 'core-js/es6/math';
29 | // import 'core-js/es6/string';
30 | // import 'core-js/es6/date';
31 | // import 'core-js/es6/array';
32 | // import 'core-js/es6/regexp';
33 | // import 'core-js/es6/map';
34 | // import 'core-js/es6/weak-map';
35 | // import 'core-js/es6/set';
36 |
37 | /** IE10 and IE11 requires the following for NgClass support on SVG elements */
38 | // import 'classlist.js'; // Run `npm install --save classlist.js`.
39 |
40 | /** IE10 and IE11 requires the following for the Reflect API. */
41 | // import 'core-js/es6/reflect';
42 |
43 |
44 | /** Evergreen browsers require these. **/
45 | // Used for reflect-metadata in JIT. If you use AOT (and only Angular decorators), you can remove.
46 | import 'core-js/es7/reflect';
47 |
48 |
49 | /**
50 | * Required to support Web Animations `@angular/platform-browser/animations`.
51 | * Needed for: All but Chrome, Firefox and Opera. http://caniuse.com/#feat=web-animation
52 | **/
53 | // import 'web-animations-js'; // Run `npm install --save web-animations-js`.
54 |
55 |
56 |
57 | /***************************************************************************************************
58 | * Zone JS is required by default for Angular itself.
59 | */
60 | import 'zone.js/dist/zone'; // Included with Angular CLI.
61 |
62 |
63 |
64 | /***************************************************************************************************
65 | * APPLICATION IMPORTS
66 | */
67 |
--------------------------------------------------------------------------------
/install/ose-mon-template.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Template
3 | metadata:
4 | creationTimestamp: null
5 | name: ose-mon
6 | objects:
7 | - apiVersion: v1
8 | kind: Route
9 | metadata:
10 | annotations:
11 | openshift.io/host.generated: "true"
12 | creationTimestamp: null
13 | labels:
14 | app: daemon
15 | name: daemon
16 | spec:
17 | host: ${DAEMON_PUBLIC_ROUTE}
18 | port:
19 | targetPort: 8090-tcp
20 | to:
21 | kind: Service
22 | name: daemon
23 | - apiVersion: v1
24 | kind: Service
25 | metadata:
26 | annotations:
27 | openshift.io/generated-by: OpenShiftNewApp
28 | creationTimestamp: null
29 | labels:
30 | app: daemon
31 | name: daemon
32 | spec:
33 | ports:
34 | - name: 8090-tcp
35 | port: 8090
36 | protocol: TCP
37 | targetPort: 8090
38 | selector:
39 | app: daemon
40 | sessionAffinity: None
41 | type: ClusterIP
42 | status:
43 | loadBalancer: {}
44 | - apiVersion: extensions/v1beta1
45 | kind: DaemonSet
46 | metadata:
47 | creationTimestamp: null
48 | generation: 2
49 | labels:
50 | app: daemon
51 | name: daemon
52 | spec:
53 | selector:
54 | matchLabels:
55 | app: daemon
56 | template:
57 | metadata:
58 | creationTimestamp: null
59 | labels:
60 | app: daemon
61 | spec:
62 | containers:
63 | - env:
64 | - name: DAEMON_TYPE
65 | value: POD
66 | - name: HUB_ADDRESS
67 | value: ${DS_HUB_ADDRESS}
68 | - name: POD_NAMESPACE
69 | valueFrom:
70 | fieldRef:
71 | apiVersion: v1
72 | fieldPath: metadata.namespace
73 | image: ${IMAGE_SPEC}
74 | imagePullPolicy: Always
75 | name: daemon
76 | resources: {}
77 | terminationMessagePath: /dev/termination-log
78 | dnsPolicy: ClusterFirst
79 | nodeSelector:
80 | purpose: workingnode
81 | restartPolicy: Always
82 | securityContext: {}
83 | terminationGracePeriodSeconds: 30
84 | status:
85 | currentNumberScheduled: 2
86 | desiredNumberScheduled: 2
87 | numberMisscheduled: 0
88 | parameters:
89 | - description: The public route of the daemon
90 | name: DAEMON_PUBLIC_ROUTE
91 | required: true
92 | value: daemon.namespace.defaultroute.com
93 | - description: The address & port of your hub
94 | name: DS_HUB_ADDRESS
95 | required: true
96 | value: yourmaster:2600
97 | - description: The pull spec of the image
98 | name: IMAGE_SPEC
99 | required: true
100 | value: 172.30.151.39:5000/project/daemon
--------------------------------------------------------------------------------
/ui/src/app/checks/checks.component.html:
--------------------------------------------------------------------------------
1 |
74 |
--------------------------------------------------------------------------------
/install/ose-mon-standalone-template.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Template
3 | metadata:
4 | creationTimestamp: null
5 | name: daemon.standalone.yml
6 | objects:
7 | - apiVersion: v1
8 | kind: Service
9 | metadata:
10 | annotations:
11 | openshift.io/generated-by: OpenShiftNewApp
12 | creationTimestamp: null
13 | labels:
14 | app: daemon
15 | name: daemon
16 | spec:
17 | ports:
18 | - name: 8090-tcp
19 | port: 8090
20 | protocol: TCP
21 | targetPort: 8090
22 | selector:
23 | app: daemon
24 | sessionAffinity: None
25 | type: ClusterIP
26 | status:
27 | loadBalancer: {}
28 | - apiVersion: v1
29 | kind: DeploymentConfig
30 | metadata:
31 | annotations:
32 | openshift.io/generated-by: OpenShiftNewApp
33 | creationTimestamp: null
34 | generation: 4
35 | labels:
36 | app: daemon
37 | name: daemon
38 | spec:
39 | replicas: 1
40 | selector:
41 | app: daemon
42 | deploymentconfig: daemon
43 | strategy:
44 | resources: {}
45 | rollingParams:
46 | intervalSeconds: 1
47 | maxSurge: 25%
48 | maxUnavailable: 25%
49 | timeoutSeconds: 600
50 | updatePeriodSeconds: 1
51 | type: Rolling
52 | template:
53 | metadata:
54 | annotations:
55 | openshift.io/container.daemon.image.entrypoint: '["daemon"]'
56 | openshift.io/generated-by: OpenShiftNewApp
57 | creationTimestamp: null
58 | labels:
59 | app: daemon
60 | deploymentconfig: daemon
61 | spec:
62 | containers:
63 | - env:
64 | - name: DAEMON_TYPE
65 | value: POD
66 | - name: POD_NAMESPACE
67 | valueFrom:
68 | fieldRef:
69 | apiVersion: v1
70 | fieldPath: metadata.namespace
71 | image: ${IMAGE_SPEC}
72 | imagePullPolicy: IfNotPresent
73 | name: daemon
74 | resources: {}
75 | terminationMessagePath: /dev/termination-log
76 | dnsPolicy: ClusterFirst
77 | restartPolicy: Always
78 | securityContext: {}
79 | terminationGracePeriodSeconds: 30
80 | status:
81 | availableReplicas: 1
82 | observedGeneration: 4
83 | replicas: 1
84 | updatedReplicas: 1
85 | - apiVersion: v1
86 | kind: Route
87 | metadata:
88 | annotations:
89 | openshift.io/host.generated: "true"
90 | creationTimestamp: null
91 | labels:
92 | app: daemon
93 | name: daemon
94 | spec:
95 | host: ${DAEMON_PUBLIC_ROUTE}
96 | port:
97 | targetPort: 8090-tcp
98 | to:
99 | kind: Service
100 | name: daemon
101 | weight: 100
102 | parameters:
103 | - description: The public route of the daemon
104 | name: DAEMON_PUBLIC_ROUTE
105 | required: true
106 | value: daemon.namespace.defaultroute.com
107 | - description: The pull spec of the image
108 | name: IMAGE_SPEC
109 | required: true
110 | value: 172.30.151.39:5000/project/daemon
--------------------------------------------------------------------------------
/ui/tslint.json:
--------------------------------------------------------------------------------
1 | {
2 | "rulesDirectory": [
3 | "node_modules/codelyzer"
4 | ],
5 | "rules": {
6 | "arrow-return-shorthand": true,
7 | "callable-types": true,
8 | "class-name": true,
9 | "comment-format": [
10 | true,
11 | "check-space"
12 | ],
13 | "curly": true,
14 | "deprecation": {
15 | "severity": "warn"
16 | },
17 | "eofline": true,
18 | "forin": true,
19 | "import-blacklist": [
20 | true,
21 | "rxjs",
22 | "rxjs/Rx"
23 | ],
24 | "import-spacing": true,
25 | "indent": [
26 | true,
27 | "spaces"
28 | ],
29 | "interface-over-type-literal": true,
30 | "label-position": true,
31 | "max-line-length": [
32 | true,
33 | 140
34 | ],
35 | "member-access": false,
36 | "member-ordering": [
37 | true,
38 | {
39 | "order": [
40 | "static-field",
41 | "instance-field",
42 | "static-method",
43 | "instance-method"
44 | ]
45 | }
46 | ],
47 | "no-arg": true,
48 | "no-bitwise": true,
49 | "no-console": [
50 | true,
51 | "debug",
52 | "info",
53 | "time",
54 | "timeEnd",
55 | "trace"
56 | ],
57 | "no-construct": true,
58 | "no-debugger": true,
59 | "no-duplicate-super": true,
60 | "no-empty": false,
61 | "no-empty-interface": true,
62 | "no-eval": true,
63 | "no-inferrable-types": [
64 | true,
65 | "ignore-params"
66 | ],
67 | "no-misused-new": true,
68 | "no-non-null-assertion": true,
69 | "no-shadowed-variable": true,
70 | "no-string-literal": false,
71 | "no-string-throw": true,
72 | "no-switch-case-fall-through": true,
73 | "no-trailing-whitespace": true,
74 | "no-unnecessary-initializer": true,
75 | "no-unused-expression": true,
76 | "no-use-before-declare": true,
77 | "no-var-keyword": true,
78 | "object-literal-sort-keys": false,
79 | "one-line": [
80 | true,
81 | "check-open-brace",
82 | "check-catch",
83 | "check-else",
84 | "check-whitespace"
85 | ],
86 | "prefer-const": true,
87 | "quotemark": [
88 | true,
89 | "single"
90 | ],
91 | "radix": true,
92 | "semicolon": [
93 | true,
94 | "always"
95 | ],
96 | "triple-equals": [
97 | true,
98 | "allow-null-check"
99 | ],
100 | "typedef-whitespace": [
101 | true,
102 | {
103 | "call-signature": "nospace",
104 | "index-signature": "nospace",
105 | "parameter": "nospace",
106 | "property-declaration": "nospace",
107 | "variable-declaration": "nospace"
108 | }
109 | ],
110 | "unified-signatures": true,
111 | "variable-name": false,
112 | "whitespace": [
113 | true,
114 | "check-branch",
115 | "check-decl",
116 | "check-operator",
117 | "check-separator",
118 | "check-type"
119 | ],
120 | "directive-selector": [
121 | true,
122 | "attribute",
123 | "app",
124 | "camelCase"
125 | ],
126 | "component-selector": [
127 | true,
128 | "element",
129 | "app",
130 | "kebab-case"
131 | ],
132 | "no-output-on-prefix": true,
133 | "use-input-property-decorator": true,
134 | "use-output-property-decorator": true,
135 | "use-host-property-decorator": true,
136 | "no-input-rename": true,
137 | "no-output-rename": true,
138 | "use-life-cycle-interface": true,
139 | "use-pipe-transform-interface": true,
140 | "component-class-suffix": true,
141 | "directive-class-suffix": true
142 | }
143 | }
144 |
--------------------------------------------------------------------------------
/ui/src/app/results/results.component.ts:
--------------------------------------------------------------------------------
1 | import {Component, OnInit, SimpleChanges, ViewChild} from '@angular/core';
2 | import {SocketService} from "../socket.service";
3 | import {SocketType} from "../shared/socket.types";
4 | import {BaseChartDirective} from "ng2-charts";
5 |
6 | @Component({
7 | selector: 'app-results',
8 | templateUrl: 'results.component.html'
9 | })
10 | export class ResultsComponent implements OnInit {
11 | // Dognut charts
12 | public dognutChartType: string = 'doughnut';
13 | public dognutChartOptions: any = {
14 | legend: {
15 | display: false
16 | }
17 | };
18 |
19 | public checkOverviewLabels: string[] = ['Started', 'Finished'];
20 | public checkOverviewData: number[] = [0, 0];
21 |
22 | public checkTypeLabels: string[] = ["MASTER_API_CHECK", "DNS_NSLOOKUP_KUBERNETES", "DNS_SERVICE_NODE",
23 | "DNS_SERVICE_POD", "HTTP_POD_SERVICE_A_B", "HTTP_POD_SERVICE_A_C", "HTTP_SERVICE_ABC", "HTTP_HAPROXY", "ETCD_HEALTH"];
24 | public errorData: number[] = [0, 0, 0, 0, 0, 0, 0, 0, 0];
25 | public successData: number[] = [0, 0, 0, 0, 0, 0, 0, 0, 0];
26 |
27 | public failures: Array = [];
28 |
29 | // Line Chart
30 | @ViewChild(BaseChartDirective) chart: BaseChartDirective;
31 | public lineChartType: string = 'line';
32 | public checkLineData: any = [
33 | {data: [], label: 'Successful checks'},
34 | {data: [], label: 'Failed checks'}
35 | ];
36 | public checkLineLabels: Array = [];
37 | public checkLineLegend = true;
38 | public checkLineOptions: any = {
39 | responsive: true
40 | };
41 |
42 | constructor(private socketService: SocketService) {
43 | }
44 |
45 | ngOnInit() {
46 | this.socketService.websocket.subscribe(
47 | msg => {
48 | let data = JSON.parse(msg.data);
49 | switch (data.type) {
50 | case SocketType.CHECK_RESULTS:
51 | this.handleResults(data.message);
52 | break;
53 | }
54 | }
55 | );
56 | }
57 |
58 | private handleResults(res) {
59 | // Failures
60 | this.failures = res.failures.slice().reverse();
61 |
62 | // Started & finished checks
63 | this.checkOverviewData[0] = res.startedChecks;
64 | this.checkOverviewData[1] = res.finishedChecks;
65 | this.checkOverviewData = this.checkOverviewData.slice();
66 |
67 | // Success / failed by type
68 | this.handleFailedByType(res.failedChecksByType);
69 | this.handleSuccesfulByType(res.successfulChecksByType);
70 |
71 | // Handle Line-Charts
72 | this.handleLineResult(res);
73 | }
74 |
75 | private handleLineResult(res: any) {
76 | for (let [k, v] of Object.entries(res.ticks)) {
77 | if (!this.checkLineLabels.includes(k)) {
78 | this.checkLineLabels.push(k);
79 | this.checkLineData[0].data.push(v.successfulChecks);
80 | this.checkLineData[1].data.push(v.failedChecks);
81 | }
82 | }
83 | // Update UI because of bug in chartjs:
84 | this.chart.labels = this.checkLineLabels.slice();
85 | this.checkLineData = this.checkLineData.slice();
86 | }
87 |
88 | private handleFailedByType(res: any) {
89 | for (let [k, v] of Object.entries(res)) {
90 | // Find index for key
91 | let idx = this.checkTypeLabels.findIndex(m => m === k);
92 | this.errorData[idx] = v;
93 | }
94 |
95 | // Enforce refresh
96 | this.errorData = this.errorData.slice();
97 | }
98 |
99 | private handleSuccesfulByType(res: any) {
100 | for (let [k, v] of Object.entries(res)) {
101 | // Find index for key
102 | let idx = this.checkTypeLabels.findIndex(m => m === k);
103 | this.successData[idx] = v;
104 | }
105 |
106 | // Enforce refresh
107 | this.successData = this.successData.slice();
108 | }
109 | }
110 |
--------------------------------------------------------------------------------
/daemon/client/handlers/major.go:
--------------------------------------------------------------------------------
1 | package handlers
2 |
3 | import (
4 | "github.com/oscp/openshift-monitoring/daemon/client/checks"
5 | "log"
6 | "net/http"
7 | "os"
8 | "strings"
9 | )
10 |
11 | func HandleMajorChecks(daemonType string, w http.ResponseWriter, r *http.Request) {
12 | errors := []string{}
13 | if daemonType == "NODE" {
14 | if err := checks.CheckDockerPool(90); err != nil {
15 | errors = append(errors, err.Error())
16 | }
17 |
18 | if err := checks.CheckDnsNslookupOnKubernetes(); err != nil {
19 | errors = append(errors, err.Error())
20 | }
21 |
22 | if err := checks.CheckDnsServiceNode(); err != nil {
23 | errors = append(errors, err.Error())
24 | }
25 | }
26 |
27 | if daemonType == "MASTER" || daemonType == "NODE" {
28 | certPaths := os.Getenv("CHECK_CERTIFICATE_PATHS")
29 | kubePaths := os.Getenv("CHECK_CERTIFICATE_KUBE_PATHS")
30 |
31 | if len(certPaths) == 0 || len(kubePaths) == 0 {
32 | log.Fatal("env variables 'CHECK_CERTIFICATE_PATHS', 'CHECK_CERTIFICATE_KUBE_PATHS' must be specified")
33 | }
34 |
35 | if err := checks.CheckFileSslCertificates(strings.Split(certPaths, ","), 30); err != nil {
36 | errors = append(errors, err.Error())
37 | }
38 |
39 | if err := checks.CheckKubeSslCertificates(strings.Split(kubePaths, ","), 30); err != nil {
40 | errors = append(errors, err.Error())
41 | }
42 | }
43 |
44 | if daemonType == "MASTER" {
45 | etcdIps := os.Getenv("ETCD_IPS")
46 | registryIp := os.Getenv("REGISTRY_SVC_IP")
47 | routerIps := os.Getenv("ROUTER_IPS")
48 | certUrls := os.Getenv("CHECK_CERTIFICATE_URLS")
49 |
50 | if len(etcdIps) == 0 || len(routerIps) == 0 || len(certUrls) == 0 {
51 | log.Fatal("env variables 'ETCD_IPS', 'ROUTER_IPS', 'CHECK_CERTIFICATE_URLS' must be specified on type 'MASTER'")
52 | }
53 |
54 | // boolean false means exclude buildnodes
55 | // boolean true means only buildnodes
56 | if err := checks.CheckOcGetNodes(false); err != nil {
57 | errors = append(errors, err.Error())
58 | }
59 |
60 | // check notready working nodes but only alert if no more capacity is available
61 | //if err := checks.CheckOcGetNodesRelaxed(); err != nil {
62 | // errors = append(errors, err.Error())
63 | //}
64 |
65 | if err := checks.CheckEtcdHealth(etcdIps, ""); err != nil {
66 | errors = append(errors, err.Error())
67 | }
68 |
69 | if err := checks.CheckRegistryHealth(registryIp); err != nil {
70 | errors = append(errors, err.Error())
71 | }
72 |
73 | for _, rip := range strings.Split(routerIps, ",") {
74 | if err := checks.CheckRouterHealth(rip); err != nil {
75 | errors = append(errors, err.Error())
76 | }
77 | }
78 |
79 | if err := checks.CheckMasterApis("https://localhost:8443/api"); err != nil {
80 | errors = append(errors, err.Error())
81 | }
82 |
83 | if err := checks.CheckDnsNslookupOnKubernetes(); err != nil {
84 | errors = append(errors, err.Error())
85 | }
86 |
87 | if err := checks.CheckDnsServiceNode(); err != nil {
88 | errors = append(errors, err.Error())
89 | }
90 |
91 | if err := checks.CheckUrlSslCertificates(strings.Split(certUrls, ","), 30); err != nil {
92 | errors = append(errors, err.Error())
93 | }
94 | }
95 |
96 | if daemonType == "STORAGE" {
97 | isGlusterServer := os.Getenv("IS_GLUSTER_SERVER")
98 |
99 | if isGlusterServer == "true" {
100 | if err := checks.CheckIfGlusterdIsRunning(); err != nil {
101 | errors = append(errors, err.Error())
102 | }
103 |
104 | if err := checks.CheckMountPointSizes(90); err != nil {
105 | errors = append(errors, err.Error())
106 | }
107 |
108 | if err := checks.CheckLVPoolSizes(90); err != nil {
109 | errors = append(errors, err.Error())
110 | }
111 |
112 | if err := checks.CheckVGSizes(5); err != nil {
113 | errors = append(errors, err.Error())
114 | }
115 | }
116 | }
117 |
118 | generateResponse(w, errors)
119 | }
120 |
--------------------------------------------------------------------------------
/daemon/client/handlers/minor.go:
--------------------------------------------------------------------------------
1 | package handlers
2 |
3 | import (
4 | "log"
5 | "net/http"
6 | "os"
7 | "strconv"
8 | "strings"
9 |
10 | "github.com/oscp/openshift-monitoring/daemon/client/checks"
11 | )
12 |
13 | func HandleMinorChecks(daemonType string, w http.ResponseWriter, r *http.Request) {
14 | errors := []string{}
15 | if daemonType == "NODE" {
16 | if err := checks.CheckDockerPool(80); err != nil {
17 | errors = append(errors, err.Error())
18 | }
19 |
20 | if err := checks.CheckHttpService(false); err != nil {
21 | errors = append(errors, err.Error())
22 | }
23 | }
24 |
25 | if daemonType == "MASTER" || daemonType == "NODE" {
26 | certPaths := os.Getenv("CHECK_CERTIFICATE_PATHS")
27 | kubePaths := os.Getenv("CHECK_CERTIFICATE_KUBE_PATHS")
28 |
29 | if len(certPaths) == 0 || len(kubePaths) == 0 {
30 | log.Fatal("env variables 'CHECK_CERTIFICATE_PATHS', 'CHECK_CERTIFICATE_KUBE_PATHS' must be specified")
31 | }
32 |
33 | if err := checks.CheckFileSslCertificates(strings.Split(certPaths, ","), 80); err != nil {
34 | errors = append(errors, err.Error())
35 | }
36 |
37 | if err := checks.CheckKubeSslCertificates(strings.Split(kubePaths, ","), 80); err != nil {
38 | errors = append(errors, err.Error())
39 | }
40 |
41 | if err := checks.CheckBondNetworkInterface(); err != nil {
42 | errors = append(errors, err.Error())
43 | }
44 | }
45 |
46 | if daemonType == "MASTER" {
47 | externalSystem := os.Getenv("EXTERNAL_SYSTEM_URL")
48 | hawcularIp := os.Getenv("HAWCULAR_SVC_IP")
49 | allowedWithoutLimits := os.Getenv("PROJECTS_WITHOUT_LIMITS")
50 | allowedWithoutQuota := os.Getenv("PROJECTS_WITHOUT_QUOTA")
51 | certUrls := os.Getenv("CHECK_CERTIFICATE_URLS")
52 |
53 | if len(externalSystem) == 0 || len(allowedWithoutLimits) == 0 || len(allowedWithoutQuota) == 0 || len(certUrls) == 0 {
54 | log.Fatal("env variables 'EXTERNAL_SYSTEM_URL', 'PROJECTS_WITHOUT_LIMITS', 'PROJECTS_WITHOUT_QUOTA', 'CHECK_CERTIFICATE_URLS' must be specified on type 'MASTER'")
55 | }
56 |
57 | allowedWithoutLimitsInt, err := strconv.Atoi(allowedWithoutLimits)
58 | if err != nil {
59 | log.Fatal("allowedWithoutLimits seems not to be an integer", allowedWithoutLimits)
60 | }
61 | allowedWithoutQuotaInt, err := strconv.Atoi(allowedWithoutQuota)
62 | if err != nil {
63 | log.Fatal("allowedWithoutLimits seems not to be an integer", allowedWithoutQuota)
64 | }
65 |
66 | // boolean false means exclude buildnodes
67 | // boolean true means only buildnodes
68 | if err := checks.CheckOcGetNodes(true); err != nil {
69 | errors = append(errors, err.Error())
70 | }
71 |
72 | if err := checks.CheckExternalSystem(externalSystem); err != nil {
73 | errors = append(errors, err.Error())
74 | }
75 |
76 | if err := checks.CheckHawcularHealth(hawcularIp); err != nil {
77 | errors = append(errors, err.Error())
78 | }
79 |
80 | if err := checks.CheckRouterRestartCount(); err != nil {
81 | errors = append(errors, err.Error())
82 | }
83 |
84 | if err := checks.CheckLimitsAndQuota(allowedWithoutLimitsInt, allowedWithoutQuotaInt); err != nil {
85 | errors = append(errors, err.Error())
86 | }
87 |
88 | if err := checks.CheckHttpService(false); err != nil {
89 | errors = append(errors, err.Error())
90 | }
91 |
92 | if err := checks.CheckLoggingRestartsCount(); err != nil {
93 | errors = append(errors, err.Error())
94 | }
95 |
96 | if err := checks.CheckUrlSslCertificates(strings.Split(certUrls, ","), 80); err != nil {
97 | errors = append(errors, err.Error())
98 | }
99 | }
100 |
101 | if daemonType == "STORAGE" {
102 | if err := checks.CheckOpenFileCount(); err != nil {
103 | errors = append(errors, err.Error())
104 | }
105 |
106 | if err := checks.CheckMountPointSizes(85); err != nil {
107 | errors = append(errors, err.Error())
108 | }
109 |
110 | if err := checks.CheckLVPoolSizes(80); err != nil {
111 | errors = append(errors, err.Error())
112 | }
113 |
114 | if err := checks.CheckVGSizes(10); err != nil {
115 | errors = append(errors, err.Error())
116 | }
117 | }
118 |
119 | if err := checks.CheckChrony(); err != nil {
120 | errors = append(errors, err.Error())
121 | }
122 |
123 | generateResponse(w, errors)
124 | }
125 |
--------------------------------------------------------------------------------
/daemon/client/daemon.go:
--------------------------------------------------------------------------------
1 | package client
2 |
3 | import (
4 | "github.com/cenkalti/rpc2"
5 | "github.com/oscp/openshift-monitoring/daemon/client/checks"
6 | "github.com/oscp/openshift-monitoring/models"
7 | "log"
8 | "net"
9 | "os"
10 | "strings"
11 | "time"
12 | )
13 |
14 | func StartDaemon(h string, dt string, ns string) *rpc2.Client {
15 | // Local state
16 | host, _ := os.Hostname()
17 | d := models.Daemon{Hostname: host,
18 | Namespace: ns,
19 | DaemonType: dt,
20 | StartedChecks: 0,
21 | FailedChecks: 0,
22 | SuccessfulChecks: 0}
23 |
24 | dc := &models.DaemonClient{Daemon: d,
25 | Quit: make(chan bool),
26 | ToHub: make(chan models.CheckResult)}
27 |
28 | // Register on hub
29 | conn, _ := net.Dial("tcp", h)
30 | dc.Client = rpc2.NewClient(conn)
31 | dc.Client.Handle("startChecks", func(client *rpc2.Client, checks *models.Checks, reply *string) error {
32 | startChecks(dc, checks)
33 | *reply = "ok"
34 | return nil
35 | })
36 | dc.Client.Handle("stopChecks", func(client *rpc2.Client, stop *bool, reply *string) error {
37 | stopChecks(dc)
38 | *reply = "ok"
39 | return nil
40 | })
41 |
42 | disc := dc.Client.DisconnectNotify()
43 | go func() {
44 | for {
45 | select {
46 | case <-disc:
47 | log.Println("Lost connection to host. Terminating.")
48 | os.Exit(0)
49 | }
50 | }
51 | }()
52 |
53 | // Start handling from & to hub
54 | go dc.Client.Run()
55 | go handleCheckResultToHub(dc)
56 |
57 | registerOnHub(h, dc)
58 |
59 | return dc.Client
60 | }
61 |
62 | func StopDaemon(c *rpc2.Client) {
63 | unregisterOnHub(c)
64 | }
65 |
66 | func startChecks(dc *models.DaemonClient, checkConfig *models.Checks) {
67 | tickExt := time.Tick(time.Duration(checkConfig.CheckInterval) * time.Millisecond)
68 | tickInt := time.Tick(3 * time.Second)
69 |
70 | log.Println("starting async checks")
71 |
72 | go func() {
73 | for {
74 | select {
75 | case <-dc.Quit:
76 | HandleChecksStopped(dc)
77 | return
78 | case <-tickInt:
79 | if checkConfig.MasterApiCheck {
80 | go func() {
81 | HandleCheckStarted(dc)
82 | err := checks.CheckMasterApis(checkConfig.MasterApiUrls)
83 | HandleCheckFinished(dc, err, models.MasterApiCheck)
84 | }()
85 | }
86 | if checkConfig.EtcdCheck && dc.Daemon.IsMaster() {
87 | go func() {
88 | HandleCheckStarted(dc)
89 | err := checks.CheckEtcdHealth(checkConfig.EtcdIps, checkConfig.EtcdCertPath)
90 | HandleCheckFinished(dc, err, models.EtcdHealth)
91 | }()
92 | }
93 | case <-tickExt:
94 | if checkConfig.DnsCheck {
95 | go func() {
96 | HandleCheckStarted(dc)
97 | err := checks.CheckDnsNslookupOnKubernetes()
98 | HandleCheckFinished(dc, err, models.DnsNslookupKubernetes)
99 | }()
100 |
101 | if dc.Daemon.IsNode() || dc.Daemon.IsMaster() {
102 | go func() {
103 | HandleCheckStarted(dc)
104 | err := checks.CheckDnsServiceNode()
105 | HandleCheckFinished(dc, err, models.DnsServiceNode)
106 | }()
107 | }
108 |
109 | if dc.Daemon.IsPod() {
110 | go func() {
111 | HandleCheckStarted(dc)
112 | err := checks.CheckDnsInPod()
113 | HandleCheckFinished(dc, err, models.DnsServicePod)
114 | }()
115 | }
116 | }
117 |
118 | if checkConfig.HttpChecks {
119 | if dc.Daemon.IsPod() && strings.HasSuffix(dc.Daemon.Namespace, "a") {
120 | go func() {
121 | HandleCheckStarted(dc)
122 | err := checks.CheckPodHttpAtoB()
123 | HandleCheckFinished(dc, err, models.HttpPodServiceAB)
124 | }()
125 | go func() {
126 | HandleCheckStarted(dc)
127 | err := checks.CheckPodHttpAtoC(false)
128 | HandleCheckFinished(dc, err, models.HttpPodServiceAC)
129 | }()
130 | go func() {
131 | HandleCheckStarted(dc)
132 | err := checks.CheckPodHttpAtoC(true)
133 | HandleCheckFinished(dc, err, models.HttpPodServiceAC)
134 | }()
135 | }
136 |
137 | if dc.Daemon.IsNode() || dc.Daemon.IsMaster() {
138 | go func() {
139 | HandleCheckStarted(dc)
140 | err := checks.CheckHttpService(false)
141 | HandleCheckFinished(dc, err, models.HttpServiceABC)
142 | }()
143 | go func() {
144 | HandleCheckStarted(dc)
145 | err := checks.CheckHttpService(true)
146 | HandleCheckFinished(dc, err, models.HttpServiceABC)
147 | }()
148 | }
149 |
150 | go func() {
151 | HandleCheckStarted(dc)
152 | err := checks.CheckHttpHaProxy(checkConfig.DaemonPublicUrl, false)
153 | HandleCheckFinished(dc, err, models.HttpHaProxy)
154 | }()
155 |
156 | go func() {
157 | HandleCheckStarted(dc)
158 | err := checks.CheckHttpHaProxy(checkConfig.DaemonPublicUrl, true)
159 | HandleCheckFinished(dc, err, models.HttpHaProxy)
160 | }()
161 | }
162 | }
163 | }
164 | }()
165 | }
166 |
167 | func stopChecks(dc *models.DaemonClient) {
168 | dc.Quit <- true
169 | }
170 |
--------------------------------------------------------------------------------
/daemon/client/checks/storage.go:
--------------------------------------------------------------------------------
1 | package checks
2 |
3 | import (
4 | "encoding/json"
5 | "errors"
6 | "fmt"
7 | "log"
8 | "os"
9 | "os/exec"
10 | "regexp"
11 | "strconv"
12 | "strings"
13 | "time"
14 | )
15 |
16 | func CheckOpenFileCount() error {
17 | log.Println("Checking open files")
18 |
19 | out, err := exec.Command("bash", "-c", "cat /proc/sys/fs/file-nr | cut -f1").Output()
20 | if err != nil {
21 | msg := "Could not evaluate open file count: " + err.Error()
22 | log.Println(msg)
23 | return errors.New(msg)
24 | }
25 |
26 | nr, err := strconv.Atoi(strings.TrimSpace(string(out)))
27 |
28 | if err != nil {
29 | return errors.New("Could not parse output to integer: " + string(out))
30 | }
31 |
32 | if nr < 200000 {
33 | return nil
34 | } else {
35 | return errors.New("Open files are higher than 200'000 files!")
36 | }
37 | }
38 |
39 | func CheckGlusterStatus() error {
40 | log.Println("Checking gluster status with gstatus")
41 |
42 | out, err := exec.Command("bash", "-c", "gstatus -o json").Output()
43 | if err != nil {
44 | if strings.Contains(err.Error(), "exit status 16") || strings.Contains(err.Error(), "exit status 1") || strings.Contains(err.Error(), "exit status 12") {
45 | // Other gluster server did the same check the same time
46 | // Try again 5 seconds
47 | time.Sleep(5 * time.Second)
48 | out, err = exec.Command("bash", "-c", "gstatus -o json").Output()
49 | if err != nil {
50 | msg := "Could not check gstatus output. Tryed 2 times. Error: " + err.Error()
51 | log.Println(msg)
52 | return errors.New(msg)
53 | }
54 | } else {
55 | msg := "Could not check gstatus output: " + err.Error()
56 | log.Println(msg)
57 | return errors.New(msg)
58 | }
59 | }
60 |
61 | // Sample JSON
62 | // 2017-03-27 12:34:17.626544 {"brick_count": 4, "bricks_active": 4, "glfs_version": "3.7.9", "node_count": 2, "nodes_active": 2, "over_commit": "No", "product_name": "Red Hat Gluster Storage Server 3.1 Update 3", "raw_capacity": 214639312896, "sh_active": 2, "sh_enabled": 2, "snapshot_count": 0, "status": "healthy", "usable_capacity": 107319656448, "used_capacity": 11712278528, "volume_count": 2, "volume_summary": [{"snapshot_count": 0, "state": "up", "usable_capacity": 53659828224, "used_capacity": 34619392, "volume_name": "vol_fast_registry"}, {"snapshot_count": 0, "state": "up", "usable_capacity": 53659828224, "used_capacity": 5821519872, "volume_name": "vol_slow_openshift-infra"}]}
63 | res := string(out)[27:(len(string(out)))]
64 |
65 | var dat map[string]interface{}
66 | if err := json.Unmarshal([]byte(res), &dat); err != nil {
67 | msg := "Error decoding gstatus output: " + res
68 | log.Println(msg)
69 | return errors.New(msg)
70 | }
71 |
72 | if dat["status"] != "healthy" {
73 | return errors.New("Status of GlusterFS is not healthy")
74 | }
75 |
76 | return nil
77 | }
78 |
79 | func CheckVGSizes(okSize int) error {
80 | log.Println("Checking VG free size")
81 |
82 | out, err := exec.Command("bash", "-c", "vgs -o vg_free,vg_size,vg_name --noheadings --units G | grep -v crash").Output()
83 | if err != nil {
84 | msg := "Could not evaluate VG sizes: " + err.Error()
85 | log.Println(msg)
86 | return errors.New(msg)
87 | }
88 |
89 | lines := strings.Split(string(out), "\n")
90 | for _, l := range lines {
91 | if len(l) > 0 {
92 | isOk := isVgSizeOk(l, okSize)
93 |
94 | log.Println("Checking VG size: ", l)
95 |
96 | if !isOk {
97 | return fmt.Errorf("VG free space is below: %v%% | (free,size,name) %v", strconv.Itoa(okSize), l)
98 | }
99 | }
100 | }
101 |
102 | return nil
103 | }
104 |
105 | func CheckLVPoolSizes(okSize int) error {
106 | log.Println("Checking LV pool used size")
107 |
108 | out, err := exec.Command("bash", "-c", "lvs -o data_percent,metadata_percent,LV_NAME --noheadings --units G --nosuffix | grep pool").Output()
109 | if err != nil {
110 | msg := "Could not evaluate LV pool size: " + err.Error()
111 | log.Println(msg)
112 | return errors.New(msg)
113 | }
114 |
115 | lines := strings.Split(string(out), "\n")
116 | for _, l := range lines {
117 | if len(l) > 0 {
118 | isOk := isLvsSizeOk(l, okSize)
119 |
120 | log.Println("Checking LV Pool: ", l)
121 |
122 | if !isOk {
123 | return fmt.Errorf("LV pool size is above: %v | %v", strconv.Itoa(okSize), l)
124 | }
125 | }
126 | }
127 |
128 | return nil
129 | }
130 |
131 | func CheckMountPointSizes(okSize int) error {
132 | mounts := os.Getenv("MOUNTPOINTS_TO_CHECK")
133 |
134 | if mounts == "" {
135 | return nil
136 | }
137 |
138 | mountList := strings.Split(mounts, ",")
139 |
140 | for _, m := range mountList {
141 | log.Printf("Checking free disk size of %v.", m)
142 |
143 | out, err := exec.Command("bash", "-c", "df --output=target,pcent | grep -w "+m).Output()
144 | if err != nil {
145 | msg := "Could not evaluate df of mount point: " + m + ". err: " + err.Error()
146 | log.Println(msg)
147 | return errors.New(msg)
148 | }
149 |
150 | // Example: /gluster/fast_registry 8%
151 | num := regexp.MustCompile(`\d+%`)
152 | usages := num.FindAllString(string(out), 1)
153 | log.Println(m, usages)
154 | if len(usages) != 1 {
155 | return errors.New("Could not parse output to integer: " + string(out))
156 | }
157 | usageInt, err := strconv.Atoi(strings.Replace(usages[0], "%", "", 1))
158 | if err != nil {
159 | return errors.New("Could not parse output to integer: " + string(out))
160 | }
161 |
162 | if usageInt > okSize {
163 | msg := fmt.Sprintf("Usage %% of volume %v is bigger than treshold. Is: %v%% - treshold: %v%%", m, usageInt, okSize)
164 | log.Println(msg)
165 | return errors.New(msg)
166 | }
167 | }
168 |
169 | return nil
170 | }
171 |
172 | func CheckIfGlusterdIsRunning() error {
173 | log.Print("Checking if glusterd is running")
174 |
175 | out, err := exec.Command("bash", "-c", "systemctl status glusterd").Output()
176 | if err != nil {
177 | msg := "Could not run 'systemctl status glusterd'. err: " + err.Error()
178 | log.Println(msg)
179 | return errors.New(msg)
180 | }
181 |
182 | if !strings.Contains(string(out), "active (running)") {
183 | return fmt.Errorf("Glusterd seems not to be running! Output: %v", string(out))
184 | }
185 |
186 | return nil
187 | }
188 |
--------------------------------------------------------------------------------
/daemon/client/checks/common.go:
--------------------------------------------------------------------------------
1 | package checks
2 |
3 | import (
4 | "crypto/tls"
5 | "errors"
6 | "fmt"
7 | "log"
8 | "net"
9 | "net/http"
10 | "os"
11 | "os/exec"
12 | "regexp"
13 | "strconv"
14 | "strings"
15 | )
16 |
17 | const (
18 | daemonDNSEndpoint = "daemon.ose-mon-a.endpoints.cluster.local"
19 | daemonDNSServiceA = "daemon.ose-mon-a.svc.cluster.local"
20 | daemonDNSServiceB = "daemon.ose-mon-b.svc.cluster.local"
21 | daemonDNSServiceC = "daemon.ose-mon-c.svc.cluster.local"
22 | daemonDNSPod = "daemon"
23 | kubernetesIP = "172.30.0.1"
24 | )
25 |
26 | var num = regexp.MustCompile(`\d+(?:\.\d+)?`)
27 |
28 | func CheckExternalSystem(url string) error {
29 | if err := checkHttp(url); err != nil {
30 | msg := "Call to " + url + " failed"
31 | log.Println(msg)
32 | return errors.New(msg)
33 | }
34 |
35 | return nil
36 | }
37 |
38 | func CheckChrony() error {
39 | log.Println("Checking output of 'chronyc tracking'")
40 |
41 | out, err := exec.Command("bash", "-c", "chronyc tracking").Output()
42 | if err != nil {
43 | msg := "Could not check chrony status: " + err.Error()
44 | log.Println(msg)
45 | return errors.New(msg)
46 | }
47 |
48 | offset, err := parseChronyOffset(string(out))
49 |
50 | if offset < -0.1 || offset > 0.1 { // 100 Millisekunden
51 | return errors.New("Time is not correct on the server or chrony is not running")
52 | } else {
53 | return nil
54 | }
55 | }
56 |
57 | func parseChronyOffset(out string) (float64, error) {
58 | for _, line := range strings.Split(string(out), "\n") {
59 | if strings.Contains(line, "Last offset") {
60 | // Example output
61 | // Reference ID : 0A7CD814 (some-ntp-server)
62 | // Stratum : 2
63 | // Ref time (UTC) : Thu May 31 13:41:40 2018
64 | // System time : 0.000037743 seconds fast of NTP time
65 | // Last offset : +0.000061081 seconds <--- SECONDS
66 | // RMS offset : 0.000333012 seconds
67 | // Frequency : 6.629 ppm fast
68 | // Residual freq : +0.004 ppm
69 | // Skew : 0.140 ppm
70 | // Root delay : 0.002649408 seconds
71 | // Root dispersion : 0.000559144 seconds
72 | // Update interval : 517.4 seconds
73 | // Leap status : Normal
74 | rgx := regexp.MustCompile("(.*offset\\s+:\\s+)(.*?)\\s+seconds")
75 | offset := rgx.FindStringSubmatch(line)
76 |
77 | log.Println("Found chrony offset:", offset[2])
78 | out, err := strconv.ParseFloat(offset[2], 64)
79 | if err != nil {
80 | return -1000, fmt.Errorf("couldn't parse chrony offset. Value was %v", offset[2])
81 | }
82 | return out, nil
83 | }
84 | }
85 | return -1000, fmt.Errorf("couldn't parse chrony offset. Offset line was not found.")
86 | }
87 |
88 | func CheckNtpd() error {
89 | log.Println("Checking output of 'ntpq -c rv 0 offset'")
90 |
91 | out, err := exec.Command("bash", "-c", "ntpq -c rv 0 offset").Output()
92 | if err != nil {
93 | msg := "Could not check ntpd status: " + err.Error()
94 | log.Println(msg)
95 | return errors.New(msg)
96 | }
97 |
98 | offset, err := parseNTPOffsetFromNTPD(string(out))
99 |
100 | if offset < -100 || offset > 100 {
101 | return errors.New("Time is not correct on the server or ntpd is not running")
102 | } else {
103 | return nil
104 | }
105 | }
106 |
107 | func parseNTPOffsetFromNTPD(out string) (float64, error) {
108 | for _, l := range strings.Split(string(out), "\n") {
109 | if strings.Contains(l, "offset") {
110 | // Example output
111 | // mintc=3, offset=0.400, frequency=-4.546, sys_jitter=1.015,
112 | // tc=10, mintc=3, offset=-0.648, frequency=3.934, sys_jitter=0.253,
113 | rgx := regexp.MustCompile("(.*offset=)(.*?),")
114 | offset := rgx.FindStringSubmatch(l)
115 |
116 | log.Println("Found ntpd offset:", offset[2])
117 | out, err := strconv.ParseFloat(offset[2], 64)
118 | if err != nil {
119 | return -1000, fmt.Errorf("couldn't parse ntp offset. Value was %v", offset[2])
120 | }
121 | return out, nil
122 | }
123 | }
124 | return -1000, fmt.Errorf("couldn't parse ntp offset. Offset line was not found.")
125 | }
126 |
127 | func getIpsForName(n string) []net.IP {
128 | ips, err := net.LookupIP(n)
129 | if err != nil {
130 | log.Println("failed to lookup ip for name ", n)
131 | return nil
132 | }
133 | return ips
134 | }
135 |
136 | func getEnv(key, fallback string) string {
137 | if value, ok := os.LookupEnv(key); ok {
138 | return value
139 | }
140 | return fallback
141 | }
142 |
143 | func checkHttp(toCall string) error {
144 | log.Println("Checking access to:", toCall)
145 | if strings.HasPrefix(toCall, "https") {
146 | tr := &http.Transport{
147 | TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
148 | }
149 | client := &http.Client{Transport: tr}
150 | resp, err := client.Get(toCall)
151 | if err != nil {
152 | log.Println("error in http check: ", err)
153 | return err
154 | } else {
155 | resp.Body.Close()
156 | return nil
157 | }
158 | } else {
159 | resp, err := http.Get(toCall)
160 | if err != nil {
161 | log.Println("error in http check: ", err)
162 | return err
163 | } else {
164 | resp.Body.Close()
165 | return nil
166 | }
167 | }
168 | }
169 |
170 | func getEndpoint(slow bool) string {
171 | if slow {
172 | return "slow"
173 | } else {
174 | return "fast"
175 | }
176 | }
177 |
178 | // isVgSizeOk returns true if vgs output in stdOut indicates that the volume
179 | // group free space is equal or above the percentage treshold okSize, which is
180 | // expected to be in the range [0, 100].
181 | func isVgSizeOk(stdOut string, okSize int) bool {
182 | // Example
183 | // 5.37 26.84 vg_fast_registry
184 | // 5.37 26.84 vg_slow
185 | nums := num.FindAllString(stdOut, 2)
186 |
187 | if len(nums) != 2 {
188 | log.Println("Unable to parse vgs output:", stdOut)
189 | return false
190 | }
191 |
192 | free, err := strconv.ParseFloat(nums[0], 64)
193 | if err != nil {
194 | log.Println("Unable to parse first digit of output", stdOut)
195 | return false
196 | }
197 | size, err := strconv.ParseFloat(nums[1], 64)
198 | if err != nil {
199 | log.Println("Unable to parse second digit of output", stdOut)
200 | return false
201 | }
202 |
203 | // calculate usage
204 | if 100/size*free < float64(okSize) {
205 | msg := fmt.Sprintf("VG free size is below treshold. Size: %v, free: %v, treshold: %v %%", size, free, okSize)
206 | log.Println(msg)
207 | return false
208 | }
209 |
210 | return true
211 | }
212 |
213 | // isLvsSizeOk returns true if lvs output in stdOut indicates that the logical
214 | // volume percentage full for data and metadata are both below the threshold
215 | // okSize, which is expected to be in the range [0, 100].
216 | func isLvsSizeOk(stdOut string, okSize int) bool {
217 | // Examples
218 | // 42.10 8.86 docker-pool
219 | // 13.63 8.93 lv_fast_registry_pool
220 | checksOk := 0
221 | for _, nr := range num.FindAllString(stdOut, -1) {
222 | i, err := strconv.ParseFloat(nr, 64)
223 | if err != nil {
224 | log.Print("Unable to parse int:", nr)
225 | return false
226 | }
227 |
228 | if i < float64(okSize) {
229 | checksOk++
230 | } else {
231 | log.Println("LVM pool size exceeded okSize:", i)
232 | }
233 | }
234 |
235 | return checksOk == 2
236 | }
237 |
--------------------------------------------------------------------------------
/hub/server/hub.go:
--------------------------------------------------------------------------------
1 | package server
2 |
3 | import (
4 | "github.com/cenkalti/rpc2"
5 | "github.com/mitchellh/mapstructure"
6 | "github.com/oscp/openshift-monitoring/models"
7 | "log"
8 | "net"
9 | "time"
10 | )
11 |
12 | type Hub struct {
13 | hubAddr string
14 | daemons map[string]*models.DaemonClient
15 | currentChecks models.Checks
16 | result models.Results
17 | startChecks chan models.Checks
18 | stopChecks chan bool
19 | ResetStats chan bool
20 | toUi chan models.BaseModel
21 | updateStats bool
22 |
23 | // Temp values between ticks
24 | successfulSinceTick int
25 | failedSinceTick int
26 | }
27 |
28 | func NewHub(hubAddr string, masterApiUrls string, daemonPublicUrl string,
29 | etcdIps string, etcdCertPath string) *Hub {
30 |
31 | return &Hub{
32 | hubAddr: hubAddr,
33 | daemons: make(map[string]*models.DaemonClient),
34 | startChecks: make(chan models.Checks),
35 | stopChecks: make(chan bool),
36 | ResetStats: make(chan bool),
37 | toUi: make(chan models.BaseModel, 1000),
38 | updateStats: false,
39 | result: models.Results{
40 | SuccessfulChecksByType: make(map[string]int),
41 | FailedChecksByType: make(map[string]int),
42 | Ticks: []models.Tick{},
43 | Errors: []models.Failures{},
44 | },
45 | currentChecks: models.Checks{
46 | CheckInterval: 5000,
47 | MasterApiUrls: masterApiUrls,
48 | DaemonPublicUrl: daemonPublicUrl,
49 | MasterApiCheck: true,
50 | HttpChecks: true,
51 | DnsCheck: true,
52 | EtcdCheck: true,
53 | EtcdIps: etcdIps,
54 | EtcdCertPath: etcdCertPath,
55 | IsRunning: false},
56 | }
57 | }
58 |
59 | func (h *Hub) Daemons() []models.Daemon {
60 | r := []models.Daemon{}
61 | for _, d := range h.daemons {
62 | r = append(r, d.Daemon)
63 | }
64 | return r
65 | }
66 |
67 | func (h *Hub) Serve() {
68 | statsTicker := time.NewTicker(1 * time.Second)
69 | toUITicker := time.NewTicker(1 * time.Second)
70 |
71 | // Handle stats
72 | go func() {
73 | for {
74 | select {
75 |
76 | case <-h.ResetStats:
77 | h.resetStats()
78 | break
79 |
80 | case <-toUITicker.C:
81 | // Update checkresults & daemons
82 | h.toUi <- models.BaseModel{Type: models.CheckResults, Message: h.result}
83 | h.toUi <- models.BaseModel{Type: models.AllDaemons, Message: h.Daemons()}
84 | break
85 |
86 | case <-statsTicker.C:
87 | h.aggregateStats()
88 | break
89 |
90 | case checks := <-h.startChecks:
91 | h.updateStats = true
92 | for _, d := range h.daemons {
93 | if err := d.Client.Call("startChecks", checks, nil); err != nil {
94 | log.Println("error starting checks on daemon", err)
95 | }
96 | }
97 | break
98 |
99 | case stop := <-h.stopChecks:
100 | if stop {
101 | h.updateStats = false
102 | for _, d := range h.daemons {
103 | if err := d.Client.Call("stopChecks", stop, nil); err != nil {
104 | log.Println("error stopping checks on daemon", err)
105 | }
106 | }
107 | }
108 | break
109 | }
110 | }
111 | }()
112 |
113 | // Create rpc server for communication with clients
114 | srv := rpc2.NewServer()
115 | srv.Handle("register", func(c *rpc2.Client, d *models.Daemon, reply *string) error {
116 | h.AddDaemon(d, c)
117 | *reply = "ok"
118 | return nil
119 | })
120 | srv.Handle("unregister", func(cl *rpc2.Client, host *string, reply *string) error {
121 | h.RemoveDaemon(*host)
122 | *reply = "ok"
123 | return nil
124 | })
125 | srv.Handle("updateCheckcount", func(cl *rpc2.Client, d *models.Daemon, reply *string) error {
126 | h.daemons[d.Hostname].Daemon = *d
127 | *reply = "ok"
128 | return nil
129 | })
130 | srv.Handle("checkResult", func(cl *rpc2.Client, r *models.CheckResult, reply *string) error {
131 | go h.handleCheckResult(r)
132 | *reply = "ok"
133 | return nil
134 | })
135 | lis, err := net.Listen("tcp", h.hubAddr)
136 | srv.Accept(lis)
137 | if err != nil {
138 | log.Fatalf("Cannot start rpc2 server: %s", err)
139 | }
140 | }
141 |
142 | func (h *Hub) RemoveDaemon(host string) {
143 | log.Println("daemon left: ", host)
144 | delete(h.daemons, host)
145 |
146 | h.toUi <- models.BaseModel{Type: models.DaemonLeft, Message: host}
147 | }
148 |
149 | func (h *Hub) AddDaemon(d *models.Daemon, c *rpc2.Client) {
150 | log.Println("new daemon joined:", d)
151 |
152 | h.daemons[d.Hostname] = &models.DaemonClient{Client: c, Daemon: *d}
153 |
154 | if h.currentChecks.IsRunning {
155 | // Tell the new daemon to join the checks
156 | if err := c.Call("startChecks", h.currentChecks, nil); err != nil {
157 | log.Println("error starting checks on newly joined daemon", err)
158 | }
159 | }
160 |
161 | h.toUi <- models.BaseModel{Type: models.NewDaemon, Message: d.Hostname}
162 | }
163 |
164 | func (h *Hub) StopChecks() models.BaseModel {
165 | // Save current state & tell daemons
166 | h.currentChecks.IsRunning = false
167 | h.stopChecks <- true
168 |
169 | // Return ok to UI
170 | return models.BaseModel{Type: models.CurrentChecks, Message: h.currentChecks}
171 | }
172 |
173 | func (h *Hub) StartChecks(msg interface{}) models.BaseModel {
174 | checks := getChecksStruct(msg)
175 |
176 | // Save current state & tell daemons
177 | checks.IsRunning = true
178 | h.currentChecks = checks
179 | h.startChecks <- checks
180 |
181 | // Return ok to UI
182 | return models.BaseModel{Type: models.CurrentChecks, Message: checks}
183 | }
184 |
185 | func getChecksStruct(msg interface{}) models.Checks {
186 | var checks models.Checks
187 | err := mapstructure.Decode(msg, &checks)
188 | if err != nil {
189 | log.Println("error decoding checks", err)
190 | }
191 | return checks
192 | }
193 |
194 | func (h *Hub) handleCheckResult(r *models.CheckResult) {
195 | // Write values from check result to temp values
196 | if r.IsOk {
197 | h.result.SuccessfulChecks++
198 | h.result.SuccessfulChecksByType[r.Type]++
199 | h.successfulSinceTick++
200 | } else {
201 | h.result.FailedChecks++
202 | h.result.FailedChecksByType[r.Type]++
203 | h.failedSinceTick++
204 |
205 | h.result.Errors = append(h.result.Errors, models.Failures{
206 | Date: time.Now(),
207 | Type: r.Type,
208 | Hostname: r.Hostname,
209 | Message: r.Message,
210 | })
211 | }
212 | }
213 |
214 | func (h *Hub) aggregateStats() {
215 | // Update global fields
216 | h.result.StartedChecks = 0
217 | h.result.FinishedChecks = 0
218 | for _, d := range h.daemons {
219 | h.result.StartedChecks += d.Daemon.StartedChecks
220 | h.result.FinishedChecks += d.Daemon.SuccessfulChecks + d.Daemon.FailedChecks
221 | }
222 |
223 | if h.failedSinceTick > 0 || h.successfulSinceTick > 0 {
224 | // Create a new tick out of temp values since last tick
225 | h.result.Ticks = append(h.result.Ticks, models.Tick{
226 | FailedChecks: h.failedSinceTick,
227 | SuccessfulChecks: h.successfulSinceTick,
228 | })
229 |
230 | // Prepare for next tick
231 | h.failedSinceTick = 0
232 | h.successfulSinceTick = 0
233 | }
234 | }
235 |
236 | func (h *Hub) resetStats() {
237 | log.Println("resetting stats and current results.")
238 | h.result = models.Results{
239 | SuccessfulChecksByType: make(map[string]int),
240 | FailedChecksByType: make(map[string]int),
241 | Ticks: []models.Tick{},
242 | Errors: []models.Failures{},
243 | }
244 |
245 | for _, d := range h.daemons {
246 | d.Daemon.SuccessfulChecks = 0
247 | d.Daemon.FailedChecks = 0
248 | d.Daemon.StartedChecks = 0
249 | }
250 | }
251 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # General idea
2 | We at [@SchweizerischeBundesbahnen](https://github.com/SchweizerischeBundesbahnen) have lots of productive apps running in our OpenShift environment. So we try really hard to avoid any downtime.
3 | So we test new things (versions/config and so on) in our test environment. As our test environment runs way less pods & traffic we created this tool to check all important OpenShift components under pressure, especially during a change.
4 |
5 | Furthermore the daemon now also has a standalone mode. It runs checks based on a http call. So you can monitor all those things from an external monitoring system.
6 |
7 | # Screenshot
8 | 
9 |
10 | # Components
11 | - UI: The UI to controll everything
12 | - Hub: The backend of the UI and the daemons
13 | - Daemon: Deploy them as DaemonSet & manually on master & nodes
14 |
15 | # Modes & Daemon Types
16 | ### Modes
17 | - HUB = Use the hub as control instance. Hub triggers checks on daemons asynchronously
18 | - STANDALONE = Daemon runs on its own and exposes a webserver to run the checks
19 |
20 | ### Daemon-Types
21 | - NODE = On a Node as systemd-service
22 | - MASTER = On a master as systemd-service
23 | - STORAGE = On glusterfs server as systemd-service
24 | - POD = Runs inside a docker container
25 |
26 | # Checks
27 | ### Hub mode
28 | | TYPE | CHECK |
29 | |--------|----------------------------------|
30 | | MASTER | Master-API check |
31 | | MASTER | ETCD health check |
32 | | MASTER | DNS via kubernetes |
33 | | MASTER | DNS via dnsmasq |
34 | | MASTER | HTTP check via service |
35 | | MASTER | HTTP check via ha-proxy |
36 | | NODE | Master-API check |
37 | | NODE | DNS via kubernetes |
38 | | NODE | DNS via dnsmasq |
39 | | NODE | HTTP check via service |
40 | | NODE | HTTP check via ha-proxy |
41 | | POD | Master-API check |
42 | | POD | DNS via kubernetes |
43 | | POD | DNS via Node > dnsmasq |
44 | | POD | SDN over http via service check |
45 | | POD | SDN over http via ha-proxy check |
46 |
47 | ### Standalone mode
48 | | TYPE | URL | CHECK |
49 | |---------|---------------|---------------------------------------------------------|
50 | | ALL | /fast | Fast endpoint for http-ping |
51 | | ALL | /slow | Slow endpoint for slow http-ping |
52 | | NODE | /checks/minor | Checks if the dockerpool is > 80% |
53 | | | | Checks ntpd synchronization status |
54 | | | | Checks if http access via service is ok |
55 | | NODE | /checks/major | Checks if the dockerpool is > 90% |
56 | | | | Check if dns is ok via kubernetes & dnsmasq |
57 | | MASTER | /checks/minor | Checks ntpd synchronization status |
58 | | | | Checks if external system is reachable |
59 | | | | Checks if hawcular is healthy |
60 | | | | Checks if ha-proxy has a high restart count |
61 | | | | Checks if all projects have limits & quotas |
62 | | | | Checks if logging pods are healthy |
63 | | | | Checks if http access via service is ok |
64 | | MASTER | /checks/major | Checks if output of 'oc get nodes' is fine |
65 | | | | Checks if etcd cluster is healthy |
66 | | | | Checks if docker registry is healthy |
67 | | | | Checks if all routers are healthy |
68 | | | | Checks if local master api is healthy |
69 | | | | Check if dns is ok via kubernetes & dnsmasq |
70 | | STORAGE | /checks/minor | Checks if open-files count is higher than 200'000 files |
71 | | | | Checks every lvs-pool size. Is the value above 80%? |
72 | | | | Checks every VG has at least 10% free storage |
73 | | | | Checks if every specified mount path has at least 15% free storage |
74 | | STORAGE | /checks/major | Checks if output of gstatus is 'healthy' |
75 | | | | Checks every lvs-pool size. Is the value above 90%? |
76 | | | | Checks every VG has at least 5% free storage |
77 | | | | Checks if every specified mount path has at least 10% free storage |
78 |
79 | # Config parameters
80 | ## Hub
81 | **NAME**|**DESCRIPTION**|**EXAMPLE**
82 | -----|-----|-----
83 | UI\_ADDR|The address & port where the UI should be hosted|10.10.10.1:80
84 | RPC\_ADDR|The address & port where the hub should be hosted|10.10.10.1:2600
85 | MASTER\_API\_URLS|Names or IPs of your masters with the API port|https://master1:8443
86 | DAEMON\_PUBLIC\_URL|Public url of your daemon|http://daemon.yourdefault.route.com
87 | ETCD\_IPS|Names or IPs where to call your etcd hosts|https://localhost:2379
88 | ETCD\_CERT\_PATH|Optional config of alternative etcd certificates path. This is used during certificate renew process of OpenShift to do checks with the old certificates. If this fails the default path will be checked as well|/etc/etcd/old/
89 |
90 | ## Daemon
91 | #### Hub mode
92 | **NAME**|**DESCRIPTION**|**EXAMPLE**
93 | -----|-----|-----
94 | HUB\_ADDRESS|Address & port of the hub|localhost:2600
95 | DAEMON\_TYPE|Type of the daemon out of [MASTER|NODE
96 | POD\_NAMESPACE|The namespace if the daemon runs inside a pod in OpenShift|ose-mon-a
97 |
98 | #### Standalone mode
99 | **NAME**|**DAEMON TYPE**|**DESCRIPTION**|**EXAMPLE**
100 | -----|-----|-----|-----
101 | WITH\_HUB|ALL|Disable communication with hub|false
102 | DAEMON\_TYPE|ALL|Type of the daemon out of [MASTER|NODE
103 | SERVER\_ADDRESS|ALL|The address & port where the webserver runs|localhost:2600
104 | POD\_NAMESPACE|NODE|The namespace if the daemon runs inside a pod in OpenShift|ose-mon-a
105 | EXTERNAL\_SYSTEM\_URL|MASTER|URL of an external system to call via http to check external connection|www.google.ch
106 | HAWCULAR\_SVC\_IP|MASTER|Ip of the hawcular service|10.10.10.1
107 | ETCD\_IPS|MASTER|Ips of the etcd hosts with protocol & port|https://192.168.125.241:2379,https://192.168.125.244:2379
108 | REGISTRY\_SVC\_IP|MASTER|Ip of the registry service|10.10.10.1
109 | ROUTER\_IPS|MASTER|Ips of the routers services|10.10.10.1,10.10.10.2
110 | PROJECTS\_WITHOUT\_LIMITS|MASTER|Number of system projects that have no limits |4
111 | PROJECTS\_WITHOUT\_QUOTA|MASTER|Number of system projects that have no quotas |4
112 | IS\_GLUSTER\_SERVER|STORAGE|Boolean value of the node is a gluster server|true/false
113 | MOUNTPOINTS\_TO\_CHECK|A list of mount points where free size should be checked|/gluster/registry/,/gluster/xxx
114 | CHECK\_CERTIFICATE\_URLS|A list of urls to check for validity of certificate|https://master-ip:8443
115 | CHECK\_CERTIFICATE\_PATHS|A list of paths to check for validity of certificates. Filter is *.crt|/etc/origin/master,/etc/origin/node
116 |
117 |
118 | # Installation
119 | ### OpenShift
120 | ```bash
121 | oc new-project ose-mon-a
122 | oc new-project ose-mon-b
123 | oc new-project ose-mon-c
124 |
125 | # Join projects a <> c
126 | oc adm pod-network join-projects --to=ose-mon-a ose-mon-c
127 |
128 | # Use the template install/ose-mon-template.yaml
129 | # Do this for each project a,b,c
130 | oc project ose-mon-a
131 |
132 | # HUB-Mode: IMAGE_SPEC = If you want to use our image use "oscp/openshift-monitoring:version"
133 | oc process -f ose-mon-template.yaml -p DAEMON_PUBLIC_ROUTE=xxx,DS_HUB_ADDRESS=xxx,IMAGE_SPEC=xxx | oc create -f -
134 |
135 | # Standalone-Mode:
136 | oc process -f ose-mon-standalone-template.yaml -p DAEMON_PUBLIC_ROUTE=daemon-ose-mon-b.your-route.com IMAGE_SPEC=oscp/openshift-monitoring:xxxx | oc create -f -
137 | ```
138 |
139 | ### Master nodes
140 | ```bash
141 | mkdir -p /opt/ose-mon
142 |
143 | # Download and unpack from releases or build it yourself (https://github.com/oscp/openshift-monitoring/releases)
144 |
145 | chmod +x /opt/ose-mon/hub /opt/ose-mon/daemon
146 |
147 | # Add your params to the service definition files
148 | cp /opt/ose-mon/ose-mon-hub.service /etc/systemd/system/ose-mon-hub.service
149 | cp /opt/ose-mon/ose-mon-daemon.service /etc/systemd/system/ose-mon-daemon.service
150 |
151 | systemctl start ose-mon-hub.service
152 | systemctl enable ose-mon-hub.service
153 |
154 | systemctl start ose-mon-daemon.service
155 | systemctl enable ose-mon-daemon.service
156 | ```
157 |
158 | ### Install the UI
159 | ```bash
160 | cd /opt/ose-mon
161 | mkdir static
162 |
163 | # The UI is included in the download above
164 | ```
165 |
166 | ### Worker / storage nodes
167 | - Do the same as above, just without the hub
168 |
--------------------------------------------------------------------------------
/daemon/client/checks/certificates.go:
--------------------------------------------------------------------------------
1 | package checks
2 |
3 | import (
4 | "crypto/tls"
5 | "crypto/x509"
6 | "encoding/base64"
7 | "encoding/pem"
8 | "errors"
9 | "fmt"
10 | "gopkg.in/yaml.v2"
11 | "io/ioutil"
12 | "log"
13 | "net/http"
14 | "os"
15 | "path/filepath"
16 | "time"
17 | )
18 |
19 | type Cert struct {
20 | File string
21 | DaysLeft int
22 | }
23 |
24 | type KubeConfig struct {
25 | APIVersion string `yaml:"apiVersion"`
26 | Clusters []struct {
27 | Cluster struct {
28 | CertificateAuthorityData string `yaml:"certificate-authority-data"`
29 | Server string `yaml:"server"`
30 | } `yaml:"cluster"`
31 | Name string `yaml:"name"`
32 | } `yaml:"clusters"`
33 | Contexts []struct {
34 | Context struct {
35 | Cluster string `yaml:"cluster"`
36 | Namespace string `yaml:"namespace"`
37 | User string `yaml:"user"`
38 | } `yaml:"context"`
39 | Name string `yaml:"name"`
40 | } `yaml:"contexts"`
41 | CurrentContext string `yaml:"current-context"`
42 | Kind string `yaml:"kind"`
43 | Preferences struct {
44 | } `yaml:"preferences"`
45 | Users []struct {
46 | Name string `yaml:"name"`
47 | User struct {
48 | ClientCertificateData string `yaml:"client-certificate-data"`
49 | ClientKeyData string `yaml:"client-key-data"`
50 | } `yaml:"user"`
51 | } `yaml:"users"`
52 | }
53 |
54 | func decodeCertBlocks(data []byte) []*pem.Block {
55 | var blocks []*pem.Block
56 | block, rest := pem.Decode([]byte(data))
57 |
58 | if block != nil {
59 | blocks = append(blocks, block)
60 | }
61 |
62 | if len(rest) > 0 {
63 | return append(blocks, decodeCertBlocks(rest)...)
64 | } else {
65 | return blocks
66 | }
67 | }
68 |
69 | func getKubeFiles(paths []string) (error, []string) {
70 | var kubeFiles []string
71 |
72 | for _, path := range paths {
73 | file, err := os.Stat(path)
74 | if os.IsNotExist(err) {
75 | log.Printf("Path %s does not exist.", path)
76 | continue
77 | }
78 |
79 | if file.IsDir() {
80 | files, err := ioutil.ReadDir(path)
81 | if err != nil {
82 | msg := fmt.Sprintf("could not read directory %s (%s)", path, err.Error())
83 | log.Println(msg)
84 | return errors.New(msg), nil
85 | }
86 |
87 | for _, file := range files {
88 | if file.IsDir() || filepath.Ext(file.Name()) != ".kubeconfig" {
89 | continue
90 | }
91 |
92 | kubeFiles = append(kubeFiles, filepath.Join(path, file.Name()))
93 | }
94 | } else {
95 | kubeFiles = append(kubeFiles, path)
96 | }
97 | }
98 |
99 | return nil, kubeFiles
100 | }
101 |
102 | func CheckKubeSslCertificates(kubePaths []string, days int) error {
103 | log.Printf("Checking expiry date for SSL certificates (%d days) in kube config files.", days)
104 |
105 | var certErrorList []string
106 |
107 | err, kubeFiles := getKubeFiles(kubePaths)
108 | if err != nil {
109 | return errors.New(fmt.Sprintf("could not get kube files (%s)", err.Error()))
110 | }
111 |
112 | for _, kubeFile := range kubeFiles {
113 |
114 | data, err := ioutil.ReadFile(kubeFile)
115 | if err != nil {
116 | msg := fmt.Sprintf("could not read file %s", kubeFile)
117 | log.Println(msg)
118 | return errors.New(msg)
119 | }
120 |
121 | var kubeConfig KubeConfig
122 |
123 | err = yaml.Unmarshal(data, &kubeConfig)
124 | if err != nil {
125 | msg := fmt.Sprintf("unmarshalling %s failed (%s)", kubeFile, err.Error())
126 | log.Println(msg)
127 | return errors.New(msg)
128 | }
129 |
130 | for _, cluster := range kubeConfig.Clusters {
131 | if len(cluster.Cluster.CertificateAuthorityData) > 0 {
132 |
133 | certBytes, err := base64.StdEncoding.DecodeString(cluster.Cluster.CertificateAuthorityData)
134 | if err != nil {
135 | msg := fmt.Sprintf("can't base64 decode cert (%s)", err.Error())
136 | log.Println(msg)
137 | return errors.New(msg)
138 | }
139 |
140 | block, _ := pem.Decode(certBytes)
141 |
142 | cert, err := x509.ParseCertificate(block.Bytes)
143 | if err != nil {
144 | msg := fmt.Sprintf("certificate parsing error (%s)", err.Error())
145 | log.Println(msg)
146 | return errors.New(msg)
147 | }
148 |
149 | daysLeft := cert.NotAfter.Sub(time.Now()).Hours() / 24
150 |
151 | if int(daysLeft) <= days {
152 | msg := fmt.Sprintf("%s from %s expires in %d days", cert.Subject, kubeFile, int(daysLeft))
153 | log.Println(msg)
154 | certErrorList = append(certErrorList, msg)
155 | }
156 | }
157 | }
158 |
159 | for _, user := range kubeConfig.Users {
160 | if len(user.User.ClientCertificateData) > 0 {
161 |
162 | certBytes, _ := base64.StdEncoding.DecodeString(user.User.ClientCertificateData)
163 | block, _ := pem.Decode(certBytes)
164 |
165 | cert, err := x509.ParseCertificate(block.Bytes)
166 |
167 | if err != nil {
168 | msg := fmt.Sprintf("certificate parsing error (%s)", err.Error())
169 | log.Println(msg)
170 | return errors.New(msg)
171 | }
172 |
173 | daysLeft := cert.NotAfter.Sub(time.Now()).Hours() / 24
174 |
175 | if int(daysLeft) <= days {
176 | msg := fmt.Sprintf("%s from %s expires in %d days", cert.Subject, kubeFile, int(daysLeft))
177 | log.Println(msg)
178 | certErrorList = append(certErrorList, msg)
179 | }
180 | }
181 | }
182 | }
183 |
184 | if len(certErrorList) > 0 {
185 | var errorMessage string
186 | for _, msg := range certErrorList {
187 | errorMessage = errorMessage + msg + " "
188 | }
189 | return errors.New(errorMessage)
190 | }
191 |
192 | return nil
193 | }
194 |
195 | func CheckUrlSslCertificates(urls []string, days int) error {
196 | log.Printf("Checking expiry date for SSL certificates (%d days) via urls.", days)
197 |
198 | var certErrorList []string
199 |
200 | for _, url := range urls {
201 | req, err := http.NewRequest("GET", url, nil)
202 | if err != nil {
203 | msg := fmt.Sprintf("creating request failed for %s (%s)", url, err.Error())
204 | log.Println(msg)
205 | return errors.New(msg)
206 | }
207 |
208 | tr := &http.Transport{
209 | TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
210 | }
211 |
212 | hc := &http.Client{Transport: tr}
213 |
214 | resp, err := hc.Do(req)
215 |
216 | if err != nil {
217 | msg := fmt.Sprintf("get request failed for %s (%s)", url, err.Error())
218 | log.Println(msg)
219 | return errors.New(msg)
220 | }
221 |
222 | if resp.TLS != nil && len(resp.TLS.PeerCertificates) > 0 {
223 | for _, cert := range resp.TLS.PeerCertificates {
224 | daysLeft := cert.NotAfter.Sub(time.Now()).Hours() / 24
225 |
226 | if int(daysLeft) <= days {
227 | msg := fmt.Sprintf("%s from %s expires in %d days", cert.Subject, url, int(daysLeft))
228 | log.Println(msg)
229 | certErrorList = append(certErrorList, msg)
230 | }
231 | }
232 | }
233 |
234 | resp.Body.Close()
235 | }
236 |
237 | if len(certErrorList) > 0 {
238 | var errorMessage string
239 | for _, msg := range certErrorList {
240 | errorMessage = errorMessage + msg + " "
241 | }
242 | return errors.New(errorMessage)
243 | }
244 |
245 | return nil
246 | }
247 |
248 | func getCertFiles(filePaths []string) (error, []string) {
249 | var certFiles []string
250 |
251 | for _, path := range filePaths {
252 | file, err := os.Stat(path)
253 | if os.IsNotExist(err) {
254 | log.Printf("Path %s does not exist.", path)
255 | continue
256 | }
257 |
258 | if file.IsDir() {
259 |
260 | files, err := ioutil.ReadDir(path)
261 | if err != nil {
262 | msg := fmt.Sprintf("could not read directory %s (%s)", path, err.Error())
263 | log.Println(msg)
264 | return errors.New(msg), nil
265 | }
266 |
267 | for _, file := range files {
268 | if file.IsDir() || filepath.Ext(file.Name()) != ".crt" {
269 | continue
270 | }
271 |
272 | certFiles = append(certFiles, filepath.Join(path, file.Name()))
273 | }
274 | } else {
275 | certFiles = append(certFiles, path)
276 | }
277 | }
278 |
279 | return nil, certFiles
280 | }
281 |
282 | func getExpiredCerts(filePaths []string, days int) (error, []Cert) {
283 | var expiredCerts []Cert
284 |
285 | err, certFiles := getCertFiles(filePaths)
286 | if err != nil {
287 | return errors.New(fmt.Sprintf("could not get files (%s)", err.Error())), nil
288 | }
289 |
290 | for _, file := range certFiles {
291 |
292 | if _, err := os.Stat(file); os.IsNotExist(err) {
293 | continue
294 | }
295 |
296 | data, err := ioutil.ReadFile(file)
297 | if err != nil {
298 | return errors.New(fmt.Sprintf("could not read file %s", file)), nil
299 | }
300 |
301 | blocks := decodeCertBlocks([]byte(data))
302 |
303 | for _, block := range blocks {
304 | cert, err := x509.ParseCertificate(block.Bytes)
305 | if err != nil {
306 | return errors.New(fmt.Sprintf("certificate parsing error (%s)", err.Error())), nil
307 | }
308 |
309 | daysLeft := cert.NotAfter.Sub(time.Now()).Hours() / 24
310 |
311 | if int(daysLeft) <= days {
312 | log.Println(fmt.Sprintf("%s expires in %d days", file, int(daysLeft)))
313 | expiredCerts = append(expiredCerts, Cert{File: file, DaysLeft: int(daysLeft)})
314 | }
315 | }
316 | }
317 |
318 | return nil, expiredCerts
319 | }
320 |
321 | func CheckFileSslCertificates(filePaths []string, days int) error {
322 | log.Printf("Checking expiry date for SSL certificates (%d days) in files.", days)
323 |
324 | var certErrorList []string
325 |
326 | err, expiredCerts := getExpiredCerts(filePaths, days)
327 | if err != nil {
328 | msg := "could not get expired certificates"
329 | log.Println(msg)
330 | return errors.New(msg)
331 | }
332 |
333 | for _, expiredCert := range expiredCerts {
334 | certErrorList = append(certErrorList, fmt.Sprintf("%s expires in %d days", expiredCert.File, expiredCert.DaysLeft))
335 | }
336 |
337 | if len(certErrorList) > 0 {
338 | var errorMessage string
339 | for _, msg := range certErrorList {
340 | errorMessage = errorMessage + msg + " "
341 | }
342 | return errors.New(errorMessage)
343 | }
344 |
345 | return nil
346 | }
347 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "{}"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright {yyyy} {name of copyright owner}
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/daemon/client/checks/openshift.go:
--------------------------------------------------------------------------------
1 | package checks
2 |
3 | import (
4 | "bytes"
5 | "errors"
6 | "fmt"
7 | "log"
8 | "os/exec"
9 | "regexp"
10 | "strconv"
11 | "strings"
12 | "time"
13 | )
14 |
15 | func CheckMasterApis(urls string) error {
16 | log.Println("Checking master apis. At least one has to be up")
17 |
18 | urlArr := strings.Split(urls, ",")
19 |
20 | oneApiOk := false
21 | var msg string
22 | for _, u := range urlArr {
23 | if err := checkHttp(u); err == nil {
24 | oneApiOk = true
25 | } else {
26 | msg += u + " is not reachable. "
27 | }
28 | }
29 |
30 | if oneApiOk {
31 | return nil
32 | } else {
33 | return errors.New(msg)
34 | }
35 | }
36 |
37 | func CheckOcGetNodes(buildNodes bool) error {
38 | log.Println("Checking oc get nodes output")
39 | var out string
40 | var err error
41 | for i := 0; i < 5; i++ {
42 | out, err = runOcGetNodes(buildNodes)
43 | if err != nil {
44 | return err
45 | }
46 | if strings.Contains(out, "NotReady") {
47 | // Wait a few seconds and see if still NotReady
48 | // to avoid wrong alerts
49 | time.Sleep(10 * time.Second)
50 | continue
51 | }
52 | return nil
53 | }
54 | var purpose string
55 | if buildNodes {
56 | purpose = "Buildnode "
57 | } else {
58 | purpose = "Workingnode "
59 | }
60 | return errors.New(purpose + getNotReadyNodeNames(out) + " is not ready! 'oc get nodes' output contained NotReady. Output: " + out)
61 | }
62 |
63 | func CheckOcGetNodesRelaxed() error {
64 | log.Println("Checking oc get nodes output")
65 |
66 | var notReadyCount int
67 | var availablePodHardLimit int
68 | var out string
69 | var err error
70 | for i := 0; i < 5; i++ {
71 | out, err = runOcGetNodes(false)
72 | if err != nil {
73 | return err
74 | }
75 | notReadyCount = nodesNotReady(out)
76 | availablePodHardLimit, err = getAvailablePodHardLimit(out)
77 | if err != nil {
78 | return err
79 | }
80 | max_pods, err := strconv.Atoi(getEnv("OPENSHIFT_MAX_PODS", "100"))
81 | if err != nil {
82 | return errors.New("Could not parse OPENSHIFT_MAX_PODS environment variable: " + err.Error())
83 | }
84 | if notReadyCount*max_pods < availablePodHardLimit {
85 | return nil
86 | }
87 | // wait a few seconds and then check again
88 | time.Sleep(10 * time.Second)
89 | }
90 | return fmt.Errorf("Capacity overload! Workingnode %v is not ready! AvailablePodHardLimit: %v 'oc get nodes' output contained NotReady. Output: %v", getNotReadyNodeNames(out), availablePodHardLimit, out)
91 | }
92 |
93 | func getAvailablePodHardLimit(output string) (int, error) {
94 | totalPods, err := getTotalPods()
95 | if err != nil {
96 | return 0, err
97 | }
98 | totalCapacity, err := getTotalPodCapacity(output)
99 | if err != nil {
100 | return 0, err
101 | }
102 | return totalCapacity - totalPods, nil
103 | }
104 |
105 | func nodesNotReady(output string) int {
106 | r := regexp.MustCompile("NotReady")
107 | matches := r.FindAllStringIndex(output, -1)
108 | return len(matches)
109 | }
110 |
111 | func getTotalPods() (int, error) {
112 | out, err := exec.Command("bash", "-c", "oc get pods --all-namespaces | grep -v Error | grep -v Completed | wc -l").Output()
113 | if err != nil {
114 | return 0, errors.New("Could not parse oc get pods output: " + err.Error())
115 | }
116 | trimmed := strings.TrimSpace(string(out))
117 | i, err := strconv.Atoi(trimmed)
118 | if err != nil {
119 | return 0, errors.New("Could not parse oc get pods output: " + err.Error())
120 | }
121 | return i, nil
122 | }
123 |
124 | func getTotalPodCapacity(output string) (int, error) {
125 | out, err := exec.Command("bash", "-c", "oc get nodes "+getReadyWorkingNodeNames(output)+` -o=jsonpath='{range .items[*]}{.status.capacity.pods}{"\n"}{end}' | paste -sd+ | bc`).Output()
126 | if err != nil {
127 | return 0, errors.New("Could not parse oc get nodes output: " + err.Error())
128 | }
129 | trimmed := strings.TrimSpace(string(out))
130 | i, err := strconv.Atoi(trimmed)
131 | if err != nil {
132 | return 0, errors.New("Could not parse oc get nodes output: " + err.Error())
133 | }
134 | return i, nil
135 | }
136 |
137 | func getNotReadyNodeNames(out string) string {
138 | lines := strings.Split(out, "\n")
139 | var notReadyNodes []string
140 | for _, line := range lines {
141 | if strings.Contains(line, "NotReady") {
142 | s := strings.Fields(line)[0]
143 | notReadyNodes = append(notReadyNodes, s)
144 | }
145 | }
146 | return strings.Join(notReadyNodes, ", ")
147 | }
148 |
149 | func getReadyWorkingNodeNames(out string) string {
150 | lines := strings.Split(out, "\n")
151 | var ReadyWorkingNodes []string
152 | for _, line := range lines {
153 | if line == "" {
154 | continue
155 | }
156 | if strings.Contains(line, "NotReady") {
157 | continue
158 | }
159 | if strings.Contains(line, "SchedulingDisabled") {
160 | continue
161 | }
162 | if strings.Contains(line, "purpose=buildnode") {
163 | continue
164 | }
165 | s := strings.Fields(line)
166 | ReadyWorkingNodes = append(ReadyWorkingNodes, s[0])
167 | }
168 | return strings.Join(ReadyWorkingNodes, " ")
169 | }
170 |
171 | func runOcGetNodes(buildNodes bool) (string, error) {
172 | buildNodes_grep_params := "-v"
173 | if buildNodes {
174 | buildNodes_grep_params = ""
175 | }
176 | out, err := exec.Command("bash", "-c", fmt.Sprintf("oc get nodes --show-labels --no-headers | grep -v monitoring=false | grep -v SchedulingDisabled | grep %s purpose=buildnode || test $? -eq 1", buildNodes_grep_params)).Output()
177 | if err != nil {
178 | msg := "Could not parse oc get nodes output: " + err.Error()
179 | log.Println(msg)
180 | return "", errors.New(msg)
181 | }
182 | return string(out), nil
183 | }
184 |
185 | func CheckDnsNslookupOnKubernetes() error {
186 | log.Println("Checking nslookup to kubernetes ip")
187 |
188 | cmd := exec.Command("nslookup", daemonDNSEndpoint+".", kubernetesIP)
189 | var out bytes.Buffer
190 | cmd.Stdout = &out
191 | err := cmd.Run()
192 | if err != nil {
193 | msg := "DNS resolution via nslookup & kubernetes failed." + err.Error()
194 | log.Println(msg)
195 | return errors.New(msg)
196 | }
197 |
198 | stdOut := out.String()
199 |
200 | if strings.Contains(stdOut, "Server") && strings.Count(stdOut, "Address") >= 2 && strings.Contains(stdOut, "Name") {
201 | return nil
202 | } else {
203 | return errors.New("Problem with dns to kubernetes. nsLookup had wrong output")
204 | }
205 | }
206 |
207 | func CheckDnsServiceNode() error {
208 | log.Println("Checking dns to a openshift service")
209 |
210 | ips := getIpsForName(daemonDNSServiceA)
211 |
212 | if ips == nil {
213 | return errors.New("Failed to lookup ip on node (dnsmasq) for name " + daemonDNSServiceA)
214 | } else {
215 | return nil
216 | }
217 | }
218 |
219 | func CheckDnsInPod() error {
220 | log.Println("Checking dns to a openshift service inside a pod")
221 |
222 | ips := getIpsForName(daemonDNSPod)
223 |
224 | if ips == nil {
225 | return errors.New("Failed to lookup ip in pod for name " + daemonDNSPod)
226 | } else {
227 | return nil
228 | }
229 | }
230 |
231 | func CheckPodHttpAtoB() error {
232 | log.Println("Checking if http connection does not work if network not joined")
233 |
234 | // This should fail as we do not have access to this project
235 | if err := checkHttp("http://" + daemonDNSServiceB + ":8090/hello"); err == nil {
236 | errors.New("Pod A could access pod b. This should not be allowed!")
237 | }
238 |
239 | return nil
240 | }
241 |
242 | func CheckPodHttpAtoC(slow bool) error {
243 | log.Println("Checking if http connection does work with joined network")
244 |
245 | if err := checkHttp("http://" + daemonDNSServiceC + ":8090/" + getEndpoint(slow)); err != nil {
246 | return errors.New("Pod A could access pod C. This should not work. Route/Router problem?")
247 | }
248 |
249 | return nil
250 | }
251 |
252 | func CheckHttpService(slow bool) error {
253 | errA := checkHttp("http://" + daemonDNSServiceA + ":8090/" + getEndpoint(slow))
254 | errB := checkHttp("http://" + daemonDNSServiceB + ":8090/" + getEndpoint(slow))
255 | errC := checkHttp("http://" + daemonDNSServiceC + ":8090/" + getEndpoint(slow))
256 |
257 | if errA != nil || errB != nil || errC != nil {
258 | msg := "Could not reach one of the services (a/b/c)"
259 | log.Println(msg)
260 | return errors.New(msg)
261 | }
262 |
263 | return nil
264 | }
265 |
266 | func CheckHttpHaProxy(publicUrl string, slow bool) error {
267 | log.Println("Checking http via HA-Proxy")
268 |
269 | if err := checkHttp(publicUrl + ":80/" + getEndpoint(slow)); err != nil {
270 | return errors.New("Could not access pods via haproxy. Route/Router problem?")
271 | }
272 |
273 | return nil
274 | }
275 |
276 | func CheckRegistryHealth(ip string) error {
277 | if len(ip) == 0 {
278 | return nil
279 | }
280 |
281 | log.Println("Checking registry health")
282 |
283 | if err := checkHttp("http://" + ip + ":5000/healthz"); err != nil {
284 | time.Sleep(10 * time.Second)
285 |
286 | if err2 := checkHttp("http://" + ip + ":5000/healthz"); err2 != nil {
287 | return fmt.Errorf("Registry health check failed. %v", err2.Error())
288 | }
289 | }
290 |
291 | return nil
292 | }
293 |
294 | func CheckHawcularHealth(ip string) error {
295 | log.Println("Checking metrics health")
296 |
297 | if err := checkHttp("https://" + ip + ":443"); err != nil {
298 | return errors.New("Hawcular health check failed")
299 | }
300 |
301 | return nil
302 | }
303 |
304 | func CheckRouterHealth(ip string) error {
305 | log.Println("Checking router health", ip)
306 |
307 | if err := checkHttp("http://" + ip + ":1936/healthz"); err != nil {
308 | time.Sleep(10 * time.Second)
309 |
310 | if err2 := checkHttp("http://" + ip + ":5000/healthz"); err2 != nil {
311 | return fmt.Errorf("Router health check failed for %v, %v", ip, err2.Error())
312 | }
313 | }
314 |
315 | return nil
316 | }
317 |
318 | func CheckLoggingRestartsCount() error {
319 | log.Println("Checking log-container restart count")
320 |
321 | out, err := exec.Command("bash", "-c", "oc get pods -n logging -o wide -l app=sematext-agent | tr -s ' ' | cut -d ' ' -f 4").Output()
322 | if err != nil {
323 | msg := "Could not parse logging container restart count: " + err.Error()
324 | log.Println(msg)
325 | return errors.New(msg)
326 | }
327 |
328 | isOk := true
329 | var msg string
330 | for _, l := range strings.Split(string(out), "\n") {
331 | if !strings.HasPrefix(l, "RESTARTS") && len(strings.TrimSpace(l)) > 0 {
332 | cnt, _ := strconv.Atoi(l)
333 | if cnt > 7 {
334 | msg = "A logging-container has restart count bigger than 7 - " + strconv.Itoa(cnt)
335 | isOk = false
336 | }
337 | }
338 | }
339 |
340 | if !isOk {
341 | return errors.New(msg)
342 | } else {
343 | return nil
344 | }
345 | }
346 |
347 | func CheckRouterRestartCount() error {
348 | log.Println("Checking router restart count")
349 |
350 | out, err := exec.Command("bash", "-c", "oc get po -n default | grep router | grep -v deploy | tr -s ' ' | cut -d ' ' -f 4").Output()
351 | if err != nil {
352 | msg := "Could not parse router restart count: " + err.Error()
353 | log.Println(msg)
354 | return errors.New(msg)
355 | }
356 |
357 | isOk := true
358 | var msg string
359 | for _, l := range strings.Split(string(out), "\n") {
360 | if !strings.HasPrefix(l, "RESTARTS") && len(strings.TrimSpace(l)) > 0 {
361 | cnt, _ := strconv.Atoi(l)
362 | if cnt > 5 {
363 | msg = "A Router has restart count bigger than 5 - " + strconv.Itoa(cnt)
364 | isOk = false
365 | }
366 | }
367 | }
368 |
369 | if isOk {
370 | return nil
371 | } else {
372 | return errors.New(msg)
373 | }
374 | }
375 |
376 | func CheckEtcdHealth(etcdIps string, etcdCertPath string) error {
377 | log.Println("Checking etcd health")
378 |
379 | var msg string
380 | isOk := true
381 |
382 | if len(etcdCertPath) > 0 {
383 | // Check etcd with custom certs path
384 | isOk = checkEtcdHealthWithCertPath(&msg, etcdCertPath, etcdIps)
385 |
386 | if !isOk {
387 | log.Println("etcd health check with custom cert path failed, trying with default")
388 |
389 | // Check etcd with default certs path
390 | isOk = checkEtcdHealthWithCertPath(&msg, "/etc/etcd/", etcdIps)
391 | }
392 | } else {
393 | // Check etcd with default certs path
394 | isOk = checkEtcdHealthWithCertPath(&msg, "/etc/etcd/", etcdIps)
395 | }
396 |
397 | if !isOk {
398 | return errors.New(msg)
399 | } else {
400 | return nil
401 | }
402 | }
403 |
404 | func checkEtcdHealthWithCertPath(msg *string, certPath string, etcdIps string) bool {
405 | cmd := exec.Command("etcdctl", "--peers", etcdIps, "--ca-file", certPath+"ca.crt",
406 | "--key-file", certPath+"peer.key", "--cert-file", certPath+"peer.crt", "cluster-health")
407 |
408 | var out bytes.Buffer
409 | cmd.Stdout = &out
410 | err := cmd.Run()
411 | if err != nil {
412 | log.Println("error while running etcd health check", err)
413 | *msg = "etcd health check failed: " + err.Error()
414 | return false
415 | }
416 |
417 | stdOut := out.String()
418 | if strings.Contains(stdOut, "unhealthy") || strings.Contains(stdOut, "unreachable") {
419 | *msg += "Etcd health check was 'cluster unhealthy'"
420 | return false
421 | }
422 |
423 | return true
424 | }
425 |
426 | func CheckLimitsAndQuota(allowedWithoutLimits int, allowedWithoutQuota int) error {
427 | log.Println("Checking limits & quotas")
428 |
429 | // Count projects
430 | projectCount, err := exec.Command("bash", "-c", "oc get projects | wc -l").Output()
431 | if err != nil {
432 | msg := "Could not parse project count" + err.Error()
433 | log.Println(msg)
434 | return errors.New(msg)
435 | }
436 |
437 | // Count limits
438 | limitCount, err := exec.Command("bash", "-c", "oc get limits --all-namespaces | wc -l").Output()
439 | if err != nil {
440 | msg := "Could not parse limit count" + err.Error()
441 | log.Println(msg)
442 | return errors.New(msg)
443 | }
444 |
445 | // Count quota
446 | quotaCount, err := exec.Command("bash", "-c", "oc get quota --all-namespaces | wc -l").Output()
447 | if err != nil {
448 | msg := "Could not parse quota count" + err.Error()
449 | log.Println(msg)
450 | return errors.New(msg)
451 | }
452 |
453 | // Parse them
454 | pCount, err := strconv.Atoi(strings.TrimSpace(string(projectCount)))
455 | lCount, _ := strconv.Atoi(strings.TrimSpace(string(limitCount)))
456 | qCount, _ := strconv.Atoi(strings.TrimSpace(string(quotaCount)))
457 |
458 | log.Println("Parsed values (projects,limits,quotas)", pCount, lCount, qCount)
459 |
460 | if pCount-allowedWithoutLimits <= lCount {
461 | return errors.New("There are some projects without limits")
462 | }
463 | if pCount-allowedWithoutQuota <= qCount {
464 | return errors.New("There are some projects without quota")
465 | }
466 |
467 | return nil
468 | }
469 |
--------------------------------------------------------------------------------