├── .github ├── CODEOWNERS ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── PULL_REQUEST_TEMPLATE.md ├── dependabot.yml └── workflows │ ├── check.yml │ └── publish.yml ├── .gitignore ├── .vscode └── launch.json ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── Dockerfile ├── LICENSE ├── README.md ├── alertmanager ├── alertmanager.go ├── alertmanager_test.go ├── dingtalk │ ├── dingtalk.go │ └── dingtalk_test.go ├── discord │ ├── discord.go │ └── discord_test.go ├── email │ ├── email.go │ └── email_test.go ├── feishu │ ├── feishu.go │ └── feishu_test.go ├── googlechat │ ├── googlechat.go │ └── googlechat_test.go ├── matrix │ ├── matrix.go │ └── matrix_test.go ├── mattermost │ ├── mattermost.go │ └── mattermost_test.go ├── opsgenie │ ├── opsgenie.go │ └── opsgenie_test.go ├── pagerduty │ ├── pagerduty.go │ └── pagerduty_test.go ├── rocketchat │ ├── rocketchat.go │ └── rocketchat_test.go ├── slack │ ├── slack.go │ └── slack_test.go ├── teams │ ├── teams.go │ └── teams_test.go ├── telegram │ ├── telegram.go │ └── telegram_test.go ├── webhook │ ├── webhook.go │ └── webhook_test.go └── zenduty │ ├── zenduty.go │ └── zenduty_test.go ├── assets ├── discord.png ├── email.png ├── feishu.png ├── googlechat.png ├── highlevelarchitecture.drawio ├── highlevelarchitecture.png ├── logo.png ├── matrix.png ├── mattermost.png ├── opsgenie.png ├── pagerduty.png ├── rocketchat.png ├── slack.png ├── teams.png ├── telegram.png ├── users │ ├── ibec-systems.svg │ ├── justwatch.png │ └── trella.png └── zenduty.png ├── client └── client.go ├── config ├── config.go ├── config_test.go ├── defaultConfig.go └── loadConfig.go ├── constant └── constant.go ├── deploy ├── chart │ ├── .helmignore │ ├── Chart.yaml │ ├── README.md │ ├── templates │ │ ├── _helpers.tpl │ │ ├── configmap.yaml │ │ ├── deployment.yaml │ │ ├── rbac.yaml │ │ └── serviceaccount.yaml │ ├── values.schema.json │ └── values.yaml ├── config.yaml └── deploy.yaml ├── event ├── event.go └── format.go ├── filter ├── containerKillingFilter.go ├── containerLogsFilter.go ├── containerNameFilter.go ├── containerReasonsFilter.go ├── containerRestartsFilter.go ├── containerStateFilter.go ├── eventFilter.go ├── filter.go ├── namespaceFilter.go ├── podEventsFilter.go ├── podNameFilter.go ├── podOwnersFilter.go └── podStatusFilter.go ├── go.mod ├── go.sum ├── handler ├── executeContainersFilters.go ├── executePodFilters.go ├── handler.go ├── processNode.go └── processPod.go ├── main.go ├── pvcmonitor ├── checkUsage.go ├── getUsage.go └── pvc.go ├── storage ├── memory │ ├── memory.go │ └── memory_test.go └── storage.go ├── upgrader └── upgrader.go ├── util ├── util.go └── util_test.go ├── version └── version.go └── watcher ├── start.go └── watcher.go /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | # These owners will be the default owners for everything in 2 | # the repo 3 | * @abahmed -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | 5 | --- 6 | 7 | **Describe the bug** 8 | A clear and concise description of what the bug is. 9 | 10 | **To Reproduce** 11 | Steps to reproduce the behavior: 12 | 13 | **Expected behavior** 14 | A clear and concise description of what you expected to happen. 15 | 16 | **Actual behavior** 17 | A clear and concise description of what really happens. 18 | 19 | **Version/Commit** 20 | A version or commit of kwatch -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | 5 | --- 6 | 7 | **Is your feature request related to a problem? Please describe.** 8 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 9 | 10 | **Describe the solution you'd like** 11 | A clear and concise description of what you want to happen. -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | Fixes # . 2 | 3 | Changes proposed in this pull request: 4 | - 5 | - 6 | - 7 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "gomod" 4 | directory: "/" 5 | schedule: 6 | interval: "daily" 7 | -------------------------------------------------------------------------------- /.github/workflows/check.yml: -------------------------------------------------------------------------------- 1 | name: Check 2 | on: 3 | push: 4 | branches: [ main ] 5 | pull_request: 6 | branches: [ main ] 7 | 8 | jobs: 9 | check: 10 | name: Check 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: Checkout code 14 | uses: actions/checkout@v4 15 | with: 16 | ref: ${{ github.ref }} 17 | - name: Install Go 18 | uses: actions/setup-go@v5 19 | with: 20 | go-version: '1.24' 21 | - name: Build 22 | run: go build 23 | - name: Test 24 | run: go test -race --coverprofile=coverage.txt --covermode=atomic ./... 25 | - name: Upload coverage to Codecov 26 | if: success() && github.ref == 'refs/heads/main' 27 | uses: codecov/codecov-action@v5 28 | with: 29 | token: ${{ secrets.CODECOV_TOKEN }} 30 | fail_ci_if_error: false -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | jobs: 8 | push_to_registry: 9 | name: Push Docker image to GitHub Container Registry 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Check out the repo 13 | uses: actions/checkout@v4 14 | - name: Set up QEMU 15 | uses: docker/setup-qemu-action@v3 16 | - name: Set up Docker Buildx 17 | uses: docker/setup-buildx-action@v3 18 | - name: Login to GitHub Container Registry 19 | uses: docker/login-action@v3 20 | with: 21 | registry: ghcr.io 22 | username: ${{ github.actor }} 23 | password: ${{ secrets.GITHUB_TOKEN }} 24 | - name: Extract metadata (tags, labels) for Docker 25 | id: meta 26 | uses: docker/metadata-action@v5 27 | with: 28 | images: ghcr.io/abahmed/kwatch 29 | - name: Build and push Docker image 30 | uses: docker/build-push-action@v6 31 | with: 32 | platforms: linux/amd64,linux/arm64,linux/arm/v6,linux/arm/v7 33 | context: . 34 | push: true 35 | tags: ${{ steps.meta.outputs.tags }} 36 | labels: ${{ steps.meta.outputs.labels }} 37 | build-args: RELEASE_VERSION=${{ steps.meta.outputs.version }} -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## IDEA ### 2 | .idea 3 | 4 | ### Go ### 5 | # Binaries for programs and plugins 6 | *.exe 7 | *.exe~ 8 | *.dll 9 | *.so 10 | *.dylib 11 | kwatch 12 | 13 | # Test binary, built with `go test -c` 14 | *.test 15 | coverage.txt 16 | 17 | # Output of the go coverage tool, specifically when used with LiteIDE 18 | *.out 19 | 20 | # Dependency directories (remove the comment below to include it) 21 | # vendor/ 22 | 23 | # Config files 24 | config.yaml 25 | 26 | ### Go Patch ### 27 | /vendor/ 28 | /Godeps/ 29 | *.DS_Store 30 | 31 | # debug 32 | __debug_bin* -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "0.2.0", 3 | "configurations": [ 4 | { 5 | "name": "Launch kwatch", 6 | "type": "go", 7 | "request": "launch", 8 | "mode": "auto", 9 | "program": "${workspaceRoot}", 10 | "showLog": true, 11 | "env": { 12 | "CONFIG_FILE": "config.yaml" 13 | } 14 | } 15 | ] 16 | } -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. 6 | 7 | ## Our Standards 8 | 9 | Examples of behavior that contributes to creating a positive environment include: 10 | 11 | * Using welcoming and inclusive language 12 | * Being respectful of differing viewpoints and experiences 13 | * Gracefully accepting constructive criticism 14 | * Focusing on what is best for the community 15 | * Showing empathy towards other community members 16 | 17 | Examples of unacceptable behavior by participants include: 18 | 19 | * The use of sexualized language or imagery and unwelcome sexual attention or advances 20 | * Trolling, insulting/derogatory comments, and personal or political attacks 21 | * Public or private harassment 22 | * Publishing others' private information, such as a physical or electronic address, without explicit permission 23 | * Other conduct which could reasonably be considered inappropriate in a professional setting 24 | 25 | ## Our Responsibilities 26 | 27 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. 28 | 29 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. 30 | 31 | ## Scope 32 | 33 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. 34 | 35 | ## Enforcement 36 | 37 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at kwatch@googlegroups.com. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. 38 | 39 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. 40 | 41 | ## Attribution 42 | 43 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version] 44 | 45 | [homepage]: http://contributor-covenant.org 46 | [version]: http://contributor-covenant.org/version/1/4/ 47 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | ## Contributing to kwatch 2 | 3 | :tada: Anyone can contribute to kwatch. Newcomers are always welcome to contribute to kwatch, and we are happy to offer help to newcomers. 4 | Before making changes, please first discuss the change you want to make through [Discord](https://discord.gg/kzJszdKmJ7) 5 | 6 | 7 | ### There are many ways to contribute: 8 | 9 | + [Suggest new features to be implemented](https://github.com/abahmed/kwatch/issues) 10 | + [Report issues](https://github.com/abahmed/kwatch/issues) 11 | + [Improve Documentation](https://github.com/abahmed/kwatch) 12 | + [Fix issues](https://github.com/abahmed/kwatch/issues) 13 | 14 | 15 | ### Code Contribution 16 | 17 | If you wish to work on an issue, please comment on the issue that you want to work on it. This is to prevent duplicated efforts on the same issue. 18 | 19 | 20 | Contributions to kwatch should be made in the form of pull requests to the **main** branch. Each pull request will be reviewed by someone with permission to land patches. After reviewing the patch, it could be landed in the main branch or given feedback for changes. 21 | 22 | ### Code of Conduct 23 | We expect everyone to follow the [Code Of Conduct](./CODE_OF_CONDUCT.md) 24 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:alpine AS builder 2 | ARG RELEASE_VERSION="nothing" 3 | LABEL maintainer="Abdelrahman Ahmed " 4 | 5 | RUN apk update && \ 6 | apk add git build-base && \ 7 | rm -rf /var/cache/apk/* && \ 8 | mkdir -p "/build" 9 | 10 | WORKDIR /build 11 | COPY go.mod go.sum /build/ 12 | RUN go mod download 13 | 14 | COPY . /build/ 15 | RUN sed -i 's/dev/'"${RELEASE_VERSION}"'/g' version/version.go 16 | RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -o kwatch . 17 | 18 | FROM alpine:latest 19 | RUN apk add --update ca-certificates 20 | COPY --from=builder /build/kwatch /bin/kwatch 21 | ENTRYPOINT ["/bin/kwatch"] 22 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Abdelrahman Ahmed 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /alertmanager/alertmanager.go: -------------------------------------------------------------------------------- 1 | package alertmanager 2 | 3 | import ( 4 | "reflect" 5 | "strings" 6 | 7 | "github.com/abahmed/kwatch/alertmanager/dingtalk" 8 | "github.com/abahmed/kwatch/alertmanager/discord" 9 | "github.com/abahmed/kwatch/alertmanager/email" 10 | "github.com/abahmed/kwatch/alertmanager/feishu" 11 | "github.com/abahmed/kwatch/alertmanager/googlechat" 12 | "github.com/abahmed/kwatch/alertmanager/matrix" 13 | "github.com/abahmed/kwatch/alertmanager/mattermost" 14 | "github.com/abahmed/kwatch/alertmanager/opsgenie" 15 | "github.com/abahmed/kwatch/alertmanager/pagerduty" 16 | "github.com/abahmed/kwatch/alertmanager/rocketchat" 17 | "github.com/abahmed/kwatch/alertmanager/slack" 18 | "github.com/abahmed/kwatch/alertmanager/teams" 19 | "github.com/abahmed/kwatch/alertmanager/telegram" 20 | "github.com/abahmed/kwatch/alertmanager/webhook" 21 | "github.com/abahmed/kwatch/alertmanager/zenduty" 22 | "github.com/abahmed/kwatch/config" 23 | "github.com/abahmed/kwatch/event" 24 | "github.com/sirupsen/logrus" 25 | ) 26 | 27 | type AlertManager struct { 28 | providers []Provider 29 | } 30 | 31 | // Provider interface 32 | type Provider interface { 33 | Name() string 34 | SendEvent(*event.Event) error 35 | SendMessage(string) error 36 | } 37 | 38 | // Init initializes AlertManager with provided config 39 | func (a *AlertManager) Init( 40 | alertCfg map[string]map[string]interface{}, 41 | appCfg *config.App) { 42 | a.providers = make([]Provider, 0) 43 | for k, v := range alertCfg { 44 | lowerCaseKey := strings.ToLower(k) 45 | var pvdr Provider = nil 46 | if lowerCaseKey == "slack" { 47 | pvdr = slack.NewSlack(v, appCfg) 48 | } else if lowerCaseKey == "pagerduty" { 49 | pvdr = pagerduty.NewPagerDuty(v, appCfg) 50 | } else if lowerCaseKey == "discord" { 51 | pvdr = discord.NewDiscord(v, appCfg) 52 | } else if lowerCaseKey == "telegram" { 53 | pvdr = telegram.NewTelegram(v, appCfg) 54 | } else if lowerCaseKey == "teams" { 55 | pvdr = teams.NewTeams(v, appCfg) 56 | } else if lowerCaseKey == "email" { 57 | pvdr = email.NewEmail(v, appCfg) 58 | } else if lowerCaseKey == "rocketchat" { 59 | pvdr = rocketchat.NewRocketChat(v, appCfg) 60 | } else if lowerCaseKey == "mattermost" { 61 | pvdr = mattermost.NewMattermost(v, appCfg) 62 | } else if lowerCaseKey == "opsgenie" { 63 | pvdr = opsgenie.NewOpsgenie(v, appCfg) 64 | } else if lowerCaseKey == "matrix" { 65 | pvdr = matrix.NewMatrix(v, appCfg) 66 | } else if lowerCaseKey == "dingtalk" { 67 | pvdr = dingtalk.NewDingTalk(v, appCfg) 68 | } else if lowerCaseKey == "feishu" { 69 | pvdr = feishu.NewFeiShu(v, appCfg) 70 | } else if lowerCaseKey == "webhook" { 71 | pvdr = webhook.NewWebhook(v, appCfg) 72 | } else if lowerCaseKey == "zenduty" { 73 | pvdr = zenduty.NewZenduty(v, appCfg) 74 | } else if lowerCaseKey == "googlechat" { 75 | pvdr = googlechat.NewGoogleChat(v, appCfg) 76 | } 77 | 78 | if !reflect.ValueOf(pvdr).IsNil() { 79 | a.providers = append(a.providers, pvdr) 80 | } 81 | } 82 | } 83 | 84 | // Notify sends string msg to all providers 85 | func (a *AlertManager) Notify(msg string) { 86 | logrus.Infof("sending message: %s", msg) 87 | 88 | for _, prv := range a.providers { 89 | if err := prv.SendMessage(msg); err != nil { 90 | logrus.Errorf( 91 | "failed to send msg with %s: %s", 92 | prv.Name(), 93 | err.Error()) 94 | } 95 | } 96 | } 97 | 98 | // NotifyEvent sends event to all providers 99 | func (a *AlertManager) NotifyEvent(event event.Event) { 100 | logrus.Infof("sending event: %+v", event) 101 | 102 | for _, prv := range a.providers { 103 | if err := prv.SendEvent(&event); err != nil { 104 | logrus.Errorf( 105 | "failed to send event with %s: %s", 106 | prv.Name(), 107 | err.Error(), 108 | ) 109 | } 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /alertmanager/alertmanager_test.go: -------------------------------------------------------------------------------- 1 | package alertmanager 2 | 3 | import ( 4 | "errors" 5 | "testing" 6 | 7 | "github.com/abahmed/kwatch/config" 8 | "github.com/abahmed/kwatch/event" 9 | "github.com/stretchr/testify/assert" 10 | ) 11 | 12 | type fakeProvider struct{} 13 | 14 | func (p *fakeProvider) SendMessage(msg string) error { 15 | return nil 16 | } 17 | func (p *fakeProvider) SendEvent(evt *event.Event) error { 18 | return nil 19 | } 20 | func (p *fakeProvider) Name() string { 21 | return "Slack" 22 | } 23 | 24 | type fakeProviderWithError struct{} 25 | 26 | func (p *fakeProviderWithError) SendMessage(msg string) error { 27 | return errors.New("error") 28 | } 29 | func (p *fakeProviderWithError) SendEvent(evt *event.Event) error { 30 | return errors.New("error") 31 | } 32 | func (p *fakeProviderWithError) Name() string { 33 | return "Slack Error" 34 | } 35 | func TestAlertManagerNoConfig(t *testing.T) { 36 | assert := assert.New(t) 37 | alertmanager := AlertManager{} 38 | alertmanager.Init(nil, nil) 39 | assert.Len(alertmanager.providers, 0) 40 | } 41 | 42 | func TestGetProviders(t *testing.T) { 43 | assert := assert.New(t) 44 | 45 | alertMap := map[string]map[string]interface{}{ 46 | "slack": { 47 | "webhook": "test", 48 | }, 49 | "pagerduty": { 50 | "integrationKey": "test", 51 | }, 52 | "discord": { 53 | "webhook": "test/id", 54 | }, 55 | "telegram": { 56 | "token": "test", 57 | "chatId": "test", 58 | }, 59 | "teams": { 60 | "webhook": "test", 61 | }, 62 | "mattermost": { 63 | "webhook": "test", 64 | }, 65 | "rocketchat": { 66 | "webhook": "test", 67 | }, 68 | "opsgenie": { 69 | "apiKey": "test", 70 | }, 71 | "email": { 72 | "from": "test@test.com", 73 | "to": "test2@test.com", 74 | "host": "chat.google.com", 75 | "port": "5432", 76 | "password": "test", 77 | }, 78 | "matrix": { 79 | "homeServer": "localhost", 80 | "accessToken": "testToken", 81 | "internalRoomId": "room1", 82 | }, 83 | "dingtalk": { 84 | "accessToken": "testToken", 85 | }, 86 | "feishu": { 87 | "webhook": "test", 88 | }, 89 | "webhook": { 90 | "url": "test", 91 | }, 92 | "zenduty": { 93 | "integrationKey": "test", 94 | }, 95 | "googlechat": { 96 | "webhook": "test", 97 | }, 98 | } 99 | 100 | alertmanager := AlertManager{} 101 | alertmanager.Init(alertMap, &config.App{ClusterName: "dev"}) 102 | 103 | assert.Len( 104 | alertmanager.providers, 105 | len(alertMap), 106 | "get providers returned %d expected %d") 107 | } 108 | 109 | func TestSendProvidersEvent(t *testing.T) { 110 | alertmanager := AlertManager{} 111 | alertmanager.providers = append( 112 | alertmanager.providers, 113 | &fakeProvider{}, 114 | &fakeProviderWithError{}, 115 | ) 116 | alertmanager.NotifyEvent(event.Event{}) 117 | } 118 | 119 | func TestSendProvidersMsg(t *testing.T) { 120 | alertmanager := AlertManager{} 121 | alertmanager.providers = append( 122 | alertmanager.providers, 123 | &fakeProvider{}, 124 | &fakeProviderWithError{}, 125 | ) 126 | alertmanager.Notify("hello world!") 127 | } 128 | -------------------------------------------------------------------------------- /alertmanager/dingtalk/dingtalk.go: -------------------------------------------------------------------------------- 1 | package dingtalk 2 | 3 | import ( 4 | "bytes" 5 | "crypto/hmac" 6 | "crypto/sha256" 7 | "encoding/base64" 8 | "encoding/json" 9 | "fmt" 10 | "io" 11 | "net/http" 12 | "net/url" 13 | "time" 14 | 15 | "github.com/abahmed/kwatch/config" 16 | "github.com/abahmed/kwatch/constant" 17 | "github.com/abahmed/kwatch/event" 18 | "github.com/sirupsen/logrus" 19 | ) 20 | 21 | const ( 22 | dingTalkAPIURL = "https://oapi.dingtalk.com/robot/send?access_token=%s" 23 | ) 24 | 25 | type dingResponse struct { 26 | Errcode int `json:"errcode"` 27 | Errmsg string `json:"errmsg"` 28 | } 29 | 30 | type DingTalk struct { 31 | accessToken string 32 | secret string 33 | url string 34 | title string 35 | 36 | // reference for general app configuration 37 | appCfg *config.App 38 | } 39 | 40 | // NewDingTalk returns new DingTalk instance 41 | func NewDingTalk(config map[string]interface{}, appCfg *config.App) *DingTalk { 42 | accessToken, ok := config["accessToken"].(string) 43 | if !ok || len(accessToken) == 0 { 44 | logrus.Warnf("initializing dingtalk with empty access token") 45 | return nil 46 | } 47 | 48 | logrus.Infof("initializing dingtalk with access token: %s", accessToken) 49 | 50 | title, _ := config["title"].(string) 51 | secret, _ := config["secret"].(string) 52 | 53 | return &DingTalk{ 54 | accessToken: accessToken, 55 | url: dingTalkAPIURL, 56 | title: title, 57 | secret: secret, 58 | appCfg: appCfg, 59 | } 60 | } 61 | 62 | // Name returns name of the provider 63 | func (d *DingTalk) Name() string { 64 | return "DingTalk" 65 | } 66 | 67 | // SendEvent sends event to the provider 68 | func (d *DingTalk) SendEvent(e *event.Event) error { 69 | 70 | // use custom title if it's provided, otherwise use default 71 | title := d.title 72 | if len(title) == 0 { 73 | title = constant.DefaultTitle 74 | } 75 | 76 | msg := e.FormatMarkdown(d.appCfg.ClusterName, "", "") 77 | 78 | body := fmt.Sprintf(`{ 79 | "msgtype": "markdown", 80 | "markdown": { "title": "%s", "text: "%s" } 81 | }`, title, msg) 82 | 83 | return d.sendAPI(body) 84 | } 85 | 86 | // SendMessage sends text message to the provider 87 | func (d *DingTalk) SendMessage(msg string) error { 88 | body := fmt.Sprintf(`{ 89 | "msgtype": "text", 90 | "text": { "content": "%s"} 91 | }`, msg) 92 | return d.sendAPI(body) 93 | } 94 | 95 | func (d *DingTalk) sendAPI(msg string) error { 96 | buffer := bytes.NewBuffer([]byte(msg)) 97 | 98 | url := fmt.Sprintf(d.url, d.accessToken) 99 | if len(d.secret) != 0 { 100 | url += getSignature(d.secret) 101 | } 102 | 103 | request, err := http.NewRequest( 104 | http.MethodPost, 105 | url, 106 | buffer, 107 | ) 108 | if err != nil { 109 | return err 110 | } 111 | 112 | request.Header.Set("Content-Type", "application/json") 113 | 114 | client := &http.Client{} 115 | response, err := client.Do(request) 116 | if err != nil { 117 | return err 118 | } 119 | defer response.Body.Close() 120 | 121 | data, err := io.ReadAll(response.Body) 122 | if err != nil { 123 | return err 124 | } 125 | 126 | var dr dingResponse 127 | err = json.Unmarshal(data, &dr) 128 | if err != nil { 129 | return err 130 | } 131 | if dr.Errcode != 0 { 132 | return fmt.Errorf( 133 | "call to ding talk alert returned status code %d: %s", 134 | response.StatusCode, 135 | string(data)) 136 | } 137 | 138 | return nil 139 | } 140 | 141 | func getSignature(secret string) string { 142 | timeStr := fmt.Sprintf("%d", time.Now().UnixNano()/1e6) 143 | 144 | sign := fmt.Sprintf("%s\n%s", timeStr, secret) 145 | signData := computeHmacSha256(sign, secret) 146 | encodeURL := url.QueryEscape(signData) 147 | 148 | return fmt.Sprintf("×tamp=%s&sign=%s", timeStr, encodeURL) 149 | } 150 | 151 | func computeHmacSha256(message string, secret string) string { 152 | key := []byte(secret) 153 | h := hmac.New(sha256.New, key) 154 | h.Write([]byte(message)) 155 | 156 | return base64.StdEncoding.EncodeToString(h.Sum(nil)) 157 | } 158 | -------------------------------------------------------------------------------- /alertmanager/dingtalk/dingtalk_test.go: -------------------------------------------------------------------------------- 1 | package dingtalk 2 | 3 | import ( 4 | "net/http" 5 | "net/http/httptest" 6 | "testing" 7 | 8 | "github.com/abahmed/kwatch/config" 9 | "github.com/abahmed/kwatch/event" 10 | "github.com/stretchr/testify/assert" 11 | ) 12 | 13 | func TestEmptyConfig(t *testing.T) { 14 | assert := assert.New(t) 15 | 16 | c := NewDingTalk(map[string]interface{}{}, &config.App{ClusterName: "dev"}) 17 | assert.Nil(c) 18 | } 19 | 20 | func TestDingTalk(t *testing.T) { 21 | assert := assert.New(t) 22 | 23 | configMap := map[string]interface{}{ 24 | "accessToken": "testToken", 25 | } 26 | c := NewDingTalk(configMap, &config.App{ClusterName: "dev"}) 27 | assert.NotNil(c) 28 | 29 | assert.Equal(c.Name(), "DingTalk") 30 | } 31 | 32 | func TestSendMessage(t *testing.T) { 33 | assert := assert.New(t) 34 | 35 | s := httptest.NewServer( 36 | http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 37 | w.Write([]byte(`{"isOk": true}`)) 38 | })) 39 | 40 | defer s.Close() 41 | 42 | configMap := map[string]interface{}{ 43 | "accessToken": "testToken", 44 | "secret": "secret1", 45 | } 46 | c := NewDingTalk(configMap, &config.App{ClusterName: "dev"}) 47 | assert.NotNil(c) 48 | c.url = s.URL + "/send?accessToken=%s" 49 | 50 | assert.Nil(c.SendMessage("test")) 51 | } 52 | 53 | func TestSendMessageInvalidBody(t *testing.T) { 54 | assert := assert.New(t) 55 | 56 | s := httptest.NewServer( 57 | http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 58 | w.Header().Set("Content-Length", "1") 59 | })) 60 | 61 | defer s.Close() 62 | 63 | configMap := map[string]interface{}{ 64 | "accessToken": "testToken", 65 | } 66 | c := NewDingTalk(configMap, &config.App{ClusterName: "dev"}) 67 | assert.NotNil(c) 68 | c.url = s.URL + "/send?accessToken=%s" 69 | 70 | assert.NotNil(c.SendMessage("test")) 71 | } 72 | 73 | func TestSendMessageInvalidJson(t *testing.T) { 74 | assert := assert.New(t) 75 | 76 | s := httptest.NewServer( 77 | http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 78 | w.Write([]byte(`{"isOk": true`)) 79 | })) 80 | 81 | defer s.Close() 82 | 83 | configMap := map[string]interface{}{ 84 | "accessToken": "testToken", 85 | } 86 | c := NewDingTalk(configMap, &config.App{ClusterName: "dev"}) 87 | assert.NotNil(c) 88 | c.url = s.URL + "/send?accessToken=%s" 89 | 90 | assert.NotNil(c.SendMessage("test")) 91 | } 92 | 93 | func TestSendMessageErrorResponse(t *testing.T) { 94 | assert := assert.New(t) 95 | 96 | s := httptest.NewServer( 97 | http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 98 | w.Write([]byte(`{"errcode": 1234, "errmsg": "error"}`)) 99 | })) 100 | 101 | defer s.Close() 102 | 103 | configMap := map[string]interface{}{ 104 | "accessToken": "testToken", 105 | } 106 | c := NewDingTalk(configMap, &config.App{ClusterName: "dev"}) 107 | assert.NotNil(c) 108 | c.url = s.URL + "/send?accessToken=%s" 109 | 110 | assert.NotNil(c.SendMessage("test")) 111 | } 112 | 113 | func TestSendEvent(t *testing.T) { 114 | assert := assert.New(t) 115 | 116 | s := httptest.NewServer( 117 | http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 118 | w.Write([]byte(`{"isOk": true}`)) 119 | })) 120 | 121 | defer s.Close() 122 | 123 | configMap := map[string]interface{}{ 124 | "accessToken": "testToken", 125 | } 126 | c := NewDingTalk(configMap, &config.App{ClusterName: "dev"}) 127 | assert.NotNil(c) 128 | c.url = s.URL + "/send?accessToken=%s" 129 | 130 | ev := event.Event{ 131 | PodName: "test-pod", 132 | ContainerName: "test-container", 133 | Namespace: "default", 134 | Reason: "OOMKILLED", 135 | Logs: "test\ntestlogs", 136 | Events: "event1-event2-event3-event1-event2-event3-event1-event2-" + 137 | "event3\nevent5\nevent6-event8-event11-event12", 138 | } 139 | assert.Nil(c.SendEvent(&ev)) 140 | } 141 | 142 | func TestInvaildHttpRequest(t *testing.T) { 143 | assert := assert.New(t) 144 | 145 | configMap := map[string]interface{}{ 146 | "accessToken": "testToken", 147 | } 148 | c := NewDingTalk(configMap, &config.App{ClusterName: "dev"}) 149 | assert.NotNil(c) 150 | c.url = "h ttp://localhost" + "/send?accessToken=%s" 151 | 152 | assert.NotNil(c.SendMessage("test")) 153 | 154 | configMap = map[string]interface{}{ 155 | "accessToken": "testToken", 156 | } 157 | c = NewDingTalk(configMap, &config.App{ClusterName: "dev"}) 158 | assert.NotNil(c) 159 | c.url = "http://localhost:132323" + "/send?accessToken=%s" 160 | 161 | assert.NotNil(c.SendMessage("test")) 162 | } 163 | -------------------------------------------------------------------------------- /alertmanager/discord/discord.go: -------------------------------------------------------------------------------- 1 | package discord 2 | 3 | import ( 4 | "strings" 5 | 6 | "github.com/abahmed/kwatch/config" 7 | "github.com/abahmed/kwatch/constant" 8 | "github.com/abahmed/kwatch/event" 9 | 10 | discordgo "github.com/bwmarrin/discordgo" 11 | "github.com/sirupsen/logrus" 12 | ) 13 | 14 | type Discord struct { 15 | id string 16 | token string 17 | title string 18 | text string 19 | send func(webhookID, 20 | token string, 21 | wait bool, 22 | data *discordgo.WebhookParams, 23 | options ...discordgo.RequestOption) (st *discordgo.Message, err error) 24 | 25 | // reference for general app configuration 26 | appCfg *config.App 27 | } 28 | 29 | // NewDiscord returns new Discord instance 30 | func NewDiscord(config map[string]interface{}, appCfg *config.App) *Discord { 31 | webhook, ok := config["webhook"].(string) 32 | if !ok || len(webhook) == 0 { 33 | logrus.Warnf("initializing discord with empty webhook url") 34 | return nil 35 | } 36 | 37 | webhookList := strings.Split(webhook, "/") 38 | if len(webhookList) <= 1 { 39 | logrus.Warnf("initializing discord with missing id or token") 40 | return nil 41 | } 42 | logrus.Infof("initializing discord with webhook url: %s", webhook) 43 | 44 | webhookToken := webhookList[len(webhookList)-1] 45 | webhookID := webhookList[len(webhookList)-2] 46 | 47 | discordClient, _ := discordgo.New("") 48 | 49 | title, _ := config["title"].(string) 50 | text, _ := config["text"].(string) 51 | 52 | return &Discord{ 53 | id: webhookID, 54 | token: webhookToken, 55 | title: title, 56 | text: text, 57 | send: discordClient.WebhookExecute, 58 | appCfg: appCfg, 59 | } 60 | } 61 | 62 | // Name returns name of the provider 63 | func (s *Discord) Name() string { 64 | return "Discord" 65 | } 66 | 67 | // SendEvent sends event to the provider 68 | func (s *Discord) SendEvent(ev *event.Event) error { 69 | logrus.Debugf("sending to discord event: %v", ev) 70 | 71 | // initialize fields with basic info 72 | fields := []*discordgo.MessageEmbedField{ 73 | { 74 | Name: "Cluster", 75 | Value: s.appCfg.ClusterName, 76 | Inline: true, 77 | }, 78 | { 79 | Name: "Name", 80 | Value: ev.PodName, 81 | Inline: true, 82 | }, 83 | { 84 | Name: "Container", 85 | Value: ev.ContainerName, 86 | Inline: true, 87 | }, 88 | { 89 | Name: "Namespace", 90 | Value: ev.Namespace, 91 | Inline: true, 92 | }, 93 | { 94 | Name: "Reason", 95 | Value: ev.Reason, 96 | Inline: true, 97 | }, 98 | } 99 | 100 | // add events part if it exists 101 | events := strings.TrimSpace(ev.Events) 102 | if len(events) > 0 { 103 | fields = append(fields, &discordgo.MessageEmbedField{ 104 | Name: ":mag: Events", 105 | Value: "```\n" + events + "```", 106 | }) 107 | } 108 | 109 | // add logs part if it exists 110 | logs := strings.TrimSpace(ev.Logs) 111 | if len(logs) > 0 { 112 | logData := logs 113 | 114 | if len(logData) > 1024 { 115 | logData = logs[:1024] 116 | } 117 | 118 | fields = append(fields, &discordgo.MessageEmbedField{ 119 | Name: ":memo: Logs", 120 | Value: "```\n" + logData + "```", 121 | }) 122 | } 123 | 124 | // use custom title if it's provided, otherwise use default 125 | title := s.title 126 | if len(title) == 0 { 127 | title = constant.DefaultTitle 128 | } 129 | 130 | // use custom text if it's provided, otherwise use default 131 | text := s.text 132 | if len(text) == 0 { 133 | text = constant.DefaultText 134 | } 135 | 136 | // send message 137 | _, err := s.send( 138 | s.id, 139 | s.token, 140 | false, 141 | &discordgo.WebhookParams{ 142 | Embeds: []*discordgo.MessageEmbed{ 143 | { 144 | Color: 13041664, 145 | Title: title, 146 | Description: text, 147 | Fields: fields, 148 | Footer: &discordgo.MessageEmbedFooter{ 149 | Text: constant.Footer, 150 | }, 151 | }, 152 | }, 153 | }) 154 | return err 155 | } 156 | 157 | // SendMessage sends text message to the provider 158 | func (s *Discord) SendMessage(msg string) error { 159 | // send message 160 | _, err := s.send( 161 | s.id, 162 | s.token, 163 | false, 164 | &discordgo.WebhookParams{ 165 | Content: msg, 166 | }) 167 | return err 168 | } 169 | -------------------------------------------------------------------------------- /alertmanager/discord/discord_test.go: -------------------------------------------------------------------------------- 1 | package discord 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/abahmed/kwatch/config" 7 | "github.com/abahmed/kwatch/event" 8 | discordgo "github.com/bwmarrin/discordgo" 9 | "github.com/stretchr/testify/assert" 10 | ) 11 | 12 | func mockedSend( 13 | webhookID, 14 | token string, 15 | wait bool, 16 | data *discordgo.WebhookParams, 17 | options ...discordgo.RequestOption) (st *discordgo.Message, err error) { 18 | return nil, nil 19 | } 20 | 21 | func TestDiscordEmptyConfig(t *testing.T) { 22 | assert := assert.New(t) 23 | 24 | c := NewDiscord(map[string]interface{}{}, &config.App{ClusterName: "dev"}) 25 | assert.Nil(c) 26 | } 27 | 28 | func TestDiscordInvalidConfig(t *testing.T) { 29 | assert := assert.New(t) 30 | 31 | configMap := map[string]interface{}{ 32 | "webhook": "testtest", 33 | } 34 | c := NewDiscord(configMap, &config.App{ClusterName: "dev"}) 35 | assert.Nil(c) 36 | } 37 | 38 | func TestDiscord(t *testing.T) { 39 | assert := assert.New(t) 40 | 41 | configMap := map[string]interface{}{ 42 | "webhook": "test/test", 43 | } 44 | c := NewDiscord(configMap, &config.App{ClusterName: "dev"}) 45 | assert.NotNil(c) 46 | 47 | assert.Equal(c.Name(), "Discord") 48 | } 49 | 50 | func TestSendMessage(t *testing.T) { 51 | assert := assert.New(t) 52 | 53 | configMap := map[string]interface{}{ 54 | "webhook": "test/test", 55 | } 56 | c := NewDiscord(configMap, &config.App{ClusterName: "dev"}) 57 | assert.NotNil(c) 58 | 59 | c.send = mockedSend 60 | assert.Nil(c.SendMessage("test")) 61 | } 62 | 63 | func TestSendEvent(t *testing.T) { 64 | assert := assert.New(t) 65 | 66 | configMap := map[string]interface{}{ 67 | "webhook": "test/test", 68 | } 69 | c := NewDiscord(configMap, &config.App{ClusterName: "dev"}) 70 | assert.NotNil(c) 71 | 72 | c.send = mockedSend 73 | 74 | ev := event.Event{ 75 | PodName: "test-pod", 76 | ContainerName: "test-container", 77 | Namespace: "default", 78 | Reason: "OOMKILLED", 79 | Logs: "Nam quis nulla. Integer malesuada. In in enim a arcu " + 80 | "imperdiet malesuada. Sed vel lectus. Donec odio urna, tempus " + 81 | "molestie, porttitor ut, iaculis quis, sem. Phasellus rhoncus.\n" + 82 | "Nam quis nulla. Integer malesuada. In in enim a arcu " + 83 | "imperdiet malesuada. Sed vel lectus. Donec odio urna, tempus " + 84 | "molestie, porttitor ut, iaculis quis, sem. Phasellus rhoncus.\n" + 85 | "Nam quis nulla. Integer malesuada. In in enim a arcu " + 86 | "imperdiet malesuada. Sed vel lectus. Donec odio urna, tempus " + 87 | "molestie, porttitor ut, iaculis quis, sem. Phasellus rhoncus.\n" + 88 | "Nam quis nulla. Integer malesuada. In in enim a arcu " + 89 | "imperdiet malesuada. Sed vel lectus. Donec odio urna, tempus " + 90 | "molestie, porttitor ut, iaculis quis, sem. Phasellus rhoncus.\n" + 91 | "Nam quis nulla. Integer malesuada. In in enim a arcu " + 92 | "imperdiet malesuada. Sed vel lectus. Donec odio urna, tempus " + 93 | "molestie, porttitor ut, iaculis quis, sem. Phasellus rhoncus.\n" + 94 | "Nam quis nulla. Integer malesuada. In in enim a arcu " + 95 | "imperdiet malesuada. Sed vel lectus. Donec odio urna, tempus " + 96 | "molestie, porttitor ut, iaculis quis, sem. Phasellus rhoncus.\n", 97 | Events: "BackOff Back-off restarting failed container\n" + 98 | "event3\nevent5\nevent6-event8-event11-event12", 99 | } 100 | assert.Nil(c.SendEvent(&ev)) 101 | } 102 | -------------------------------------------------------------------------------- /alertmanager/email/email.go: -------------------------------------------------------------------------------- 1 | package email 2 | 3 | import ( 4 | "fmt" 5 | "math" 6 | "strconv" 7 | "strings" 8 | 9 | "github.com/abahmed/kwatch/config" 10 | "github.com/abahmed/kwatch/event" 11 | "github.com/abahmed/kwatch/util" 12 | "github.com/sirupsen/logrus" 13 | gomail "gopkg.in/mail.v2" 14 | ) 15 | 16 | type Email struct { 17 | from string 18 | to string 19 | send func(m ...*gomail.Message) error 20 | 21 | // reference for general app configuration 22 | appCfg *config.App 23 | } 24 | 25 | // NewEmail returns new email instance 26 | func NewEmail(config map[string]interface{}, appCfg *config.App) *Email { 27 | from, ok := config["from"].(string) 28 | if !ok || len(from) == 0 { 29 | logrus.Warnf("initializing email with an empty from") 30 | return nil 31 | } 32 | 33 | to, ok := config["to"].(string) 34 | if !ok || len(to) == 0 { 35 | logrus.Warnf("initializing email with an empty to") 36 | return nil 37 | } 38 | 39 | password, ok := config["password"].(string) 40 | if !ok || len(password) == 0 { 41 | logrus.Warnf("initializing email with an empty password") 42 | return nil 43 | } 44 | 45 | host, ok := config["host"].(string) 46 | if !ok || len(host) == 0 { 47 | logrus.Warnf("initializing email with an empty host") 48 | return nil 49 | } 50 | 51 | port, ok := config["port"].(string) 52 | if !ok || len(port) == 0 { 53 | logrus.Warnf("initializing email with an empty port number") 54 | return nil 55 | } 56 | portNumber, err := strconv.Atoi(port) 57 | if err != nil { 58 | logrus.Warnf("initializing email with an invalid port number: %s", err) 59 | return nil 60 | } 61 | 62 | if portNumber > math.MaxUint16 { 63 | logrus.Warnf("initializing email with an invalid range for port number") 64 | return nil 65 | } 66 | 67 | d := gomail.NewDialer(host, portNumber, from, password) 68 | 69 | return &Email{ 70 | from: from, 71 | to: to, 72 | send: d.DialAndSend, 73 | appCfg: appCfg, 74 | } 75 | } 76 | 77 | // Name returns name of the provider 78 | func (e *Email) Name() string { 79 | return "Email" 80 | } 81 | 82 | // SendEvent sends event to the provider 83 | func (e *Email) SendEvent(event *event.Event) error { 84 | subject, body := e.buildMessageSubjectAndBody(event) 85 | 86 | m := gomail.NewMessage() 87 | m.SetHeader("From", e.from) 88 | m.SetHeader("To", strings.Split(e.to, ",")...) 89 | m.SetHeader("Subject", subject) 90 | m.SetBody("text/plain", body) 91 | 92 | return e.send(m) 93 | } 94 | 95 | // SendMessage sends text message to the provider 96 | func (e *Email) SendMessage(s string) error { 97 | return nil 98 | } 99 | 100 | func (e *Email) buildMessageSubjectAndBody( 101 | ev *event.Event) (string, string) { 102 | eventsText := "No events captured" 103 | logsText := "No logs captured" 104 | 105 | // add events part if it exists 106 | events := strings.TrimSpace(ev.Events) 107 | if len(events) > 0 { 108 | eventsText = util.JsonEscape(ev.Events) 109 | } 110 | 111 | // add logs part if it exists 112 | logs := strings.TrimSpace(ev.Logs) 113 | if len(logs) > 0 { 114 | logsText = util.JsonEscape(ev.Logs) 115 | } 116 | 117 | subject := fmt.Sprintf("⛑ Kwatch detected a crash in pod %s ", ev.ContainerName) 118 | body := fmt.Sprintf( 119 | "An alert for cluster: *%s* Name: *%s* Container: *%s* "+ 120 | "Namespace: *%s* "+ 121 | "has been triggered:\\n—\\n "+ 122 | "Logs: *%s* \\n "+ 123 | "Events: *%s* ", 124 | e.appCfg.ClusterName, 125 | ev.PodName, 126 | ev.ContainerName, 127 | ev.Namespace, 128 | logsText, 129 | eventsText, 130 | ) 131 | return subject, body 132 | } 133 | -------------------------------------------------------------------------------- /alertmanager/email/email_test.go: -------------------------------------------------------------------------------- 1 | package email 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/abahmed/kwatch/config" 7 | "github.com/abahmed/kwatch/event" 8 | "github.com/stretchr/testify/assert" 9 | gomail "gopkg.in/mail.v2" 10 | ) 11 | 12 | func mockedSend(m ...*gomail.Message) error { 13 | return nil 14 | } 15 | 16 | func TestEmailEmptyConfig(t *testing.T) { 17 | assert := assert.New(t) 18 | 19 | c := NewEmail(map[string]interface{}{}, &config.App{ClusterName: "dev"}) 20 | assert.Nil(c) 21 | } 22 | 23 | func TestEmailInvalidConfig(t *testing.T) { 24 | assert := assert.New(t) 25 | 26 | configMap := map[string]interface{}{ 27 | "from": "test@test.com", 28 | } 29 | c := NewEmail(configMap, &config.App{ClusterName: "dev"}) 30 | assert.Nil(c) 31 | 32 | configMap = map[string]interface{}{ 33 | "from": "test@test.com", 34 | "to": "test12@test.com", 35 | } 36 | c = NewEmail(configMap, &config.App{ClusterName: "dev"}) 37 | assert.Nil(c) 38 | 39 | configMap = map[string]interface{}{ 40 | "from": "test@test.com", 41 | "to": "test12@test.com", 42 | "password": "testPassword", 43 | } 44 | c = NewEmail(configMap, &config.App{ClusterName: "dev"}) 45 | assert.Nil(c) 46 | 47 | configMap = map[string]interface{}{ 48 | "from": "test@test.com", 49 | "to": "test12@test.com", 50 | "password": "testPassword", 51 | "host": "chat.google.com", 52 | } 53 | c = NewEmail(configMap, &config.App{ClusterName: "dev"}) 54 | assert.Nil(c) 55 | 56 | configMap = map[string]interface{}{ 57 | "from": "test@test.com", 58 | "to": "test12@test.com", 59 | "password": "testPassword", 60 | "host": "chat.google.com", 61 | "port": "string", 62 | } 63 | c = NewEmail(configMap, &config.App{ClusterName: "dev"}) 64 | assert.Nil(c) 65 | 66 | configMap = map[string]interface{}{ 67 | "from": "test@test.com", 68 | "to": "test12@test.com", 69 | "password": "testPassword", 70 | "host": "chat.google.com", 71 | "port": "65539", 72 | } 73 | c = NewEmail(configMap, &config.App{ClusterName: "dev"}) 74 | assert.Nil(c) 75 | } 76 | 77 | func TestEmail(t *testing.T) { 78 | assert := assert.New(t) 79 | 80 | configMap := map[string]interface{}{ 81 | "from": "test@test.com", 82 | "to": "test12@test.com", 83 | "password": "testPassword", 84 | "host": "chat.google.com", 85 | "port": "587", 86 | } 87 | c := NewEmail(configMap, &config.App{ClusterName: "dev"}) 88 | assert.NotNil(c) 89 | 90 | assert.Equal(c.Name(), "Email") 91 | } 92 | 93 | func TestSendMessage(t *testing.T) { 94 | assert := assert.New(t) 95 | 96 | configMap := map[string]interface{}{ 97 | "from": "test@test.com", 98 | "to": "test12@test.com", 99 | "password": "testPassword", 100 | "host": "chat.google.com", 101 | "port": "587", 102 | } 103 | c := NewEmail(configMap, &config.App{ClusterName: "dev"}) 104 | assert.NotNil(c) 105 | 106 | c.send = mockedSend 107 | assert.Nil(c.SendMessage("test")) 108 | } 109 | 110 | func TestSendEvent(t *testing.T) { 111 | assert := assert.New(t) 112 | 113 | configMap := map[string]interface{}{ 114 | "from": "test@test.com", 115 | "to": "test12@test.com", 116 | "password": "testPassword", 117 | "host": "chat.google.com", 118 | "port": "587", 119 | } 120 | c := NewEmail(configMap, &config.App{ClusterName: "dev"}) 121 | assert.NotNil(c) 122 | 123 | c.send = mockedSend 124 | ev := event.Event{ 125 | PodName: "test-pod", 126 | ContainerName: "test-container", 127 | Namespace: "default", 128 | Reason: "OOMKILLED", 129 | Logs: "test\ntestlogs", 130 | Events: "event1-event2-event3-event1-event2-event3-event1-event2-" + 131 | "event3\nevent5\nevent6-event8-event11-event12", 132 | } 133 | assert.Nil(c.SendEvent(&ev)) 134 | } 135 | -------------------------------------------------------------------------------- /alertmanager/feishu/feishu.go: -------------------------------------------------------------------------------- 1 | package feishu 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | "fmt" 7 | "io" 8 | "net/http" 9 | 10 | "github.com/abahmed/kwatch/config" 11 | "github.com/abahmed/kwatch/event" 12 | "github.com/sirupsen/logrus" 13 | ) 14 | 15 | type FeiShu struct { 16 | webhook string 17 | title string 18 | 19 | // reference for general app configuration 20 | appCfg *config.App 21 | } 22 | 23 | type feiShuWebhookContent struct { 24 | Tag string `json:"tag"` 25 | Content string `json:"content"` 26 | } 27 | 28 | // NewFeiShu returns new feishu web bot instance 29 | func NewFeiShu(config map[string]interface{}, appCfg *config.App) *FeiShu { 30 | webhook, ok := config["webhook"].(string) 31 | if !ok || len(webhook) == 0 { 32 | logrus.Warnf("initializing Fei Shu with empty webhook url") 33 | return nil 34 | } 35 | 36 | logrus.Infof("initializing Fei Shu with webhook url: %s", webhook) 37 | 38 | title, _ := config["title"].(string) 39 | 40 | return &FeiShu{ 41 | webhook: webhook, 42 | title: title, 43 | appCfg: appCfg, 44 | } 45 | 46 | } 47 | 48 | // Name returns name of the provider 49 | func (r *FeiShu) Name() string { 50 | return "Fei Shu" 51 | } 52 | 53 | // SendEvent sends event to the provider 54 | func (r *FeiShu) SendEvent(e *event.Event) error { 55 | formattedMsg := e.FormatMarkdown(r.appCfg.ClusterName, "", "") 56 | return r.sendByFeiShuApi(r.buildRequestBodyFeiShu(formattedMsg)) 57 | } 58 | 59 | func (r *FeiShu) sendByFeiShuApi(reqBody string) error { 60 | client := &http.Client{} 61 | buffer := bytes.NewBuffer([]byte(reqBody)) 62 | request, err := http.NewRequest(http.MethodPost, r.webhook, buffer) 63 | if err != nil { 64 | return err 65 | } 66 | 67 | request.Header.Set("Content-Type", "application/json") 68 | 69 | response, err := client.Do(request) 70 | if err != nil { 71 | return err 72 | } 73 | defer response.Body.Close() 74 | 75 | if response.StatusCode != 200 { 76 | body, _ := io.ReadAll(response.Body) 77 | return fmt.Errorf( 78 | "call to rocket chat alert returned status code %d: %s", 79 | response.StatusCode, 80 | string(body)) 81 | } 82 | 83 | return nil 84 | } 85 | 86 | // SendMessage sends text message to the provider 87 | func (r *FeiShu) SendMessage(msg string) error { 88 | return r.sendByFeiShuApi(r.buildRequestBodyFeiShu(msg)) 89 | } 90 | 91 | func (r *FeiShu) buildRequestBodyFeiShu( 92 | text string) string { 93 | var content = []feiShuWebhookContent{ 94 | { 95 | Tag: "markdown", 96 | Content: text, 97 | }, 98 | } 99 | jsonBytes, _ := json.Marshal(content) 100 | 101 | body := "{\"msg_type\": \"interactive\",\"card\": {\"config\": {\"wide_screen_mode\": true},\"header\": {\"title\": {\"tag\": \"plain_text\",\"content\": \"" + 102 | r.title + 103 | "\"},\"template\": \"blue\"},\"elements\": " + string(jsonBytes) + "}}" 104 | return body 105 | } 106 | -------------------------------------------------------------------------------- /alertmanager/feishu/feishu_test.go: -------------------------------------------------------------------------------- 1 | package feishu 2 | 3 | import ( 4 | "net/http" 5 | "net/http/httptest" 6 | "testing" 7 | 8 | "github.com/abahmed/kwatch/config" 9 | "github.com/abahmed/kwatch/event" 10 | "github.com/stretchr/testify/assert" 11 | ) 12 | 13 | func TestEmptyConfig(t *testing.T) { 14 | assertions := assert.New(t) 15 | 16 | c := NewFeiShu(map[string]interface{}{}, &config.App{ClusterName: "dev"}) 17 | assertions.Nil(c) 18 | } 19 | 20 | func TestRocketChat(t *testing.T) { 21 | assertions := assert.New(t) 22 | 23 | configMap := map[string]interface{}{ 24 | "webhook": "testtest", 25 | } 26 | c := NewFeiShu(configMap, &config.App{ClusterName: "dev"}) 27 | assertions.NotNil(c) 28 | 29 | assertions.Equal(c.Name(), "Fei Shu") 30 | } 31 | 32 | func TestBuildRequestBodyFeiShu(t *testing.T) { 33 | assertions := assert.New(t) 34 | s := httptest.NewServer( 35 | http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 36 | w.Write([]byte(`{"isOk": true}`)) 37 | })) 38 | 39 | defer s.Close() 40 | 41 | configMap := map[string]interface{}{ 42 | "webhook": s.URL, 43 | } 44 | c := NewFeiShu(configMap, &config.App{ClusterName: "dev"}) 45 | assertions.NotNil(c) 46 | ev := event.Event{ 47 | PodName: "test-pod", 48 | ContainerName: "test-container", 49 | Namespace: "default", 50 | Reason: "OOMKILLED", 51 | Logs: "test\ntestlogs", 52 | Events: "test", 53 | } 54 | formattedMsg := ev.FormatMarkdown(c.appCfg.ClusterName, "", "") 55 | 56 | expectMessage := "{\"msg_type\": \"interactive\",\"card\": {\"config\": {\"wide_screen_mode\": true},\"header\": {\"title\": {\"tag\": \"plain_text\",\"content\": \"\"},\"template\": \"blue\"},\"elements\": [{\"tag\":\"markdown\",\"content\":\"There is an issue with container in a pod!\\n**Cluster:** dev\\n**Pod:** test-pod\\n**Container:** test-container\\n**Namespace:** default\\n**Reason:** OOMKILLED\\n**Events:**\\n```\\ntest\\n```\\n**Logs:**\\n```\\ntest\\ntestlogs\\n```\"}]}}" 57 | 58 | assertions.Equal(expectMessage, c.buildRequestBodyFeiShu(formattedMsg)) 59 | } 60 | 61 | func TestSendMessage(t *testing.T) { 62 | assertions := assert.New(t) 63 | 64 | s := httptest.NewServer( 65 | http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 66 | w.Write([]byte(`{"isOk": true}`)) 67 | })) 68 | 69 | defer s.Close() 70 | 71 | configMap := map[string]interface{}{ 72 | "webhook": s.URL, 73 | } 74 | c := NewFeiShu(configMap, &config.App{ClusterName: "dev"}) 75 | assertions.NotNil(c) 76 | 77 | assertions.Nil(c.SendMessage("test")) 78 | } 79 | 80 | func TestSendMessageError(t *testing.T) { 81 | assertions := assert.New(t) 82 | 83 | s := httptest.NewServer( 84 | http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 85 | w.WriteHeader(http.StatusBadGateway) 86 | })) 87 | 88 | defer s.Close() 89 | 90 | configMap := map[string]interface{}{ 91 | "webhook": s.URL, 92 | } 93 | c := NewFeiShu(configMap, &config.App{ClusterName: "dev"}) 94 | assertions.NotNil(c) 95 | 96 | assertions.NotNil(c.SendMessage("test")) 97 | } 98 | 99 | func TestSendEvent(t *testing.T) { 100 | assertions := assert.New(t) 101 | 102 | s := httptest.NewServer( 103 | http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 104 | w.Write([]byte(`{"isOk": true}`)) 105 | })) 106 | 107 | defer s.Close() 108 | 109 | configMap := map[string]interface{}{ 110 | "webhook": s.URL, 111 | } 112 | c := NewFeiShu(configMap, &config.App{ClusterName: "dev"}) 113 | assertions.NotNil(c) 114 | 115 | ev := event.Event{ 116 | PodName: "test-pod", 117 | ContainerName: "test-container", 118 | Namespace: "default", 119 | Reason: "OOMKILLED", 120 | Logs: "test\ntestlogs", 121 | Events: "event1-event2-event3-event1-event2-event3-event1-event2-" + 122 | "event3\nevent5\nevent6-event8-event11-event12", 123 | } 124 | assertions.Nil(c.SendEvent(&ev)) 125 | } 126 | 127 | func TestInvalidHttpRequest(t *testing.T) { 128 | assertions := assert.New(t) 129 | 130 | configMap := map[string]interface{}{ 131 | "webhook": "h ttp://localhost", 132 | } 133 | c := NewFeiShu(configMap, &config.App{ClusterName: "dev"}) 134 | assertions.NotNil(c) 135 | 136 | assertions.NotNil(c.SendMessage("test")) 137 | 138 | configMap = map[string]interface{}{ 139 | "webhook": "http://localhost:132323", 140 | } 141 | c = NewFeiShu(configMap, &config.App{ClusterName: "dev"}) 142 | assertions.NotNil(c) 143 | 144 | assertions.NotNil(c.SendMessage("test")) 145 | } 146 | -------------------------------------------------------------------------------- /alertmanager/googlechat/googlechat.go: -------------------------------------------------------------------------------- 1 | package googlechat 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | "fmt" 7 | "io" 8 | "net/http" 9 | 10 | "github.com/abahmed/kwatch/config" 11 | "github.com/abahmed/kwatch/event" 12 | "github.com/sirupsen/logrus" 13 | ) 14 | 15 | type GoogleChat struct { 16 | webhook string 17 | text string 18 | 19 | // reference for general app configuration 20 | appCfg *config.App 21 | } 22 | 23 | type payload struct { 24 | Text string `json:"text"` 25 | } 26 | 27 | // NewGoogleChat returns new google chat instance 28 | func NewGoogleChat(config map[string]interface{}, appCfg *config.App) *GoogleChat { 29 | webhook, ok := config["webhook"].(string) 30 | if !ok || len(webhook) == 0 { 31 | logrus.Warnf("initializing Google Chat with empty webhook url") 32 | return nil 33 | } 34 | 35 | logrus.Infof("initializing Google Chat with webhook url: %s", webhook) 36 | 37 | text, _ := config["text"].(string) 38 | 39 | return &GoogleChat{ 40 | webhook: webhook, 41 | text: text, 42 | appCfg: appCfg, 43 | } 44 | } 45 | 46 | // Name returns name of the provider 47 | func (r *GoogleChat) Name() string { 48 | return "Google Chat" 49 | } 50 | 51 | // SendEvent sends event to the provider 52 | func (r *GoogleChat) SendEvent(e *event.Event) error { 53 | formattedMsg := e.FormatText(r.appCfg.ClusterName, r.text) 54 | return r.sendAPI(r.buildRequestBody(formattedMsg)) 55 | } 56 | 57 | func (r *GoogleChat) sendAPI(reqBody string) error { 58 | client := &http.Client{} 59 | buffer := bytes.NewBuffer([]byte(reqBody)) 60 | request, err := http.NewRequest(http.MethodPost, r.webhook, buffer) 61 | if err != nil { 62 | return err 63 | } 64 | 65 | request.Header.Set("Content-Type", "application/json") 66 | 67 | response, err := client.Do(request) 68 | if err != nil { 69 | return err 70 | } 71 | defer response.Body.Close() 72 | 73 | if response.StatusCode != 200 { 74 | body, _ := io.ReadAll(response.Body) 75 | return fmt.Errorf( 76 | "call to google chat alert returned status code %d: %s", 77 | response.StatusCode, 78 | string(body)) 79 | } 80 | 81 | return nil 82 | } 83 | 84 | // SendMessage sends text message to the provider 85 | func (r *GoogleChat) SendMessage(msg string) error { 86 | return r.sendAPI(r.buildRequestBody(msg)) 87 | } 88 | 89 | func (r *GoogleChat) buildRequestBody(text string) string { 90 | msgPayload := &payload{ 91 | Text: text, 92 | } 93 | 94 | jsonBytes, _ := json.Marshal(msgPayload) 95 | return string(jsonBytes) 96 | } 97 | -------------------------------------------------------------------------------- /alertmanager/googlechat/googlechat_test.go: -------------------------------------------------------------------------------- 1 | package googlechat 2 | 3 | import ( 4 | "net/http" 5 | "net/http/httptest" 6 | "testing" 7 | 8 | "github.com/abahmed/kwatch/config" 9 | "github.com/abahmed/kwatch/event" 10 | "github.com/stretchr/testify/assert" 11 | ) 12 | 13 | func TestEmptyConfig(t *testing.T) { 14 | assert := assert.New(t) 15 | 16 | c := NewGoogleChat(map[string]interface{}{}, &config.App{ClusterName: "dev"}) 17 | assert.Nil(c) 18 | } 19 | 20 | func TestGoogleChat(t *testing.T) { 21 | assert := assert.New(t) 22 | 23 | configMap := map[string]interface{}{ 24 | "webhook": "testtest", 25 | } 26 | c := NewGoogleChat(configMap, &config.App{ClusterName: "dev"}) 27 | assert.NotNil(c) 28 | 29 | assert.Equal(c.Name(), "Google Chat") 30 | } 31 | 32 | func TestSendMessage(t *testing.T) { 33 | assert := assert.New(t) 34 | 35 | s := httptest.NewServer( 36 | http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 37 | w.Write([]byte(`{"isOk": true}`)) 38 | })) 39 | 40 | defer s.Close() 41 | 42 | configMap := map[string]interface{}{ 43 | "webhook": s.URL, 44 | } 45 | c := NewGoogleChat(configMap, &config.App{ClusterName: "dev"}) 46 | assert.NotNil(c) 47 | 48 | assert.Nil(c.SendMessage("test")) 49 | } 50 | 51 | func TestSendMessageError(t *testing.T) { 52 | assert := assert.New(t) 53 | 54 | s := httptest.NewServer( 55 | http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 56 | w.WriteHeader(http.StatusBadGateway) 57 | })) 58 | 59 | defer s.Close() 60 | 61 | configMap := map[string]interface{}{ 62 | "webhook": s.URL, 63 | } 64 | c := NewGoogleChat(configMap, &config.App{ClusterName: "dev"}) 65 | assert.NotNil(c) 66 | 67 | assert.NotNil(c.SendMessage("test")) 68 | } 69 | 70 | func TestSendEvent(t *testing.T) { 71 | assert := assert.New(t) 72 | 73 | s := httptest.NewServer( 74 | http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 75 | w.Write([]byte(`{"isOk": true}`)) 76 | })) 77 | 78 | defer s.Close() 79 | 80 | configMap := map[string]interface{}{ 81 | "webhook": s.URL, 82 | } 83 | c := NewGoogleChat(configMap, &config.App{ClusterName: "dev"}) 84 | assert.NotNil(c) 85 | 86 | ev := event.Event{ 87 | PodName: "test-pod", 88 | ContainerName: "test-container", 89 | Namespace: "default", 90 | Reason: "OOMKILLED", 91 | Logs: "test\ntestlogs", 92 | Events: "event1-event2-event3-event1-event2-event3-event1-event2-" + 93 | "event3\nevent5\nevent6-event8-event11-event12", 94 | } 95 | assert.Nil(c.SendEvent(&ev)) 96 | } 97 | 98 | func TestInvaildHttpRequest(t *testing.T) { 99 | assert := assert.New(t) 100 | 101 | configMap := map[string]interface{}{ 102 | "webhook": "h ttp://localhost", 103 | } 104 | c := NewGoogleChat(configMap, &config.App{ClusterName: "dev"}) 105 | assert.NotNil(c) 106 | 107 | assert.NotNil(c.SendMessage("test")) 108 | 109 | configMap = map[string]interface{}{ 110 | "webhook": "http://localhost:132323", 111 | } 112 | c = NewGoogleChat(configMap, &config.App{ClusterName: "dev"}) 113 | assert.NotNil(c) 114 | 115 | assert.NotNil(c.SendMessage("test")) 116 | } 117 | -------------------------------------------------------------------------------- /alertmanager/matrix/matrix.go: -------------------------------------------------------------------------------- 1 | package matrix 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "io" 7 | "net/http" 8 | "net/url" 9 | "regexp" 10 | 11 | "github.com/abahmed/kwatch/config" 12 | "github.com/abahmed/kwatch/event" 13 | "github.com/abahmed/kwatch/util" 14 | "github.com/sirupsen/logrus" 15 | ) 16 | 17 | type Matrix struct { 18 | homeServer string 19 | accessToken string 20 | internalRoomID string 21 | title string 22 | text string 23 | 24 | // reference for general app configuration 25 | appCfg *config.App 26 | } 27 | 28 | // NewMatrix returns new Matrix instance 29 | func NewMatrix(config map[string]interface{}, appCfg *config.App) *Matrix { 30 | homeServer, ok := config["homeServer"].(string) 31 | if !ok || len(homeServer) == 0 { 32 | logrus.Warnf("initializing slack with empty homeServer") 33 | return nil 34 | } 35 | 36 | accessToken, ok := config["accessToken"].(string) 37 | if !ok || len(accessToken) == 0 { 38 | logrus.Warnf("initializing slack with empty accessToken") 39 | return nil 40 | } 41 | 42 | internalRoomID, ok := config["internalRoomId"].(string) 43 | if !ok || len(internalRoomID) == 0 { 44 | logrus.Warnf("initializing slack with empty internalRoomId") 45 | return nil 46 | } 47 | 48 | title, _ := config["title"].(string) 49 | text, _ := config["text"].(string) 50 | 51 | return &Matrix{ 52 | homeServer: homeServer, 53 | accessToken: accessToken, 54 | internalRoomID: internalRoomID, 55 | title: title, 56 | text: text, 57 | appCfg: appCfg, 58 | } 59 | } 60 | 61 | func (m *Matrix) Name() string { 62 | return "Matrix" 63 | } 64 | 65 | func (m *Matrix) SendMessage(msg string) error { 66 | return m.sendAPI(msg) 67 | } 68 | 69 | func (m *Matrix) SendEvent(e *event.Event) error { 70 | return m.sendAPI(e.FormatHtml(m.appCfg.ClusterName, m.text)) 71 | } 72 | 73 | func (m *Matrix) sendAPI(formattedMsg string) error { 74 | plainMsg := stripHtmlRegex(formattedMsg) 75 | msg := fmt.Sprintf(`{ 76 | "msgtype": "m.text", 77 | "format": "org.matrix.custom.html", 78 | "body": "%s", 79 | "formatted_body": "%s" 80 | }`, 81 | util.JsonEscape(plainMsg), 82 | util.JsonEscape(formattedMsg), 83 | ) 84 | request, err := http.NewRequest( 85 | http.MethodPut, 86 | fmt.Sprintf( 87 | "%s/_matrix/client/v3/rooms/%s/send/m.room.message/%s"+ 88 | "?access_token=%s", 89 | m.homeServer, 90 | url.PathEscape(m.internalRoomID), 91 | util.RandomString(24), 92 | url.QueryEscape(m.accessToken), 93 | ), 94 | bytes.NewBuffer([]byte(msg)), 95 | ) 96 | if err != nil { 97 | return err 98 | } 99 | 100 | request.Header.Set("Content-Type", "application/json") 101 | client := &http.Client{} 102 | response, err := client.Do(request) 103 | if err != nil { 104 | return err 105 | } 106 | defer response.Body.Close() 107 | 108 | if response.StatusCode > 399 { 109 | body, _ := io.ReadAll(response.Body) 110 | return fmt.Errorf( 111 | "call to matrix alert returned status code %d: %s", 112 | response.StatusCode, 113 | string(body)) 114 | 115 | } 116 | 117 | return nil 118 | } 119 | 120 | // This method uses a regular expresion to remove HTML tags. 121 | func stripHtmlRegex(s string) string { 122 | const regex = `<.*?>` 123 | r := regexp.MustCompile(regex) 124 | return r.ReplaceAllString(s, "") 125 | } 126 | -------------------------------------------------------------------------------- /alertmanager/matrix/matrix_test.go: -------------------------------------------------------------------------------- 1 | package matrix 2 | 3 | import ( 4 | "net/http" 5 | "net/http/httptest" 6 | "testing" 7 | 8 | "github.com/abahmed/kwatch/config" 9 | "github.com/abahmed/kwatch/event" 10 | "github.com/stretchr/testify/assert" 11 | ) 12 | 13 | func TestEmptyConfig(t *testing.T) { 14 | assert := assert.New(t) 15 | 16 | c := NewMatrix(map[string]interface{}{}, &config.App{ClusterName: "dev"}) 17 | assert.Nil(c) 18 | } 19 | 20 | func TestInvalidConfig(t *testing.T) { 21 | assert := assert.New(t) 22 | 23 | configMap := map[string]interface{}{ 24 | "homeServer": "https://matrix-client.matrix.org", 25 | } 26 | c := NewMatrix(configMap, &config.App{ClusterName: "dev"}) 27 | assert.Nil(c) 28 | 29 | configMap = map[string]interface{}{ 30 | "homeServer": "https://matrix-client.matrix.org", 31 | "accessToken": "testToken", 32 | } 33 | c = NewMatrix(configMap, &config.App{ClusterName: "dev"}) 34 | assert.Nil(c) 35 | 36 | configMap = map[string]interface{}{ 37 | "homeServer": "https://matrix-client.matrix.org", 38 | "accessToken": "testToken", 39 | "internalRoomId": "", 40 | } 41 | c = NewMatrix(configMap, &config.App{ClusterName: "dev"}) 42 | assert.Nil(c) 43 | 44 | } 45 | 46 | func TestMatrix(t *testing.T) { 47 | assert := assert.New(t) 48 | 49 | configMap := map[string]interface{}{ 50 | "homeServer": "https://matrix-client.matrix.org", 51 | "accessToken": "testToken", 52 | "internalRoomId": "room1", 53 | } 54 | c := NewMatrix(configMap, &config.App{ClusterName: "dev"}) 55 | assert.NotNil(c) 56 | 57 | assert.Equal(c.Name(), "Matrix") 58 | } 59 | 60 | func TestSendMessage(t *testing.T) { 61 | assert := assert.New(t) 62 | 63 | s := httptest.NewServer( 64 | http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 65 | w.Write([]byte(`{"isOk": true}`)) 66 | })) 67 | 68 | defer s.Close() 69 | 70 | configMap := map[string]interface{}{ 71 | "homeServer": s.URL, 72 | "accessToken": "testToken", 73 | "internalRoomId": "room1", 74 | } 75 | c := NewMatrix(configMap, &config.App{ClusterName: "dev"}) 76 | assert.NotNil(c) 77 | 78 | assert.Nil(c.SendMessage("test")) 79 | } 80 | 81 | func TestSendMessageError(t *testing.T) { 82 | assert := assert.New(t) 83 | 84 | s := httptest.NewServer( 85 | http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 86 | w.WriteHeader(http.StatusBadGateway) 87 | })) 88 | 89 | defer s.Close() 90 | 91 | configMap := map[string]interface{}{ 92 | "homeServer": s.URL, 93 | "accessToken": "testToken", 94 | "internalRoomId": "room1", 95 | } 96 | c := NewMatrix(configMap, &config.App{ClusterName: "dev"}) 97 | assert.NotNil(c) 98 | 99 | assert.NotNil(c.SendMessage("test")) 100 | } 101 | 102 | func TestSendEvent(t *testing.T) { 103 | assert := assert.New(t) 104 | 105 | s := httptest.NewServer( 106 | http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 107 | w.Write([]byte(`{"isOk": true}`)) 108 | })) 109 | 110 | defer s.Close() 111 | 112 | configMap := map[string]interface{}{ 113 | "homeServer": s.URL, 114 | "accessToken": "testToken", 115 | "internalRoomId": "room1", 116 | } 117 | c := NewMatrix(configMap, &config.App{ClusterName: "dev"}) 118 | assert.NotNil(c) 119 | 120 | ev := event.Event{ 121 | PodName: "test-pod", 122 | ContainerName: "test-container", 123 | Namespace: "default", 124 | Reason: "OOMKILLED", 125 | Logs: "test\ntestlogs", 126 | Events: "event1-event2-event3-event1-event2-event3-event1-event2-" + 127 | "event3\nevent5\nevent6-event8-event11-event12", 128 | } 129 | assert.Nil(c.SendEvent(&ev)) 130 | } 131 | 132 | func TestInvaildHttpRequest(t *testing.T) { 133 | assert := assert.New(t) 134 | 135 | configMap := map[string]interface{}{ 136 | "homeServer": "h ttp://localhost", 137 | "accessToken": "testToken", 138 | "internalRoomId": "room1", 139 | } 140 | c := NewMatrix(configMap, &config.App{ClusterName: "dev"}) 141 | assert.NotNil(c) 142 | 143 | assert.NotNil(c.SendMessage("test")) 144 | 145 | configMap = map[string]interface{}{ 146 | "homeServer": "http://localhost:132323", 147 | "accessToken": "testToken", 148 | "internalRoomId": "room1", 149 | } 150 | c = NewMatrix(configMap, &config.App{ClusterName: "dev"}) 151 | assert.NotNil(c) 152 | 153 | assert.NotNil(c.SendMessage("test")) 154 | } 155 | -------------------------------------------------------------------------------- /alertmanager/mattermost/mattermost.go: -------------------------------------------------------------------------------- 1 | package mattermost 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | "fmt" 7 | "io" 8 | 9 | "net/http" 10 | 11 | "github.com/abahmed/kwatch/config" 12 | "github.com/abahmed/kwatch/constant" 13 | "github.com/abahmed/kwatch/event" 14 | "github.com/sirupsen/logrus" 15 | ) 16 | 17 | type Mattermost struct { 18 | webhook string 19 | title string 20 | text string 21 | 22 | // reference for general app configuration 23 | appCfg *config.App 24 | } 25 | 26 | type mmField struct { 27 | Short bool `json:"short"` 28 | Title string `json:"title"` 29 | Value interface{} `json:"value"` 30 | } 31 | 32 | type mmAttachment struct { 33 | Title string `json:"title"` 34 | Text string `json:"text"` 35 | Fields []mmField `json:"fields"` 36 | } 37 | 38 | type mmPayload struct { 39 | Text string `json:"text"` 40 | Attachments []mmAttachment `json:"attachments"` 41 | } 42 | 43 | // NewMattermost returns new mattermost instance 44 | func NewMattermost(config map[string]interface{}, appCfg *config.App) *Mattermost { 45 | webhook, ok := config["webhook"].(string) 46 | if !ok || len(webhook) == 0 { 47 | logrus.Warnf("initializing mattermost with empty webhook url") 48 | return nil 49 | } 50 | 51 | logrus.Infof("initializing mattermost with webhook url: %s", webhook) 52 | 53 | title, _ := config["title"].(string) 54 | text, _ := config["text"].(string) 55 | 56 | return &Mattermost{ 57 | webhook: webhook, 58 | title: title, 59 | text: text, 60 | appCfg: appCfg, 61 | } 62 | } 63 | 64 | // Name returns name of the provider 65 | func (m *Mattermost) Name() string { 66 | return "Mattermost" 67 | } 68 | 69 | // SendMessage sends text message to the provider 70 | func (m *Mattermost) SendMessage(msg string) error { 71 | logrus.Debugf("sending to mattermost msg: %s", msg) 72 | 73 | return m.sendAPI(m.buildMessage(nil, &msg)) 74 | } 75 | 76 | // SendEvent sends event to the provider 77 | func (m *Mattermost) SendEvent(e *event.Event) error { 78 | logrus.Debugf("sending to mattermost event: %v", e) 79 | 80 | return m.sendAPI(m.buildMessage(e, nil)) 81 | } 82 | 83 | func (m *Mattermost) sendAPI(content []byte) error { 84 | client := &http.Client{} 85 | buffer := bytes.NewBuffer(content) 86 | request, err := http.NewRequest(http.MethodPost, m.webhook, buffer) 87 | if err != nil { 88 | return err 89 | } 90 | 91 | request.Header.Set("Content-Type", "application/json") 92 | 93 | response, err := client.Do(request) 94 | if err != nil { 95 | return err 96 | } 97 | defer response.Body.Close() 98 | 99 | if response.StatusCode != 200 { 100 | body, _ := io.ReadAll(response.Body) 101 | return fmt.Errorf( 102 | "call to mattermost alert returned status code %d: %s", 103 | response.StatusCode, 104 | string(body)) 105 | } 106 | 107 | return nil 108 | } 109 | 110 | func (m *Mattermost) buildMessage(e *event.Event, msg *string) []byte { 111 | payload := mmPayload{} 112 | 113 | if msg != nil && len(*msg) > 0 { 114 | payload.Text = *msg 115 | } 116 | 117 | if e != nil { 118 | logs := constant.DefaultLogs 119 | if len(e.Logs) > 0 { 120 | logs = (e.Logs) 121 | } 122 | 123 | events := constant.DefaultEvents 124 | if len(e.Events) > 0 { 125 | events = (e.Events) 126 | } 127 | 128 | // use custom title if it's provided, otherwise use default 129 | title := m.title 130 | if len(title) == 0 { 131 | title = constant.DefaultTitle 132 | } 133 | 134 | // use custom text if it's provided, otherwise use default 135 | text := m.text 136 | if len(text) == 0 { 137 | text = constant.DefaultText 138 | } 139 | 140 | payload.Attachments = []mmAttachment{ 141 | { 142 | Title: title, 143 | Text: text, 144 | Fields: []mmField{ 145 | { 146 | Title: "Cluster", 147 | Value: m.appCfg.ClusterName, 148 | Short: true, 149 | }, 150 | { 151 | Title: "Name", 152 | Value: e.PodName, 153 | Short: true, 154 | }, 155 | { 156 | Title: "Container", 157 | Value: e.ContainerName, 158 | Short: true, 159 | }, 160 | { 161 | Title: "Namespace", 162 | Value: e.Namespace, 163 | Short: true, 164 | }, 165 | { 166 | Title: "Reason", 167 | Value: e.Reason, 168 | Short: true, 169 | }, 170 | { 171 | Title: ":mag: Events", 172 | Value: "```\n" + events + " \n```", 173 | Short: false, 174 | }, 175 | { 176 | Title: ":memo: Logs", 177 | Value: "```\n" + logs + "\n```", 178 | Short: false, 179 | }, 180 | }, 181 | }, 182 | } 183 | } 184 | 185 | str, _ := json.Marshal(payload) 186 | return str 187 | } 188 | -------------------------------------------------------------------------------- /alertmanager/mattermost/mattermost_test.go: -------------------------------------------------------------------------------- 1 | package mattermost 2 | 3 | import ( 4 | "net/http" 5 | "net/http/httptest" 6 | "testing" 7 | 8 | "github.com/abahmed/kwatch/config" 9 | "github.com/abahmed/kwatch/event" 10 | "github.com/stretchr/testify/assert" 11 | ) 12 | 13 | func TestMattermostEmptyConfig(t *testing.T) { 14 | assert := assert.New(t) 15 | 16 | c := NewMattermost(map[string]interface{}{}, &config.App{ClusterName: "dev"}) 17 | assert.Nil(c) 18 | } 19 | 20 | func TestMattermost(t *testing.T) { 21 | assert := assert.New(t) 22 | 23 | configMap := map[string]interface{}{ 24 | "webhook": "testtest", 25 | } 26 | c := NewMattermost(configMap, &config.App{ClusterName: "dev"}) 27 | assert.NotNil(c) 28 | 29 | assert.Equal(c.Name(), "Mattermost") 30 | } 31 | 32 | func TestSendMessage(t *testing.T) { 33 | assert := assert.New(t) 34 | 35 | s := httptest.NewServer( 36 | http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 37 | w.Write([]byte(`{"isOk": true}`)) 38 | })) 39 | 40 | defer s.Close() 41 | 42 | configMap := map[string]interface{}{ 43 | "webhook": s.URL, 44 | } 45 | c := NewMattermost(configMap, &config.App{ClusterName: "dev"}) 46 | assert.NotNil(c) 47 | 48 | assert.Nil(c.SendMessage("test")) 49 | } 50 | 51 | func TestSendMessageError(t *testing.T) { 52 | assert := assert.New(t) 53 | 54 | s := httptest.NewServer( 55 | http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 56 | w.WriteHeader(http.StatusBadGateway) 57 | })) 58 | 59 | defer s.Close() 60 | 61 | configMap := map[string]interface{}{ 62 | "webhook": s.URL, 63 | } 64 | c := NewMattermost(configMap, &config.App{ClusterName: "dev"}) 65 | assert.NotNil(c) 66 | 67 | assert.NotNil(c.SendMessage("test")) 68 | } 69 | 70 | func TestSendEvent(t *testing.T) { 71 | assert := assert.New(t) 72 | 73 | s := httptest.NewServer( 74 | http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 75 | w.Write([]byte(`{"isOk": true}`)) 76 | })) 77 | 78 | defer s.Close() 79 | 80 | configMap := map[string]interface{}{ 81 | "webhook": s.URL, 82 | } 83 | c := NewMattermost(configMap, &config.App{ClusterName: "dev"}) 84 | assert.NotNil(c) 85 | 86 | ev := event.Event{ 87 | PodName: "test-pod", 88 | ContainerName: "test-container", 89 | Namespace: "default", 90 | Reason: "OOMKILLED", 91 | Logs: "test\ntestlogs", 92 | Events: "event1-event2-event3-event1-event2-event3-event1-event2-" + 93 | "event3\nevent5\nevent6-event8-event11-event12", 94 | } 95 | assert.Nil(c.SendEvent(&ev)) 96 | } 97 | 98 | func TestInvaildHttpRequest(t *testing.T) { 99 | assert := assert.New(t) 100 | 101 | configMap := map[string]interface{}{ 102 | "webhook": "h ttp://localhost", 103 | } 104 | c := NewMattermost(configMap, &config.App{ClusterName: "dev"}) 105 | assert.NotNil(c) 106 | 107 | assert.NotNil(c.SendMessage("test")) 108 | 109 | configMap = map[string]interface{}{ 110 | "webhook": "http://localhost:132323", 111 | } 112 | c = NewMattermost(configMap, &config.App{ClusterName: "dev"}) 113 | assert.NotNil(c) 114 | 115 | assert.NotNil(c.SendMessage("test")) 116 | } 117 | -------------------------------------------------------------------------------- /alertmanager/opsgenie/opsgenie.go: -------------------------------------------------------------------------------- 1 | package opsgenie 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | "fmt" 7 | "io" 8 | "net/http" 9 | 10 | "github.com/abahmed/kwatch/config" 11 | "github.com/abahmed/kwatch/constant" 12 | "github.com/abahmed/kwatch/event" 13 | "github.com/sirupsen/logrus" 14 | ) 15 | 16 | const ( 17 | defaultOpsgenieTitle = "kwatch detected a crash in pod: %s" 18 | defaultOpsgenieText = "There is an issue with container (%s) in pod (%s)" 19 | opsgenieAPIURL = "https://api.opsgenie.com/v2/alerts" 20 | ) 21 | 22 | type Opsgenie struct { 23 | apikey string 24 | url string 25 | title string 26 | text string 27 | 28 | // reference for general app configuration 29 | appCfg *config.App 30 | } 31 | 32 | type ogPayload struct { 33 | Message string `json:"message"` 34 | Description string `json:"description"` 35 | Details interface{} `json:"details"` 36 | Priority string `json:"priority"` 37 | } 38 | 39 | // NewOpsgenie returns new opsgenie instance 40 | func NewOpsgenie(config map[string]interface{}, appCfg *config.App) *Opsgenie { 41 | apiKey, ok := config["apiKey"].(string) 42 | if !ok || len(apiKey) == 0 { 43 | logrus.Warnf("initializing opsgenie with empty webhook url") 44 | return nil 45 | } 46 | 47 | logrus.Infof("initializing opsgenie with secret apikey") 48 | 49 | title, _ := config["title"].(string) 50 | text, _ := config["text"].(string) 51 | 52 | return &Opsgenie{ 53 | apikey: apiKey, 54 | url: opsgenieAPIURL, 55 | title: title, 56 | text: text, 57 | appCfg: appCfg, 58 | } 59 | } 60 | 61 | // Name returns name of the provider 62 | func (m *Opsgenie) Name() string { 63 | return "Opsgenie" 64 | } 65 | 66 | // SendMessage sends text message to the provider 67 | func (m *Opsgenie) SendMessage(msg string) error { 68 | return nil 69 | } 70 | 71 | // SendEvent sends event to the provider 72 | func (m *Opsgenie) SendEvent(e *event.Event) error { 73 | return m.sendAPI(m.buildMessage(e)) 74 | } 75 | 76 | // sendAPI sends http request to Opsgenie API 77 | func (m *Opsgenie) sendAPI(content []byte) error { 78 | client := &http.Client{} 79 | buffer := bytes.NewBuffer(content) 80 | request, err := http.NewRequest(http.MethodPost, m.url, buffer) 81 | if err != nil { 82 | return err 83 | } 84 | 85 | // set request headers 86 | request.Header.Set("Content-Type", "application/json") 87 | request.Header.Set("Authorization", "GenieKey "+m.apikey) 88 | 89 | response, err := client.Do(request) 90 | if err != nil { 91 | return err 92 | } 93 | defer response.Body.Close() 94 | 95 | if response.StatusCode != 202 { 96 | body, _ := io.ReadAll(response.Body) 97 | return fmt.Errorf( 98 | "call to opsgenie alert returned status code %d: %s", 99 | response.StatusCode, 100 | string(body)) 101 | } 102 | 103 | return nil 104 | } 105 | 106 | func (m *Opsgenie) buildMessage(e *event.Event) []byte { 107 | payload := ogPayload{ 108 | Priority: "P1", 109 | } 110 | 111 | logs := constant.DefaultLogs 112 | if len(e.Logs) > 0 { 113 | logs = (e.Logs) 114 | } 115 | 116 | events := constant.DefaultEvents 117 | if len(e.Events) > 0 { 118 | events = (e.Events) 119 | } 120 | 121 | // use custom title if it's provided, otherwise use default 122 | title := m.title 123 | if len(title) == 0 { 124 | title = fmt.Sprintf(defaultOpsgenieTitle, e.PodName) 125 | } 126 | payload.Message = title 127 | 128 | // use custom text if it's provided, otherwise use default 129 | text := m.text 130 | if len(text) == 0 { 131 | text = fmt.Sprintf(defaultOpsgenieText, e.ContainerName, e.PodName) 132 | } 133 | 134 | payload.Description = text 135 | payload.Details = map[string]string{ 136 | "Cluster": m.appCfg.ClusterName, 137 | "Name": e.PodName, 138 | "Container": e.ContainerName, 139 | "Namespace": e.Namespace, 140 | "Reason": e.Reason, 141 | "Events": events, 142 | "Logs": logs, 143 | } 144 | 145 | str, _ := json.Marshal(payload) 146 | return str 147 | } 148 | -------------------------------------------------------------------------------- /alertmanager/opsgenie/opsgenie_test.go: -------------------------------------------------------------------------------- 1 | package opsgenie 2 | 3 | import ( 4 | "net/http" 5 | "net/http/httptest" 6 | "testing" 7 | 8 | "github.com/abahmed/kwatch/config" 9 | "github.com/abahmed/kwatch/event" 10 | "github.com/stretchr/testify/assert" 11 | ) 12 | 13 | func TestOpsgenieEmptyConfig(t *testing.T) { 14 | assert := assert.New(t) 15 | 16 | c := NewOpsgenie(map[string]interface{}{}, &config.App{ClusterName: "dev"}) 17 | assert.Nil(c) 18 | } 19 | 20 | func TestOpsgenie(t *testing.T) { 21 | assert := assert.New(t) 22 | 23 | configMap := map[string]interface{}{ 24 | "apiKey": "testtest", 25 | } 26 | c := NewOpsgenie(configMap, &config.App{ClusterName: "dev"}) 27 | assert.NotNil(c) 28 | 29 | assert.Equal(c.Name(), "Opsgenie") 30 | } 31 | 32 | func TestSendMessage(t *testing.T) { 33 | assert := assert.New(t) 34 | 35 | configMap := map[string]interface{}{ 36 | "apiKey": "test", 37 | } 38 | c := NewOpsgenie(configMap, &config.App{ClusterName: "dev"}) 39 | assert.NotNil(c) 40 | 41 | assert.Nil(c.SendMessage("test")) 42 | } 43 | 44 | func TestSendEvent(t *testing.T) { 45 | assert := assert.New(t) 46 | 47 | s := httptest.NewServer( 48 | http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 49 | w.WriteHeader(http.StatusAccepted) 50 | w.Write([]byte(`{"isOk": true}`)) 51 | })) 52 | 53 | defer s.Close() 54 | 55 | configMap := map[string]interface{}{ 56 | "apiKey": "test", 57 | } 58 | c := NewOpsgenie(configMap, &config.App{ClusterName: "dev"}) 59 | assert.NotNil(c) 60 | 61 | c.url = s.URL 62 | 63 | ev := event.Event{ 64 | PodName: "test-pod", 65 | ContainerName: "test-container", 66 | Namespace: "default", 67 | Reason: "OOMKILLED", 68 | Logs: "test\ntestlogs", 69 | Events: "event1-event2-event3-event1-event2-event3-event1-event2-" + 70 | "event3\nevent5\nevent6-event8-event11-event12", 71 | } 72 | assert.Nil(c.SendEvent(&ev)) 73 | } 74 | 75 | func TestSendEventError(t *testing.T) { 76 | assert := assert.New(t) 77 | 78 | s := httptest.NewServer( 79 | http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 80 | w.WriteHeader(http.StatusBadGateway) 81 | })) 82 | 83 | defer s.Close() 84 | 85 | configMap := map[string]interface{}{ 86 | "apiKey": "test", 87 | } 88 | c := NewOpsgenie(configMap, &config.App{ClusterName: "dev"}) 89 | assert.NotNil(c) 90 | 91 | c.url = s.URL 92 | 93 | ev := event.Event{ 94 | PodName: "test-pod", 95 | ContainerName: "test-container", 96 | Namespace: "default", 97 | Reason: "OOMKILLED", 98 | Logs: "test\ntestlogs", 99 | Events: "event1-event2-event3-event1-event2-event3-event1-event2-" + 100 | "event3\nevent5\nevent6-event8-event11-event12", 101 | } 102 | assert.NotNil(c.SendEvent(&ev)) 103 | } 104 | 105 | func TestInvaildHttpRequest(t *testing.T) { 106 | assert := assert.New(t) 107 | 108 | configMap := map[string]interface{}{ 109 | "apiKey": "test", 110 | } 111 | c := NewOpsgenie(configMap, &config.App{ClusterName: "dev"}) 112 | assert.NotNil(c) 113 | c.url = "h ttp://localhost" 114 | 115 | ev := event.Event{ 116 | PodName: "test-pod", 117 | ContainerName: "test-container", 118 | Namespace: "default", 119 | Reason: "OOMKILLED", 120 | Logs: "test\ntestlogs", 121 | Events: "event1-event2-event3-event1-event2-event3-event1-event2-" + 122 | "event3\nevent5\nevent6-event8-event11-event12", 123 | } 124 | assert.NotNil(c.SendEvent(&ev)) 125 | 126 | c = NewOpsgenie(configMap, &config.App{ClusterName: "dev"}) 127 | assert.NotNil(c) 128 | c.url = "http://localhost:132323" 129 | 130 | assert.NotNil(c.SendEvent(&ev)) 131 | } 132 | -------------------------------------------------------------------------------- /alertmanager/pagerduty/pagerduty.go: -------------------------------------------------------------------------------- 1 | package pagerduty 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "net/http" 7 | "strings" 8 | 9 | "github.com/abahmed/kwatch/config" 10 | "github.com/abahmed/kwatch/event" 11 | "github.com/abahmed/kwatch/util" 12 | "github.com/sirupsen/logrus" 13 | ) 14 | 15 | const ( 16 | pagerdutyAPIURL = "https://events.pagerduty.com/v2/enqueue" 17 | defaultEventTitle = "[%s] There is an issue with a container in a pod" 18 | ) 19 | 20 | type Pagerduty struct { 21 | integrationKey string 22 | url string 23 | 24 | // reference for general app configuration 25 | appCfg *config.App 26 | } 27 | 28 | // NewPagerDuty returns new PagerDuty instance 29 | func NewPagerDuty(config map[string]interface{}, appCfg *config.App) *Pagerduty { 30 | integrationKey, ok := config["integrationKey"].(string) 31 | if !ok || len(integrationKey) == 0 { 32 | logrus.Warnf("initializing pagerduty with an empty integration key") 33 | return nil 34 | } 35 | 36 | logrus.Infof("initializing pagerduty with the provided integration key") 37 | 38 | return &Pagerduty{ 39 | integrationKey: integrationKey, 40 | url: pagerdutyAPIURL, 41 | appCfg: appCfg, 42 | } 43 | } 44 | 45 | // Name returns name of the provider 46 | func (s *Pagerduty) Name() string { 47 | return "PagerDuty" 48 | } 49 | 50 | // SendEvent sends event to the provider 51 | func (s *Pagerduty) SendEvent(ev *event.Event) error { 52 | client := &http.Client{} 53 | 54 | reqBody := s.buildRequestBodyPagerDuty(ev, s.integrationKey) 55 | buffer := bytes.NewBuffer([]byte(reqBody)) 56 | 57 | request, err := http.NewRequest(http.MethodPost, s.url, buffer) 58 | if err != nil { 59 | return err 60 | } 61 | 62 | request.Header.Set("Content-Type", "application/json") 63 | 64 | response, err := client.Do(request) 65 | if err != nil { 66 | return err 67 | } 68 | defer response.Body.Close() 69 | 70 | if response.StatusCode > 202 { 71 | return fmt.Errorf( 72 | "call to teams alert returned status code %d", 73 | response.StatusCode) 74 | } 75 | 76 | return nil 77 | } 78 | 79 | // SendMessage sends text message to the provider 80 | func (s *Pagerduty) SendMessage(msg string) error { 81 | return nil 82 | } 83 | 84 | func (s *Pagerduty) buildRequestBodyPagerDuty( 85 | ev *event.Event, 86 | key string) string { 87 | eventsText := "No events captured" 88 | logsText := "No logs captured" 89 | 90 | // add events part if it exists 91 | events := strings.TrimSpace(ev.Events) 92 | if len(events) > 0 { 93 | eventsText = util.JsonEscape(ev.Events) 94 | } 95 | 96 | // add logs part if it exists 97 | logs := strings.TrimSpace(ev.Logs) 98 | if len(logs) > 0 { 99 | logsText = util.JsonEscape(ev.Logs) 100 | } 101 | 102 | reqBody := fmt.Sprintf(`{ 103 | "routing_key": "%s", 104 | "event_action": "trigger", 105 | "payload": { 106 | "summary": "%s", 107 | "source": "%s", 108 | "severity": "critical", 109 | "custom_details": { 110 | "Cluster": "%s", 111 | "Name": "%s", 112 | "Container": "%s", 113 | "Namespace": "%s", 114 | "Reason": "%s", 115 | "Events": "%s", 116 | "Logs": "%s" 117 | } 118 | } 119 | }`, 120 | key, 121 | fmt.Sprintf(defaultEventTitle, ev.ContainerName), 122 | ev.ContainerName, 123 | s.appCfg.ClusterName, 124 | ev.PodName, 125 | ev.ContainerName, 126 | ev.Namespace, 127 | ev.Reason, 128 | eventsText, 129 | logsText) 130 | 131 | return reqBody 132 | } 133 | -------------------------------------------------------------------------------- /alertmanager/pagerduty/pagerduty_test.go: -------------------------------------------------------------------------------- 1 | package pagerduty 2 | 3 | import ( 4 | "net/http" 5 | "net/http/httptest" 6 | "testing" 7 | 8 | "github.com/abahmed/kwatch/config" 9 | "github.com/abahmed/kwatch/event" 10 | "github.com/stretchr/testify/assert" 11 | ) 12 | 13 | func TestPagerdutyEmptyConfig(t *testing.T) { 14 | assert := assert.New(t) 15 | 16 | c := NewPagerDuty(map[string]interface{}{}, &config.App{ClusterName: "dev"}) 17 | assert.Nil(c) 18 | } 19 | 20 | func TestPagerduty(t *testing.T) { 21 | assert := assert.New(t) 22 | 23 | configMap := map[string]interface{}{ 24 | "integrationKey": "testtest", 25 | } 26 | c := NewPagerDuty(configMap, &config.App{ClusterName: "dev"}) 27 | assert.NotNil(c) 28 | 29 | assert.Equal(c.Name(), "PagerDuty") 30 | } 31 | 32 | func TestSendMessage(t *testing.T) { 33 | assert := assert.New(t) 34 | 35 | configMap := map[string]interface{}{ 36 | "integrationKey": "test", 37 | } 38 | c := NewPagerDuty(configMap, &config.App{ClusterName: "dev"}) 39 | assert.NotNil(c) 40 | 41 | assert.Nil(c.SendMessage("test")) 42 | } 43 | 44 | func TestSendEvent(t *testing.T) { 45 | assert := assert.New(t) 46 | 47 | s := httptest.NewServer(http.HandlerFunc( 48 | func(w http.ResponseWriter, r *http.Request) { 49 | w.Write([]byte(`{"isOk": true}`)) 50 | })) 51 | 52 | defer s.Close() 53 | 54 | configMap := map[string]interface{}{ 55 | "integrationKey": "test", 56 | } 57 | c := NewPagerDuty(configMap, &config.App{ClusterName: "dev"}) 58 | c.url = s.URL 59 | assert.NotNil(c) 60 | 61 | ev := event.Event{ 62 | PodName: "test-pod", 63 | ContainerName: "test-container", 64 | Namespace: "default", 65 | Reason: "OOMKILLED", 66 | Logs: "test\ntestlogs", 67 | Events: "event1-event2-event3-event1-event2-event3-event1-event2-" + 68 | "event3\nevent5\nevent6-event8-event11-event12", 69 | } 70 | assert.Nil(c.SendEvent(&ev)) 71 | } 72 | 73 | func TestSendEventError(t *testing.T) { 74 | assert := assert.New(t) 75 | 76 | s := httptest.NewServer(http.HandlerFunc( 77 | func(w http.ResponseWriter, r *http.Request) { 78 | w.WriteHeader(http.StatusBadGateway) 79 | })) 80 | 81 | defer s.Close() 82 | 83 | configMap := map[string]interface{}{ 84 | "integrationKey": "test", 85 | } 86 | c := NewPagerDuty(configMap, &config.App{ClusterName: "dev"}) 87 | assert.NotNil(c) 88 | c.url = s.URL 89 | 90 | ev := event.Event{ 91 | PodName: "test-pod", 92 | ContainerName: "test-container", 93 | Namespace: "default", 94 | Reason: "OOMKILLED", 95 | Logs: "test\ntestlogs", 96 | Events: "event1-event2-event3-event1-event2-event3-event1-event2-" + 97 | "event3\nevent5\nevent6-event8-event11-event12", 98 | } 99 | assert.NotNil(c.SendEvent(&ev)) 100 | } 101 | 102 | func TestInvaildHttpRequest(t *testing.T) { 103 | assert := assert.New(t) 104 | 105 | configMap := map[string]interface{}{ 106 | "integrationKey": "test", 107 | } 108 | c := NewPagerDuty(configMap, &config.App{ClusterName: "dev"}) 109 | assert.NotNil(c) 110 | c.url = "h ttp://localhost" 111 | 112 | ev := event.Event{ 113 | PodName: "test-pod", 114 | ContainerName: "test-container", 115 | Namespace: "default", 116 | Reason: "OOMKILLED", 117 | Logs: "test\ntestlogs", 118 | Events: "event1-event2-event3-event1-event2-event3-event1-event2-" + 119 | "event3\nevent5\nevent6-event8-event11-event12", 120 | } 121 | 122 | assert.NotNil(assert.NotNil(c.SendEvent(&ev))) 123 | 124 | c = NewPagerDuty(configMap, &config.App{ClusterName: "dev"}) 125 | assert.NotNil(c) 126 | c.url = "http://localhost:132323" 127 | 128 | assert.NotNil(assert.NotNil(c.SendEvent(&ev))) 129 | } 130 | -------------------------------------------------------------------------------- /alertmanager/rocketchat/rocketchat.go: -------------------------------------------------------------------------------- 1 | package rocketchat 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | "fmt" 7 | "io" 8 | "net/http" 9 | 10 | "github.com/abahmed/kwatch/config" 11 | "github.com/abahmed/kwatch/event" 12 | "github.com/sirupsen/logrus" 13 | ) 14 | 15 | type RocketChat struct { 16 | webhook string 17 | text string 18 | 19 | // reference for general app configuration 20 | appCfg *config.App 21 | } 22 | 23 | type rocketChatWebhookPayload struct { 24 | Text string `json:"text"` 25 | } 26 | 27 | // NewRocketChat returns new rocket chat instance 28 | func NewRocketChat(config map[string]interface{}, appCfg *config.App) *RocketChat { 29 | webhook, ok := config["webhook"].(string) 30 | if !ok || len(webhook) == 0 { 31 | logrus.Warnf("initializing Rocket Chat with empty webhook url") 32 | return nil 33 | } 34 | 35 | logrus.Infof("initializing Rocket Chat with webhook url: %s", webhook) 36 | 37 | text, _ := config["text"].(string) 38 | 39 | return &RocketChat{ 40 | webhook: webhook, 41 | text: text, 42 | appCfg: appCfg, 43 | } 44 | } 45 | 46 | // Name returns name of the provider 47 | func (r *RocketChat) Name() string { 48 | return "Rocket Chat" 49 | } 50 | 51 | // SendEvent sends event to the provider 52 | func (r *RocketChat) SendEvent(e *event.Event) error { 53 | formattedMsg := e.FormatMarkdown(r.appCfg.ClusterName, r.text, "") 54 | return r.sendByRocketChatApi(r.buildRequestBodyRocketChat(formattedMsg)) 55 | } 56 | 57 | func (r *RocketChat) sendByRocketChatApi(reqBody string) error { 58 | client := &http.Client{} 59 | buffer := bytes.NewBuffer([]byte(reqBody)) 60 | request, err := http.NewRequest(http.MethodPost, r.webhook, buffer) 61 | if err != nil { 62 | return err 63 | } 64 | 65 | request.Header.Set("Content-Type", "application/json") 66 | 67 | response, err := client.Do(request) 68 | if err != nil { 69 | return err 70 | } 71 | defer response.Body.Close() 72 | 73 | if response.StatusCode != 200 { 74 | body, _ := io.ReadAll(response.Body) 75 | return fmt.Errorf( 76 | "call to rocket chat alert returned status code %d: %s", 77 | response.StatusCode, 78 | string(body)) 79 | } 80 | 81 | return nil 82 | } 83 | 84 | // SendMessage sends text message to the provider 85 | func (r *RocketChat) SendMessage(msg string) error { 86 | return r.sendByRocketChatApi(r.buildRequestBodyRocketChat(msg)) 87 | } 88 | 89 | func (r *RocketChat) buildRequestBodyRocketChat(text string) string { 90 | msgPayload := &rocketChatWebhookPayload{ 91 | Text: text, 92 | } 93 | 94 | jsonBytes, _ := json.Marshal(msgPayload) 95 | return string(jsonBytes) 96 | } 97 | -------------------------------------------------------------------------------- /alertmanager/rocketchat/rocketchat_test.go: -------------------------------------------------------------------------------- 1 | package rocketchat 2 | 3 | import ( 4 | "net/http" 5 | "net/http/httptest" 6 | "testing" 7 | 8 | "github.com/abahmed/kwatch/config" 9 | "github.com/abahmed/kwatch/event" 10 | "github.com/stretchr/testify/assert" 11 | ) 12 | 13 | func TestEmptyConfig(t *testing.T) { 14 | assert := assert.New(t) 15 | 16 | c := NewRocketChat(map[string]interface{}{}, &config.App{ClusterName: "dev"}) 17 | assert.Nil(c) 18 | } 19 | 20 | func TestRocketChat(t *testing.T) { 21 | assert := assert.New(t) 22 | 23 | configMap := map[string]interface{}{ 24 | "webhook": "testtest", 25 | } 26 | c := NewRocketChat(configMap, &config.App{ClusterName: "dev"}) 27 | assert.NotNil(c) 28 | 29 | assert.Equal(c.Name(), "Rocket Chat") 30 | } 31 | 32 | func TestSendMessage(t *testing.T) { 33 | assert := assert.New(t) 34 | 35 | s := httptest.NewServer( 36 | http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 37 | w.Write([]byte(`{"isOk": true}`)) 38 | })) 39 | 40 | defer s.Close() 41 | 42 | configMap := map[string]interface{}{ 43 | "webhook": s.URL, 44 | } 45 | c := NewRocketChat(configMap, &config.App{ClusterName: "dev"}) 46 | assert.NotNil(c) 47 | 48 | assert.Nil(c.SendMessage("test")) 49 | } 50 | 51 | func TestSendMessageError(t *testing.T) { 52 | assert := assert.New(t) 53 | 54 | s := httptest.NewServer( 55 | http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 56 | w.WriteHeader(http.StatusBadGateway) 57 | })) 58 | 59 | defer s.Close() 60 | 61 | configMap := map[string]interface{}{ 62 | "webhook": s.URL, 63 | } 64 | c := NewRocketChat(configMap, &config.App{ClusterName: "dev"}) 65 | assert.NotNil(c) 66 | 67 | assert.NotNil(c.SendMessage("test")) 68 | } 69 | 70 | func TestSendEvent(t *testing.T) { 71 | assert := assert.New(t) 72 | 73 | s := httptest.NewServer( 74 | http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 75 | w.Write([]byte(`{"isOk": true}`)) 76 | })) 77 | 78 | defer s.Close() 79 | 80 | configMap := map[string]interface{}{ 81 | "webhook": s.URL, 82 | } 83 | c := NewRocketChat(configMap, &config.App{ClusterName: "dev"}) 84 | assert.NotNil(c) 85 | 86 | ev := event.Event{ 87 | PodName: "test-pod", 88 | ContainerName: "test-container", 89 | Namespace: "default", 90 | Reason: "OOMKILLED", 91 | Logs: "test\ntestlogs", 92 | Events: "event1-event2-event3-event1-event2-event3-event1-event2-" + 93 | "event3\nevent5\nevent6-event8-event11-event12", 94 | } 95 | assert.Nil(c.SendEvent(&ev)) 96 | } 97 | 98 | func TestInvaildHttpRequest(t *testing.T) { 99 | assert := assert.New(t) 100 | 101 | configMap := map[string]interface{}{ 102 | "webhook": "h ttp://localhost", 103 | } 104 | c := NewRocketChat(configMap, &config.App{ClusterName: "dev"}) 105 | assert.NotNil(c) 106 | 107 | assert.NotNil(c.SendMessage("test")) 108 | 109 | configMap = map[string]interface{}{ 110 | "webhook": "http://localhost:132323", 111 | } 112 | c = NewRocketChat(configMap, &config.App{ClusterName: "dev"}) 113 | assert.NotNil(c) 114 | 115 | assert.NotNil(c.SendMessage("test")) 116 | } 117 | -------------------------------------------------------------------------------- /alertmanager/slack/slack.go: -------------------------------------------------------------------------------- 1 | package slack 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | 7 | "github.com/abahmed/kwatch/config" 8 | "github.com/abahmed/kwatch/constant" 9 | "github.com/abahmed/kwatch/event" 10 | 11 | "github.com/sirupsen/logrus" 12 | slackClient "github.com/slack-go/slack" 13 | ) 14 | 15 | const ( 16 | chunkSize = 2000 17 | ) 18 | 19 | type Slack struct { 20 | webhook string 21 | title string 22 | text string 23 | 24 | // used by legacy webhook to send messages to specific channel, 25 | // instead of default one 26 | channel string 27 | 28 | // reference for general app configuration 29 | appCfg *config.App 30 | 31 | send func(url string, msg *slackClient.WebhookMessage) error 32 | } 33 | 34 | // NewSlack returns new Slack instance 35 | func NewSlack(config map[string]interface{}, appCfg *config.App) *Slack { 36 | webhook, ok := config["webhook"].(string) 37 | if !ok || len(webhook) == 0 { 38 | logrus.Warnf("initializing slack with empty webhook url") 39 | return nil 40 | } 41 | 42 | logrus.Infof("initializing slack with webhook url: %s", webhook) 43 | 44 | channel, _ := config["channel"].(string) 45 | title, _ := config["title"].(string) 46 | text, _ := config["text"].(string) 47 | 48 | return &Slack{ 49 | webhook: webhook, 50 | channel: channel, 51 | title: title, 52 | text: text, 53 | send: slackClient.PostWebhook, 54 | appCfg: appCfg, 55 | } 56 | } 57 | 58 | // Name returns name of the provider 59 | func (s *Slack) Name() string { 60 | return "Slack" 61 | } 62 | 63 | // SendEvent sends event to the provider 64 | func (s *Slack) SendEvent(ev *event.Event) error { 65 | logrus.Infof("sending to slack event: %v", ev) 66 | 67 | // use custom title if it's provided, otherwise use default 68 | title := s.title 69 | if len(title) == 0 { 70 | title = constant.DefaultTitle 71 | } 72 | 73 | // use custom text if it's provided, otherwise use default 74 | text := s.text 75 | if len(text) == 0 { 76 | text = constant.DefaultText 77 | } 78 | 79 | blocks := []slackClient.Block{ 80 | markdownSection(title), 81 | plainSection(text), 82 | slackClient.SectionBlock{ 83 | Type: "section", 84 | Fields: []*slackClient.TextBlockObject{ 85 | markdownF("*Cluster*\n%s", s.appCfg.ClusterName), 86 | markdownF("*Name*\n%s", ev.PodName), 87 | markdownF("*Container*\n%s", ev.ContainerName), 88 | markdownF("*Namespace*\n%s", ev.Namespace), 89 | markdownF("*Reason*\n%s", ev.Reason), 90 | }, 91 | }, 92 | } 93 | 94 | // add events part if it exists 95 | events := strings.TrimSpace(ev.Events) 96 | if len(events) > 0 { 97 | blocks = append(blocks, 98 | markdownSection(":mag: *Events*")) 99 | 100 | for _, chunk := range chunks(events, chunkSize) { 101 | blocks = append(blocks, 102 | markdownSectionF("```%s```", chunk)) 103 | } 104 | } 105 | 106 | // add logs part if it exists 107 | logs := strings.TrimSpace(ev.Logs) 108 | if len(logs) > 0 { 109 | blocks = append(blocks, 110 | markdownSection(":memo: *Logs*")) 111 | 112 | for _, chunk := range chunks(logs, chunkSize) { 113 | blocks = append(blocks, 114 | markdownSectionF("```%s```", chunk)) 115 | } 116 | } 117 | 118 | // send message 119 | return s.sendAPI(&slackClient.WebhookMessage{ 120 | Blocks: &slackClient.Blocks{ 121 | BlockSet: append(blocks, markdownSection(constant.Footer)), 122 | }, 123 | }) 124 | } 125 | 126 | // SendMessage sends text message to the provider 127 | func (s *Slack) SendMessage(msg string) error { 128 | return s.sendAPI(&slackClient.WebhookMessage{ 129 | Text: msg, 130 | }) 131 | } 132 | 133 | func (s *Slack) sendAPI(msg *slackClient.WebhookMessage) error { 134 | if len(s.channel) > 0 { 135 | msg.Channel = s.channel 136 | } 137 | return s.send(s.webhook, msg) 138 | } 139 | 140 | func chunks(s string, chunkSize int) []string { 141 | if chunkSize >= len(s) { 142 | return []string{s} 143 | } 144 | var chunks []string = make([]string, 0, (len(s)-1)/chunkSize+1) 145 | currentLen := 0 146 | currentStart := 0 147 | for i := range s { 148 | if currentLen == chunkSize { 149 | chunks = append(chunks, s[currentStart:i]) 150 | currentLen = 0 151 | currentStart = i 152 | } 153 | currentLen++ 154 | } 155 | chunks = append(chunks, s[currentStart:]) 156 | return chunks 157 | } 158 | 159 | func plainSection(txt string) slackClient.SectionBlock { 160 | return slackClient.SectionBlock{ 161 | Type: "section", 162 | Text: slackClient.NewTextBlockObject( 163 | slackClient.PlainTextType, 164 | txt, 165 | true, 166 | false), 167 | } 168 | } 169 | 170 | func markdownSection(txt string) slackClient.SectionBlock { 171 | return slackClient.SectionBlock{ 172 | Type: "section", 173 | Text: slackClient.NewTextBlockObject( 174 | slackClient.MarkdownType, 175 | txt, 176 | false, 177 | true), 178 | } 179 | } 180 | 181 | func markdownF(format string, a ...interface{}) *slackClient.TextBlockObject { 182 | return slackClient.NewTextBlockObject( 183 | slackClient.MarkdownType, 184 | fmt.Sprintf(format, a...), 185 | false, 186 | true) 187 | } 188 | 189 | func markdownSectionF( 190 | format string, a ...interface{}) slackClient.SectionBlock { 191 | return slackClient.SectionBlock{ 192 | Type: "section", 193 | Text: markdownF(format, a...), 194 | } 195 | } 196 | -------------------------------------------------------------------------------- /alertmanager/slack/slack_test.go: -------------------------------------------------------------------------------- 1 | package slack 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/abahmed/kwatch/config" 7 | "github.com/abahmed/kwatch/event" 8 | slackClient "github.com/slack-go/slack" 9 | "github.com/stretchr/testify/assert" 10 | ) 11 | 12 | func mockedSend(url string, msg *slackClient.WebhookMessage) error { 13 | return nil 14 | } 15 | func TestSlackEmptyConfig(t *testing.T) { 16 | assert := assert.New(t) 17 | 18 | s := NewSlack(map[string]interface{}{}, &config.App{ClusterName: "dev"}) 19 | assert.Nil(s) 20 | } 21 | 22 | func TestSlack(t *testing.T) { 23 | assert := assert.New(t) 24 | 25 | configMap := map[string]interface{}{ 26 | "webhook": "testtest", 27 | } 28 | s := NewSlack(configMap, &config.App{ClusterName: "dev"}) 29 | assert.NotNil(s) 30 | 31 | assert.Equal(s.Name(), "Slack") 32 | } 33 | 34 | func TestSendMessage(t *testing.T) { 35 | assert := assert.New(t) 36 | 37 | s := NewSlack(map[string]interface{}{ 38 | "webhook": "testtest", 39 | "channel": "test", 40 | }, &config.App{ClusterName: "dev"}) 41 | assert.NotNil(s) 42 | 43 | s.send = mockedSend 44 | assert.Nil(s.SendMessage("test")) 45 | } 46 | 47 | func TestSendEvent(t *testing.T) { 48 | assert := assert.New(t) 49 | 50 | s := NewSlack(map[string]interface{}{ 51 | "webhook": "testtest", 52 | }, &config.App{ClusterName: "dev"}) 53 | assert.NotNil(s) 54 | 55 | s.send = mockedSend 56 | 57 | ev := event.Event{ 58 | PodName: "test-pod", 59 | ContainerName: "test-container", 60 | Namespace: "default", 61 | Reason: "OOMKILLED", 62 | Logs: "Nam quis nulla. Integer malesuada. In in enim a arcu " + 63 | "imperdiet malesuada. Sed vel lectus. Donec odio urna, tempus " + 64 | "molestie, porttitor ut, iaculis quis, sem. Phasellus rhoncus.\n" + 65 | "Nam quis nulla. Integer malesuada. In in enim a arcu " + 66 | "imperdiet malesuada. Sed vel lectus. Donec odio urna, tempus " + 67 | "molestie, porttitor ut, iaculis quis, sem. Phasellus rhoncus.\n" + 68 | "Nam quis nulla. Integer malesuada. In in enim a arcu " + 69 | "imperdiet malesuada. Sed vel lectus. Donec odio urna, tempus " + 70 | "molestie, porttitor ut, iaculis quis, sem. Phasellus rhoncus.\n" + 71 | "Nam quis nulla. Integer malesuada. In in enim a arcu " + 72 | "imperdiet malesuada. Sed vel lectus. Donec odio urna, tempus " + 73 | "molestie, porttitor ut, iaculis quis, sem. Phasellus rhoncus.\n" + 74 | "Nam quis nulla. Integer malesuada. In in enim a arcu " + 75 | "imperdiet malesuada. Sed vel lectus. Donec odio urna, tempus " + 76 | "molestie, porttitor ut, iaculis quis, sem. Phasellus rhoncus.\n" + 77 | "Nam quis nulla. Integer malesuada. In in enim a arcu " + 78 | "imperdiet malesuada. Sed vel lectus. Donec odio urna, tempus " + 79 | "molestie, porttitor ut, iaculis quis, sem. Phasellus rhoncus.\n" + 80 | "Nam quis nulla. Integer malesuada. In in enim a arcu " + 81 | "imperdiet malesuada. Sed vel lectus. Donec odio urna, tempus " + 82 | "molestie, porttitor ut, iaculis quis, sem. Phasellus rhoncus.\n" + 83 | "Nam quis nulla. Integer malesuada. In in enim a arcu " + 84 | "imperdiet malesuada. Sed vel lectus. Donec odio urna, tempus " + 85 | "molestie, porttitor ut, iaculis quis, sem. Phasellus rhoncus.\n" + 86 | "Nam quis nulla. Integer malesuada. In in enim a arcu " + 87 | "imperdiet malesuada. Sed vel lectus. Donec odio urna, tempus " + 88 | "molestie, porttitor ut, iaculis quis, sem. Phasellus rhoncus.\n" + 89 | "Nam quis nulla. Integer malesuada. In in enim a arcu " + 90 | "imperdiet malesuada. Sed vel lectus. Donec odio urna, tempus " + 91 | "molestie, porttitor ut, iaculis quis, sem. Phasellus rhoncus.\n" + 92 | "Nam quis nulla. Integer malesuada. In in enim a arcu " + 93 | "imperdiet malesuada. Sed vel lectus. Donec odio urna, tempus " + 94 | "molestie, porttitor ut, iaculis quis, sem. Phasellus rhoncus.\n" + 95 | "Nam quis nulla. Integer malesuada. In in enim a arcu " + 96 | "imperdiet malesuada. Sed vel lectus. Donec odio urna, tempus " + 97 | "molestie, porttitor ut, iaculis quis, sem. Phasellus rhoncus.\n", 98 | Events: "BackOff Back-off restarting failed container\n" + 99 | "event3\nevent5\nevent6-event8-event11-event12", 100 | } 101 | assert.Nil(s.SendEvent(&ev)) 102 | } 103 | -------------------------------------------------------------------------------- /alertmanager/telegram/telegram.go: -------------------------------------------------------------------------------- 1 | package telegram 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "net/http" 7 | "strings" 8 | 9 | "github.com/abahmed/kwatch/config" 10 | "github.com/abahmed/kwatch/event" 11 | "github.com/sirupsen/logrus" 12 | ) 13 | 14 | const ( 15 | telegramAPIURL = "https://api.telegram.org/bot%s/sendMessage" 16 | ) 17 | 18 | type Telegram struct { 19 | token string 20 | chatId string 21 | url string 22 | 23 | // reference for general app configuration 24 | appCfg *config.App 25 | } 26 | 27 | // NewTelegram returns a new Telegram object 28 | func NewTelegram(config map[string]interface{}, appCfg *config.App) *Telegram { 29 | token, ok := config["token"].(string) 30 | if !ok || len(token) == 0 { 31 | logrus.Warnf("initializing telegram with empty token") 32 | return nil 33 | } 34 | 35 | chatId, ok := config["chatId"].(string) 36 | if !ok || len(chatId) == 0 { 37 | logrus.Warnf("initializing telegram with empty chat_id") 38 | return nil 39 | } 40 | 41 | logrus.Infof( 42 | "initializing telegram with token %s and chat_id %s", 43 | token, 44 | chatId) 45 | 46 | // returns a new telegram object 47 | return &Telegram{ 48 | token: token, 49 | chatId: chatId, 50 | url: telegramAPIURL, 51 | appCfg: appCfg, 52 | } 53 | } 54 | 55 | // Name returns name of the provider 56 | func (t *Telegram) Name() string { 57 | return "Telegram" 58 | } 59 | 60 | // SendEvent sends event to the provider 61 | func (t *Telegram) SendEvent(e *event.Event) error { 62 | logrus.Debugf("sending to telegram event: %v", e) 63 | 64 | reqBody := t.buildRequestBodyTelegram(e, t.chatId, "") 65 | return t.sendByTelegramApi(reqBody) 66 | } 67 | 68 | // SendMessage sends text message to the provider 69 | func (t *Telegram) SendMessage(msg string) error { 70 | logrus.Debugf("sending to telegram msg: %s", msg) 71 | 72 | reqBody := t.buildRequestBodyTelegram(new(event.Event), t.chatId, msg) 73 | return t.sendByTelegramApi(reqBody) 74 | } 75 | 76 | func (t *Telegram) buildRequestBodyTelegram( 77 | e *event.Event, 78 | chatId string, 79 | customMsg string) string { 80 | eventsText := "No events captured" 81 | logsText := "No logs captured" 82 | 83 | // add events part if it exists 84 | events := strings.TrimSpace(e.Events) 85 | if len(events) > 0 { 86 | eventsText = e.Events 87 | } 88 | 89 | // add logs part if it exists 90 | logs := strings.TrimSpace(e.Logs) 91 | if len(logs) > 0 { 92 | logsText = e.Logs 93 | } 94 | 95 | // build text will be sent in the message 96 | txt := "" 97 | if len(customMsg) <= 0 { 98 | txt = fmt.Sprintf( 99 | "An alert for Cluster: *%s* Name: *%s* "+ 100 | "Container: *%s* "+ 101 | "Namespace: *%s* has been triggered:\\n—\\n "+ 102 | "Logs: *%s* \\n "+ 103 | "Events: *%s* ", 104 | t.appCfg.ClusterName, 105 | e.PodName, 106 | e.ContainerName, 107 | e.Namespace, 108 | logsText, 109 | eventsText, 110 | ) 111 | } else { 112 | txt = customMsg 113 | } 114 | 115 | // build the message to be sent 116 | msg := fmt.Sprintf( 117 | "⛑ Kwatch detected a crash in pod \\n%s ", 118 | txt, 119 | ) 120 | 121 | reqBody := fmt.Sprintf( 122 | `{"chat_id": "%s", "text": "%s", "parse_mode": "MARKDOWN"}`, 123 | chatId, 124 | msg, 125 | ) 126 | return reqBody 127 | } 128 | 129 | func (t *Telegram) sendByTelegramApi(reqBody string) error { 130 | client := &http.Client{} 131 | buffer := bytes.NewBuffer([]byte(reqBody)) 132 | url := fmt.Sprintf(t.url, t.token) 133 | 134 | request, err := http.NewRequest(http.MethodPost, url, buffer) 135 | if err != nil { 136 | return err 137 | } 138 | 139 | request.Header.Set("Content-Type", "application/json") 140 | 141 | response, err := client.Do(request) 142 | if err != nil { 143 | return err 144 | } 145 | defer response.Body.Close() 146 | 147 | if response.StatusCode > 202 { 148 | return fmt.Errorf( 149 | "call to telegram alert returned status code %d", 150 | response.StatusCode) 151 | } 152 | 153 | return nil 154 | } 155 | -------------------------------------------------------------------------------- /alertmanager/telegram/telegram_test.go: -------------------------------------------------------------------------------- 1 | package telegram 2 | 3 | import ( 4 | "net/http" 5 | "net/http/httptest" 6 | "testing" 7 | 8 | "github.com/abahmed/kwatch/config" 9 | "github.com/abahmed/kwatch/event" 10 | "github.com/stretchr/testify/assert" 11 | ) 12 | 13 | func TestEmptyConfig(t *testing.T) { 14 | assert := assert.New(t) 15 | 16 | c := NewTelegram(map[string]interface{}{}, &config.App{ClusterName: "dev"}) 17 | assert.Nil(c) 18 | } 19 | 20 | func TestTelegram(t *testing.T) { 21 | assert := assert.New(t) 22 | 23 | configMap := map[string]interface{}{ 24 | "token": "testtest", 25 | "chatId": "tessst", 26 | } 27 | c := NewTelegram(configMap, &config.App{ClusterName: "dev"}) 28 | assert.NotNil(c) 29 | 30 | assert.Equal(c.Name(), "Telegram") 31 | } 32 | 33 | func TestTelegramInvalidConfig(t *testing.T) { 34 | assert := assert.New(t) 35 | 36 | configMap := map[string]interface{}{ 37 | "token": "test", 38 | } 39 | c := NewTelegram(configMap, &config.App{ClusterName: "dev"}) 40 | assert.Nil(c) 41 | 42 | configMap = map[string]interface{}{ 43 | "chatId": "test", 44 | } 45 | c = NewTelegram(configMap, &config.App{ClusterName: "dev"}) 46 | assert.Nil(c) 47 | } 48 | 49 | func TestSendMessage(t *testing.T) { 50 | assert := assert.New(t) 51 | 52 | s := httptest.NewServer( 53 | http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 54 | w.Write([]byte(`{"isOk": true}`)) 55 | })) 56 | 57 | defer s.Close() 58 | 59 | configMap := map[string]interface{}{ 60 | "token": "test", 61 | "chatId": "test", 62 | } 63 | c := NewTelegram(configMap, &config.App{ClusterName: "dev"}) 64 | c.url = s.URL + "/%s" 65 | assert.NotNil(c) 66 | 67 | assert.Nil(c.SendMessage("test")) 68 | } 69 | 70 | func TestSendMessageError(t *testing.T) { 71 | assert := assert.New(t) 72 | 73 | s := httptest.NewServer( 74 | http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 75 | w.WriteHeader(http.StatusBadGateway) 76 | })) 77 | 78 | defer s.Close() 79 | 80 | configMap := map[string]interface{}{ 81 | "token": "test", 82 | "chatId": "test", 83 | } 84 | c := NewTelegram(configMap, &config.App{ClusterName: "dev"}) 85 | c.url = s.URL + "/%s" 86 | assert.NotNil(c) 87 | 88 | assert.NotNil(c.SendMessage("test")) 89 | } 90 | 91 | func TestSendEvent(t *testing.T) { 92 | assert := assert.New(t) 93 | 94 | s := httptest.NewServer( 95 | http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 96 | w.Write([]byte(`{"isOk": true}`)) 97 | })) 98 | 99 | defer s.Close() 100 | 101 | configMap := map[string]interface{}{ 102 | "token": "test", 103 | "chatId": "test", 104 | } 105 | c := NewTelegram(configMap, &config.App{ClusterName: "dev"}) 106 | c.url = s.URL + "/%s" 107 | assert.NotNil(c) 108 | 109 | ev := event.Event{ 110 | PodName: "test-pod", 111 | ContainerName: "test-container", 112 | Namespace: "default", 113 | Reason: "OOMKILLED", 114 | Logs: "test\ntestlogs", 115 | Events: "event1-event2-event3-event1-event2-event3-event1-event2-" + 116 | "event3\nevent5\nevent6-event8-event11-event12", 117 | } 118 | assert.Nil(c.SendEvent(&ev)) 119 | } 120 | 121 | func TestInvaildHttpRequest(t *testing.T) { 122 | assert := assert.New(t) 123 | 124 | configMap := map[string]interface{}{ 125 | "token": "test", 126 | "chatId": "test", 127 | } 128 | 129 | c := NewTelegram(configMap, &config.App{ClusterName: "dev"}) 130 | assert.NotNil(c) 131 | c.url = "h ttp://localhost/%s" 132 | 133 | assert.NotNil(c.SendMessage("test")) 134 | 135 | c = NewTelegram(configMap, &config.App{ClusterName: "dev"}) 136 | assert.NotNil(c) 137 | c.url = "http://localhost:132323/%s" 138 | 139 | assert.NotNil(c.SendMessage("test")) 140 | } 141 | -------------------------------------------------------------------------------- /alertmanager/webhook/webhook.go: -------------------------------------------------------------------------------- 1 | package webhook 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | "fmt" 7 | "net/http" 8 | "strings" 9 | 10 | "github.com/abahmed/kwatch/config" 11 | "github.com/abahmed/kwatch/event" 12 | "github.com/abahmed/kwatch/util" 13 | 14 | "github.com/sirupsen/logrus" 15 | ) 16 | 17 | type KeyValue struct { 18 | Name string `json:"name"` 19 | Value string `json:"value"` 20 | } 21 | 22 | type Authentication struct { 23 | UserName string `json:"username"` 24 | Password string `json:"password"` 25 | } 26 | 27 | type Webhook struct { 28 | webhook string 29 | headers []KeyValue 30 | username string 31 | password string 32 | appCfg *config.App 33 | } 34 | 35 | func (w *Webhook) SendMessage(msg string) error { 36 | return nil 37 | } 38 | 39 | // NewSlack returns new Slack instance 40 | func NewWebhook(config map[string]interface{}, appCfg *config.App) *Webhook { 41 | url, ok := config["url"].(string) 42 | if !ok || len(url) == 0 { 43 | logrus.Warnf("initializing with empty webhook url") 44 | return nil 45 | } 46 | rawHeaders, ok := config["headers"] 47 | var headers []KeyValue 48 | if ok { 49 | headerArray := rawHeaders.([]interface{}) 50 | for _, header := range headerArray { 51 | headerJson, _ := json.Marshal(header) 52 | var k KeyValue 53 | json.Unmarshal(headerJson, &k) 54 | headers = append(headers, k) 55 | } 56 | } 57 | 58 | basicAuth, ok := config["basicAuth"] 59 | basicAuthJson, _ := json.Marshal(basicAuth) 60 | 61 | var a Authentication 62 | json.Unmarshal(basicAuthJson, &a) 63 | 64 | logrus.Infof("initializing with webhook url: %s "+ 65 | "with headers: %s and username: %s", url, headers, a.UserName) 66 | 67 | return &Webhook{ 68 | webhook: url, 69 | headers: headers, 70 | username: a.UserName, 71 | password: a.Password, 72 | appCfg: appCfg, 73 | } 74 | } 75 | 76 | // Name returns name of the provider 77 | func (w *Webhook) Name() string { 78 | return "Webhook" 79 | } 80 | 81 | // SendEvent sends event to the provider 82 | func (w *Webhook) SendEvent(ev *event.Event) error { 83 | client := &http.Client{} 84 | 85 | reqBody := w.buildRequestBody(ev) 86 | buffer := bytes.NewBuffer(reqBody) 87 | 88 | request, err := http.NewRequest(http.MethodPost, w.webhook, buffer) 89 | if err != nil { 90 | return err 91 | } 92 | 93 | for _, header := range w.headers { 94 | request.Header.Set(header.Name, header.Value) 95 | } 96 | if len(w.username) > 0 && len(w.password) > 0 { 97 | request.SetBasicAuth(w.username, w.password) 98 | } 99 | 100 | response, err := client.Do(request) 101 | if err != nil { 102 | return err 103 | } 104 | defer response.Body.Close() 105 | 106 | if response.StatusCode > 202 { 107 | return fmt.Errorf( 108 | "call to teams alert returned status code %d", 109 | response.StatusCode) 110 | } 111 | 112 | return nil 113 | } 114 | 115 | func (w *Webhook) buildRequestBody( 116 | ev *event.Event, 117 | ) []byte { 118 | eventsText := "No events captured" 119 | logsText := "No logs captured" 120 | 121 | // add events part if it exists 122 | events := strings.TrimSpace(ev.Events) 123 | if len(events) > 0 { 124 | eventsText = util.JsonEscape(ev.Events) 125 | } 126 | 127 | // add logs part if it exists 128 | logs := strings.TrimSpace(ev.Logs) 129 | if len(logs) > 0 { 130 | logsText = util.JsonEscape(ev.Logs) 131 | } 132 | 133 | postBody, _ := json.Marshal(map[string]interface{}{ 134 | "Cluster": w.appCfg.ClusterName, 135 | "Name": ev.PodName, 136 | "Container": ev.ContainerName, 137 | "Namespace": ev.Namespace, 138 | "Reason": ev.Reason, 139 | "Events": eventsText, 140 | "Logs": logsText, 141 | "Labels": ev.Labels, 142 | }) 143 | 144 | return postBody 145 | } 146 | -------------------------------------------------------------------------------- /alertmanager/webhook/webhook_test.go: -------------------------------------------------------------------------------- 1 | package webhook 2 | 3 | import ( 4 | "net/http" 5 | "net/http/httptest" 6 | "testing" 7 | 8 | "github.com/abahmed/kwatch/config" 9 | "github.com/abahmed/kwatch/event" 10 | "github.com/stretchr/testify/assert" 11 | ) 12 | 13 | func TestEmptyConfig(t *testing.T) { 14 | assert := assert.New(t) 15 | 16 | c := NewWebhook(map[string]interface{}{}, &config.App{ClusterName: "dev"}) 17 | assert.Nil(c) 18 | } 19 | 20 | func TestWebhook(t *testing.T) { 21 | assert := assert.New(t) 22 | 23 | configMap := map[string]interface{}{ 24 | "url": "testtest", 25 | "headers": []interface{}{ 26 | map[string]string{ 27 | "name": "test", 28 | "value": "test", 29 | }, 30 | }, 31 | } 32 | c := NewWebhook(configMap, &config.App{ClusterName: "dev"}) 33 | assert.NotNil(c) 34 | 35 | assert.Equal(c.Name(), "Webhook") 36 | } 37 | 38 | func TestSendMessage(t *testing.T) { 39 | assert := assert.New(t) 40 | 41 | s := httptest.NewServer( 42 | http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 43 | w.Write([]byte(`{"isOk": true}`)) 44 | })) 45 | 46 | defer s.Close() 47 | 48 | configMap := map[string]interface{}{ 49 | "url": s.URL, 50 | "headers": []interface{}{ 51 | map[string]string{ 52 | "name": "test", 53 | "value": "test", 54 | }, 55 | }, 56 | } 57 | c := NewWebhook(configMap, &config.App{ClusterName: "dev"}) 58 | assert.NotNil(c) 59 | 60 | assert.Nil(c.SendMessage("test")) 61 | } 62 | 63 | func TestSendMessageError(t *testing.T) { 64 | assert := assert.New(t) 65 | 66 | s := httptest.NewServer( 67 | http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 68 | w.WriteHeader(http.StatusBadGateway) 69 | })) 70 | 71 | defer s.Close() 72 | 73 | configMap := map[string]interface{}{ 74 | "url": s.URL, 75 | } 76 | c := NewWebhook(configMap, &config.App{ClusterName: "dev"}) 77 | assert.NotNil(c) 78 | 79 | assert.Nil(c.SendMessage("test")) 80 | } 81 | 82 | func TestSendEvent(t *testing.T) { 83 | assert := assert.New(t) 84 | 85 | s := httptest.NewServer( 86 | http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 87 | w.WriteHeader(http.StatusBadGateway) 88 | })) 89 | 90 | defer s.Close() 91 | 92 | configMap := map[string]interface{}{ 93 | "url": s.URL, 94 | } 95 | c := NewWebhook(configMap, &config.App{ClusterName: "dev"}) 96 | assert.NotNil(c) 97 | 98 | ev := event.Event{ 99 | PodName: "test-pod", 100 | ContainerName: "test-container", 101 | Namespace: "default", 102 | Reason: "OOMKILLED", 103 | Logs: "test\ntestlogs", 104 | Events: "event1-event2-event3-event1-event2-event3-event1-event2-" + 105 | "event3\nevent5\nevent6-event8-event11-event12", 106 | } 107 | assert.Error(c.SendEvent(&ev)) 108 | } 109 | 110 | func TestSendEventError(t *testing.T) { 111 | assert := assert.New(t) 112 | 113 | s := httptest.NewServer( 114 | http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 115 | w.Write([]byte(`{"isOk": true}`)) 116 | })) 117 | 118 | defer s.Close() 119 | 120 | configMap := map[string]interface{}{ 121 | "url": s.URL, 122 | "headers": []interface{}{ 123 | map[string]string{ 124 | "name": "test", 125 | "value": "test", 126 | }, 127 | }, 128 | "basicAuth": map[string]string{ 129 | "username": "test", 130 | "password": "test", 131 | }, 132 | } 133 | c := NewWebhook(configMap, &config.App{ClusterName: "dev"}) 134 | assert.NotNil(c) 135 | 136 | ev := event.Event{ 137 | PodName: "test-pod", 138 | ContainerName: "test-container", 139 | Namespace: "default", 140 | Reason: "OOMKILLED", 141 | Logs: "test\ntestlogs", 142 | Events: "event1-event2-event3-event1-event2-event3-event1-event2-" + 143 | "event3\nevent5\nevent6-event8-event11-event12", 144 | } 145 | assert.Nil(c.SendEvent(&ev)) 146 | } 147 | 148 | func TestInvaildHttpRequest(t *testing.T) { 149 | assert := assert.New(t) 150 | 151 | configMap := map[string]interface{}{ 152 | "url": "h ttp://localhost", 153 | } 154 | c := NewWebhook(configMap, &config.App{ClusterName: "dev"}) 155 | assert.NotNil(c) 156 | 157 | ev := event.Event{ 158 | PodName: "test-pod", 159 | ContainerName: "test-container", 160 | Namespace: "default", 161 | Reason: "OOMKILLED", 162 | Logs: "test\ntestlogs", 163 | Events: "event1-event2-event3-event1-event2-event3-event1-event2-" + 164 | "event3\nevent5\nevent6-event8-event11-event12", 165 | } 166 | 167 | assert.NotNil(assert.NotNil(c.SendEvent(&ev))) 168 | 169 | c = NewWebhook(configMap, &config.App{ClusterName: "dev"}) 170 | assert.NotNil(c) 171 | c.webhook = "http://localhost:132323" 172 | 173 | assert.NotNil(assert.NotNil(c.SendEvent(&ev))) 174 | } 175 | -------------------------------------------------------------------------------- /alertmanager/zenduty/zenduty.go: -------------------------------------------------------------------------------- 1 | package zenduty 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | "fmt" 7 | "io" 8 | "net/http" 9 | "slices" 10 | 11 | "github.com/abahmed/kwatch/config" 12 | "github.com/abahmed/kwatch/constant" 13 | "github.com/abahmed/kwatch/event" 14 | "github.com/sirupsen/logrus" 15 | ) 16 | 17 | const ( 18 | defaultZendutyTitle = "kwatch detected a crash in pod: %s" 19 | defaultZendutyText = "There is an issue with container (%s) in pod (%s)" 20 | zendutyAPIURL = "https://www.zenduty.com/api/events" 21 | ) 22 | 23 | var AlertTypes = []string{ 24 | "critical", 25 | "acknowledged", 26 | "resolved", 27 | "error", 28 | "warning", 29 | "info", 30 | } 31 | 32 | type Zenduty struct { 33 | integrationkey string 34 | url string 35 | alertType string 36 | 37 | // reference for general app configuration 38 | appCfg *config.App 39 | } 40 | 41 | type zendutyPayload struct { 42 | Message string `json:"message"` 43 | Summary string `json:"summary"` 44 | AlertType string `json:"alert_type"` 45 | } 46 | 47 | // NewZenduty returns new zenduty instance 48 | func NewZenduty(config map[string]interface{}, appCfg *config.App) *Zenduty { 49 | integrationKey, ok := config["integrationKey"].(string) 50 | if !ok || len(integrationKey) == 0 { 51 | logrus.Warnf("initializing zenduty with empty webhook url") 52 | return nil 53 | } 54 | 55 | logrus.Infof("initializing zenduty with secret apikey") 56 | 57 | // If alert type is not provided, or provided with invalid value 58 | // it will fallback to critical type 59 | alertType, ok := config["alertType"].(string) 60 | if !ok || !slices.Contains(AlertTypes, alertType) { 61 | alertType = "critical" 62 | } 63 | 64 | return &Zenduty{ 65 | integrationkey: integrationKey, 66 | url: zendutyAPIURL, 67 | alertType: alertType, 68 | appCfg: appCfg, 69 | } 70 | } 71 | 72 | // Name returns name of the provider 73 | func (m *Zenduty) Name() string { 74 | return "Zenduty" 75 | } 76 | 77 | // SendMessage sends text message to the provider 78 | func (m *Zenduty) SendMessage(msg string) error { 79 | return nil 80 | } 81 | 82 | // SendEvent sends event to the provider 83 | func (m *Zenduty) SendEvent(e *event.Event) error { 84 | return m.sendAPI(m.buildMessage(e)) 85 | } 86 | 87 | // sendAPI sends http request to Zenduty API 88 | func (m *Zenduty) sendAPI(content []byte) error { 89 | client := &http.Client{} 90 | buffer := bytes.NewBuffer(content) 91 | url := m.url + "/" + m.integrationkey + "/" 92 | request, err := http.NewRequest(http.MethodPost, url, buffer) 93 | if err != nil { 94 | return err 95 | } 96 | 97 | response, err := client.Do(request) 98 | if err != nil { 99 | return err 100 | } 101 | defer response.Body.Close() 102 | 103 | if response.StatusCode != 201 { 104 | body, _ := io.ReadAll(response.Body) 105 | return fmt.Errorf( 106 | "call to zenduty alert returned status code %d: %s", 107 | response.StatusCode, 108 | string(body)) 109 | } 110 | 111 | return nil 112 | } 113 | 114 | func (m *Zenduty) buildMessage(e *event.Event) []byte { 115 | payload := zendutyPayload{ 116 | AlertType: m.alertType, 117 | } 118 | 119 | logs := constant.DefaultLogs 120 | if len(e.Logs) > 0 { 121 | logs = (e.Logs) 122 | } 123 | 124 | events := constant.DefaultEvents 125 | if len(e.Events) > 0 { 126 | events = (e.Events) 127 | } 128 | 129 | payload.Message = fmt.Sprintf(defaultZendutyTitle, e.PodName) 130 | payload.Summary = fmt.Sprintf( 131 | "An alert has been triggered for\n\n"+ 132 | "cluster: %s\n"+ 133 | "Pod Name: %s\n"+ 134 | "Container: %s\n"+ 135 | "Namespace: %s\n"+ 136 | "Reason: %s\n\n"+ 137 | "Events:\n%s\n\n"+ 138 | "Logs:\n%s\n\n", 139 | m.appCfg.ClusterName, 140 | e.PodName, 141 | e.ContainerName, 142 | e.Namespace, 143 | e.Reason, 144 | events, 145 | logs, 146 | ) 147 | 148 | str, _ := json.Marshal(payload) 149 | return str 150 | } 151 | -------------------------------------------------------------------------------- /alertmanager/zenduty/zenduty_test.go: -------------------------------------------------------------------------------- 1 | package zenduty 2 | 3 | import ( 4 | "net/http" 5 | "net/http/httptest" 6 | "testing" 7 | 8 | "github.com/abahmed/kwatch/config" 9 | "github.com/abahmed/kwatch/event" 10 | "github.com/stretchr/testify/assert" 11 | ) 12 | 13 | func TestZendutyEmptyConfig(t *testing.T) { 14 | assert := assert.New(t) 15 | 16 | c := NewZenduty(map[string]interface{}{}, &config.App{ClusterName: "dev"}) 17 | assert.Nil(c) 18 | } 19 | 20 | func TestZenduty(t *testing.T) { 21 | assert := assert.New(t) 22 | 23 | configMap := map[string]interface{}{ 24 | "integrationKey": "testtest", 25 | } 26 | c := NewZenduty(configMap, &config.App{ClusterName: "dev"}) 27 | assert.NotNil(c) 28 | 29 | assert.Equal(c.Name(), "Zenduty") 30 | } 31 | 32 | func TestSendMessage(t *testing.T) { 33 | assert := assert.New(t) 34 | 35 | configMap := map[string]interface{}{ 36 | "integrationKey": "test", 37 | } 38 | c := NewZenduty(configMap, &config.App{ClusterName: "dev"}) 39 | assert.NotNil(c) 40 | 41 | assert.Nil(c.SendMessage("test")) 42 | } 43 | 44 | func TestSendEvent(t *testing.T) { 45 | assert := assert.New(t) 46 | 47 | s := httptest.NewServer( 48 | http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 49 | w.WriteHeader(http.StatusCreated) 50 | w.Write([]byte(`{"isOk": true}`)) 51 | })) 52 | 53 | defer s.Close() 54 | 55 | configMap := map[string]interface{}{ 56 | "integrationKey": "test", 57 | } 58 | c := NewZenduty(configMap, &config.App{ClusterName: "dev"}) 59 | assert.NotNil(c) 60 | 61 | c.url = s.URL 62 | 63 | ev := event.Event{ 64 | PodName: "test-pod", 65 | ContainerName: "test-container", 66 | Namespace: "default", 67 | Reason: "OOMKILLED", 68 | Logs: "test\ntestlogs", 69 | Events: "event1-event2-event3-event1-event2-event3-event1-event2-" + 70 | "event3\nevent5\nevent6-event8-event11-event12", 71 | } 72 | assert.Nil(c.SendEvent(&ev)) 73 | } 74 | 75 | func TestSendEventError(t *testing.T) { 76 | assert := assert.New(t) 77 | 78 | s := httptest.NewServer( 79 | http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 80 | w.WriteHeader(http.StatusBadGateway) 81 | })) 82 | 83 | defer s.Close() 84 | 85 | configMap := map[string]interface{}{ 86 | "integrationKey": "test", 87 | } 88 | c := NewZenduty(configMap, &config.App{ClusterName: "dev"}) 89 | assert.NotNil(c) 90 | 91 | c.url = s.URL 92 | 93 | ev := event.Event{ 94 | PodName: "test-pod", 95 | ContainerName: "test-container", 96 | Namespace: "default", 97 | Reason: "OOMKILLED", 98 | Logs: "test\ntestlogs", 99 | Events: "event1-event2-event3-event1-event2-event3-event1-event2-" + 100 | "event3\nevent5\nevent6-event8-event11-event12", 101 | } 102 | assert.NotNil(c.SendEvent(&ev)) 103 | } 104 | 105 | func TestInvaildHttpRequest(t *testing.T) { 106 | assert := assert.New(t) 107 | 108 | configMap := map[string]interface{}{ 109 | "integrationKey": "test", 110 | } 111 | c := NewZenduty(configMap, &config.App{ClusterName: "dev"}) 112 | assert.NotNil(c) 113 | c.url = "h ttp://localhost" 114 | 115 | ev := event.Event{ 116 | PodName: "test-pod", 117 | ContainerName: "test-container", 118 | Namespace: "default", 119 | Reason: "OOMKILLED", 120 | Logs: "test\ntestlogs", 121 | Events: "event1-event2-event3-event1-event2-event3-event1-event2-" + 122 | "event3\nevent5\nevent6-event8-event11-event12", 123 | } 124 | assert.NotNil(c.SendEvent(&ev)) 125 | 126 | c = NewZenduty(configMap, &config.App{ClusterName: "dev"}) 127 | assert.NotNil(c) 128 | c.url = "http://localhost:132323" 129 | 130 | assert.NotNil(c.SendEvent(&ev)) 131 | } 132 | -------------------------------------------------------------------------------- /assets/discord.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abahmed/kwatch/cb26da43468a319069243ef816243f5a50e025df/assets/discord.png -------------------------------------------------------------------------------- /assets/email.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abahmed/kwatch/cb26da43468a319069243ef816243f5a50e025df/assets/email.png -------------------------------------------------------------------------------- /assets/feishu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abahmed/kwatch/cb26da43468a319069243ef816243f5a50e025df/assets/feishu.png -------------------------------------------------------------------------------- /assets/googlechat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abahmed/kwatch/cb26da43468a319069243ef816243f5a50e025df/assets/googlechat.png -------------------------------------------------------------------------------- /assets/highlevelarchitecture.drawio: -------------------------------------------------------------------------------- 1 | 7Vxbb9s6Ev4t+2CgXaCGrYsdPyZO3NNFuqdoFttz9o2WaIsJLWopKnb665dX3ajECmrLgvYARSpRvIic+YYzH0ceucvd4TMFSfSVhBCPnEl4GLm3I8eZThYO/0+UvKiSxcxXBVuKQl2pKHhAP6FpqUszFMK0UpERghlKqoUBiWMYsEoZoJTsq9U2BFdHTcAWWgUPAcB26Q8UskiXTieT4sFvEG0jPfSVrx+sQfC0pSSL9XgxiaF6sgOmG101jUBI9qUi927kLikhTF3tDkuIxbKaFVPtVq88zV+Zwpi1aQA+3U6uDnf//fPrf9iPn4flH9+f/vFJ9/IMcKaXQr8sezFrA0O+VPqWUBaRLYkBvitKb+T8oRhmyu+KOveEJLrwETL2ouUOMkZ4UcR2WD9NnyALIn0TZPQ578yepH7jlGQ0gG/MTE+EAbqF7I16nqonZlkaQC/hZ0h2kNEXXoFCDBh6rioM0Hq3zesVAuAXWgbvkIdjyeM6YIRaQuHKlIjLbIdVBffmGVKGuErfgzXE30iKGCIxr7ImjJFdqcI1RlvxgAnplMVAMoZRDJc5yCa8EMbZDlLAYJOsqpJ/VVhiZHh4c3n1U1eDRdsRT9/uC1CaGlEJjrPJmcThDhYeXkt4zHsFD8+Sxy3Zx5iAUIwWy79Jgl/UVrFBW7EZIC4FrkIR4mso9gYq1gfGIYrFY74HUJZaYuXVGXxIgFzEPd/1qiJpxlK3WJnOjoJl6nSJFn+waJm3RIvxZ84OF7J+FCbamWBh7VWzLzG/v+bqzxVT2n41Xcx1OOIOEaSNDpaeTRryf+rBmzKeleajdaIm3nv1RmWRAI2SgC88pA3w2aEwVNKHKfoJ1rI/sfskBMVMvrd/M/JveYmcMNcLPp8lwWLnu5VOVxV9lZ3r31ohZ0IhyA4FusIj2m7lSP57sfqm+tcBnLutel4V/68J2J8m4+l0vlBtW2uK7u6bWLFSFbLZpFxn66qUj9qkXfxWK9g70D+3XRdtip/2gEnbG8IEk5cdlC/IcctXBmepUIkBmN/pxe3v1WDtr7Grxw2w2yt/ZfGXRJx+ScS8d0kkCVUWE5Nt4R/yeTFo/MnKhvqrVopRCL+SZ+F9qmodR1gLZ+wfN1xel4ZrOlwawhik45Zr1i+c2EzE/51IzJT7IhKbjfgOZeRbj3XRhv+JCdOBrpgCpQ0s0jDsV2Pg2639snmJwYBl1tZ+LfoFluGSEa1FYnrsi0hmr9mvhHJ7IgkKbsIo2eUm7QMcb8diTTAInkbOcqSOYmiYsRd1yyCGWwp2Hwdq3Fz34sbNDuwHg6RFWyRd9QtJA47024rE7ZlzZof6ywgKozWJ4bNguIQZSGUUOYEHlLKBGizv4gbLsUP8oaAj5+6PGqx+RZOOHeCjOGUgZkgzK5OMITzeEt79Z8i+lRwCdaxHlafAW/HykTMDO6H98TpN8gU9O5TsA4Xx+RE2n1Zp5kaH4MrvEl82NaD9OCUqtEs4XJZ5TKocOhg/I0piTf4/A4rkcUglWNVC1i0SwKLxpUTrX0C0jYFst6K1KYYvUpripSAO+DAjYV41/NTfyta2g2kqkq3KjrwEOn8fcylhfhmxelKsMdGHlPIYcIMOtyCN9J0w5Kmy6eKWvzMDKBanlvI+pCT5l7G+ooAvPLgLkbktEsJ+V9k0uly86orgUL3rpHPdaiR536tberTv4iwz3kr06uH8xfHh3GmDF+DUhgOYQRpzid2IFUktjX7z3LKlkg+XGnBaUwP9yutxbGrg78V+AIE8PX6C4oWI2C5YBHOOYAeSD4YFUJxAiS3gS/yRmyvMBDS5Ez7biivRN6NImCdpd1JpdyynoqlZoJ36YkRexFeo2P4YeYKx2dc2CGNxjNQXf8UZFdkYGG7Y2e1Qngls3Jfp3AQMZUu0aDANxjqdXttsRsPEavsIct0S0dqasKiVnI2ILyVS9+wynE4XLUipeacxns2AGBFmSSjWi2Ofz1DMwvHEW3HTWTglWcJlKuKIlb66oEsyH0/HnbsDjVF6t67mcHMjDDV1nMPqV/K5O1zipL1I+nWM5drECQg0YzJyr+1Tjw4yMH89UzJXtFOkSk78RcW0mW8oLpE42VKorxIoMdgpZ9Q6CeNe7kjnTAKMyT53QzaErlFX7kfXZLJ/cTLZHe4nIG7bb0DcfpHJrp1tISKzOoQKR11FeuUHOqJcv+TXTbHel02tmU6pyXMD90jGCH8rQLk0gV9zpTA0IFawHfclMrwal3eJTiLDukvqOy3jwvnZsD5cYshtSwy5/SKG3GZi6DpJVF6bhJ2EGZZHqhJcVUjlAL4Wde9ltSa0532aHbVFlytZ9b6709wG4Drj7jmd3iHXZnSk8dYkTqElakdQdtzSnRAGmK+LKEFx3fOKqHa6BDhUNqXOFX/MZGv55bfpQyQuFY+KkUwzrjVdZSw1qIx7fsLB4ox6oCT9yJqBB8T+KF3/KQKtsa/vbg867pI3L6OGs6JTbQltM228k6cR1iIu4xB4dfd/tqhRwGpSut0ZAjObk3oQfhwvWpKYUYKx5IZ/KJeORTCVbibd8ve7Pn6AMBW5jN/sZMcUmph+ZjdyxrlzW+2fWwFC4QogDMPPlFuNTYYfooyFZC+6UscldhBZ798V/f+z7DXX7aLuQ241VnOv3ry+fVZad2LxOt0M3Ul1M5z5DSefTgOzujiXnfOGS+MZU3TcZvXrqzPPpvFu4QZJjyGGewXQwr4AVitZyXBSHlIs8wgSqo9q+dJSlVTDK8mDD3lEmpBQVc6kjQp4lAtNU/BMJEIDojwXUR8kSDymYLNBwaXi0hpyLddlcYmzkiZE595NN4i2CcPBILrtx1ie3y9E27zgX4juK6IX/rzuWV4e08P9ZsxA9Tim+0U2eTb/Z9x/G8wfKmgubjikP4400dBTxDkXCP97ADibShwM4Nr+PpHXr++YPJu2098ry71Obn+ZiJNjwtAm/xWGOqzUOU7+EeEH8SmBSO1RXxR0xbMdx10XtJvntsDdrIF2Ox/ubNrtAeoMulGeMJ6a+/K3oNLQmgI+eIjSgNAw95pKj2TOp3wgcj4vxDQ0yNwTMn9Hvvm5f7PIu6B6PN7cgeRm7Wx+m+PfH+aHG25YG37x0htLnixiLFG02or/2yIWZetxILikFViDaCcYqpX5aanVGpO1GB8I5l7+J5XBvRffStY5K1+OUDUjJxpOCGAi+5B5h/WRZyedm9tAF87Hk3PNTRBL9nCnXUqdj0lLl0aQDZO9Ko+u/l6NndMtsPAZtDR9p2l491xr7bv2cIvzidZvQMnixKJ9JfzlmGnQq4Uim88zeMVbl0CaN73BaYX71hvYY3u/OLbYuoqxi43y7NvixN4Wj+5pDXmA1jb3RnB/9GdQvaaUMWfy/i2N3xY/2K1OnYofRHfv/gc= -------------------------------------------------------------------------------- /assets/highlevelarchitecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abahmed/kwatch/cb26da43468a319069243ef816243f5a50e025df/assets/highlevelarchitecture.png -------------------------------------------------------------------------------- /assets/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abahmed/kwatch/cb26da43468a319069243ef816243f5a50e025df/assets/logo.png -------------------------------------------------------------------------------- /assets/matrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abahmed/kwatch/cb26da43468a319069243ef816243f5a50e025df/assets/matrix.png -------------------------------------------------------------------------------- /assets/mattermost.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abahmed/kwatch/cb26da43468a319069243ef816243f5a50e025df/assets/mattermost.png -------------------------------------------------------------------------------- /assets/opsgenie.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abahmed/kwatch/cb26da43468a319069243ef816243f5a50e025df/assets/opsgenie.png -------------------------------------------------------------------------------- /assets/pagerduty.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abahmed/kwatch/cb26da43468a319069243ef816243f5a50e025df/assets/pagerduty.png -------------------------------------------------------------------------------- /assets/rocketchat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abahmed/kwatch/cb26da43468a319069243ef816243f5a50e025df/assets/rocketchat.png -------------------------------------------------------------------------------- /assets/slack.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abahmed/kwatch/cb26da43468a319069243ef816243f5a50e025df/assets/slack.png -------------------------------------------------------------------------------- /assets/teams.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abahmed/kwatch/cb26da43468a319069243ef816243f5a50e025df/assets/teams.png -------------------------------------------------------------------------------- /assets/telegram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abahmed/kwatch/cb26da43468a319069243ef816243f5a50e025df/assets/telegram.png -------------------------------------------------------------------------------- /assets/users/justwatch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abahmed/kwatch/cb26da43468a319069243ef816243f5a50e025df/assets/users/justwatch.png -------------------------------------------------------------------------------- /assets/users/trella.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abahmed/kwatch/cb26da43468a319069243ef816243f5a50e025df/assets/users/trella.png -------------------------------------------------------------------------------- /assets/zenduty.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abahmed/kwatch/cb26da43468a319069243ef816243f5a50e025df/assets/zenduty.png -------------------------------------------------------------------------------- /client/client.go: -------------------------------------------------------------------------------- 1 | package client 2 | 3 | import ( 4 | "net/http" 5 | "os" 6 | "path/filepath" 7 | 8 | "github.com/abahmed/kwatch/config" 9 | "github.com/sirupsen/logrus" 10 | "k8s.io/client-go/kubernetes" 11 | "k8s.io/client-go/rest" 12 | "k8s.io/client-go/tools/clientcmd" 13 | "k8s.io/client-go/util/homedir" 14 | ) 15 | 16 | // Create returns kubernetes client after initializing it with in-cluster, or 17 | // out of cluster config 18 | func Create(appConfig *config.App) kubernetes.Interface { 19 | // try to use in cluster config 20 | clientConfig, err := rest.InClusterConfig() 21 | if err != nil { 22 | logrus.Warnf("cannot get kubernetes in cluster config: %v", err) 23 | 24 | // try to use out of cluster config 25 | kubeconfigPath := os.Getenv("KUBECONFIG") 26 | if kubeconfigPath == "" { 27 | home := homedir.HomeDir() 28 | kubeconfigPath = filepath.Join(home, ".kube", "config") 29 | } 30 | 31 | clientConfig, err = clientcmd.BuildConfigFromFlags("", kubeconfigPath) 32 | if err != nil { 33 | logrus.Fatalf( 34 | "cannot build kubernetes out of cluster config: %v", 35 | err) 36 | } 37 | } 38 | 39 | // avoid using default app proxy if it's set 40 | if len(appConfig.ProxyURL) > 0 && clientConfig.Proxy == nil { 41 | clientConfig.Proxy = http.ProxyURL(nil) 42 | } 43 | 44 | // creates the clientset 45 | clientset, err := kubernetes.NewForConfig(clientConfig) 46 | if err != nil { 47 | logrus.Fatalf("cannot create kubernetes client: %v", err) 48 | } 49 | 50 | logrus.Debugf("created kubernetes client successfully") 51 | 52 | return clientset 53 | } 54 | -------------------------------------------------------------------------------- /config/config.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "regexp" 5 | ) 6 | 7 | type Config struct { 8 | // App general configuration 9 | App App `yaml:"app"` 10 | 11 | // Upgrader configuration 12 | Upgrader Upgrader `yaml:"upgrader"` 13 | 14 | // PvcMonitor configuration 15 | PvcMonitor PvcMonitor `yaml:"pvcMonitor"` 16 | 17 | // NodeMonitor configuration 18 | NodeMonitor NodeMonitor `yaml:"nodeMonitor"` 19 | 20 | // MaxRecentLogLines optional max tail log lines in messages, 21 | // if it's not provided it will get all log lines 22 | MaxRecentLogLines int64 `yaml:"maxRecentLogLines"` 23 | 24 | // IgnoreFailedGracefulShutdown if set to true, containers which are 25 | // forcefully killed during shutdown (as their graceful shutdown failed) 26 | // are not reported as error 27 | IgnoreFailedGracefulShutdown bool `yaml:"ignoreFailedGracefulShutdown"` 28 | 29 | // Namespaces is an optional list of namespaces that you want to watch or 30 | // forbid, if it's not provided it will watch all namespaces. 31 | // If you want to forbid a namespace, configure it with ! 32 | // You can either set forbidden namespaces or allowed, not both 33 | Namespaces []string `yaml:"namespaces"` 34 | 35 | // Reasons is an optional list of reasons that you want to watch or forbid, 36 | // if it's not provided it will watch all reasons. 37 | // If you want to forbid a reason, configure it with ! 38 | // You can either set forbidden reasons or allowed, not both 39 | Reasons []string `yaml:"reasons"` 40 | 41 | // IgnoreContainerNames optional list of container names to ignore 42 | IgnoreContainerNames []string `yaml:"ignoreContainerNames"` 43 | 44 | // IgnorePodNames optional list of pod name regexp patterns to ignore 45 | IgnorePodNames []string `yaml:"ignorePodNames"` 46 | 47 | // IgnoreLogPatterns optional list of regexp patterns to ignore 48 | IgnoreLogPatterns []string `yaml:"ignoreLogPatterns"` 49 | 50 | // Alert is a map contains a map of each provider configuration 51 | // e.g. {"slack": {"webhook": "URL"}} 52 | Alert map[string]map[string]interface{} `yaml:"alert"` 53 | 54 | // AllowedNamespaces, ForbiddenNamespaces are calculated internally 55 | // after populating Namespaces configuration 56 | AllowedNamespaces []string 57 | ForbiddenNamespaces []string 58 | 59 | // AllowedReasons, ForbiddenReasons are calculated internally after 60 | // populating Reasons configuration 61 | AllowedReasons []string 62 | ForbiddenReasons []string 63 | 64 | // Patterns are compiled from IgnorePodNames after populating 65 | // IgnorePodNames configuration 66 | IgnorePodNamePatterns []*regexp.Regexp 67 | 68 | // Patterns are compiled from IgnoreLogPatterns after populating 69 | // IgnoreLogPatterns configuration 70 | IgnoreLogPatternsCompiled []*regexp.Regexp 71 | } 72 | 73 | // App confing struct 74 | type App struct { 75 | // ProxyURL to be used in outgoing http(s) requests except Kubernetes 76 | // requests to cluster 77 | ProxyURL string `yaml:"proxyURL"` 78 | 79 | // ClusterName to used in notifications to indicate which cluster has 80 | // issue 81 | ClusterName string `yaml:"clusterName"` 82 | 83 | // DisableUpdateCheck if set to true, welcome message will not be 84 | // sent to configured notification channels 85 | DisableStartupMessage bool `yaml:"disableStartupMessage"` 86 | 87 | // LogFormatter used for setting custom formatter when app prints logs 88 | LogFormatter string `yaml:"logFormatter"` 89 | } 90 | 91 | // Upgrader confing struct 92 | type Upgrader struct { 93 | // DisableUpdateCheck if set to true, does not check for and 94 | // notify about kwatch updates 95 | DisableUpdateCheck bool `yaml:"disableUpdateCheck"` 96 | } 97 | 98 | // PvcMonitor confing struct 99 | type PvcMonitor struct { 100 | // Enabled if set to true, it will check pvc usage periodically 101 | // By default, this value is true 102 | Enabled bool `yaml:"enabled"` 103 | 104 | // Interval is the frequency (in minutes) to check pvc usage in the cluster 105 | // By default, this value is 5 106 | Interval int `yaml:"interval"` 107 | 108 | // Threshold is the percentage of accepted pvc usage. if current usage 109 | // exceeds this value, it will send a notification. 110 | // By default, this value is 80 111 | Threshold float64 `yaml:"threshold"` 112 | } 113 | 114 | // NodeMonitor confing struct 115 | type NodeMonitor struct { 116 | // Enabled if set to true, it will enable node watcher 117 | // By default, this value is true 118 | Enabled bool `yaml:"enabled"` 119 | } 120 | -------------------------------------------------------------------------------- /config/config_test.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "os" 5 | "testing" 6 | 7 | "github.com/stretchr/testify/assert" 8 | "gopkg.in/yaml.v3" 9 | ) 10 | 11 | func TestGetAllowForbidSlices(t *testing.T) { 12 | assert := assert.New(t) 13 | 14 | testCases := []map[string][]string{ 15 | { 16 | "input": {}, 17 | "allow": {}, 18 | "forbid": {}, 19 | }, 20 | { 21 | "input": {"hello", "!world"}, 22 | "allow": {"hello"}, 23 | "forbid": {"world"}, 24 | }, 25 | { 26 | "input": {"hello"}, 27 | "allow": {"hello"}, 28 | "forbid": {}, 29 | }, 30 | { 31 | "input": {"!hello"}, 32 | "allow": {}, 33 | "forbid": {"hello"}, 34 | }, 35 | } 36 | 37 | for _, tc := range testCases { 38 | actualAllow, actualForbid := getAllowForbidSlices(tc["input"]) 39 | assert.Equal(actualAllow, tc["allow"]) 40 | assert.Equal(actualForbid, tc["forbid"]) 41 | } 42 | } 43 | 44 | func TestEmptyConfig(t *testing.T) { 45 | assert := assert.New(t) 46 | 47 | os.Setenv("CONFIG_FILE", "config.yaml") 48 | defer os.Unsetenv("CONFIG_FILE") 49 | 50 | os.WriteFile("config.yaml", []byte{}, 0644) 51 | defer os.RemoveAll("config.yaml") 52 | 53 | cfg, _ := LoadConfig() 54 | assert.NotNil(cfg) 55 | } 56 | 57 | func TestConfigInvalidFile(t *testing.T) { 58 | assert := assert.New(t) 59 | cfg, err := LoadConfig() 60 | assert.Nil(cfg) 61 | assert.NotNil(err) 62 | } 63 | 64 | func TestConfigFromFile(t *testing.T) { 65 | assert := assert.New(t) 66 | 67 | defer os.Unsetenv("CONFIG_FILE") 68 | defer os.RemoveAll("config.yaml") 69 | 70 | os.Setenv("CONFIG_FILE", "config.yaml") 71 | 72 | n := Config{ 73 | MaxRecentLogLines: 20, 74 | Namespaces: []string{"default", "!kwatch"}, 75 | Reasons: []string{"default", "!kwatch"}, 76 | IgnorePodNames: []string{"my-fancy-pod-[.*"}, 77 | IgnoreLogPatterns: []string{"leaderelection lost-[.*"}, 78 | App: App{ 79 | ProxyURL: "https://localhost", 80 | ClusterName: "development", 81 | }, 82 | } 83 | yamlData, _ := yaml.Marshal(&n) 84 | os.WriteFile("config.yaml", yamlData, 0644) 85 | 86 | cfg, _ := LoadConfig() 87 | assert.NotNil(cfg) 88 | 89 | assert.Equal(cfg.App.ClusterName, "development") 90 | assert.Equal(cfg.App.ProxyURL, "https://localhost") 91 | 92 | assert.Equal(cfg.MaxRecentLogLines, int64(20)) 93 | assert.Len(cfg.AllowedNamespaces, 1) 94 | assert.Len(cfg.AllowedReasons, 1) 95 | assert.Len(cfg.ForbiddenNamespaces, 1) 96 | assert.Len(cfg.ForbiddenReasons, 1) 97 | 98 | os.WriteFile("config.yaml", []byte("maxRecentLogLines: test"), 0644) 99 | _, err := LoadConfig() 100 | assert.NotNil(err) 101 | } 102 | 103 | func TestGetCompiledIgnorePatterns(t *testing.T) { 104 | assert := assert.New(t) 105 | 106 | validPatterns := []string{ 107 | "my-fancy-pod-[0-9]", 108 | "leaderelection lost", 109 | } 110 | 111 | compiledPatterns, err := getCompiledIgnorePatterns(validPatterns) 112 | 113 | assert.Nil(err) 114 | assert.True(compiledPatterns[0].MatchString("my-fancy-pod-8")) 115 | assert.True(compiledPatterns[1].MatchString(`controllermanager.go:272] "leaderelection lost"`)) 116 | 117 | invalidPatterns := []string{ 118 | "my-fancy-pod-[.*", 119 | } 120 | 121 | compiledPatterns, err = getCompiledIgnorePatterns(invalidPatterns) 122 | 123 | assert.NotNil(err) 124 | } 125 | -------------------------------------------------------------------------------- /config/defaultConfig.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | func DefaultConfig() *Config { 4 | return &Config{ 5 | App: App{ 6 | LogFormatter: "text", 7 | }, 8 | IgnoreFailedGracefulShutdown: true, 9 | PvcMonitor: PvcMonitor{ 10 | Enabled: true, 11 | Interval: 5, 12 | Threshold: 80, 13 | }, 14 | NodeMonitor: NodeMonitor{ 15 | Enabled: true, 16 | }, 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /config/loadConfig.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "regexp" 7 | "strings" 8 | 9 | "github.com/sirupsen/logrus" 10 | "gopkg.in/yaml.v3" 11 | ) 12 | 13 | // LoadConfig loads yaml configuration from file if provided, otherwise 14 | // loads default configuration 15 | func LoadConfig() (*Config, error) { 16 | // initialize configuration 17 | configFile := os.Getenv("CONFIG_FILE") 18 | 19 | config := DefaultConfig() 20 | yamlFile, err := os.ReadFile(configFile) 21 | if err != nil { 22 | logrus.Warnf("unable to load config file: %s", err.Error()) 23 | return nil, err 24 | } 25 | 26 | err = yaml.Unmarshal(yamlFile, config) 27 | if err != nil { 28 | logrus.Warnf("unable to parse config file: %s", err.Error()) 29 | return nil, err 30 | } 31 | 32 | // Parse namespace allow/forbid lists 33 | config.AllowedNamespaces, config.ForbiddenNamespaces = 34 | getAllowForbidSlices(config.Namespaces) 35 | if len(config.AllowedNamespaces) > 0 && 36 | len(config.ForbiddenNamespaces) > 0 { 37 | logrus.Error( 38 | "Either allowed or forbidden namespaces must be set. " + 39 | "Can't set both") 40 | } 41 | 42 | // Parse reason allow/forbid lists 43 | config.AllowedReasons, config.ForbiddenReasons = 44 | getAllowForbidSlices(config.Reasons) 45 | if len(config.AllowedReasons) > 0 && 46 | len(config.ForbiddenReasons) > 0 { 47 | logrus.Error("Either allowed or forbidden reasons must be set. " + 48 | "Can't set both") 49 | } 50 | 51 | // Prepare ignored pod name patters 52 | config.IgnorePodNamePatterns, err = 53 | getCompiledIgnorePatterns(config.IgnorePodNames) 54 | if err != nil { 55 | logrus.Errorf("Failed to compile pod name pattern: %s", err.Error()) 56 | } 57 | 58 | // Prepare ignored log patterns 59 | config.IgnoreLogPatternsCompiled, err = 60 | getCompiledIgnorePatterns(config.IgnoreLogPatterns) 61 | if err != nil { 62 | logrus.Errorf("Failed to compile log pattern: %s", err.Error()) 63 | } 64 | 65 | // Parse proxy config 66 | if len(config.App.ProxyURL) > 0 { 67 | os.Setenv("HTTPS_PROXY", config.App.ProxyURL) 68 | } 69 | 70 | return config, nil 71 | } 72 | 73 | // getAllowForbidSlices split input slice into two slices by items start with ! 74 | func getAllowForbidSlices(items []string) (allow []string, forbid []string) { 75 | allow = make([]string, 0) 76 | forbid = make([]string, 0) 77 | for _, item := range items { 78 | if clean := strings.TrimPrefix(item, "!"); item != clean { 79 | forbid = append(forbid, clean) 80 | continue 81 | } 82 | allow = append(allow, item) 83 | } 84 | return allow, forbid 85 | } 86 | 87 | func getCompiledIgnorePatterns(patterns []string) (compiledPatterns []*regexp.Regexp, err error) { 88 | compiledPatterns = make([]*regexp.Regexp, 0) 89 | 90 | for _, pattern := range patterns { 91 | compiledPattern, err := regexp.Compile(pattern) 92 | 93 | if err != nil { 94 | return nil, fmt.Errorf("failed to compile pattern '%s'", pattern) 95 | } 96 | 97 | compiledPatterns = append(compiledPatterns, compiledPattern) 98 | } 99 | 100 | return compiledPatterns, nil 101 | } 102 | -------------------------------------------------------------------------------- /constant/constant.go: -------------------------------------------------------------------------------- 1 | package constant 2 | 3 | // WelcomeMsg is used to be sent to all providers when kwatch starts 4 | const WelcomeMsg = ":tada: kwatch@%s just started!" 5 | 6 | // KwatchUpdateMsg is used to notify all registered providers when a newer 7 | // version is available 8 | const KwatchUpdateMsg = ":tada: A newer version " + 9 | " of Kwatch " + 10 | "is available! Please update to the latest version." 11 | 12 | const ( 13 | Footer = "" 14 | DefaultTitle = ":red_circle: kwatch detected a crash in pod" 15 | DefaultText = "There is an issue with container in a pod!" 16 | DefaultLogs = "No logs captured" 17 | DefaultEvents = "No events captured" 18 | ) 19 | -------------------------------------------------------------------------------- /deploy/chart/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *.orig 18 | *~ 19 | # Various IDEs 20 | .project 21 | .idea/ 22 | *.tmproj 23 | .vscode/ 24 | -------------------------------------------------------------------------------- /deploy/chart/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v2 2 | name: kwatch 3 | version: "0.10.2" 4 | appVersion: "v0.10.2" 5 | description: monitor all changes in your Kubernetes(K8s) cluster, detects crashes 6 | in your running apps in realtime, and publishes notifications to your channels (Slack, 7 | Discord, etc.) instantly 8 | type: application 9 | home: https://kwatch.dev 10 | icon: https://kwatch.dev/img/kwatch-logo.png 11 | sources: 12 | - https://github.com/abahmed/kwatch 13 | keywords: 14 | - kwatch 15 | - kubernetes 16 | - monitoring 17 | - crash-reporting 18 | - event-notifier 19 | maintainers: 20 | - email: a.ahmed1026@gmail.com 21 | name: Abdelrahman Ahmed 22 | url: github.com/abahmed 23 | - email: yaserkalali.work@gmail.com 24 | name: yaser 25 | url: github.com/yaskinny 26 | 27 | -------------------------------------------------------------------------------- /deploy/chart/README.md: -------------------------------------------------------------------------------- 1 | # kwatch Helm Chart 2 | 3 | monitor all changes in your Kubernetes(K8s) cluster, detects crashes in your running apps in realtime, and publishes notifications to your channels (Slack, 4 | Discord, etc.) instantly 5 | 6 | ## Add Repository 7 | 8 | ```console 9 | helm repo add kwatch https://kwatch.dev/charts 10 | helm repo update 11 | ``` 12 | 13 | ## Install Chart 14 | 15 | ```console 16 | helm install [RELEASE_NAME] kwatch/kwatch --version 0.10.2 17 | ``` 18 | 19 | ## Uninstall Chart 20 | 21 | ```console 22 | helm delete --purge [RELEASE_NAME] 23 | ``` 24 | 25 | ## Configuration 26 | 27 | | Parameter | Description | Default | 28 | |-----------|-------------|---------| 29 | | `podAnnotations` | Pod annotations | {} | 30 | | `podLabels` | Pod labels | {} | 31 | | `securityContext.runAsNonRoot` | Container runs as a non-root user | true | 32 | | `securityContext.runAsUser` | Container processes' UID to run the entrypoint | 101 | 33 | | `securityContext.runAsGroup` | Container processes' GID to run the entrypoint | 101 | 34 | | `securityContext.readOnlyRootFilesystem` | Container's root filesystem is read-only | true | 35 | | `resources` | CPU/Memory resource requests/limits | {limits: memory: 128Mi cpu: 100m} | 36 | | `nodeSelector` | Node labels for pod assignment | {} | 37 | | `tolerations` | Tolerations for pod assignment | [] | 38 | | `affinity` | affinity for pod | {} | 39 | | `config` | [kwatch configuration](https://github.com/abahmed/kwatch#configuration) | {} | 40 | -------------------------------------------------------------------------------- /deploy/chart/templates/_helpers.tpl: -------------------------------------------------------------------------------- 1 | {{/* 2 | Expand the name of the chart. 3 | */}} 4 | {{- define "kwatch.name" -}} 5 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} 6 | {{- end }} 7 | 8 | {{/* 9 | Create a default fully qualified app name. 10 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). 11 | If release name contains chart name it will be used as a full name. 12 | */}} 13 | {{- define "kwatch.fullname" -}} 14 | {{- if .Values.fullnameOverride }} 15 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} 16 | {{- else }} 17 | {{- $name := default .Chart.Name .Values.nameOverride }} 18 | {{- if contains $name .Release.Name }} 19 | {{- .Release.Name | trunc 63 | trimSuffix "-" }} 20 | {{- else }} 21 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} 22 | {{- end }} 23 | {{- end }} 24 | {{- end }} 25 | 26 | {{/* 27 | Create chart name and version as used by the chart label. 28 | */}} 29 | {{- define "kwatch.chart" -}} 30 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} 31 | {{- end }} 32 | 33 | {{/* 34 | Common labels 35 | */}} 36 | {{- define "kwatch.labels" -}} 37 | helm.sh/chart: {{ include "kwatch.chart" . }} 38 | {{ include "kwatch.selectorLabels" . }} 39 | {{- if .Chart.AppVersion }} 40 | app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} 41 | {{- end }} 42 | app.kubernetes.io/managed-by: {{ .Release.Service }} 43 | {{- end }} 44 | 45 | {{/* 46 | Selector labels 47 | */}} 48 | {{- define "kwatch.selectorLabels" -}} 49 | app.kubernetes.io/name: {{ include "kwatch.name" . }} 50 | app.kubernetes.io/instance: {{ .Release.Name }} 51 | {{- end }} 52 | 53 | {{/* 54 | Create the name of the service account to use 55 | */}} 56 | {{- define "kwatch.serviceAccountName" -}} 57 | {{- if .Values.serviceAccount.create }} 58 | {{- default (include "kwatch.fullname" .) .Values.serviceAccount.name }} 59 | {{- else }} 60 | {{- default "default" .Values.serviceAccount.name }} 61 | {{- end }} 62 | {{- end }} 63 | -------------------------------------------------------------------------------- /deploy/chart/templates/configmap.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: {{ .Release.Name }} 5 | namespace: {{ .Release.Namespace }} 6 | data: 7 | config.yaml: | 8 | {{- toYaml .Values.config | nindent 4 }} -------------------------------------------------------------------------------- /deploy/chart/templates/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: {{ include "kwatch.fullname" . }} 5 | namespace: {{ .Release.Namespace }} 6 | labels: 7 | {{- include "kwatch.labels" . | nindent 4 }} 8 | spec: 9 | replicas: {{ default 1 }} 10 | selector: 11 | matchLabels: 12 | {{- include "kwatch.selectorLabels" . | nindent 6 }} 13 | template: 14 | metadata: 15 | annotations: 16 | checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }} 17 | {{- with .Values.podAnnotations }} 18 | {{- toYaml . | nindent 8 }} 19 | {{- end }} 20 | labels: 21 | {{- include "kwatch.selectorLabels" . | nindent 8 }} 22 | {{- with .Values.podLabels }} 23 | {{- toYaml . | nindent 8 }} 24 | {{- end }} 25 | spec: 26 | restartPolicy: Always 27 | serviceAccountName: {{ .Release.Name }} 28 | containers: 29 | - name: {{ .Chart.Name }} 30 | securityContext: 31 | {{- toYaml .Values.securityContext | nindent 12 }} 32 | image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" 33 | imagePullPolicy: {{ .Values.image.pullPolicy }} 34 | volumeMounts: 35 | - name: config-volume 36 | mountPath: /config 37 | env: 38 | - name: CONFIG_FILE 39 | value: "/config/config.yaml" 40 | resources: 41 | {{- toYaml .Values.resources | nindent 12 }} 42 | {{- with .Values.nodeSelector }} 43 | nodeSelector: 44 | {{- toYaml . | nindent 8 }} 45 | {{- end }} 46 | {{- with .Values.affinity }} 47 | affinity: 48 | {{- toYaml . | nindent 8 }} 49 | {{- end }} 50 | {{- with .Values.tolerations }} 51 | tolerations: 52 | {{- toYaml . | nindent 8 }} 53 | {{- end }} 54 | volumes: 55 | - name: config-volume 56 | configMap: 57 | name: {{ .Release.Name }} 58 | -------------------------------------------------------------------------------- /deploy/chart/templates/rbac.yaml: -------------------------------------------------------------------------------- 1 | kind: ClusterRole 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | metadata: 4 | name: {{ .Release.Name }} 5 | rules: 6 | - apiGroups: [""] 7 | resources: ["events", "nodes", "nodes/proxy", "pods", "pods/log", "persistentvolumeclaims"] 8 | verbs: ["get", "watch", "list"] 9 | - apiGroups: ["apps"] 10 | resources: ["daemonsets", "statefulsets", "deployments", "replicasets"] 11 | verbs: ["get", "watch", "list"] 12 | --- 13 | apiVersion: rbac.authorization.k8s.io/v1 14 | kind: ClusterRoleBinding 15 | metadata: 16 | name: {{ .Release.Name }} 17 | roleRef: 18 | apiGroup: rbac.authorization.k8s.io 19 | kind: ClusterRole 20 | name: {{ .Release.Name }} 21 | subjects: 22 | - kind: ServiceAccount 23 | name: {{ .Release.Name }} 24 | namespace: {{ .Release.Namespace }} -------------------------------------------------------------------------------- /deploy/chart/templates/serviceaccount.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | name: {{ .Release.Name }} 5 | namespace: {{ .Release.Namespace }} 6 | annotations: 7 | checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }} 8 | {{- with .Values.sacAnnotations }} 9 | {{- toYaml . | nindent 4 }} 10 | {{- end }} 11 | 12 | -------------------------------------------------------------------------------- /deploy/chart/values.schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "object", 3 | "$schema": "http://json-schema.org/draft-07/schema", 4 | "required": [ 5 | "config" 6 | ], 7 | "properties": { 8 | "image": { 9 | "type": "object", 10 | "required": [], 11 | "properties": { 12 | "repository": { 13 | "type": [ 14 | "string", 15 | "boolean", 16 | "number", 17 | "object", 18 | "array" 19 | ], 20 | "default": "ghcr.io/abahmed/kwatch" 21 | }, 22 | "pullPolicy": { 23 | "type": [ 24 | "string", 25 | "boolean", 26 | "number", 27 | "object", 28 | "array" 29 | ], 30 | "default": "Always" 31 | } 32 | } 33 | }, 34 | "securityContext": { 35 | "type": "object", 36 | "required": [], 37 | "properties": { 38 | "runAsUser": { 39 | "type": [ 40 | "string", 41 | "boolean", 42 | "number", 43 | "object", 44 | "array" 45 | ], 46 | "default": "101" 47 | }, 48 | "runAsGroup": { 49 | "type": [ 50 | "string", 51 | "boolean", 52 | "number", 53 | "object", 54 | "array" 55 | ], 56 | "default": "101" 57 | }, 58 | "runAsNonRoot": { 59 | "type": [ 60 | "string", 61 | "boolean", 62 | "number", 63 | "object", 64 | "array" 65 | ], 66 | "default": "true" 67 | }, 68 | "readOnlyRootFilesystem": { 69 | "type": [ 70 | "string", 71 | "boolean", 72 | "number", 73 | "object", 74 | "array" 75 | ], 76 | "default": "true" 77 | } 78 | } 79 | }, 80 | "resources": { 81 | "type": "object", 82 | "required": [], 83 | "properties": { 84 | "limits": { 85 | "type": "object", 86 | "required": [], 87 | "properties": { 88 | "memory": { 89 | "type": [ 90 | "string", 91 | "boolean", 92 | "number", 93 | "object", 94 | "array" 95 | ], 96 | "default": "128Mi" 97 | }, 98 | "cpu": { 99 | "type": [ 100 | "string", 101 | "boolean", 102 | "number", 103 | "object", 104 | "array" 105 | ], 106 | "default": "100m" 107 | } 108 | } 109 | } 110 | } 111 | }, 112 | "nodeSelector": { 113 | "type": [ 114 | "string", 115 | "boolean", 116 | "number", 117 | "object", 118 | "array" 119 | ] 120 | }, 121 | "tolerations": { 122 | "type": [ 123 | "string", 124 | "boolean", 125 | "number", 126 | "object", 127 | "array" 128 | ] 129 | }, 130 | "affinity": { 131 | "type": [ 132 | "string", 133 | "boolean", 134 | "number", 135 | "object", 136 | "array" 137 | ] 138 | }, 139 | "config": { 140 | "type": [ 141 | "string", 142 | "boolean", 143 | "number", 144 | "object", 145 | "array" 146 | ], 147 | "description": "kwatch configuration" 148 | } 149 | } 150 | } -------------------------------------------------------------------------------- /deploy/chart/values.yaml: -------------------------------------------------------------------------------- 1 | # Default values for kwatch. 2 | # This is a YAML-formatted file. 3 | # Declare variables to be passed into your templates. 4 | 5 | image: 6 | repository: ghcr.io/abahmed/kwatch 7 | pullPolicy: Always 8 | 9 | securityContext: 10 | runAsUser: 101 11 | runAsGroup: 101 12 | runAsNonRoot: true 13 | readOnlyRootFilesystem: true 14 | 15 | resources: 16 | limits: 17 | memory: 128Mi 18 | cpu: 100m 19 | 20 | nodeSelector: {} 21 | 22 | tolerations: [] 23 | 24 | affinity: {} 25 | 26 | podAnnotations: {} 27 | 28 | sacAnnotations: {} 29 | 30 | podLabels: {} 31 | 32 | # kwatch configuration 33 | config: {} 34 | 35 | 36 | -------------------------------------------------------------------------------- /deploy/config.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | name: kwatch 5 | --- 6 | apiVersion: v1 7 | kind: ConfigMap 8 | metadata: 9 | name: kwatch 10 | namespace: kwatch 11 | data: 12 | config.yaml: | 13 | maxRecentLogLines: 14 | ignoreFailedGracefulShutdown: 15 | alert: 16 | slack: 17 | webhook: 18 | pagerduty: 19 | integrationKey: 20 | discord: 21 | webhook: 22 | telegram: 23 | token: 24 | chatId: 25 | email: 26 | form: 27 | to: 28 | password: 29 | host: 30 | port: 31 | teams: 32 | webhook: 33 | rocketchat: 34 | webhook: 35 | mattermost: 36 | webhook: 37 | opsgenie: 38 | apiKey: 39 | namespaces: 40 | - 41 | reasons: 42 | - 43 | ignoreContainerNames: 44 | - 45 | 46 | -------------------------------------------------------------------------------- /deploy/deploy.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | name: kwatch 5 | --- 6 | kind: ClusterRole 7 | apiVersion: rbac.authorization.k8s.io/v1 8 | metadata: 9 | name: kwatch 10 | rules: 11 | - apiGroups: [""] 12 | resources: ["events", "nodes", "nodes/proxy", "pods", "pods/log", "persistentvolumeclaims"] 13 | verbs: ["get", "watch", "list"] 14 | - apiGroups: ["apps"] 15 | resources: ["daemonsets", "statefulsets", "deployments", "replicasets"] 16 | verbs: ["get", "watch", "list"] 17 | --- 18 | apiVersion: v1 19 | kind: ServiceAccount 20 | metadata: 21 | name: kwatch 22 | namespace: kwatch 23 | --- 24 | apiVersion: rbac.authorization.k8s.io/v1 25 | kind: ClusterRoleBinding 26 | metadata: 27 | name: kwatch 28 | roleRef: 29 | apiGroup: rbac.authorization.k8s.io 30 | kind: ClusterRole 31 | name: kwatch 32 | subjects: 33 | - kind: ServiceAccount 34 | name: kwatch 35 | namespace: kwatch 36 | --- 37 | apiVersion: apps/v1 38 | kind: Deployment 39 | metadata: 40 | name: kwatch 41 | namespace: kwatch 42 | spec: 43 | selector: 44 | matchLabels: 45 | app: kwatch 46 | template: 47 | metadata: 48 | labels: 49 | app: kwatch 50 | spec: 51 | restartPolicy: Always 52 | serviceAccountName: kwatch 53 | containers: 54 | - name: kwatch 55 | image: ghcr.io/abahmed/kwatch:v0.10.2 56 | imagePullPolicy: Always 57 | volumeMounts: 58 | - name: config-volume 59 | mountPath: /config 60 | env: 61 | - name: CONFIG_FILE 62 | value: "/config/config.yaml" 63 | resources: 64 | limits: 65 | memory: "128Mi" 66 | cpu: "100m" 67 | volumes: 68 | - name: config-volume 69 | configMap: 70 | name: kwatch -------------------------------------------------------------------------------- /event/event.go: -------------------------------------------------------------------------------- 1 | package event 2 | 3 | // Event used to represent info needed by providers to send messages 4 | type Event struct { 5 | PodName string 6 | ContainerName string 7 | Namespace string 8 | Reason string 9 | Events string 10 | Logs string 11 | Labels map[string]string 12 | } 13 | -------------------------------------------------------------------------------- /event/format.go: -------------------------------------------------------------------------------- 1 | package event 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | 7 | "github.com/abahmed/kwatch/constant" 8 | ) 9 | 10 | func (e *Event) FormatMarkdown(clusterName, text, delimiter string) string { 11 | // add events part if it exists 12 | eventsText := constant.DefaultEvents 13 | events := strings.TrimSpace(e.Events) 14 | if len(events) > 0 { 15 | eventsText = e.Events 16 | } 17 | 18 | // add logs part if it exist 19 | logsText := constant.DefaultLogs 20 | logs := strings.TrimSpace(e.Logs) 21 | if len(logs) > 0 { 22 | logsText = e.Logs 23 | } 24 | 25 | // use custom text if it's provided, otherwise use default 26 | if len(text) == 0 { 27 | text = constant.DefaultText 28 | } 29 | 30 | if len(delimiter) == 0 { 31 | delimiter = "\n" 32 | } 33 | 34 | msg := fmt.Sprintf( 35 | "%s"+delimiter+ 36 | "**Cluster:** %s"+delimiter+ 37 | "**Pod:** %s"+delimiter+ 38 | "**Container:** %s"+delimiter+ 39 | "**Namespace:** %s"+delimiter+ 40 | "**Reason:** %s"+delimiter+ 41 | "**Events:**\n```\n%s\n```"+delimiter+ 42 | "**Logs:**\n```\n%s\n```", 43 | text, 44 | clusterName, e.PodName, 45 | e.ContainerName, 46 | e.Namespace, 47 | e.Reason, 48 | eventsText, 49 | logsText, 50 | ) 51 | 52 | return msg 53 | } 54 | 55 | func (e *Event) FormatHtml(clusterName, text string) string { 56 | eventsText := constant.DefaultEvents 57 | logsText := constant.DefaultLogs 58 | 59 | // add events part if it exists 60 | events := strings.TrimSpace(e.Events) 61 | if len(events) > 0 { 62 | eventsText = e.Events 63 | } 64 | 65 | // add logs part if it exists 66 | logs := strings.TrimSpace(e.Logs) 67 | if len(logs) > 0 { 68 | logsText = e.Logs 69 | } 70 | 71 | // use custom text if it's provided, otherwise use default 72 | if len(text) == 0 { 73 | text = constant.DefaultText 74 | } 75 | 76 | msg := fmt.Sprintf( 77 | "%s
"+ 78 | "Cluster: %s
"+ 79 | "Pod: %s
"+ 80 | "Container: %s
"+ 81 | "Namespace: %s
"+ 82 | "Reason: %s
"+ 83 | "Events:
%s
"+ 84 | "Logs:
%s
", 85 | text, 86 | clusterName, 87 | e.PodName, 88 | e.ContainerName, 89 | e.Namespace, 90 | e.Reason, 91 | strings.ReplaceAll(eventsText, "\n", "
"), 92 | strings.ReplaceAll(logsText, "\n", "
"), 93 | ) 94 | 95 | return msg 96 | } 97 | 98 | func (e *Event) FormatText(clusterName, text string) string { 99 | eventsText := constant.DefaultEvents 100 | logsText := constant.DefaultLogs 101 | 102 | // add events part if it exists 103 | events := strings.TrimSpace(e.Events) 104 | if len(events) > 0 { 105 | eventsText = e.Events 106 | } 107 | 108 | // add logs part if it exists 109 | logs := strings.TrimSpace(e.Logs) 110 | if len(logs) > 0 { 111 | logsText = e.Logs 112 | } 113 | 114 | // use custom text if it's provided, otherwise use default 115 | if len(text) == 0 { 116 | text = constant.DefaultText 117 | } 118 | 119 | msg := fmt.Sprintf( 120 | "There is an issue with container in a pod!\n\n"+ 121 | "cluster: %s\n"+ 122 | "Pod Name: %s\n"+ 123 | "Container: %s\n"+ 124 | "Namespace: %s\n"+ 125 | "Reason: %s\n\n"+ 126 | "Events:\n%s\n\n"+ 127 | "Logs:\n%s\n\n", 128 | clusterName, 129 | e.PodName, 130 | e.ContainerName, 131 | e.Namespace, 132 | e.Reason, 133 | eventsText, 134 | logsText, 135 | ) 136 | 137 | return msg 138 | } 139 | -------------------------------------------------------------------------------- /filter/containerKillingFilter.go: -------------------------------------------------------------------------------- 1 | package filter 2 | 3 | import ( 4 | "strings" 5 | ) 6 | 7 | type ContainerKillingFilter struct{} 8 | 9 | func (f ContainerKillingFilter) Execute(ctx *Context) bool { 10 | if !ctx.Config.IgnoreFailedGracefulShutdown || ctx.Events == nil { 11 | return false 12 | } 13 | container := ctx.Container.Container 14 | 15 | isOk := false 16 | if container.State.Waiting != nil { 17 | return isOk 18 | } 19 | 20 | for _, ev := range *ctx.Events { 21 | // Graceful shutdown did not work and container was killed during 22 | // shutdown. Not really an error 23 | if ev.Reason == "Killing" && 24 | strings.Contains( 25 | ev.Message, 26 | "Stopping container "+container.Name) { 27 | isOk = true 28 | } 29 | } 30 | 31 | return isOk 32 | } 33 | -------------------------------------------------------------------------------- /filter/containerLogsFilter.go: -------------------------------------------------------------------------------- 1 | package filter 2 | 3 | import ( 4 | "github.com/abahmed/kwatch/util" 5 | "github.com/sirupsen/logrus" 6 | ) 7 | 8 | type ContainerLogsFilter struct{} 9 | 10 | func (f ContainerLogsFilter) Execute(ctx *Context) bool { 11 | container := ctx.Container.Container 12 | 13 | if container.RestartCount == 0 && container.State.Waiting != nil { 14 | return false 15 | } 16 | 17 | previousLogs := false 18 | if ctx.Container.HasRestarts && container.State.Running != nil { 19 | previousLogs = true 20 | } 21 | 22 | logs := util.GetPodContainerLogs( 23 | ctx.Client, 24 | ctx.Pod.Name, 25 | container.Name, 26 | ctx.Pod.Namespace, 27 | previousLogs, 28 | ctx.Config.MaxRecentLogLines) 29 | 30 | for _, pattern := range ctx.Config.IgnoreLogPatternsCompiled { 31 | if pattern.MatchString(logs) { 32 | logrus.Infof( 33 | "skipping container %s logs as it matches the ignore log pattern", 34 | container.Name) 35 | return true 36 | } 37 | } 38 | 39 | ctx.Container.Logs = logs 40 | return false 41 | } 42 | -------------------------------------------------------------------------------- /filter/containerNameFilter.go: -------------------------------------------------------------------------------- 1 | package filter 2 | 3 | import ( 4 | "github.com/sirupsen/logrus" 5 | "golang.org/x/exp/slices" 6 | ) 7 | 8 | type ContainerNameFilter struct{} 9 | 10 | func (f ContainerNameFilter) Execute(ctx *Context) bool { 11 | container := ctx.Container.Container 12 | if len(ctx.Config.IgnoreContainerNames) > 0 && 13 | slices.Contains(ctx.Config.IgnoreContainerNames, container.Name) { 14 | logrus.Infof( 15 | "skipping container %s as it is in the container ignore list", 16 | container.Name) 17 | return true 18 | } 19 | 20 | return false 21 | } 22 | -------------------------------------------------------------------------------- /filter/containerReasonsFilter.go: -------------------------------------------------------------------------------- 1 | package filter 2 | 3 | import ( 4 | "github.com/sirupsen/logrus" 5 | "golang.org/x/exp/slices" 6 | ) 7 | 8 | type ContainerReasonsFilter struct{} 9 | 10 | func (f ContainerReasonsFilter) Execute(ctx *Context) bool { 11 | container := ctx.Container.Container 12 | 13 | if container.State.Waiting != nil { 14 | ctx.Container.Reason = container.State.Waiting.Reason 15 | ctx.Container.Msg = container.State.Waiting.Message 16 | } else if container.State.Terminated != nil { 17 | ctx.Container.Reason = container.State.Terminated.Reason 18 | ctx.Container.Msg = container.State.Terminated.Message 19 | ctx.Container.ExitCode = container.State.Terminated.ExitCode 20 | ctx.Container.LastTerminatedOn = container.State.Terminated.StartedAt.Time 21 | } 22 | 23 | if (ctx.Container.Reason == "CrashLoopBackOff" || 24 | ctx.Container.HasRestarts) && 25 | container.LastTerminationState.Terminated != nil { 26 | ctx.Container.Reason = 27 | container.LastTerminationState.Terminated.Reason 28 | ctx.Container.Msg = 29 | container.LastTerminationState.Terminated.Message 30 | ctx.Container.ExitCode = 31 | container.LastTerminationState.Terminated.ExitCode 32 | ctx.Container.LastTerminatedOn = 33 | container.LastTerminationState.Terminated.StartedAt.Time 34 | } 35 | 36 | if len(ctx.Config.AllowedReasons) > 0 && 37 | !slices.Contains(ctx.Config.AllowedReasons, ctx.Container.Reason) { 38 | logrus.Infof( 39 | "skipping reason %s as it is not in the reason allow list", 40 | ctx.Container.Reason) 41 | return true 42 | } 43 | 44 | if len(ctx.Config.ForbiddenReasons) > 0 && 45 | slices.Contains(ctx.Config.ForbiddenReasons, ctx.Container.Reason) { 46 | logrus.Infof( 47 | "skipping reason %s as it is in the reason forbid list", 48 | ctx.Container.Reason) 49 | return true 50 | } 51 | 52 | lastState := ctx.Memory.GetPodContainer(ctx.Pod.Namespace, 53 | ctx.Pod.Name, 54 | container.Name) 55 | 56 | if lastState != nil { 57 | if lastState.LastTerminatedOn == ctx.Container.LastTerminatedOn { 58 | return true 59 | } 60 | 61 | if lastState.Reason == ctx.Container.Reason && 62 | lastState.Msg == ctx.Container.Msg && 63 | lastState.ExitCode == ctx.Container.ExitCode { 64 | return true 65 | } 66 | } 67 | 68 | return false 69 | } 70 | -------------------------------------------------------------------------------- /filter/containerRestartsFilter.go: -------------------------------------------------------------------------------- 1 | package filter 2 | 3 | type ContainerRestartsFilter struct{} 4 | 5 | func (f ContainerRestartsFilter) Execute(ctx *Context) bool { 6 | container := ctx.Container.Container 7 | 8 | lastState := ctx.Memory.GetPodContainer(ctx.Pod.Namespace, 9 | ctx.Pod.Name, 10 | container.Name) 11 | 12 | ctx.Container.HasRestarts = false 13 | if lastState == nil { 14 | return false 15 | } 16 | 17 | if container.RestartCount > lastState.RestartCount { 18 | ctx.Container.HasRestarts = true 19 | } 20 | 21 | return false 22 | } 23 | -------------------------------------------------------------------------------- /filter/containerStateFilter.go: -------------------------------------------------------------------------------- 1 | package filter 2 | 3 | type ContainerStateFilter struct{} 4 | 5 | func (f ContainerStateFilter) Execute(ctx *Context) bool { 6 | container := ctx.Container.Container 7 | 8 | if container.State.Running != nil { 9 | ctx.Container.Status = "running" 10 | } else if container.State.Waiting != nil { 11 | ctx.Container.Status = "waiting" 12 | } else if container.State.Terminated != nil { 13 | ctx.Container.Status = "terminated" 14 | } 15 | 16 | if !ctx.Container.HasRestarts && container.State.Running != nil { 17 | return true 18 | } 19 | 20 | if container.State.Waiting != nil && 21 | (container.State.Waiting.Reason == "ContainerCreating" || 22 | container.State.Waiting.Reason == "PodInitializing") { 23 | return true 24 | } 25 | 26 | if container.State.Terminated != nil && 27 | (container.State.Terminated.Reason == "Completed" || 28 | // 143 is the exit code for graceful termination 29 | container.State.Terminated.ExitCode == 143 || 30 | // 0 is the exit code for purpose stop 31 | container.State.Terminated.ExitCode == 0) { 32 | return true 33 | } 34 | 35 | return false 36 | } 37 | -------------------------------------------------------------------------------- /filter/eventFilter.go: -------------------------------------------------------------------------------- 1 | package filter 2 | 3 | type EventFilter struct{} 4 | 5 | func (f EventFilter) Execute(ctx *Context) bool { 6 | if ctx.EvType == "DELETED" { 7 | ctx.Memory.DelPod(ctx.Pod.Namespace, ctx.Pod.Name) 8 | return true 9 | } 10 | 11 | return false 12 | } 13 | -------------------------------------------------------------------------------- /filter/filter.go: -------------------------------------------------------------------------------- 1 | package filter 2 | 3 | import ( 4 | "time" 5 | 6 | "github.com/abahmed/kwatch/config" 7 | "github.com/abahmed/kwatch/storage" 8 | corev1 "k8s.io/api/core/v1" 9 | apiv1 "k8s.io/apimachinery/pkg/apis/meta/v1" 10 | "k8s.io/client-go/kubernetes" 11 | ) 12 | 13 | type Filter interface { 14 | Execute(ctx *Context) (ShouldStop bool) 15 | } 16 | 17 | type FilterResult struct { 18 | ShouldStop bool 19 | } 20 | 21 | type Context struct { 22 | Client kubernetes.Interface 23 | Config *config.Config 24 | Memory storage.Storage 25 | 26 | Pod *corev1.Pod 27 | EvType string 28 | 29 | Owner *apiv1.OwnerReference 30 | Events *[]corev1.Event 31 | 32 | PodHasIssues bool 33 | ContainersHasIssues bool 34 | PodReason string 35 | PodMsg string 36 | 37 | // Container 38 | Container *ContainerContext 39 | } 40 | 41 | type ContainerContext struct { 42 | Container *corev1.ContainerStatus 43 | Reason string 44 | Msg string 45 | ExitCode int32 46 | Logs string 47 | HasRestarts bool 48 | LastTerminatedOn time.Time 49 | State string 50 | Status string 51 | } 52 | -------------------------------------------------------------------------------- /filter/namespaceFilter.go: -------------------------------------------------------------------------------- 1 | package filter 2 | 3 | import ( 4 | "github.com/sirupsen/logrus" 5 | "golang.org/x/exp/slices" 6 | ) 7 | 8 | type NamespaceFilter struct{} 9 | 10 | func (f NamespaceFilter) Execute(ctx *Context) bool { 11 | // filter by namespaces in config if specified 12 | if len(ctx.Config.AllowedNamespaces) > 0 && 13 | !slices.Contains(ctx.Config.AllowedNamespaces, ctx.Pod.Namespace) { 14 | logrus.Infof( 15 | "skipping namespace %s as it is not in the namespace allow list", 16 | ctx.Pod.Namespace) 17 | return true 18 | } 19 | 20 | if len(ctx.Config.ForbiddenNamespaces) > 0 && 21 | slices.Contains(ctx.Config.ForbiddenNamespaces, ctx.Pod.Namespace) { 22 | logrus.Infof( 23 | "skipping namespace %s as it is in the namespace forbid list", 24 | ctx.Pod.Namespace) 25 | return true 26 | } 27 | 28 | return false 29 | } 30 | -------------------------------------------------------------------------------- /filter/podEventsFilter.go: -------------------------------------------------------------------------------- 1 | package filter 2 | 3 | import ( 4 | "strings" 5 | 6 | corev1 "k8s.io/api/core/v1" 7 | ) 8 | 9 | type PodEventsFilter struct{} 10 | 11 | func (f PodEventsFilter) Execute(ctx *Context) bool { 12 | if !ctx.PodHasIssues { 13 | return false 14 | } 15 | 16 | if ctx.Events == nil { 17 | return false 18 | } 19 | 20 | for _, ev := range *ctx.Events { 21 | if ev.Type == corev1.EventTypeWarning { 22 | if strings.Contains(ev.Message, "deleting pod") { 23 | ctx.PodHasIssues = false 24 | ctx.ContainersHasIssues = false 25 | return true 26 | } 27 | } 28 | } 29 | return false 30 | } 31 | -------------------------------------------------------------------------------- /filter/podNameFilter.go: -------------------------------------------------------------------------------- 1 | package filter 2 | 3 | import ( 4 | "github.com/sirupsen/logrus" 5 | ) 6 | 7 | type PodNameFilter struct{} 8 | 9 | func (f PodNameFilter) Execute(ctx *Context) bool { 10 | for _, pattern := range ctx.Config.IgnorePodNamePatterns { 11 | if pattern.MatchString(ctx.Pod.Name) { 12 | logrus.Infof( 13 | "skipping pod %s as it is in the ignore pod name list", 14 | ctx.Pod.Name) 15 | return true 16 | } 17 | } 18 | 19 | return false 20 | } 21 | -------------------------------------------------------------------------------- /filter/podOwnersFilter.go: -------------------------------------------------------------------------------- 1 | package filter 2 | 3 | import ( 4 | "context" 5 | 6 | apiv1 "k8s.io/apimachinery/pkg/apis/meta/v1" 7 | ) 8 | 9 | type PodOwnersFilter struct{} 10 | 11 | func (f PodOwnersFilter) Execute(ctx *Context) bool { 12 | if ctx.Owner != nil { 13 | return false 14 | } 15 | 16 | if len(ctx.Pod.OwnerReferences) == 0 { 17 | return false 18 | } 19 | 20 | owner := ctx.Pod.OwnerReferences[0] 21 | if owner.Kind == "ReplicaSet" { 22 | rs, _ := 23 | ctx.Client.AppsV1().ReplicaSets(ctx.Pod.Namespace).Get( 24 | context.TODO(), 25 | owner.Name, 26 | apiv1.GetOptions{}) 27 | 28 | if rs != nil && len(rs.ObjectMeta.OwnerReferences) > 0 { 29 | owner = rs.ObjectMeta.OwnerReferences[0] 30 | } 31 | } else if owner.Kind == "DaemonSet" { 32 | ds, _ := 33 | ctx.Client.AppsV1().DaemonSets(ctx.Pod.Namespace).Get( 34 | context.TODO(), 35 | owner.Name, 36 | apiv1.GetOptions{}) 37 | if ds != nil && len(ds.ObjectMeta.OwnerReferences) > 0 { 38 | owner = ds.ObjectMeta.OwnerReferences[0] 39 | } 40 | } else if owner.Kind == "StatefulSet" { 41 | ss, _ := 42 | ctx.Client.AppsV1().StatefulSets(ctx.Pod.Namespace).Get( 43 | context.TODO(), 44 | owner.Name, 45 | apiv1.GetOptions{}) 46 | if ss != nil && len(ss.ObjectMeta.OwnerReferences) > 0 { 47 | owner = ss.ObjectMeta.OwnerReferences[0] 48 | } 49 | } 50 | 51 | ctx.Owner = &owner 52 | 53 | return false 54 | } 55 | -------------------------------------------------------------------------------- /filter/podStatusFilter.go: -------------------------------------------------------------------------------- 1 | package filter 2 | 3 | import ( 4 | "github.com/sirupsen/logrus" 5 | "golang.org/x/exp/slices" 6 | corev1 "k8s.io/api/core/v1" 7 | ) 8 | 9 | type PodStatusFilter struct{} 10 | 11 | func (f PodStatusFilter) Execute(ctx *Context) bool { 12 | if ctx.Pod.Status.Phase == corev1.PodSucceeded { 13 | ctx.PodHasIssues = false 14 | ctx.ContainersHasIssues = false 15 | return true 16 | } 17 | 18 | if ctx.EvType == "Added" && len(ctx.Pod.Status.Conditions) == 0 { 19 | ctx.PodHasIssues = false 20 | ctx.ContainersHasIssues = false 21 | return true 22 | } 23 | 24 | issueInContainers := true 25 | issueInPod := true 26 | for _, c := range ctx.Pod.Status.Conditions { 27 | if c.Type == corev1.PodReady { 28 | if c.Status == corev1.ConditionFalse && c.Reason == "PodCompleted" { 29 | ctx.PodHasIssues = false 30 | ctx.ContainersHasIssues = false 31 | return true 32 | } 33 | 34 | issueInPod = false 35 | issueInContainers = false 36 | if c.Status != corev1.ConditionTrue { 37 | issueInContainers = true 38 | } 39 | } else if c.Type == corev1.PodScheduled && c.Status == corev1.ConditionFalse { 40 | issueInPod = true 41 | issueInContainers = false 42 | ctx.PodReason = c.Reason 43 | ctx.PodMsg = c.Message 44 | } else if c.Type == corev1.ContainersReady && c.Status == corev1.ConditionFalse { 45 | issueInContainers = true 46 | issueInPod = false 47 | } 48 | } 49 | 50 | ctx.PodHasIssues = issueInPod 51 | ctx.ContainersHasIssues = issueInContainers 52 | 53 | if len(ctx.PodReason) > 0 && 54 | len(ctx.Config.AllowedReasons) > 0 && 55 | !slices.Contains(ctx.Config.AllowedReasons, ctx.PodReason) { 56 | logrus.Infof( 57 | "skipping reason %s for pod %s as it is not in the reason allow list", 58 | ctx.PodReason, 59 | ctx.Pod.Name) 60 | return true 61 | } 62 | 63 | if len(ctx.PodReason) > 0 && 64 | len(ctx.Config.ForbiddenReasons) > 0 && 65 | slices.Contains(ctx.Config.ForbiddenReasons, ctx.PodReason) { 66 | logrus.Infof( 67 | "skipping reason %s for pod %s as it is in the reason forbid list", 68 | ctx.PodReason, 69 | ctx.Pod.Name) 70 | return true 71 | } 72 | 73 | lastState := ctx.Memory.GetPodContainer(ctx.Pod.Namespace, 74 | ctx.Pod.Name, 75 | ".") 76 | 77 | if ctx.PodHasIssues && lastState != nil { 78 | return true 79 | } 80 | 81 | return false 82 | } 83 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/abahmed/kwatch 2 | 3 | go 1.24.0 4 | 5 | require ( 6 | github.com/bwmarrin/discordgo v0.28.1 7 | github.com/google/go-github/v41 v41.0.1-0.20211227215900-a899e0fadbec 8 | github.com/sirupsen/logrus v1.9.3 9 | github.com/slack-go/slack v0.16.0 10 | github.com/stretchr/testify v1.10.0 11 | gopkg.in/mail.v2 v2.3.1 12 | gopkg.in/yaml.v3 v3.0.1 13 | k8s.io/api v0.33.1 14 | k8s.io/apimachinery v0.33.1 15 | k8s.io/client-go v0.33.1 16 | ) 17 | 18 | require ( 19 | github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect 20 | github.com/emicklei/go-restful/v3 v3.12.2 // indirect 21 | github.com/fxamacker/cbor/v2 v2.8.0 // indirect 22 | github.com/go-logr/logr v1.4.2 // indirect 23 | github.com/go-openapi/jsonpointer v0.21.1 // indirect 24 | github.com/go-openapi/jsonreference v0.21.0 // indirect 25 | github.com/go-openapi/swag v0.23.1 // indirect 26 | github.com/gogo/protobuf v1.3.2 // indirect 27 | github.com/google/gnostic-models v0.6.9 // indirect 28 | github.com/google/go-cmp v0.7.0 // indirect 29 | github.com/google/go-querystring v1.1.0 // indirect 30 | github.com/google/uuid v1.6.0 // indirect 31 | github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674 // indirect 32 | github.com/josharian/intern v1.0.0 // indirect 33 | github.com/json-iterator/go v1.1.12 // indirect 34 | github.com/mailru/easyjson v0.9.0 // indirect 35 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect 36 | github.com/modern-go/reflect2 v1.0.2 // indirect 37 | github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect 38 | github.com/pkg/errors v0.9.1 // indirect 39 | github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect 40 | github.com/spf13/pflag v1.0.6 // indirect 41 | github.com/x448/float16 v0.8.4 // indirect 42 | golang.org/x/crypto v0.37.0 // indirect 43 | golang.org/x/exp v0.0.0-20250408133849-7e4ce0ab07d0 44 | golang.org/x/net v0.39.0 // indirect 45 | golang.org/x/oauth2 v0.29.0 // indirect 46 | golang.org/x/sys v0.32.0 // indirect 47 | golang.org/x/term v0.31.0 // indirect 48 | golang.org/x/text v0.24.0 // indirect 49 | golang.org/x/time v0.11.0 // indirect 50 | google.golang.org/protobuf v1.36.6 // indirect 51 | gopkg.in/alexcesaro/quotedprintable.v3 v3.0.0-20150716171945-2caba252f4dc // indirect 52 | gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect 53 | gopkg.in/inf.v0 v0.9.1 // indirect 54 | k8s.io/klog/v2 v2.130.1 // indirect 55 | k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff // indirect 56 | k8s.io/utils v0.0.0-20250321185631-1f6e0b77f77e // indirect 57 | sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 // indirect 58 | sigs.k8s.io/randfill v1.0.0 // indirect 59 | sigs.k8s.io/structured-merge-diff/v4 v4.7.0 // indirect 60 | sigs.k8s.io/yaml v1.4.0 // indirect 61 | ) 62 | -------------------------------------------------------------------------------- /handler/executeContainersFilters.go: -------------------------------------------------------------------------------- 1 | package handler 2 | 3 | import ( 4 | "time" 5 | 6 | "github.com/abahmed/kwatch/event" 7 | "github.com/abahmed/kwatch/filter" 8 | "github.com/abahmed/kwatch/storage" 9 | "github.com/abahmed/kwatch/util" 10 | "github.com/sirupsen/logrus" 11 | corev1 "k8s.io/api/core/v1" 12 | ) 13 | 14 | func (h *handler) executeContainersFilters(ctx *filter.Context) { 15 | containers := make([]*corev1.ContainerStatus, 0) 16 | for idx := range ctx.Pod.Status.InitContainerStatuses { 17 | containers = append(containers, &ctx.Pod.Status.InitContainerStatuses[idx]) 18 | } 19 | for idx := range ctx.Pod.Status.ContainerStatuses { 20 | containers = append(containers, &ctx.Pod.Status.ContainerStatuses[idx]) 21 | } 22 | 23 | for _, container := range containers { 24 | ctx.Container = &filter.ContainerContext{ 25 | Container: container, 26 | HasRestarts: false, 27 | LastTerminatedOn: time.Time{}, 28 | } 29 | 30 | isContainerOk := false 31 | for i := range h.containerFilters { 32 | if shouldStop := h.containerFilters[i].Execute(ctx); shouldStop { 33 | isContainerOk = true 34 | break 35 | } 36 | } 37 | 38 | ctx.Memory.AddPodContainer( 39 | ctx.Pod.Namespace, 40 | ctx.Pod.Name, 41 | ctx.Container.Container.Name, 42 | &storage.ContainerState{ 43 | RestartCount: ctx.Container.Container.RestartCount, 44 | LastTerminatedOn: ctx.Container.LastTerminatedOn, 45 | Reason: ctx.Container.Reason, 46 | Msg: ctx.Container.Msg, 47 | ExitCode: ctx.Container.ExitCode, 48 | Status: ctx.Container.Status, 49 | }) 50 | 51 | if !isContainerOk { 52 | ownerName := "" 53 | if ctx.Owner != nil { 54 | ownerName = ctx.Owner.Name 55 | } 56 | 57 | logrus.Printf( 58 | "container only issue %s %s %s %s %s %d", 59 | ctx.Container.Container.Name, 60 | ctx.Pod.Name, 61 | ownerName, 62 | ctx.Container.Reason, 63 | ctx.Container.Msg, 64 | ctx.Container.ExitCode) 65 | 66 | h.alertManager.NotifyEvent(event.Event{ 67 | PodName: ctx.Pod.Name, 68 | ContainerName: ctx.Container.Container.Name, 69 | Namespace: ctx.Pod.Namespace, 70 | Reason: ctx.Container.Reason, 71 | Events: util.GetPodEventsStr(ctx.Events), 72 | Logs: ctx.Container.Logs, 73 | Labels: ctx.Pod.Labels, 74 | }) 75 | } 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /handler/executePodFilters.go: -------------------------------------------------------------------------------- 1 | package handler 2 | 3 | import ( 4 | "github.com/abahmed/kwatch/event" 5 | "github.com/abahmed/kwatch/filter" 6 | "github.com/abahmed/kwatch/storage" 7 | "github.com/abahmed/kwatch/util" 8 | "github.com/sirupsen/logrus" 9 | ) 10 | 11 | func (h *handler) executePodFilters(ctx *filter.Context) { 12 | isPodOk := false 13 | for i := range h.podFilters { 14 | if shouldStop := h.podFilters[i].Execute(ctx); shouldStop { 15 | isPodOk = true 16 | break 17 | } 18 | } 19 | 20 | if isPodOk || 21 | ctx.ContainersHasIssues || 22 | !ctx.PodHasIssues { 23 | return 24 | } 25 | 26 | ownerName := "" 27 | if ctx.Owner != nil { 28 | ownerName = ctx.Owner.Name 29 | } 30 | 31 | ctx.Memory.AddPodContainer( 32 | ctx.Pod.Namespace, 33 | ctx.Pod.Name, 34 | ".", 35 | &storage.ContainerState{ 36 | Reason: ctx.PodReason, 37 | Msg: ctx.PodMsg, 38 | Status: "", 39 | }, 40 | ) 41 | 42 | logrus.Printf("pod only issue %s %s %s %s", ctx.Pod.Name, ownerName, ctx.PodReason, ctx.PodMsg) 43 | 44 | h.alertManager.NotifyEvent(event.Event{ 45 | PodName: ctx.Pod.Name, 46 | ContainerName: "", 47 | Namespace: ctx.Pod.Namespace, 48 | Reason: ctx.PodReason, 49 | Events: util.GetPodEventsStr(ctx.Events), 50 | Logs: "", 51 | Labels: ctx.Pod.Labels, 52 | }) 53 | } 54 | -------------------------------------------------------------------------------- /handler/handler.go: -------------------------------------------------------------------------------- 1 | package handler 2 | 3 | import ( 4 | "github.com/abahmed/kwatch/alertmanager" 5 | "github.com/abahmed/kwatch/config" 6 | "github.com/abahmed/kwatch/filter" 7 | "github.com/abahmed/kwatch/storage" 8 | "k8s.io/apimachinery/pkg/runtime" 9 | "k8s.io/client-go/kubernetes" 10 | ) 11 | 12 | type Handler interface { 13 | ProcessPod(evType string, obj runtime.Object) 14 | ProcessNode(evType string, obj runtime.Object) 15 | } 16 | 17 | type handler struct { 18 | kclient kubernetes.Interface 19 | config *config.Config 20 | memory storage.Storage 21 | podFilters []filter.Filter 22 | containerFilters []filter.Filter 23 | alertManager *alertmanager.AlertManager 24 | } 25 | 26 | func NewHandler( 27 | cli kubernetes.Interface, 28 | cfg *config.Config, 29 | mem storage.Storage, 30 | alertManager *alertmanager.AlertManager) Handler { 31 | // Order is important 32 | podFilters := []filter.Filter{ 33 | filter.NamespaceFilter{}, 34 | filter.PodNameFilter{}, 35 | filter.PodStatusFilter{}, 36 | filter.PodEventsFilter{}, 37 | filter.PodOwnersFilter{}, 38 | } 39 | 40 | containersFilters := []filter.Filter{ 41 | filter.NamespaceFilter{}, 42 | filter.PodNameFilter{}, 43 | filter.ContainerNameFilter{}, 44 | filter.ContainerRestartsFilter{}, 45 | filter.ContainerStateFilter{}, 46 | filter.ContainerKillingFilter{}, 47 | filter.ContainerReasonsFilter{}, 48 | filter.ContainerLogsFilter{}, 49 | filter.PodOwnersFilter{}, 50 | } 51 | 52 | return &handler{ 53 | kclient: cli, 54 | config: cfg, 55 | podFilters: podFilters, 56 | containerFilters: containersFilters, 57 | memory: mem, 58 | alertManager: alertManager, 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /handler/processNode.go: -------------------------------------------------------------------------------- 1 | package handler 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/sirupsen/logrus" 7 | corev1 "k8s.io/api/core/v1" 8 | "k8s.io/apimachinery/pkg/runtime" 9 | ) 10 | 11 | func (h *handler) ProcessNode(eventType string, obj runtime.Object) { 12 | if obj == nil { 13 | return 14 | } 15 | 16 | node, ok := obj.(*corev1.Node) 17 | if !ok { 18 | logrus.Warnf("failed to cast event to node object: %v", obj) 19 | return 20 | } 21 | 22 | if eventType == "DELETED" { 23 | h.memory.DelNode(node.Name) 24 | return 25 | } 26 | 27 | for _, c := range node.Status.Conditions { 28 | if c.Type == corev1.NodeReady { 29 | if c.Status == corev1.ConditionFalse && !h.memory.HasNode(node.Name) { 30 | logrus.Printf("node %s is not ready: %s", node.Name, c.Reason) 31 | h.alertManager.Notify(fmt.Sprintf("Node %s is not ready: %s - %s", 32 | node.Name, 33 | c.Reason, 34 | c.Message, 35 | )) 36 | h.memory.AddNode(node.Name) 37 | } else if c.Status == corev1.ConditionTrue { 38 | h.memory.DelNode(node.Name) 39 | } 40 | } 41 | } 42 | 43 | } 44 | -------------------------------------------------------------------------------- /handler/processPod.go: -------------------------------------------------------------------------------- 1 | package handler 2 | 3 | import ( 4 | "github.com/abahmed/kwatch/filter" 5 | "github.com/abahmed/kwatch/util" 6 | "github.com/sirupsen/logrus" 7 | corev1 "k8s.io/api/core/v1" 8 | "k8s.io/apimachinery/pkg/runtime" 9 | ) 10 | 11 | func (h *handler) ProcessPod(eventType string, obj runtime.Object) { 12 | if obj == nil { 13 | return 14 | } 15 | 16 | pod, ok := obj.(*corev1.Pod) 17 | if !ok { 18 | logrus.Warnf("failed to cast event to pod object: %v", obj) 19 | return 20 | } 21 | 22 | if eventType == "DELETED" { 23 | h.memory.DelPod(pod.Namespace, pod.Name) 24 | return 25 | } 26 | 27 | ctx := filter.Context{ 28 | Client: h.kclient, 29 | Config: h.config, 30 | Memory: h.memory, 31 | Pod: pod, 32 | EvType: eventType, 33 | } 34 | 35 | podEvents, err := util.GetPodEvents(ctx.Client, ctx.Pod.Name, ctx.Pod.Namespace) 36 | if err != nil { 37 | logrus.Errorf( 38 | "failed to get events for pod %s(%s): %s", 39 | ctx.Pod.Name, 40 | ctx.Pod.Namespace, 41 | err.Error()) 42 | } 43 | 44 | if podEvents != nil { 45 | ctx.Events = &podEvents.Items 46 | } 47 | 48 | h.executePodFilters(&ctx) 49 | h.executeContainersFilters(&ctx) 50 | } 51 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/abahmed/kwatch/alertmanager" 7 | "github.com/abahmed/kwatch/client" 8 | "github.com/abahmed/kwatch/config" 9 | "github.com/abahmed/kwatch/constant" 10 | "github.com/abahmed/kwatch/handler" 11 | "github.com/abahmed/kwatch/pvcmonitor" 12 | "github.com/abahmed/kwatch/storage/memory" 13 | "github.com/abahmed/kwatch/upgrader" 14 | "github.com/abahmed/kwatch/version" 15 | "github.com/abahmed/kwatch/watcher" 16 | "github.com/sirupsen/logrus" 17 | ) 18 | 19 | func main() { 20 | config, err := config.LoadConfig() 21 | if err != nil { 22 | logrus.Fatalf("failed to load config: %s", err.Error()) 23 | } 24 | setLogFormatter(config.App.LogFormatter) 25 | 26 | logrus.Info(fmt.Sprintf(constant.WelcomeMsg, version.Short())) 27 | 28 | // create kubernetes client 29 | client := client.Create(&config.App) 30 | 31 | alertManager := alertmanager.AlertManager{} 32 | alertManager.Init(config.Alert, &config.App) 33 | 34 | if !config.App.DisableStartupMessage { 35 | // send notification to providers 36 | alertManager.Notify(fmt.Sprintf(constant.WelcomeMsg, version.Short())) 37 | } 38 | 39 | // check and notify if newer versions are available 40 | upgrader := upgrader.NewUpgrader(&config.Upgrader, &alertManager) 41 | go upgrader.CheckUpdates() 42 | 43 | // start monitoring Persistent Volume Claims 44 | pvcMonitor := 45 | pvcmonitor.NewPvcMonitor(client, &config.PvcMonitor, &alertManager) 46 | go pvcMonitor.Start() 47 | 48 | // Create handler 49 | h := handler.NewHandler( 50 | client, 51 | config, 52 | memory.NewMemory(), 53 | &alertManager, 54 | ) 55 | 56 | // start watcher 57 | watcher.Start(client, config, h) 58 | } 59 | 60 | func setLogFormatter(formatter string) { 61 | switch formatter { 62 | case "json": 63 | logrus.SetFormatter(&logrus.JSONFormatter{}) 64 | default: 65 | logrus.SetFormatter(&logrus.TextFormatter{}) 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /pvcmonitor/checkUsage.go: -------------------------------------------------------------------------------- 1 | package pvcmonitor 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/abahmed/kwatch/util" 7 | "github.com/sirupsen/logrus" 8 | ) 9 | 10 | type PvcUsage struct { 11 | Name string 12 | PVName string 13 | Namespace string 14 | PodName string 15 | UsagePercentage float64 16 | } 17 | 18 | func (p *PvcMonitor) checkUsage() { 19 | // getting nodes 20 | nodes, err := util.GetNodes(p.client) 21 | if err != nil { 22 | logrus.Errorf("pvc monitor: failed to get nodes %s", err.Error()) 23 | return 24 | } 25 | 26 | nodeNames := make([]string, 0) 27 | for _, node := range nodes.Items { 28 | nodeNames = append(nodeNames, node.Name) 29 | } 30 | 31 | var pvcUsages []*PvcUsage 32 | 33 | for _, nodeName := range nodeNames { 34 | nodePvcUsage, _ := p.getNodeUsage(nodeName) 35 | pvcUsages = append(pvcUsages, nodePvcUsage...) 36 | } 37 | 38 | for _, pvc := range pvcUsages { 39 | if pvc.UsagePercentage >= p.config.Threshold { 40 | // ignore notified pv 41 | if _, ok := p.notifiedPvc[pvc.PVName]; ok { 42 | continue 43 | } 44 | 45 | msg := fmt.Sprintf("Volume Usage for %s (%s) attached to pod %s "+ 46 | "in namespace %s is %.2f%% (higher than %.0f%%)", 47 | pvc.Name, 48 | pvc.PVName, 49 | pvc.PodName, 50 | pvc.Namespace, 51 | pvc.UsagePercentage, 52 | p.config.Threshold, 53 | ) 54 | p.alertManager.Notify(msg) 55 | p.notifiedPvc[pvc.PVName] = true 56 | } 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /pvcmonitor/getUsage.go: -------------------------------------------------------------------------------- 1 | package pvcmonitor 2 | 3 | import ( 4 | "encoding/json" 5 | 6 | "github.com/abahmed/kwatch/util" 7 | "github.com/sirupsen/logrus" 8 | ) 9 | 10 | type SummaryResponse struct { 11 | Pods []*Pod `json:"pods"` 12 | } 13 | 14 | type Pod struct { 15 | PodRef *Ref `json:"podRef"` 16 | Volume []*Volume `json:"volume"` 17 | } 18 | 19 | type Volume struct { 20 | UsedBytes int64 `json:"usedBytes"` 21 | CapacityBytes int64 `json:"capacityBytes"` 22 | Name string `json:"name"` 23 | PvcRef *Ref `json:"pvcRef"` 24 | } 25 | 26 | type Ref struct { 27 | Name string `json:"name"` 28 | Namespace string `json:"namespace"` 29 | } 30 | 31 | // getNodeUsage gets list of pvc usage for specific node 32 | func (p *PvcMonitor) getNodeUsage(nodeName string) ([]*PvcUsage, error) { 33 | result := make([]*PvcUsage, 0) 34 | 35 | summaryResponse, err := util.GetNodeSummary(p.client, nodeName) 36 | if err != nil { 37 | return result, err 38 | 39 | } 40 | 41 | var summaryObj SummaryResponse 42 | err = json.Unmarshal(summaryResponse, &summaryObj) 43 | if err != nil { 44 | return result, err 45 | } 46 | 47 | for _, pod := range summaryObj.Pods { 48 | for _, vol := range pod.Volume { 49 | if vol.PvcRef == nil || len(vol.PvcRef.Name) == 0 { 50 | continue 51 | } 52 | 53 | pvName, err := 54 | util.GetPVNameFromPVC( 55 | p.client, 56 | pod.PodRef.Namespace, 57 | vol.PvcRef.Name) 58 | if err != nil { 59 | logrus.Errorf( 60 | "failed to get pv name for pvc %s: %s", 61 | vol.PvcRef.Name, 62 | err.Error()) 63 | continue 64 | } 65 | 66 | percentage := 67 | (float64(vol.UsedBytes) / float64(vol.CapacityBytes)) * 100.0 68 | 69 | result = append(result, &PvcUsage{ 70 | Name: vol.PvcRef.Name, 71 | PVName: pvName, 72 | Namespace: pod.PodRef.Namespace, 73 | PodName: pod.PodRef.Name, 74 | UsagePercentage: percentage, 75 | }) 76 | } 77 | } 78 | 79 | return result, nil 80 | } 81 | -------------------------------------------------------------------------------- /pvcmonitor/pvc.go: -------------------------------------------------------------------------------- 1 | package pvcmonitor 2 | 3 | import ( 4 | "time" 5 | 6 | "github.com/abahmed/kwatch/alertmanager" 7 | "github.com/abahmed/kwatch/config" 8 | "k8s.io/client-go/kubernetes" 9 | ) 10 | 11 | type PvcMonitor struct { 12 | client kubernetes.Interface 13 | config *config.PvcMonitor 14 | alertManager *alertmanager.AlertManager 15 | notifiedPvc map[string]bool 16 | } 17 | 18 | // NewPvcMonitor returns new instance of pvc monitor 19 | func NewPvcMonitor( 20 | client kubernetes.Interface, 21 | config *config.PvcMonitor, 22 | alertManager *alertmanager.AlertManager) *PvcMonitor { 23 | return &PvcMonitor{ 24 | client: client, 25 | config: config, 26 | alertManager: alertManager, 27 | notifiedPvc: make(map[string]bool), 28 | } 29 | } 30 | 31 | func (p *PvcMonitor) Start() { 32 | if !p.config.Enabled { 33 | return 34 | } 35 | 36 | // check at startup 37 | p.checkUsage() 38 | 39 | ticker := time.NewTicker(time.Duration(p.config.Interval) * time.Minute) 40 | defer ticker.Stop() 41 | 42 | for range ticker.C { 43 | p.checkUsage() 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /storage/memory/memory.go: -------------------------------------------------------------------------------- 1 | package memory 2 | 3 | import ( 4 | "sync" 5 | 6 | storage "github.com/abahmed/kwatch/storage" 7 | ) 8 | 9 | type memory struct { 10 | smap sync.Map 11 | nmap sync.Map 12 | } 13 | 14 | // NewMemory returns new Memory object 15 | func NewMemory() storage.Storage { 16 | return &memory{ 17 | smap: sync.Map{}, 18 | nmap: sync.Map{}, 19 | } 20 | } 21 | 22 | // AddPodContainer attaches container to pod to mark it has an error 23 | func (m *memory) AddPodContainer(namespace, podKey, containerKey string, state *storage.ContainerState) { 24 | key := m.getKey(namespace, podKey) 25 | if v, ok := m.smap.Load(key); ok { 26 | containers := v.(map[string]*storage.ContainerState) 27 | containers[containerKey] = state 28 | 29 | m.smap.Store(key, containers) 30 | return 31 | } 32 | m.smap.Store(key, map[string]*storage.ContainerState{containerKey: state}) 33 | } 34 | 35 | // Delete deletes pod with all its containers 36 | func (m *memory) DelPod(namespace, podKey string) { 37 | key := m.getKey(namespace, podKey) 38 | m.smap.Delete(key) 39 | } 40 | 41 | // DelPodContainer detaches container from pod to mark error is resolved 42 | func (m *memory) DelPodContainer(namespace, podKey, containerKey string) { 43 | key := m.getKey(namespace, podKey) 44 | 45 | v, ok := m.smap.Load(key) 46 | if !ok { 47 | return 48 | } 49 | 50 | containers := v.(map[string]*storage.ContainerState) 51 | delete(containers, containerKey) 52 | 53 | m.smap.Store(key, containers) 54 | } 55 | 56 | // HasPodContainer checks if container is attached to given pod or not 57 | func (m *memory) HasPodContainer(namespace, podKey, containerKey string) bool { 58 | key := m.getKey(namespace, podKey) 59 | 60 | v, ok := m.smap.Load(key) 61 | if !ok { 62 | return false 63 | } 64 | 65 | containers := v.(map[string]*storage.ContainerState) 66 | if _, ok := containers[containerKey]; ok { 67 | return true 68 | } 69 | 70 | return false 71 | } 72 | 73 | func (m *memory) GetPodContainer(namespace, podKey, containerKey string) *storage.ContainerState { 74 | key := m.getKey(namespace, podKey) 75 | 76 | v, ok := m.smap.Load(key) 77 | if !ok { 78 | return nil 79 | } 80 | 81 | containers := v.(map[string]*storage.ContainerState) 82 | if val, ok := containers[containerKey]; ok { 83 | return val 84 | } 85 | 86 | return nil 87 | } 88 | 89 | func (*memory) getKey(namespace, pod string) string { 90 | return namespace + "/" + pod 91 | } 92 | 93 | // AddNode stores node with key 94 | func (m *memory) AddNode(nodeKey string) { 95 | m.nmap.Store(nodeKey, true) 96 | } 97 | 98 | // HasNode checks if node is stored 99 | func (m *memory) HasNode(nodeKey string) bool { 100 | _, ok := m.nmap.Load(nodeKey) 101 | return ok 102 | } 103 | 104 | // AddNode deletes node with key 105 | func (m *memory) DelNode(nodeKey string) { 106 | m.nmap.Delete(nodeKey) 107 | } 108 | -------------------------------------------------------------------------------- /storage/memory/memory_test.go: -------------------------------------------------------------------------------- 1 | package memory 2 | 3 | import ( 4 | "sync" 5 | "testing" 6 | 7 | storage "github.com/abahmed/kwatch/storage" 8 | ) 9 | 10 | func TestMemory(t *testing.T) { 11 | m := NewMemory() 12 | _, ok := m.(storage.Storage) 13 | if !ok { 14 | t.Errorf("expected to return Storage interface") 15 | } 16 | } 17 | 18 | func TestAddPodContainer(t *testing.T) { 19 | mem := &memory{ 20 | smap: sync.Map{}, 21 | } 22 | 23 | mem.AddPodContainer("default", "test", "container1", &storage.ContainerState{}) 24 | mem.AddPodContainer("default", "test", "container2", &storage.ContainerState{}) 25 | 26 | if v, ok := mem.smap.Load(mem.getKey("default", "test")); !ok { 27 | t.Errorf("expected to find value in pod test") 28 | } else { 29 | containers := v.(map[string]*storage.ContainerState) 30 | if _, ok = containers["container1"]; !ok { 31 | t.Errorf("expected to find container container1 in pod test") 32 | } 33 | 34 | if _, ok = containers["container2"]; !ok { 35 | t.Errorf("expected to find container container2 in pod test") 36 | } 37 | } 38 | } 39 | 40 | func TestHasPodContainer(t *testing.T) { 41 | mem := &memory{ 42 | smap: sync.Map{}, 43 | } 44 | 45 | mem.AddPodContainer("default", "test", "test", &storage.ContainerState{}) 46 | mem.AddPodContainer("default", "test", "test2", &storage.ContainerState{}) 47 | 48 | mem.DelPodContainer("default", "test", "test") 49 | mem.DelPodContainer("default", "test3", "test") 50 | 51 | if !mem.HasPodContainer("default", "test", "test2") { 52 | t.Errorf("expected to find container test2 in pod test") 53 | } 54 | 55 | if mem.HasPodContainer("default", "test", "test") { 56 | t.Errorf("expected not to find container test in pod test") 57 | } 58 | 59 | if mem.HasPodContainer("default", "test", "test6") { 60 | t.Errorf("expected not to find container test6 in pod test") 61 | } 62 | 63 | if mem.HasPodContainer("default", "test4", "test") { 64 | t.Errorf("expected to not find container test in pod test4") 65 | } 66 | } 67 | 68 | func TestDelPodContainer(t *testing.T) { 69 | mem := &memory{ 70 | smap: sync.Map{}, 71 | } 72 | 73 | mem.AddPodContainer("default", "test", "test", &storage.ContainerState{}) 74 | mem.AddPodContainer("default", "test", "test2", &storage.ContainerState{}) 75 | 76 | mem.DelPodContainer("default", "test", "test") 77 | mem.DelPodContainer("default", "test3", "test") 78 | 79 | if v, ok := mem.smap.Load(mem.getKey("default", "test")); !ok { 80 | t.Errorf("expected to find value in pod test") 81 | } else { 82 | containers := v.(map[string]*storage.ContainerState) 83 | if _, ok = containers["test"]; ok { 84 | t.Errorf("expected not to find container test in pod test") 85 | } 86 | } 87 | } 88 | 89 | func TestGetPodContainer(t *testing.T) { 90 | mem := &memory{ 91 | smap: sync.Map{}, 92 | } 93 | 94 | mem.AddPodContainer("default", "test", "test1", &storage.ContainerState{}) 95 | mem.AddPodContainer("default", "test", "test2", &storage.ContainerState{}) 96 | 97 | state := mem.GetPodContainer("default", "test", "test1") 98 | if state == nil { 99 | t.Errorf("expected to find value in pod test") 100 | } 101 | 102 | state2 := mem.GetPodContainer("default", "test", "test3") 103 | if state2 != nil { 104 | t.Errorf("expected to be nil as container does not exist") 105 | } 106 | 107 | state3 := mem.GetPodContainer("default", "test3", "test1") 108 | if state3 != nil { 109 | t.Errorf("expected to be nil as pod does not exist") 110 | } 111 | } 112 | 113 | func TestDelPod(t *testing.T) { 114 | mem := &memory{ 115 | smap: sync.Map{}, 116 | } 117 | 118 | mem.AddPodContainer("default", "test", "test1", &storage.ContainerState{}) 119 | mem.AddPodContainer("default", "test", "test2", &storage.ContainerState{}) 120 | 121 | mem.DelPod("default", "test") 122 | mem.DelPod("default", "test3") 123 | 124 | if _, ok := mem.smap.Load(mem.getKey("default", "test")); ok { 125 | t.Errorf("expected not to find pod test") 126 | } 127 | } 128 | 129 | func TestAddNode(t *testing.T) { 130 | mem := &memory{ 131 | nmap: sync.Map{}, 132 | } 133 | 134 | mem.AddNode("default-node-1") 135 | mem.AddNode("default-node-2") 136 | 137 | if _, ok := mem.nmap.Load("default-node-1"); !ok { 138 | t.Errorf("expected to find node default-node-1") 139 | } 140 | } 141 | 142 | func TestHasNode(t *testing.T) { 143 | mem := &memory{ 144 | nmap: sync.Map{}, 145 | } 146 | 147 | mem.AddNode("default-node-1") 148 | mem.AddNode("default-node-2") 149 | 150 | if !mem.HasNode(("default-node-1")) { 151 | t.Errorf("expected to find node default-node-1") 152 | } 153 | 154 | if mem.HasNode("default-node-3") { 155 | t.Errorf("expected not to find node default-node-3") 156 | } 157 | } 158 | 159 | func TestDelNode(t *testing.T) { 160 | mem := &memory{ 161 | smap: sync.Map{}, 162 | } 163 | 164 | mem.AddNode("default-node-1") 165 | mem.AddNode("default-node-2") 166 | 167 | mem.DelNode("default-node-1") 168 | mem.DelNode("default-node-2") 169 | 170 | if _, ok := mem.nmap.Load("default-node-1"); ok { 171 | t.Errorf("expected not to find node default-node-1") 172 | } 173 | } 174 | -------------------------------------------------------------------------------- /storage/storage.go: -------------------------------------------------------------------------------- 1 | package storage 2 | 3 | import "time" 4 | 5 | type ContainerState struct { 6 | RestartCount int32 7 | LastTerminatedOn time.Time 8 | Reason string 9 | Msg string 10 | ExitCode int32 11 | Status string 12 | Reported bool 13 | } 14 | 15 | // Storage interface 16 | type Storage interface { 17 | AddPodContainer(namespace, podKey, containerKey string, state *ContainerState) 18 | DelPodContainer(namespace, podKey, containerKey string) 19 | DelPod(namespace, podKey string) 20 | HasPodContainer(namespace, podKey, containerKey string) bool 21 | GetPodContainer(namespace, podKey, containerKey string) *ContainerState 22 | 23 | AddNode(nodeKey string) 24 | HasNode(nodeKey string) bool 25 | DelNode(nodeKey string) 26 | } 27 | -------------------------------------------------------------------------------- /upgrader/upgrader.go: -------------------------------------------------------------------------------- 1 | package upgrader 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "time" 7 | 8 | "github.com/abahmed/kwatch/alertmanager" 9 | "github.com/abahmed/kwatch/config" 10 | "github.com/abahmed/kwatch/constant" 11 | "github.com/abahmed/kwatch/version" 12 | "github.com/google/go-github/v41/github" 13 | "github.com/sirupsen/logrus" 14 | ) 15 | 16 | type Upgrader struct { 17 | config *config.Upgrader 18 | alertManager *alertmanager.AlertManager 19 | } 20 | 21 | // NewUpgrader returns new instance of upgrader 22 | func NewUpgrader(config *config.Upgrader, 23 | alertManager *alertmanager.AlertManager) *Upgrader { 24 | return &Upgrader{ 25 | config: config, 26 | alertManager: alertManager, 27 | } 28 | } 29 | 30 | // CheckUpdates checks every 24 hours if a newer version of Kwatch is available 31 | func (u *Upgrader) CheckUpdates() { 32 | if u.config.DisableUpdateCheck || 33 | version.Short() == "dev" { 34 | return 35 | } 36 | 37 | // check at startup 38 | u.checkRelease() 39 | 40 | ticker := time.NewTicker(24 * time.Hour) 41 | defer ticker.Stop() 42 | 43 | for range ticker.C { 44 | u.checkRelease() 45 | } 46 | } 47 | 48 | func (u *Upgrader) checkRelease() { 49 | client := github.NewClient(nil) 50 | 51 | r, _, err := client.Repositories.GetLatestRelease( 52 | context.TODO(), 53 | "abahmed", 54 | "kwatch") 55 | if err != nil { 56 | logrus.Warnf("failed to get latest release: %s", err.Error()) 57 | return 58 | } 59 | 60 | if r.TagName == nil { 61 | logrus.Warnf("failed to get release tag: %+v", r) 62 | return 63 | } 64 | 65 | if version.Short() == *r.TagName { 66 | return 67 | } 68 | 69 | u.alertManager.Notify(fmt.Sprintf(constant.KwatchUpdateMsg, *r.TagName)) 70 | } 71 | -------------------------------------------------------------------------------- /util/util.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "context" 5 | "encoding/json" 6 | "fmt" 7 | "math/rand" 8 | "strings" 9 | "time" 10 | 11 | "github.com/sirupsen/logrus" 12 | v1 "k8s.io/api/core/v1" 13 | 14 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 15 | 16 | "k8s.io/client-go/kubernetes" 17 | ) 18 | 19 | // GetPodEventsStr returns formatted events as a string for specified pod 20 | func GetPodEventsStr(events *[]v1.Event) string { 21 | if events == nil { 22 | return "" 23 | } 24 | 25 | eventsString := "" 26 | 27 | for _, ev := range *events { 28 | eventsString += 29 | fmt.Sprintf( 30 | "[%s] %s %s\n", 31 | ev.LastTimestamp.String(), 32 | ev.Reason, 33 | ev.Message) 34 | } 35 | 36 | return strings.TrimSpace(eventsString) 37 | } 38 | 39 | // ContainsKillingStoppingContainerEvents checks if the events contain an event 40 | // with "Killing Stopping container" which indicates that a container could not 41 | // be gracefully shutdown 42 | func ContainsKillingStoppingContainerEvents( 43 | c kubernetes.Interface, 44 | name, 45 | namespace string) bool { 46 | events, err := GetPodEvents(c, name, namespace) 47 | if err != nil { 48 | return false 49 | } 50 | 51 | for _, ev := range events.Items { 52 | if strings.ToLower(ev.Reason) == "killing" && 53 | strings.Contains( 54 | strings.ToLower(ev.Message), 55 | "stopping container") { 56 | return true 57 | } 58 | } 59 | 60 | return false 61 | } 62 | 63 | // GetPodContainerLogs returns logs for specified container in pod 64 | func GetPodContainerLogs( 65 | c kubernetes.Interface, name, container, namespace string, 66 | previous bool, 67 | maxRecentLogLines int64) string { 68 | options := v1.PodLogOptions{ 69 | Container: container, 70 | Previous: previous, 71 | } 72 | 73 | // get max recent log lines 74 | if maxRecentLogLines != 0 { 75 | options.TailLines = &maxRecentLogLines 76 | } 77 | 78 | // get logs 79 | logs, err := getContainerLogs(c, name, namespace, &options) 80 | if err != nil { 81 | logrus.Warnf( 82 | "failed to get logs for container %s in pod %s@%s: %s", 83 | name, 84 | container, 85 | namespace, 86 | err.Error()) 87 | 88 | // try to decode response 89 | var status metav1.Status 90 | parseErr := json.Unmarshal(logs, &status) 91 | if parseErr == nil { 92 | return status.Message 93 | } 94 | 95 | logrus.Warnf( 96 | "failed to parse logs for container %s in pod %s@%s: %s", 97 | name, 98 | container, 99 | namespace, 100 | parseErr.Error()) 101 | } 102 | 103 | return string(logs) 104 | } 105 | 106 | func getContainerLogs( 107 | c kubernetes.Interface, 108 | name string, 109 | namespace string, 110 | options *v1.PodLogOptions) ([]byte, error) { 111 | return c.CoreV1(). 112 | Pods(namespace). 113 | GetLogs(name, options). 114 | DoRaw(context.TODO()) 115 | } 116 | 117 | func GetPodEvents( 118 | c kubernetes.Interface, 119 | name, 120 | namespace string) (*v1.EventList, error) { 121 | return c.CoreV1(). 122 | Events(namespace). 123 | List(context.TODO(), metav1.ListOptions{ 124 | FieldSelector: "involvedObject.name=" + name, 125 | }) 126 | } 127 | 128 | // GetNodes gets a list of nodes 129 | func GetNodes(c kubernetes.Interface) (*v1.NodeList, error) { 130 | return c.CoreV1(). 131 | Nodes(). 132 | List(context.TODO(), metav1.ListOptions{}) 133 | } 134 | 135 | // // GetNodeSummary gets a list of nodes 136 | func GetNodeSummary(c kubernetes.Interface, name string) ([]byte, error) { 137 | return c.CoreV1(). 138 | RESTClient(). 139 | Get(). 140 | Resource("nodes"). 141 | Name(name). 142 | SubResource("proxy"). 143 | Suffix("stats/summary"). 144 | DoRaw(context.TODO()) 145 | } 146 | 147 | // GetPVNameFromPVC returns the name of persistent volume given a namespace and 148 | // persistent volume claim name 149 | func GetPVNameFromPVC( 150 | c kubernetes.Interface, 151 | namespace, pvcName string) (string, error) { 152 | pvc, err := 153 | c.CoreV1(). 154 | PersistentVolumeClaims(namespace). 155 | Get(context.TODO(), pvcName, metav1.GetOptions{}) 156 | if err != nil { 157 | return "", err 158 | } 159 | 160 | return pvc.Spec.VolumeName, nil 161 | } 162 | 163 | // JsonEscape escapes the json special characters in a string 164 | func JsonEscape(i string) string { 165 | jm, _ := json.Marshal(i) 166 | 167 | s := string(jm) 168 | return s[1 : len(s)-1] 169 | } 170 | 171 | // RandomString generates random string with provided n size 172 | func RandomString(n int) string { 173 | const availableCharacterBytes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLM" + 174 | "NOPQRSTUVWXYZ0123456789" 175 | 176 | b := make([]byte, n) 177 | r := rand.New(rand.NewSource(time.Now().UnixNano())) 178 | for i := range b { 179 | b[i] = availableCharacterBytes[r.Intn(len(availableCharacterBytes))] 180 | } 181 | 182 | return string(b) 183 | } 184 | -------------------------------------------------------------------------------- /util/util_test.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "errors" 5 | "math/rand" 6 | "testing" 7 | 8 | "github.com/stretchr/testify/assert" 9 | v1 "k8s.io/api/core/v1" 10 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 11 | "k8s.io/apimachinery/pkg/runtime" 12 | 13 | "k8s.io/client-go/kubernetes/fake" 14 | 15 | k8stesting "k8s.io/client-go/testing" 16 | ) 17 | 18 | func TestGetPodContainerLogs(t *testing.T) { 19 | assert := assert.New(t) 20 | 21 | client := fake.NewSimpleClientset() 22 | logs := GetPodContainerLogs( 23 | client, 24 | "test", 25 | "test", 26 | "default", 27 | false, 28 | 20) 29 | 30 | assert.Equal(logs, "fake logs") 31 | } 32 | 33 | func TestJsonEscape(t *testing.T) { 34 | assert := assert.New(t) 35 | 36 | testCases := []struct { 37 | Input string 38 | Output string 39 | }{ 40 | { 41 | Input: "test", 42 | Output: "test", 43 | }, 44 | { 45 | Input: "te\bst", 46 | Output: "te\\bst", 47 | }, 48 | { 49 | Input: "\b", 50 | Output: "\\b", 51 | }, 52 | { 53 | Input: "\"", 54 | Output: "\\\"", 55 | }, 56 | } 57 | 58 | for _, tc := range testCases { 59 | assert.Equal(JsonEscape(tc.Input), tc.Output) 60 | } 61 | } 62 | 63 | func TestGetPodEventsStr(t *testing.T) { 64 | assert := assert.New(t) 65 | 66 | event := v1.Event{ 67 | Reason: "test reason", 68 | Message: "test message", 69 | LastTimestamp: metav1.Now(), 70 | } 71 | 72 | result := GetPodEventsStr(&[]v1.Event{event}) 73 | expectedOutput := 74 | "[" + event.LastTimestamp.String() + "] " + event.Reason + " " + 75 | event.Message 76 | assert.Equal(result, expectedOutput) 77 | } 78 | 79 | func TestGetPodEventsStrNil(t *testing.T) { 80 | assert := assert.New(t) 81 | 82 | result := GetPodEventsStr(nil) 83 | expectedOutput := "" 84 | assert.Equal(result, expectedOutput) 85 | } 86 | 87 | func TestContainsKillingStoppingContainerEvents(t *testing.T) { 88 | assert := assert.New(t) 89 | 90 | cli := fake.NewSimpleClientset() 91 | cli.PrependReactor( 92 | "list", 93 | "events", 94 | func(action k8stesting.Action) (bool, runtime.Object, error) { 95 | return true, &v1.EventList{ 96 | Items: []v1.Event{{ 97 | Reason: "killing", 98 | Message: "test stopping container", 99 | LastTimestamp: metav1.Now(), 100 | }}, 101 | }, nil 102 | }) 103 | 104 | result := 105 | ContainsKillingStoppingContainerEvents( 106 | cli, 107 | "dummy-app-579f7cd745-t6fdg", 108 | "test") 109 | 110 | assert.True(result) 111 | } 112 | 113 | func TestContainsKillingStoppingContainerEventsError(t *testing.T) { 114 | assert := assert.New(t) 115 | 116 | cli := fake.NewSimpleClientset() 117 | cli.PrependReactor( 118 | "list", 119 | "events", 120 | func(action k8stesting.Action) (bool, runtime.Object, error) { 121 | return true, nil, errors.New("ssss") 122 | }) 123 | 124 | result := 125 | ContainsKillingStoppingContainerEvents( 126 | cli, 127 | "dummy-app-579f7cd745-t6fdg", 128 | "test") 129 | 130 | assert.False(result) 131 | } 132 | 133 | func TestContainsKillingStoppingContainerEmpty(t *testing.T) { 134 | assert := assert.New(t) 135 | 136 | cli := fake.NewSimpleClientset() 137 | cli.PrependReactor( 138 | "list", 139 | "events", 140 | func(action k8stesting.Action) (bool, runtime.Object, error) { 141 | return true, &v1.EventList{ 142 | Items: []v1.Event{}, 143 | }, nil 144 | }) 145 | 146 | result := 147 | ContainsKillingStoppingContainerEvents( 148 | cli, 149 | "dummy-app-579f7cd745-t6fdg", 150 | "test") 151 | 152 | assert.False(result) 153 | } 154 | 155 | func TestRandomString(t *testing.T) { 156 | assert := assert.New(t) 157 | 158 | randLen := rand.Intn(300) 159 | result := RandomString(randLen) 160 | 161 | assert.Len(result, randLen) 162 | } 163 | 164 | func TestGetNodes(t *testing.T) { 165 | assert := assert.New(t) 166 | 167 | cli := fake.NewSimpleClientset() 168 | node := v1.Node{} 169 | cli.PrependReactor( 170 | "list", 171 | "nodes", 172 | func(action k8stesting.Action) (bool, runtime.Object, error) { 173 | return true, &v1.NodeList{ 174 | Items: []v1.Node{node}, 175 | }, nil 176 | }) 177 | 178 | result, err := GetNodes(cli) 179 | assert.NoError(err) 180 | assert.NotNil(result) 181 | assert.Equal(len(result.Items), 1) 182 | } 183 | 184 | func TestGetPVNameFromPVC(t *testing.T) { 185 | assert := assert.New(t) 186 | 187 | cli := fake.NewSimpleClientset() 188 | cli.PrependReactor( 189 | "get", 190 | "persistentvolumeclaims", 191 | func(action k8stesting.Action) (bool, runtime.Object, error) { 192 | return true, &v1.PersistentVolumeClaim{ 193 | Spec: v1.PersistentVolumeClaimSpec{ 194 | VolumeName: "test", 195 | }, 196 | }, nil 197 | }) 198 | 199 | result, err := GetPVNameFromPVC(cli, "test", "test") 200 | assert.NoError(err) 201 | assert.Equal(result, "test") 202 | } 203 | 204 | func TestGetPVNameFromPVCError(t *testing.T) { 205 | assert := assert.New(t) 206 | 207 | cli := fake.NewSimpleClientset() 208 | cli.PrependReactor( 209 | "get", 210 | "persistentvolumeclaims", 211 | func(action k8stesting.Action) (bool, runtime.Object, error) { 212 | return true, nil, errors.New("failed") 213 | }) 214 | 215 | result, err := GetPVNameFromPVC(cli, "test", "test") 216 | assert.Error(err, "failed") 217 | assert.Equal(result, "") 218 | } 219 | -------------------------------------------------------------------------------- /version/version.go: -------------------------------------------------------------------------------- 1 | package version 2 | 3 | import ( 4 | "encoding/json" 5 | ) 6 | 7 | // Version is the current versions of kwatch 8 | const version = "dev" 9 | 10 | // GitCommitID git commit id of the release 11 | const gitCommitID = "none" 12 | 13 | // BuildDate date for the release 14 | const buildDate = "unknown" 15 | 16 | type Info struct { 17 | Version string 18 | GitCommit string 19 | BuildDate string 20 | } 21 | 22 | func Short() string { 23 | return version 24 | } 25 | 26 | func Version() string { 27 | ver, _ := json.Marshal(Info{ 28 | Version: version, 29 | GitCommit: gitCommitID, 30 | BuildDate: buildDate, 31 | }) 32 | 33 | return string(ver) 34 | } 35 | -------------------------------------------------------------------------------- /watcher/start.go: -------------------------------------------------------------------------------- 1 | package watcher 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/abahmed/kwatch/config" 7 | "github.com/abahmed/kwatch/handler" 8 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 9 | "k8s.io/apimachinery/pkg/runtime" 10 | 11 | "k8s.io/apimachinery/pkg/watch" 12 | "k8s.io/client-go/kubernetes" 13 | "k8s.io/client-go/tools/cache" 14 | toolsWatch "k8s.io/client-go/tools/watch" 15 | "k8s.io/client-go/util/workqueue" 16 | ) 17 | 18 | // Start creates an instance of watcher after initialization and runs it 19 | func Start( 20 | client kubernetes.Interface, 21 | config *config.Config, 22 | handler handler.Handler) { 23 | 24 | watchers := []*Watcher{ 25 | newPodWatcher(client, config, handler.ProcessPod), 26 | } 27 | 28 | if config.NodeMonitor.Enabled { 29 | watchers = append(watchers, newNodeWatcher(client, handler.ProcessNode)) 30 | } 31 | 32 | stopCh := make(chan struct{}) 33 | defer close(stopCh) 34 | 35 | for idx := range watchers { 36 | go watchers[idx].run(stopCh) 37 | } 38 | 39 | <-stopCh 40 | } 41 | 42 | // newNodeWatcher creates watcher for nodes 43 | func newNodeWatcher( 44 | client kubernetes.Interface, 45 | handler func(evType string, obj runtime.Object), 46 | ) *Watcher { 47 | watchFunc := 48 | func(options metav1.ListOptions) (watch.Interface, error) { 49 | return client.CoreV1().Nodes().Watch( 50 | context.Background(), 51 | metav1.ListOptions{}, 52 | ) 53 | } 54 | 55 | return newWatcher( 56 | "node", 57 | watchFunc, 58 | handler, 59 | ) 60 | } 61 | 62 | // newPodWatcher creates watcher for pods 63 | func newPodWatcher( 64 | client kubernetes.Interface, 65 | config *config.Config, 66 | handler func(evType string, obj runtime.Object), 67 | ) *Watcher { 68 | namespace := metav1.NamespaceAll 69 | if len(config.AllowedNamespaces) == 1 { 70 | namespace = config.AllowedNamespaces[0] 71 | } 72 | 73 | watchFunc := 74 | func(options metav1.ListOptions) (watch.Interface, error) { 75 | return client.CoreV1().Pods(namespace).Watch( 76 | context.Background(), 77 | metav1.ListOptions{}, 78 | ) 79 | } 80 | 81 | return newWatcher( 82 | "pod", 83 | watchFunc, 84 | handler, 85 | ) 86 | } 87 | 88 | // newWatcher creates watcher with provided name, watch, and handle functions 89 | func newWatcher( 90 | name string, 91 | watchFunc func(options metav1.ListOptions) (watch.Interface, error), 92 | handleFunc func(string, runtime.Object), 93 | ) *Watcher { 94 | watcher, _ := 95 | toolsWatch.NewRetryWatcher( 96 | "1", 97 | &cache.ListWatch{WatchFunc: watchFunc}, 98 | ) 99 | 100 | return &Watcher{ 101 | name: name, 102 | watcher: watcher, 103 | queue: workqueue.New(), 104 | handlerFunc: handleFunc, 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /watcher/watcher.go: -------------------------------------------------------------------------------- 1 | package watcher 2 | 3 | import ( 4 | "time" 5 | 6 | "github.com/sirupsen/logrus" 7 | "k8s.io/apimachinery/pkg/runtime" 8 | utilruntime "k8s.io/apimachinery/pkg/util/runtime" 9 | "k8s.io/apimachinery/pkg/util/wait" 10 | toolsWatch "k8s.io/client-go/tools/watch" 11 | "k8s.io/client-go/util/workqueue" 12 | ) 13 | 14 | type watcherEvent struct { 15 | eventType string 16 | obj runtime.Object 17 | } 18 | 19 | type Watcher struct { 20 | name string 21 | watcher *toolsWatch.RetryWatcher 22 | queue *workqueue.Type 23 | handlerFunc func(string, runtime.Object) 24 | } 25 | 26 | // run starts the watcher 27 | func (w *Watcher) run(stopCh chan struct{}) { 28 | defer utilruntime.HandleCrash() 29 | defer w.queue.ShutDown() 30 | 31 | logrus.Infof("starting %s watcher", w.name) 32 | 33 | go wait.Until(w.processEvents, time.Second, stopCh) 34 | go wait.Until(w.runWorker, time.Second, stopCh) 35 | 36 | <-stopCh 37 | } 38 | 39 | func (w *Watcher) processEvents() { 40 | if w.watcher == nil { 41 | return 42 | } 43 | 44 | for event := range w.watcher.ResultChan() { 45 | w.queue.Add(watcherEvent{ 46 | eventType: string(event.Type), 47 | obj: event.Object.DeepCopyObject(), 48 | }) 49 | } 50 | } 51 | 52 | func (w *Watcher) runWorker() { 53 | for w.processNextItem() { 54 | // continue looping 55 | } 56 | } 57 | 58 | func (w *Watcher) processNextItem() bool { 59 | newEvent, quit := w.queue.Get() 60 | if quit { 61 | return false 62 | } 63 | 64 | defer w.queue.Done(newEvent) 65 | 66 | ev, ok := newEvent.(watcherEvent) 67 | if !ok { 68 | logrus.Errorf("failed to cast watcher event: %v", ev) 69 | return true 70 | } 71 | 72 | w.handlerFunc(ev.eventType, ev.obj) 73 | 74 | return true 75 | } 76 | --------------------------------------------------------------------------------