├── .circleci └── config.yml ├── .codacy.yml ├── .gitignore ├── .pre-commit-config.yaml ├── Dockerfile ├── LICENSE ├── README.md ├── cachet_url_monitor ├── __init__.py ├── client.py ├── configuration.py ├── exceptions.py ├── expectation.py ├── latency_unit.py ├── plugins │ ├── __init__.py │ └── token_provider.py ├── scheduler.py ├── status.py └── webhook.py ├── config.yml ├── dev_requirements.txt ├── requirements.txt ├── setup.cfg ├── setup.py ├── tests ├── configs │ ├── config.yml │ ├── config_default_latency_unit.yml │ ├── config_header.yml │ ├── config_insecure.yml │ ├── config_invalid_type.yml │ ├── config_metric.yml │ ├── config_missing_name.yml │ ├── config_multiple_urls.yml │ └── config_webhooks.yml ├── plugins │ └── test_token_provider.py ├── test_client.py ├── test_configuration.py ├── test_expectation.py ├── test_latency_unit.py └── test_scheduler.py └── tox.ini /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | # Python CircleCI 2.0 configuration file 2 | # 3 | # Check https://circleci.com/docs/2.0/language-python/ for more details 4 | # 5 | version: 2 6 | jobs: 7 | build: 8 | docker: 9 | # specify the version you desire here 10 | # use `-browsers` prefix for selenium tests, e.g. `3.6.1-browsers` 11 | - image: circleci/python:3.7.2 12 | 13 | working_directory: ~/repo 14 | 15 | steps: 16 | - checkout 17 | 18 | # Download and cache dependencies 19 | - restore_cache: 20 | keys: 21 | - v1-dependencies-{{ checksum "dev_requirements.txt" }} 22 | # fallback to using the latest cache if no exact match is found 23 | - v1-dependencies- 24 | 25 | - run: 26 | name: install dependencies 27 | command: | 28 | sudo pip3 install virtualenv 29 | virtualenv venv 30 | source venv/bin/activate 31 | pip3 install -r dev_requirements.txt 32 | pip3 install -r requirements.txt 33 | pip3 install coveralls 34 | pip3 install tox 35 | 36 | - save_cache: 37 | paths: 38 | - ./venv 39 | key: v1-dependencies-{{ checksum "dev_requirements.txt" }} 40 | 41 | - run: 42 | name: run tests 43 | command: | 44 | source venv/bin/activate 45 | tox -e circleci 46 | 47 | - store_test_results: 48 | path: test-reports 49 | 50 | - store_artifacts: 51 | path: test-reports 52 | destination: test-reports 53 | -------------------------------------------------------------------------------- /.codacy.yml: -------------------------------------------------------------------------------- 1 | --- 2 | engines: 3 | pylint: 4 | enabled: true 5 | python_version: 3 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | bin/ 2 | include/ 3 | lib/ 4 | share/ 5 | .Python 6 | *.swp 7 | *.pyc 8 | .cache 9 | .coverage 10 | *.egg-info 11 | MANIFEST 12 | dist/ 13 | .idea 14 | .pytest_cache/ 15 | pip-selfcheck.json 16 | .eggs 17 | test-reports/ 18 | .tox/ 19 | venv 20 | coverage.xml 21 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v2.4.0 4 | hooks: 5 | - id: check-yaml 6 | - id: end-of-file-fixer 7 | - id: trailing-whitespace 8 | - id: mixed-line-ending 9 | - repo: https://gitlab.com/pycqa/flake8 10 | rev: 3.7.4 11 | hooks: 12 | - id: flake8 13 | - repo: https://github.com/psf/black 14 | rev: 19.3b0 15 | hooks: 16 | - id: black 17 | args: 18 | - "-l 120" 19 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.7.2-alpine 2 | MAINTAINER Mitsuo Takaki 3 | 4 | WORKDIR /usr/src/app 5 | 6 | RUN python3.7 -m pip install --upgrade pip 7 | COPY requirements.txt ./ 8 | RUN pip3 install --no-cache-dir -r requirements.txt 9 | 10 | COPY cachet_url_monitor /usr/src/app/cachet_url_monitor 11 | COPY setup.py /usr/src/app/ 12 | RUN python3.7 setup.py install 13 | 14 | COPY config.yml /usr/src/app/config/ 15 | VOLUME /usr/src/app/config/ 16 | 17 | CMD ["python3.7", "./cachet_url_monitor/scheduler.py", "config/config.yml"] 18 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Status 2 | [![CircleCI](https://circleci.com/gh/mtakaki/cachet-url-monitor/tree/master.svg?style=svg)](https://circleci.com/gh/mtakaki/cachet-url-monitor/tree/master) 3 | [![Coverage Status](https://coveralls.io/repos/github/mtakaki/cachet-url-monitor/badge.svg?branch=master)](https://coveralls.io/github/mtakaki/cachet-url-monitor?branch=master) 4 | [![Codacy Badge](https://api.codacy.com/project/badge/Grade/7ef4123130ef4140b8ea7b94d460ba64)](https://www.codacy.com/app/mitsuotakaki/cachet-url-monitor?utm_source=github.com&utm_medium=referral&utm_content=mtakaki/cachet-url-monitor&utm_campaign=Badge_Grade) 5 | ![Docker Pulls](https://img.shields.io/docker/pulls/mtakaki/cachet-url-monitor) 6 | [![Docker stars](https://img.shields.io/docker/stars/mtakaki/cachet-url-monitor.svg)](https://hub.docker.com/r/mtakaki/cachet-url-monitor/) 7 | ![License](https://img.shields.io/github/license/mtakaki/cachet-url-monitor.svg) 8 | [![Latest release](https://img.shields.io/pypi/v/cachet-url-monitor.svg)](https://pypi.python.org/pypi/cachet-url-monitor) 9 | [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://github.com/pre-commit/pre-commit) 10 | 11 | cachet-url-monitor 12 | ======================== 13 | Python plugin for [cachet](cachethq.io) that monitors an URL, verifying it's response status and latency. The 14 | frequency the URL is tested is configurable, along with the assertion applied to the request response. 15 | 16 | This project is available at PyPI: [https://pypi.python.org/pypi/cachet-url-monitor](https://pypi.python.org/pypi/cachet-url-monitor) 17 | 18 | ## Configuration 19 | 20 | ```yaml 21 | endpoints: 22 | - name: Google 23 | url: http://www.google.com 24 | method: GET 25 | header: 26 | SOME-HEADER: SOME-VALUE 27 | timeout: 1 # seconds 28 | expectation: 29 | - type: HTTP_STATUS 30 | status_range: 200-205 31 | - type: LATENCY 32 | threshold: 1 33 | - type: REGEX 34 | regex: ".*.*" 35 | allowed_fails: 0 36 | component_id: 1 37 | metric_id: 1 38 | action: 39 | - UPDATE_STATUS 40 | public_incidents: true 41 | latency_unit: ms 42 | frequency: 5 43 | - name: Amazon 44 | url: http://www.amazon.com 45 | method: GET 46 | header: 47 | SOME-HEADER: SOME-VALUE 48 | timeout: 1 # seconds 49 | expectation: 50 | - type: HTTP_STATUS 51 | status_range: 200-205 52 | incident: MAJOR 53 | - type: LATENCY 54 | threshold: 1 55 | - type: REGEX 56 | regex: ".*.*" 57 | threshold: 10 58 | allowed_fails: 0 59 | component_id: 2 60 | action: 61 | - CREATE_INCIDENT 62 | public_incidents: true 63 | latency_unit: ms 64 | frequency: 5 65 | - name: Insecure-site 66 | url: https://www.Insecure-site-internal.com 67 | method: GET 68 | header: 69 | SOME-HEADER: SOME-VALUE 70 | insecure: true 71 | timeout: 1 # seconds 72 | expectation: 73 | - type: HTTP_STATUS 74 | status_range: 200-205 75 | allowed_fails: 0 76 | component_id: 2 77 | action: 78 | - CREATE_INCIDENT 79 | public_incidents: true 80 | frequency: 5 81 | cachet: 82 | api_url: http://status.cachethq.io/api/v1 83 | token: 84 | - type: ENVIRONMENT_VARIABLE 85 | value: CACHET_TOKEN 86 | - type: AWS_SECRETS_MANAGER 87 | secret_name: cachethq 88 | secret_key: token 89 | region: us-west-2 90 | - type: TOKEN 91 | value: my_token 92 | webhooks: 93 | - url: "https://push.example.com/message?token=" 94 | params: 95 | title: "{title}" 96 | message: "{message}" 97 | priority: 5 98 | messages: 99 | incident_outage: "{name} is unavailable" 100 | incident_operational: "{name} is operational" 101 | incident_performance: "{name} has degraded performance" 102 | ``` 103 | 104 | - **endpoints**, the configuration about the URL/Urls that will be monitored. 105 | - **name**, The name of the component. This is now mandatory (since 0.6.0) so we can distinguish the logs for 106 | each URL being monitored. 107 | - **url**, the URL that is going to be monitored. *mandatory* 108 | - **method**, the HTTP method that will be used by the monitor. *mandatory* 109 | - **header**, client header passed to the request. Remove if you do not want to pass a header. 110 | - **insecure**, for URLs which have self-singed/invalid SSL certs OR you wish to disable SSL check, use this key. Default is false, so by default we validate SSL certs. 111 | - **timeout**, how long we'll wait to consider the request failed. The unit of it is seconds. *mandatory* 112 | - **expectation**, the list of expectations set for the URL. *mandatory* 113 | - **HTTP_STATUS**, we will verify if the response status code falls into the expected range. Please keep in 114 | mind the range is inclusive on the first number and exclusive on the second number. If just one value is 115 | specified, it will default to only the given value, for example `200` will be converted to `200-201`. 116 | - **LATENCY**, we measure how long the request took to get a response and fail if it's above the threshold 117 | . The unit is in seconds. 118 | - **REGEX**, we verify if the response body matches the given regex. 119 | - **allowed_fails**, create incident/update component status only after specified amount of failed connection trials. 120 | - **component_id**, the id of the component we're monitoring. This will be used to update the status of the 121 | component. *mandatory* 122 | - **metric_id**, this will be used to store the latency of the API. If this is not set, it will be ignored. 123 | - **action**, the action to be done when one of the expectations fails. This is optional and if left blank 124 | , nothing will be done to the component. 125 | - **CREATE_INCIDENT**, we will create an incident when the expectation fails. 126 | - **UPDATE_STATUS**, updates the component status. 127 | - **PUSH_METRICS**, uploads response latency metrics. 128 | - **public_incidents**, boolean to decide if created incidents should be visible to everyone or only to logged in 129 | users. Important only if `CREATE_INCIDENT` or `UPDATE_STATUS` are set. 130 | - **latency_unit**, the latency unit used when reporting the metrics. It will automatically convert to the 131 | specified unit. It's not mandatory and it will default to **seconds**. Available units: `ms`, `s`, `m`, `h`. 132 | - **frequency**, how often we'll send a request to the given URL. The unit is in seconds. 133 | - **cachet**, this is the settings for our cachet server. 134 | - **api_url**, the cachet API endpoint. *mandatory* 135 | - **token**, the API token. It can either be a string (backwards compatible with old configuration) or a list of 136 | token providers. It will read in the specified order and fallback to the next option if no token could be found 137 | . (since 0.6.10) *mandatory* 138 | - **ENVIRONMENT_VARIABLE**, it will read the token from the specified environment variable. 139 | - **TOKEN**, it's a string and it will be read directly from the configuration. 140 | - **AWS_SECRETS_MANAGER**, it will attempt reading the token from 141 | [AWS Secrets Manager](https://aws.amazon.com/secrets-manager/). It requires setting up the AWS credentials 142 | into the docker container. More instructions below. It takes these parameters: 143 | - **secret_name**, the name of the secret. 144 | - **secret_key**, the key under which the token is stored. 145 | - **region**, the AWS region. 146 | - **webhooks**, generic webhooks to be notified about incident updates 147 | - **url**, webhook URL, will be interpolated 148 | - **params**, POST parameters, will be interpolated 149 | - **messages**, customize text for generated events, use any of **endpoint** parameter in interpolation 150 | - **incident_outage**, title of incident in case of outage 151 | - **incident_performace**, title of incident in case of performance issues 152 | - **incident_operational**, title of incident in case service is operational 153 | 154 | Each `expectation` has their own default incident status. It can be overridden by setting the `incident` property to 155 | any of the following values: 156 | - `PARTIAL` 157 | - `MAJOR` 158 | - `PERFORMANCE` 159 | 160 | By choosing any of the aforementioned statuses, it will let you control the kind of incident it should be considered 161 | . These are the default incident status for each `expectation` type: 162 | 163 | | Expectation | Incident status | 164 | | ----------- | --------------- | 165 | | HTTP_STATUS | PARTIAL | 166 | | LATENCY | PERFORMANCE | 167 | | REGEX | PARTIAL | 168 | 169 | Following parameters are available in webhook interpolation 170 | 171 | | Parameter | Description | 172 | | --------- | ----------- | 173 | | `{title}` | Event title, includes endpoint name and short status | 174 | | `{message}` | Event message, same as sent to Cachet | 175 | 176 | ### AWS Secrets Manager 177 | This tools can integrate with AWS Secrets Manager, where the token is fetched directly from the service. In order to 178 | get this functionality working, you will need to setup the AWS credentials into the container. The easiest way would 179 | be setting the environment variables: 180 | ```bash 181 | $ docker run --rm -it -e AWS_ACCESS_KEY_ID=xyz -e AWS_SECRET_ACCESS_KEY=aaa -v "$PWD"/my_config.yml:/usr/src/app/config/config.yml:ro mtakaki/cachet-url-monitor 182 | ``` 183 | 184 | ## Setting up 185 | 186 | The application should be installed using **virtualenv**, through the following command: 187 | 188 | ```bash 189 | $ git clone https://github.com/mtakaki/cachet-url-monitor.git 190 | $ cd cachet-url-monitor 191 | $ virtualenv venv 192 | $ source venv/bin/activate 193 | $ pip install -r requirements.txt 194 | $ python3 setup.py install 195 | ``` 196 | 197 | To start the agent: 198 | 199 | ```bash 200 | $ python3 cachet_url_monitor/scheduler.py config.yml 201 | ``` 202 | 203 | ## Docker 204 | 205 | You can run the agent in docker, so you won't need to worry about installing python, virtualenv, or any other 206 | dependency into your OS. The `Dockerfile` is already checked in and it's ready to be used. 207 | 208 | You have two choices, checking this repo out and building the docker image or it can be pulled directly from 209 | [dockerhub](https://hub.docker.com/r/mtakaki/cachet-url-monitor/). You will need to create your own custom `config 210 | .yml` file and run (it will pull latest): 211 | 212 | ```bash 213 | $ docker pull mtakaki/cachet-url-monitor 214 | $ docker run --rm -it -v "$PWD":/usr/src/app/config/ mtakaki/cachet-url-monitor 215 | ``` 216 | 217 | If you're going to use a file with a name other than `config.yml`, you will need to map the local file, like this: 218 | 219 | ```bash 220 | $ docker run --rm -it -v "$PWD"/my_config.yml:/usr/src/app/config/config.yml:ro mtakaki/cachet-url-monitor 221 | ``` 222 | 223 | ### Docker compose 224 | 225 | Docker compose has been removed from this repo as it had a dependency on PostgreSQL and it slightly complicated how it works. This has been kindly handled on: https://github.com/boonisz/cachet-url-monitor-dc It facilitates spawning CachetHQ with its dependencies and cachet-url-monitor alongside to it. 226 | 227 | ## Generating configuration from existing CachetHQ instance (since 0.6.2) 228 | 229 | In order to expedite the creation of your configuration file, you can use the client to automatically scrape the 230 | CachetHQ instance and spit out a YAML file. It can be used like this: 231 | ```bash 232 | $ python cachet_url_monitor/client.py http://localhost/api/v1 my-token test.yml 233 | ``` 234 | Or from docker (you will end up with a `test.yml` in your `$PWD/tmp` folder): 235 | ```bash 236 | $ docker run --rm -it -v $PWD/tmp:/home/tmp/ mtakaki/cachet-url-monitor python3.7 ./cachet_url_monitor/client.py http://localhost/api/v1 my-token /home/tmp/test.yml 237 | ``` 238 | The arguments are: 239 | - **URL**, the CachetHQ API URL, so that means appending `/api/v1` to your hostname. 240 | - **token**, the token that has access to your CachetHQ instance. 241 | - **filename**, the file where it should write the configuration. 242 | 243 | ### Caveats 244 | Because we can't predict what expectations will be needed, it will default to these behavior: 245 | - Verify a [200-300[ HTTP status range. 246 | - If status fail, make the incident major and public. 247 | - Frequency of 30 seconds. 248 | - `GET` request. 249 | - Timeout of 1s. 250 | - We'll read the `link` field from the components and use it as the URL. 251 | 252 | ## Troubleshooting 253 | 254 | ### SSLERROR 255 | If it's throwing the following exception: 256 | ```python 257 | raise SSLError(e, request=request) 258 | requests.exceptions.SSLError: HTTPSConnectionPool(host='redacted', port=443): Max retries exceeded with url: /api/v1/components/19 (Caused by SSLError(SSLError(1, u'[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed (_ssl.c:579)'),)) 259 | ``` 260 | 261 | It can be resolved by setting the CA bundle environment variable `REQUESTS_CA_BUNDLE` pointing at your certificate 262 | file. It can either be set in your python environment, before running this tool, or in your docker container. 263 | 264 | # Development 265 | If you want to contribute to this project, feel free to fork this repo and post PRs with any improvements or bug 266 | fixes. This is highly appreciated, as it's been hard to deal with numerous requests coming my end. 267 | 268 | This repo is setup with [pre-commit hooks](https://pre-commit.com/) and it should ensure code style is consistent 269 | . The steps to start development on this repo is the same as the setup aforementioned above: 270 | ```bash 271 | $ git clone https://github.com/mtakaki/cachet-url-monitor.git 272 | $ cd cachet-url-monitor 273 | $ pre-commit install 274 | $ virtualenv venv 275 | $ source venv/bin/activate 276 | $ tox 277 | ``` 278 | -------------------------------------------------------------------------------- /cachet_url_monitor/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mtakaki/cachet-url-monitor/5c651d329e663bc3d392ca55aa74654f6fdacdab/cachet_url_monitor/__init__.py -------------------------------------------------------------------------------- /cachet_url_monitor/client.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from typing import Dict 3 | from typing import Optional 4 | 5 | import click 6 | import requests 7 | from yaml import dump 8 | from cachet_url_monitor import latency_unit, status, exceptions 9 | 10 | 11 | def normalize_url(url: str) -> str: 12 | """If passed url doesn't include schema return it with default one - http.""" 13 | if not url.lower().startswith("http"): 14 | return f"http://{url}" 15 | return url 16 | 17 | 18 | def save_config(config_map, filename: str): 19 | with open(filename, "w") as file: 20 | dump(config_map, file) 21 | 22 | 23 | class CachetClient(object): 24 | """Utility class to interact with CahetHQ server.""" 25 | 26 | url: str 27 | token: str 28 | headers: Dict[str, str] 29 | 30 | def __init__(self, url: str, token: str): 31 | self.url = normalize_url(url) 32 | self.token = token 33 | self.headers = {"X-Cachet-Token": token} 34 | 35 | def get_components(self): 36 | """Retrieves all components registered in cachet-hq""" 37 | return requests.get(f"{self.url}/components", headers=self.headers).json()["data"] 38 | 39 | def get_metrics(self): 40 | """Retrieves all metrics registered in cachet-hq""" 41 | return requests.get(f"{self.url}/metrics", headers=self.headers).json()["data"] 42 | 43 | def generate_config(self): 44 | components = self.get_components() 45 | generated_endpoints = [ 46 | { 47 | "name": component["name"], 48 | "url": component["link"], 49 | "method": "GET", 50 | "timeout": 1, 51 | "expectation": [{"type": "HTTP_STATUS", "status_range": "200-300", "incident": "MAJOR"}], 52 | "allowed_fails": 0, 53 | "frequency": 30, 54 | "component_id": component["id"], 55 | "action": ["CREATE_INCIDENT", "UPDATE_STATUS"], 56 | "public_incidents": True, 57 | } 58 | for component in components 59 | if component["enabled"] 60 | ] 61 | generated_config = {"cachet": {"api_url": self.url, "token": self.token}, "endpoints": generated_endpoints} 62 | return generated_config 63 | 64 | def get_default_metric_value(self, metric_id): 65 | """Returns default value for configured metric.""" 66 | get_metric_request = requests.get(f"{self.url}/metrics/{metric_id}", headers=self.headers) 67 | 68 | if get_metric_request.ok: 69 | return get_metric_request.json()["data"]["default_value"] 70 | else: 71 | raise exceptions.MetricNonexistentError(metric_id) 72 | 73 | def get_component_status(self, component_id: int) -> Optional[status.ComponentStatus]: 74 | """Retrieves the current status of the given component. It will fail if the component does 75 | not exist or doesn't respond with the expected data. 76 | :return component status. 77 | """ 78 | get_status_request = requests.get(f"{self.url}/components/{component_id}", headers=self.headers) 79 | 80 | if get_status_request.ok: 81 | # The component exists. 82 | return status.ComponentStatus(int(get_status_request.json()["data"]["status"])) 83 | else: 84 | raise exceptions.ComponentNonexistentError(component_id) 85 | 86 | def push_status(self, component_id: int, component_status: status.ComponentStatus): 87 | """Pushes the status of the component to the cachet server. 88 | """ 89 | params = {"id": component_id, "status": component_status.value} 90 | return requests.put(f"{self.url}/components/{component_id}", params=params, headers=self.headers) 91 | 92 | def push_metrics(self, metric_id: int, latency_time_unit: str, elapsed_time_in_seconds: int, timestamp: int): 93 | """Pushes the total amount of seconds the request took to get a response from the URL. 94 | """ 95 | value = latency_unit.convert_to_unit(latency_time_unit, elapsed_time_in_seconds) 96 | params = {"id": metric_id, "value": value, "timestamp": timestamp} 97 | return requests.post(f"{self.url}/metrics/{metric_id}/points", params=params, headers=self.headers) 98 | 99 | def push_incident( 100 | self, 101 | status_value: status.ComponentStatus, 102 | is_public_incident: bool, 103 | component_id: int, 104 | title: str, 105 | previous_incident_id=None, 106 | message=None, 107 | ): 108 | """If the component status has changed, we create a new incident (if this is the first time it becomes unstable) 109 | or updates the existing incident once it becomes healthy again. 110 | """ 111 | if previous_incident_id and status_value == status.ComponentStatus.OPERATIONAL: 112 | # If the incident already exists, it means it was unhealthy but now it's healthy again, post update 113 | params = {"status": status.IncidentStatus.FIXED.value, "message": title} 114 | 115 | return requests.post( 116 | f"{self.url}/incidents/{previous_incident_id}/updates", params=params, headers=self.headers 117 | ) 118 | elif not previous_incident_id and status_value != status.ComponentStatus.OPERATIONAL: 119 | # This is the first time the incident is being created. 120 | params = { 121 | "name": title, 122 | "message": message, 123 | "status": status.IncidentStatus.INVESTIGATING.value, 124 | "visible": is_public_incident, 125 | "component_id": component_id, 126 | "component_status": status_value.value, 127 | "notify": True, 128 | } 129 | return requests.post(f"{self.url}/incidents", params=params, headers=self.headers) 130 | 131 | 132 | @click.group() 133 | def cli(): 134 | pass 135 | 136 | 137 | @click.command() 138 | @click.argument("url") 139 | @click.argument("token") 140 | @click.argument("output") 141 | def run_client(url, token, output): 142 | client = CachetClient(url, token) 143 | config = client.generate_config() 144 | save_config(config, output) 145 | 146 | 147 | if __name__ == "__main__": 148 | cli() 149 | -------------------------------------------------------------------------------- /cachet_url_monitor/configuration.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import copy 3 | import logging 4 | import time 5 | from typing import Dict 6 | from typing import List 7 | from typing import Optional 8 | 9 | import requests 10 | from yaml import dump 11 | 12 | import cachet_url_monitor.status as st 13 | from cachet_url_monitor.client import CachetClient, normalize_url 14 | from cachet_url_monitor.exceptions import ConfigurationValidationError 15 | from cachet_url_monitor.expectation import Expectation 16 | from cachet_url_monitor.status import ComponentStatus 17 | from cachet_url_monitor.webhook import Webhook 18 | 19 | # This is the mandatory fields that must be in the configuration file in this 20 | # same exact structure. 21 | configuration_mandatory_fields = ["url", "method", "timeout", "expectation", "component_id", "frequency"] 22 | 23 | incident_title_map = { 24 | ComponentStatus.UNKNOWN: "incident_outage", 25 | ComponentStatus.OPERATIONAL: "incident_operational", 26 | ComponentStatus.PERFORMANCE_ISSUES: "incident_performance", 27 | ComponentStatus.PARTIAL_OUTAGE: "incident_outage", 28 | ComponentStatus.MAJOR_OUTAGE: "incident_outage", 29 | } 30 | default_messages = { 31 | "incident_outage": "{name} is unavailable", 32 | "incident_operational": "{name} is operational", 33 | "incident_performance": "{name} has degraded performance", 34 | } 35 | 36 | 37 | class Configuration(object): 38 | """Represents a configuration file, but it also includes the functionality 39 | of assessing the API and pushing the results to cachet. 40 | """ 41 | 42 | endpoint_index: int 43 | endpoint: str 44 | client: CachetClient 45 | webhooks: List[Webhook] 46 | current_fails: int 47 | trigger_update: bool 48 | 49 | endpoint_method: str 50 | endpoint_url: str 51 | endpoint_timeout: int 52 | endpoint_header: Dict[str, str] 53 | 54 | allowed_fails: int 55 | component_id: int 56 | metric_id: int 57 | default_metric_value: int 58 | latency_unit: str 59 | 60 | status: ComponentStatus 61 | previous_status: ComponentStatus 62 | message: str 63 | 64 | def __init__(self, config, endpoint_index: int, client: CachetClient, webhooks: Optional[List[Webhook]] = None): 65 | self.endpoint_index = endpoint_index 66 | self.data = config 67 | self.endpoint = self.data["endpoints"][endpoint_index] 68 | self.messages = config.get("messages", default_messages) 69 | self.client = client 70 | self.webhooks = webhooks or [] 71 | 72 | self.current_fails = 0 73 | self.trigger_update = True 74 | 75 | if "name" not in self.endpoint: 76 | # We have to make this mandatory, otherwise the logs are confusing when there are multiple URLs. 77 | raise ConfigurationValidationError("name") 78 | 79 | self.logger = logging.getLogger(f'cachet_url_monitor.configuration.Configuration.{self.endpoint["name"]}') 80 | 81 | # Exposing the configuration to confirm it's parsed as expected. 82 | self.print_out() 83 | 84 | # We need to validate the configuration is correct and then validate the component actually exists. 85 | self.validate() 86 | 87 | # We store the main information from the configuration file, so we don't keep reading from the data dictionary. 88 | 89 | self.endpoint_method = self.endpoint["method"] 90 | self.endpoint_url = normalize_url(self.endpoint["url"]) 91 | self.endpoint_timeout = self.endpoint.get("timeout") or 1 92 | self.endpoint_header = self.endpoint.get("header") or None 93 | self.allowed_fails = self.endpoint.get("allowed_fails") or 0 94 | 95 | self.component_id = self.endpoint["component_id"] 96 | self.metric_id = self.endpoint.get("metric_id") 97 | 98 | if self.metric_id is not None: 99 | self.default_metric_value = self.client.get_default_metric_value(self.metric_id) 100 | 101 | # The latency_unit configuration is not mandatory and we fallback to seconds, by default. 102 | self.latency_unit = self.endpoint.get("latency_unit") or "s" 103 | 104 | # We need the current status so we monitor the status changes. This is necessary for creating incidents. 105 | self.status = self.client.get_component_status(self.component_id) 106 | self.previous_status = self.status 107 | self.logger.info(f"Component current status: {self.status}") 108 | 109 | # Get remaining settings 110 | self.public_incidents = int(self.endpoint["public_incidents"]) 111 | 112 | self.logger.info("Monitoring URL: %s %s" % (self.endpoint_method, self.endpoint_url)) 113 | self.expectations = [Expectation.create(expectation) for expectation in self.endpoint["expectation"]] 114 | for expectation in self.expectations: 115 | self.logger.info("Registered expectation: %s" % (expectation,)) 116 | 117 | def get_incident_title(self): 118 | """Generates incident title for current status.""" 119 | key = incident_title_map[self.status] 120 | template = self.messages.get(key, default_messages[key]) 121 | return template.format(**self.endpoint) 122 | 123 | def get_action(self) -> List[str]: 124 | """Retrieves the action list from the configuration. If it's empty, returns an empty list. 125 | :return: The list of actions, which can be an empty list. 126 | """ 127 | return self.endpoint.get("action", []) 128 | 129 | def validate(self): 130 | """Validates the configuration by verifying the mandatory fields are 131 | present and in the correct format. If the validation fails, a 132 | ConfigurationValidationError is raised. Otherwise nothing will happen. 133 | """ 134 | configuration_errors = [] 135 | for key in configuration_mandatory_fields: 136 | if key not in self.endpoint: 137 | configuration_errors.append(key) 138 | 139 | if "expectation" in self.endpoint: 140 | if not isinstance(self.endpoint["expectation"], list) or ( 141 | isinstance(self.endpoint["expectation"], list) and len(self.endpoint["expectation"]) == 0 142 | ): 143 | configuration_errors.append("endpoint.expectation") 144 | 145 | for key, message in self.messages.items(): 146 | if not isinstance(message, str): 147 | configuration_errors.append(f"message.{key}") 148 | 149 | if len(configuration_errors) > 0: 150 | raise ConfigurationValidationError( 151 | "Endpoint [%s] failed validation. Missing keys: %s" % (self.endpoint, ", ".join(configuration_errors)) 152 | ) 153 | 154 | def evaluate(self): 155 | """Sends the request to the URL set in the configuration and executes 156 | each one of the expectations, one by one. The status will be updated 157 | according to the expectation results. 158 | """ 159 | try: 160 | if self.endpoint_header is None: 161 | self.request = requests.request(self.endpoint_method, self.endpoint_url, timeout=self.endpoint_timeout) 162 | else: 163 | self.request = requests.request( 164 | self.endpoint_method, self.endpoint_url, timeout=self.endpoint_timeout, headers=self.endpoint_header, 165 | verify=not self.endpoint['insecure'] if 'insecure' in self.endpoint else True 166 | ) 167 | 168 | self.current_timestamp = int(time.time()) 169 | except requests.ConnectionError: 170 | self.message = "The URL is unreachable: %s %s" % (self.endpoint_method, self.endpoint_url) 171 | self.logger.warning(self.message) 172 | self.status = st.ComponentStatus.PARTIAL_OUTAGE 173 | return 174 | except requests.HTTPError: 175 | self.message = "Unexpected HTTP response" 176 | self.logger.exception(self.message) 177 | self.status = st.ComponentStatus.PARTIAL_OUTAGE 178 | return 179 | except (requests.Timeout, requests.ConnectTimeout): 180 | self.message = "Request timed out" 181 | self.logger.warning(self.message) 182 | self.status = st.ComponentStatus.PERFORMANCE_ISSUES 183 | return 184 | 185 | # We initially assume the API is healthy. 186 | self.status = st.ComponentStatus.OPERATIONAL 187 | self.message = "" 188 | for expectation in self.expectations: 189 | status: ComponentStatus = expectation.get_status(self.request) 190 | 191 | # The greater the status is, the worse the state of the API is. 192 | if status.value > self.status.value: 193 | self.status = status 194 | self.message = expectation.get_message(self.request) 195 | self.logger.info(self.message) 196 | 197 | def print_out(self): 198 | self.logger.info(f"Current configuration:\n{self.__repr__()}") 199 | 200 | def __repr__(self): 201 | temporary_data = copy.deepcopy(self.data) 202 | temporary_data["endpoints"] = temporary_data["endpoints"][self.endpoint_index] 203 | 204 | return dump(temporary_data, default_flow_style=False) 205 | 206 | def if_trigger_update(self): 207 | """ 208 | Checks if update should be triggered - trigger it for all operational states 209 | and only for non-operational ones above the configured threshold (allowed_fails). 210 | """ 211 | 212 | if self.status != st.ComponentStatus.OPERATIONAL: 213 | self.current_fails = self.current_fails + 1 214 | self.logger.warning(f"Failure #{self.current_fails} with threshold set to {self.allowed_fails}") 215 | if self.current_fails <= self.allowed_fails: 216 | self.trigger_update = False 217 | return 218 | self.current_fails = 0 219 | self.trigger_update = True 220 | 221 | def push_status(self): 222 | """Pushes the status of the component to the cachet server. It will update the component 223 | status based on the previous call to evaluate(). 224 | """ 225 | if self.previous_status == self.status: 226 | # We don't want to keep spamming if there's no change in status. 227 | self.logger.info(f"No changes to component status.") 228 | self.trigger_update = False 229 | return 230 | 231 | self.previous_status = self.status 232 | 233 | if not self.trigger_update: 234 | return 235 | 236 | api_component_status = self.client.get_component_status(self.component_id) 237 | 238 | if self.status == api_component_status: 239 | return 240 | 241 | component_request = self.client.push_status(self.component_id, self.status) 242 | if component_request.ok: 243 | # Successful update 244 | self.logger.info(f"Component update: status [{self.status}]") 245 | else: 246 | # Failed to update the API status 247 | self.logger.warning( 248 | f"Component update failed with HTTP status: {component_request.status_code}. API" 249 | f" status: {self.status}" 250 | ) 251 | 252 | def push_metrics(self): 253 | """Pushes the total amount of seconds the request took to get a response from the URL. 254 | It only will send a request if the metric id was set in the configuration. 255 | In case of failed connection trial pushes the default metric value. 256 | """ 257 | if self.metric_id and hasattr(self, "request"): 258 | # We convert the elapsed time from the request, in seconds, to the configured unit. 259 | metrics_request = self.client.push_metrics( 260 | self.metric_id, self.latency_unit, self.request.elapsed.total_seconds(), self.current_timestamp 261 | ) 262 | if metrics_request.ok: 263 | # Successful metrics upload 264 | self.logger.info("Metric uploaded: %.6f %s" % (self.request.elapsed.total_seconds(), self.latency_unit)) 265 | else: 266 | self.logger.warning(f"Metric upload failed with status [{metrics_request.status_code}]") 267 | 268 | def trigger_webhooks(self): 269 | """Trigger webhooks.""" 270 | if self.status == st.ComponentStatus.PERFORMANCE_ISSUES: 271 | title = f'{self.endpoint["name"]} degraded' 272 | elif self.status == st.ComponentStatus.OPERATIONAL: 273 | title = f'{self.endpoint["name"]} OK' 274 | else: 275 | title = f'{self.endpoint["name"]} unavailable' 276 | for webhook in self.webhooks: 277 | webhook_request = webhook.push_incident(self.get_incident_title(), self.message) 278 | if webhook_request.ok: 279 | self.logger.info(f"Webhook {webhook.url} triggered with {title}") 280 | else: 281 | self.logger.warning(f"Webhook {webhook.url} failed with status [{webhook_request.status_code}]") 282 | 283 | def push_incident(self): 284 | """If the component status has changed, we create a new incident (if this is the first time it becomes unstable) 285 | or updates the existing incident once it becomes healthy again. 286 | """ 287 | if not self.trigger_update: 288 | return 289 | if hasattr(self, "incident_id") and self.status == st.ComponentStatus.OPERATIONAL: 290 | incident_request = self.client.push_incident( 291 | self.status, 292 | self.public_incidents, 293 | self.component_id, 294 | self.get_incident_title(), 295 | previous_incident_id=self.incident_id, 296 | ) 297 | 298 | if incident_request.ok: 299 | # Successful metrics upload 300 | self.logger.info( 301 | f'Incident updated, API healthy again: component status [{self.status}], message: "{self.message}"' 302 | ) 303 | del self.incident_id 304 | else: 305 | self.logger.warning( 306 | f'Incident update failed with status [{incident_request.status_code}], message: "{self.message}"' 307 | ) 308 | 309 | self.trigger_webhooks() 310 | elif not hasattr(self, "incident_id") and self.status != st.ComponentStatus.OPERATIONAL: 311 | incident_request = self.client.push_incident( 312 | self.status, self.public_incidents, self.component_id, self.get_incident_title(), message=self.message 313 | ) 314 | if incident_request.ok: 315 | # Successful incident upload. 316 | self.incident_id = incident_request.json()["data"]["id"] 317 | self.logger.info( 318 | f'Incident uploaded, API unhealthy: component status [{self.status}], message: "{self.message}"' 319 | ) 320 | else: 321 | self.logger.warning( 322 | f'Incident upload failed with status [{incident_request.status_code}], message: "{self.message}"' 323 | ) 324 | 325 | self.trigger_webhooks() 326 | -------------------------------------------------------------------------------- /cachet_url_monitor/exceptions.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | class ComponentNonexistentError(Exception): 3 | """Exception raised when the component does not exist.""" 4 | 5 | def __init__(self, component_id): 6 | self.component_id = component_id 7 | 8 | def __str__(self): 9 | return repr(f"Component with id [{self.component_id}] does not exist.") 10 | 11 | 12 | class MetricNonexistentError(Exception): 13 | """Exception raised when the component does not exist.""" 14 | 15 | def __init__(self, metric_id): 16 | self.metric_id = metric_id 17 | 18 | def __str__(self): 19 | return repr(f"Metric with id [{self.metric_id}] does not exist.") 20 | 21 | 22 | class ConfigurationValidationError(Exception): 23 | """Exception raised when there's a validation error.""" 24 | 25 | def __init__(self, value): 26 | self.value = value 27 | 28 | def __str__(self): 29 | return repr(self.value) 30 | -------------------------------------------------------------------------------- /cachet_url_monitor/expectation.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import abc 3 | import re 4 | 5 | import cachet_url_monitor.status as st 6 | from cachet_url_monitor.exceptions import ConfigurationValidationError 7 | from cachet_url_monitor.status import ComponentStatus 8 | 9 | 10 | class Expectation(object): 11 | """Base class for URL result expectations. Any new expectation should extend 12 | this class and the name added to create() method. 13 | """ 14 | 15 | @staticmethod 16 | def create(configuration): 17 | """Creates a list of expectations based on the configuration types 18 | list. 19 | """ 20 | # If a need expectation is created, this is where we need to add it. 21 | expectations = {"HTTP_STATUS": HttpStatus, "LATENCY": Latency, "REGEX": Regex} 22 | if configuration["type"] not in expectations: 23 | raise ConfigurationValidationError(f"Invalid type: {configuration['type']}") 24 | 25 | return expectations.get(configuration["type"])(configuration) 26 | 27 | def __init__(self, configuration): 28 | self.incident_status = self.parse_incident_status(configuration) 29 | 30 | @abc.abstractmethod 31 | def get_status(self, response) -> ComponentStatus: 32 | """Returns the status of the API, following cachet's component status 33 | documentation: https://docs.cachethq.io/docs/component-statuses 34 | """ 35 | 36 | @abc.abstractmethod 37 | def get_message(self, response) -> str: 38 | """Gets the error message.""" 39 | 40 | @abc.abstractmethod 41 | def get_default_incident(self): 42 | """Returns the default status when this incident happens.""" 43 | 44 | def parse_incident_status(self, configuration) -> ComponentStatus: 45 | return st.INCIDENT_MAP.get(configuration.get("incident", None), self.get_default_incident()) 46 | 47 | 48 | class HttpStatus(Expectation): 49 | def __init__(self, configuration): 50 | self.status_range = HttpStatus.parse_range(configuration["status_range"]) 51 | super(HttpStatus, self).__init__(configuration) 52 | 53 | @staticmethod 54 | def parse_range(range_string): 55 | if isinstance(range_string, int): 56 | # This happens when there's no range and no dash character, it will be parsed as int already. 57 | return range_string, range_string + 1 58 | 59 | statuses = range_string.split("-") 60 | if len(statuses) == 1: 61 | # When there was no range given, we should treat the first number as a single status check. 62 | return int(statuses[0]), int(statuses[0]) + 1 63 | else: 64 | # We shouldn't look into more than one value, as this is a range value. 65 | return int(statuses[0]), int(statuses[1]) 66 | 67 | def get_status(self, response) -> ComponentStatus: 68 | if self.status_range[0] <= response.status_code < self.status_range[1]: 69 | return st.ComponentStatus.OPERATIONAL 70 | else: 71 | return self.incident_status 72 | 73 | def get_default_incident(self): 74 | return st.ComponentStatus.PARTIAL_OUTAGE 75 | 76 | def get_message(self, response): 77 | return f"Unexpected HTTP status ({response.status_code})" 78 | 79 | def __str__(self): 80 | return repr(f"HTTP status range: [{self.status_range[0]}, {self.status_range[1]}[") 81 | 82 | 83 | class Latency(Expectation): 84 | def __init__(self, configuration): 85 | self.threshold = configuration["threshold"] 86 | super(Latency, self).__init__(configuration) 87 | 88 | def get_status(self, response) -> ComponentStatus: 89 | if response.elapsed.total_seconds() <= self.threshold: 90 | return st.ComponentStatus.OPERATIONAL 91 | else: 92 | return self.incident_status 93 | 94 | def get_default_incident(self): 95 | return st.ComponentStatus.PERFORMANCE_ISSUES 96 | 97 | def get_message(self, response): 98 | return "Latency above threshold: %.4f seconds" % (response.elapsed.total_seconds(),) 99 | 100 | def __str__(self): 101 | return repr("Latency threshold: %.4f seconds" % (self.threshold,)) 102 | 103 | 104 | class Regex(Expectation): 105 | def __init__(self, configuration): 106 | self.regex_string = configuration["regex"] 107 | self.regex = re.compile(configuration["regex"], re.UNICODE + re.DOTALL) 108 | super(Regex, self).__init__(configuration) 109 | 110 | def get_status(self, response) -> ComponentStatus: 111 | if self.regex.match(response.text): 112 | return st.ComponentStatus.OPERATIONAL 113 | else: 114 | return self.incident_status 115 | 116 | def get_default_incident(self): 117 | return st.ComponentStatus.PARTIAL_OUTAGE 118 | 119 | def get_message(self, response): 120 | return "Regex did not match anything in the body" 121 | 122 | def __str__(self): 123 | return repr(f"Regex: {self.regex_string}") 124 | -------------------------------------------------------------------------------- /cachet_url_monitor/latency_unit.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from typing import Dict 3 | 4 | seconds_per_unit: Dict[str, float] = { 5 | "ms": 1000, 6 | "milliseconds": 1000, 7 | "s": 1, 8 | "seconds": 1, 9 | "m": float(1) / 60, 10 | "minutes": float(1) / 60, 11 | "h": float(1) / 3600, 12 | "hours": float(1) / 3600, 13 | } 14 | 15 | 16 | def convert_to_unit(time_unit: str, value: float): 17 | """ 18 | Will convert the given value from seconds to the given time_unit. 19 | 20 | :param time_unit: The time unit to which the value will be converted to, from seconds. 21 | This is a string parameter. The unit must be in the short form. 22 | :param value: The given value that will be converted. This value must be in seconds. 23 | :return: The converted value. 24 | """ 25 | return value * seconds_per_unit[time_unit] 26 | -------------------------------------------------------------------------------- /cachet_url_monitor/plugins/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mtakaki/cachet-url-monitor/5c651d329e663bc3d392ca55aa74654f6fdacdab/cachet_url_monitor/plugins/__init__.py -------------------------------------------------------------------------------- /cachet_url_monitor/plugins/token_provider.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | from typing import Any 4 | from typing import Dict 5 | from typing import Optional 6 | 7 | from boto3.session import Session 8 | from botocore.exceptions import ClientError 9 | 10 | 11 | class TokenProvider: 12 | def __init__(self): 13 | pass 14 | 15 | def get_token(self) -> Optional[str]: 16 | pass 17 | 18 | 19 | class EnvironmentVariableTokenProvider(TokenProvider): 20 | variable_name: str 21 | 22 | def __init__(self, config_data: Dict[str, Any]): 23 | self.variable_name = config_data["value"] 24 | 25 | def get_token(self) -> Optional[str]: 26 | return os.environ.get(self.variable_name) 27 | 28 | 29 | class ConfigurationFileTokenProvider(TokenProvider): 30 | def __init__(self, config_data: Dict[str, Any]): 31 | self.token = config_data["value"] 32 | 33 | def get_token(self) -> Optional[str]: 34 | return self.token 35 | 36 | 37 | class AwsSecretsManagerTokenRetrievalException(Exception): 38 | def __init__(self, message): 39 | self.message = message 40 | 41 | def __repr__(self): 42 | return self.message 43 | 44 | 45 | class AwsSecretsManagerTokenProvider(TokenProvider): 46 | def __init__(self, config_data: Dict[str, Any]): 47 | self.secret_name = config_data["secret_name"] 48 | self.region = config_data["region"] 49 | self.secret_key = config_data["secret_key"] 50 | 51 | def get_token(self) -> Optional[str]: 52 | session = Session() 53 | client = session.client(service_name="secretsmanager", region_name=self.region) 54 | try: 55 | get_secret_value_response = client.get_secret_value(SecretId=self.secret_name) 56 | except ClientError as e: 57 | if e.response["Error"]["Code"] == "ResourceNotFoundException": 58 | raise AwsSecretsManagerTokenRetrievalException(f"The requested secret {self.secret_name} was not found") 59 | elif e.response["Error"]["Code"] == "InvalidRequestException": 60 | raise AwsSecretsManagerTokenRetrievalException("The request was invalid") 61 | elif e.response["Error"]["Code"] == "InvalidParameterException": 62 | raise AwsSecretsManagerTokenRetrievalException("The request had invalid params") 63 | else: 64 | if "SecretString" in get_secret_value_response: 65 | secret = json.loads(get_secret_value_response["SecretString"]) 66 | try: 67 | return secret[self.secret_key] 68 | except KeyError: 69 | raise AwsSecretsManagerTokenRetrievalException(f"Invalid secret_key parameter: {self.secret_key}") 70 | else: 71 | raise AwsSecretsManagerTokenRetrievalException( 72 | "Invalid secret format. It should be a SecretString, instead of binary." 73 | ) 74 | 75 | 76 | TYPE_NAME_TO_CLASS: Dict[str, TokenProvider] = { 77 | "ENVIRONMENT_VARIABLE": EnvironmentVariableTokenProvider, 78 | "TOKEN": ConfigurationFileTokenProvider, 79 | "AWS_SECRETS_MANAGER": AwsSecretsManagerTokenProvider, 80 | } 81 | 82 | 83 | class InvalidTokenProviderTypeException(Exception): 84 | def __init__(self, name): 85 | self.name = name 86 | 87 | def __repr__(self): 88 | return f"Invalid token provider type: {self.name}" 89 | 90 | 91 | def get_token_provider_by_name(name: str) -> TokenProvider: 92 | try: 93 | return TYPE_NAME_TO_CLASS[name] 94 | except KeyError: 95 | raise InvalidTokenProviderTypeException(name) 96 | 97 | 98 | class TokenNotFoundException(Exception): 99 | def __repr__(self): 100 | return "Token could not be found" 101 | 102 | 103 | def get_token(token_config: Dict[str, Any]) -> str: 104 | token: str 105 | if type(token_config) == list: 106 | for token_provider in token_config: 107 | provider = get_token_provider_by_name(token_provider["type"])(token_provider) 108 | token = provider.get_token() 109 | if token: 110 | return token 111 | raise TokenNotFoundException() 112 | else: 113 | return os.environ.get("CACHET_TOKEN") or token_config 114 | -------------------------------------------------------------------------------- /cachet_url_monitor/scheduler.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import logging 3 | import sys 4 | import threading 5 | import time 6 | import os 7 | from typing import List 8 | 9 | from yaml import load, SafeLoader 10 | 11 | from cachet_url_monitor.client import CachetClient 12 | from cachet_url_monitor.configuration import Configuration 13 | from cachet_url_monitor.webhook import Webhook 14 | from cachet_url_monitor.plugins.token_provider import get_token 15 | 16 | cachet_mandatory_fields = ["api_url", "token"] 17 | 18 | 19 | class Decorator(object): 20 | """Defines the actions a user can configure to be executed when there's an incident.""" 21 | 22 | def execute(self, configuration: Configuration): 23 | pass 24 | 25 | 26 | class UpdateStatusDecorator(Decorator): 27 | """Updates the component status when an incident happens.""" 28 | 29 | def execute(self, configuration: Configuration): 30 | configuration.push_status() 31 | 32 | 33 | class CreateIncidentDecorator(Decorator): 34 | """Creates an incident entry on cachet when an incident happens.""" 35 | 36 | def execute(self, configuration: Configuration): 37 | configuration.push_incident() 38 | 39 | 40 | class PushMetricsDecorator(Decorator): 41 | """Updates the URL latency metric.""" 42 | 43 | def execute(self, configuration: Configuration): 44 | configuration.push_metrics() 45 | 46 | 47 | ACTION_NAMES_DECORATOR_MAP = { 48 | "CREATE_INCIDENT": CreateIncidentDecorator, 49 | "UPDATE_STATUS": UpdateStatusDecorator, 50 | "PUSH_METRICS": PushMetricsDecorator, 51 | } 52 | 53 | 54 | class Agent(object): 55 | """Monitor agent that will be constantly verifying if the URL is healthy 56 | and updating the component. 57 | """ 58 | 59 | configuration: Configuration 60 | decorators: List[Decorator] 61 | 62 | def __init__(self, configuration: Configuration, decorators: List[Decorator] = None): 63 | self.configuration = configuration 64 | if decorators is None: 65 | decorators = [] 66 | self.decorators = decorators 67 | 68 | def execute(self): 69 | """Will verify the API status and push the status and metrics to the 70 | cachet server. 71 | """ 72 | self.configuration.evaluate() 73 | self.configuration.if_trigger_update() 74 | 75 | for decorator in self.decorators: 76 | decorator.execute(self.configuration) 77 | 78 | 79 | class Scheduler(object): 80 | logger: logging.Logger 81 | configuration: Configuration 82 | agent: Agent 83 | stop: bool 84 | 85 | def __init__(self, configuration: Configuration, agent): 86 | self.logger = logging.getLogger("cachet_url_monitor.scheduler.Scheduler") 87 | self.configuration = configuration 88 | self.agent = agent 89 | self.stop = False 90 | 91 | def start(self): 92 | self.logger.info("Starting monitor agent...") 93 | while not self.stop: 94 | self.agent.execute() 95 | time.sleep(self.configuration.endpoint["frequency"]) 96 | 97 | 98 | class NewThread(threading.Thread): 99 | scheduler: Scheduler 100 | 101 | def __init__(self, scheduler: Scheduler): 102 | threading.Thread.__init__(self) 103 | self.scheduler = scheduler 104 | 105 | def run(self): 106 | self.scheduler.start() 107 | 108 | 109 | def build_agent(configuration: Configuration, logger: logging.Logger): 110 | actions: List[Decorator] = [] 111 | for action in configuration.get_action(): 112 | logger.info(f"Registering action {action}") 113 | actions.append(ACTION_NAMES_DECORATOR_MAP[action]()) 114 | return Agent(configuration, decorators=actions) 115 | 116 | 117 | def validate_config(): 118 | if "endpoints" not in config_data.keys(): 119 | fatal_error("Endpoints is a mandatory field") 120 | 121 | if config_data["endpoints"] is None: 122 | fatal_error("Endpoints array can not be empty") 123 | 124 | for key in cachet_mandatory_fields: 125 | if key not in config_data["cachet"]: 126 | fatal_error("Missing cachet mandatory fields") 127 | 128 | 129 | def fatal_error(message: str): 130 | logging.getLogger("cachet_url_monitor.scheduler").fatal("%s", message) 131 | sys.exit(1) 132 | 133 | 134 | if __name__ == "__main__": 135 | FORMAT = "%(levelname)9s [%(asctime)-15s] %(name)s - %(message)s" 136 | logging.basicConfig(format=FORMAT, level=logging.INFO) 137 | for handler in logging.root.handlers: 138 | handler.addFilter(logging.Filter("cachet_url_monitor")) 139 | 140 | if len(sys.argv) <= 1: 141 | fatal_error("Missing configuration file argument") 142 | sys.exit(1) 143 | 144 | try: 145 | config_data = load(open(sys.argv[1], "r"), SafeLoader) 146 | except FileNotFoundError: 147 | fatal_error(f"File not found: {sys.argv[1]}") 148 | sys.exit(1) 149 | 150 | validate_config() 151 | 152 | webhooks: List[Webhook] = [] 153 | for webhook in config_data.get("webhooks", []): 154 | webhooks.append(Webhook(webhook["url"], webhook.get("params", {}))) 155 | 156 | token: str = get_token(config_data["cachet"]["token"]) 157 | api_url: str = os.environ.get("CACHET_API_URL") or config_data["cachet"]["api_url"] 158 | client: CachetClient = CachetClient(api_url, token) 159 | for endpoint_index in range(len(config_data["endpoints"])): 160 | configuration = Configuration(config_data, endpoint_index, client, webhooks) 161 | NewThread( 162 | Scheduler(configuration, build_agent(configuration, logging.getLogger("cachet_url_monitor.scheduler"))) 163 | ).start() 164 | -------------------------------------------------------------------------------- /cachet_url_monitor/status.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | This file defines all the different status different values. 4 | These are all constants and are coupled to cachet's API configuration. 5 | """ 6 | from enum import Enum 7 | 8 | 9 | class ComponentStatus(Enum): 10 | UNKNOWN = 0 11 | OPERATIONAL = 1 12 | PERFORMANCE_ISSUES = 2 13 | PARTIAL_OUTAGE = 3 14 | MAJOR_OUTAGE = 4 15 | 16 | 17 | INCIDENT_PARTIAL = "PARTIAL" 18 | INCIDENT_MAJOR = "MAJOR" 19 | INCIDENT_PERFORMANCE = "PERFORMANCE" 20 | 21 | INCIDENT_MAP = { 22 | INCIDENT_PARTIAL: ComponentStatus.PARTIAL_OUTAGE, 23 | INCIDENT_MAJOR: ComponentStatus.MAJOR_OUTAGE, 24 | INCIDENT_PERFORMANCE: ComponentStatus.PERFORMANCE_ISSUES, 25 | } 26 | 27 | 28 | class IncidentStatus(Enum): 29 | SCHEDULED = 0 30 | INVESTIGATING = 1 31 | IDENTIFIED = 2 32 | WATCHING = 3 33 | FIXED = 4 34 | -------------------------------------------------------------------------------- /cachet_url_monitor/webhook.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | 3 | import requests 4 | 5 | 6 | class Webhook: 7 | url: str 8 | params: Dict[str, str] 9 | 10 | def __init__(self, url: str, params: Dict[str, str]): 11 | self.url = url 12 | self.params = params 13 | 14 | def push_incident(self, title: str, message: str): 15 | format_args = {"title": title, "message": message or title} 16 | # Interpolate URL and params 17 | url = self.url.format(**format_args) 18 | params = {name: str(value).format(**format_args) for name, value in self.params.items()} 19 | 20 | return requests.post(url, params=params) 21 | -------------------------------------------------------------------------------- /config.yml: -------------------------------------------------------------------------------- 1 | endpoints: 2 | - name: swagger 3 | url: http://localhost:8080/swagger 4 | method: GET 5 | header: 6 | SOME-HEADER: SOME-VALUE 7 | timeout: 0.01 8 | expectation: 9 | - type: HTTP_STATUS 10 | status_range: 200-300 11 | incident: MAJOR 12 | - type: LATENCY 13 | threshold: 1 14 | - type: REGEX 15 | regex: '.*(" 31 | params: 32 | title: "{title}" 33 | -------------------------------------------------------------------------------- /tests/plugins/test_token_provider.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import mock 3 | import pytest 4 | 5 | from cachet_url_monitor.plugins.token_provider import get_token 6 | from cachet_url_monitor.plugins.token_provider import get_token_provider_by_name 7 | from cachet_url_monitor.plugins.token_provider import AwsSecretsManagerTokenProvider 8 | from cachet_url_monitor.plugins.token_provider import ConfigurationFileTokenProvider 9 | from cachet_url_monitor.plugins.token_provider import EnvironmentVariableTokenProvider 10 | from cachet_url_monitor.plugins.token_provider import InvalidTokenProviderTypeException 11 | from cachet_url_monitor.plugins.token_provider import TokenNotFoundException 12 | from cachet_url_monitor.plugins.token_provider import AwsSecretsManagerTokenRetrievalException 13 | 14 | from botocore.exceptions import ClientError 15 | 16 | 17 | @pytest.fixture() 18 | def mock_boto3(): 19 | with mock.patch("cachet_url_monitor.plugins.token_provider.Session") as _mock_session: 20 | mock_session = mock.Mock() 21 | _mock_session.return_value = mock_session 22 | 23 | mock_client = mock.Mock() 24 | mock_session.client.return_value = mock_client 25 | yield mock_client 26 | 27 | 28 | def test_configuration_file_token_provider(): 29 | token_provider = ConfigurationFileTokenProvider({"value": "my_token", "type": "TOKEN"}) 30 | assert token_provider.get_token() == "my_token" 31 | 32 | 33 | @mock.patch("cachet_url_monitor.plugins.token_provider.os") 34 | def test_environment_variable_token_provider(mock_os): 35 | mock_os.environ.get.return_value = "my_token" 36 | token_provider = EnvironmentVariableTokenProvider({"value": "HQ_TOKEN", "type": "ENVIRONMENT_VARIABLE"}) 37 | assert token_provider.get_token() == "my_token" 38 | mock_os.environ.get.assert_called_with("HQ_TOKEN") 39 | 40 | 41 | def test_get_token_provider_by_name_token_type(): 42 | assert get_token_provider_by_name("TOKEN") == ConfigurationFileTokenProvider 43 | 44 | 45 | def test_get_token_provider_by_name_environment_variable_type(): 46 | assert get_token_provider_by_name("ENVIRONMENT_VARIABLE") == EnvironmentVariableTokenProvider 47 | 48 | 49 | def test_get_token_provider_by_name_aws_secrets_manager_type(): 50 | assert get_token_provider_by_name("AWS_SECRETS_MANAGER") == AwsSecretsManagerTokenProvider 51 | 52 | 53 | def test_get_token_provider_by_name_invalid_type(): 54 | with pytest.raises(InvalidTokenProviderTypeException) as exception_info: 55 | get_token_provider_by_name("WRONG") 56 | 57 | assert exception_info.value.__repr__() == "Invalid token provider type: WRONG" 58 | 59 | 60 | @mock.patch("cachet_url_monitor.plugins.token_provider.os") 61 | def test_get_token_first_succeeds(mock_os): 62 | mock_os.environ.get.return_value = "my_token_env_var" 63 | token = get_token([{"value": "HQ_TOKEN", "type": "ENVIRONMENT_VARIABLE"}, {"value": "my_token", "type": "TOKEN"}]) 64 | assert token == "my_token_env_var" 65 | mock_os.environ.get.assert_called_with("HQ_TOKEN") 66 | 67 | 68 | @mock.patch("cachet_url_monitor.plugins.token_provider.os") 69 | def test_get_token_second_succeeds(mock_os): 70 | mock_os.environ.get.return_value = None 71 | token = get_token([{"value": "HQ_TOKEN", "type": "ENVIRONMENT_VARIABLE"}, {"value": "my_token", "type": "TOKEN"}]) 72 | assert token == "my_token" 73 | mock_os.environ.get.assert_called_with("HQ_TOKEN") 74 | 75 | 76 | @mock.patch("cachet_url_monitor.plugins.token_provider.os") 77 | def test_get_token_no_token_found(mock_os): 78 | mock_os.environ.get.return_value = None 79 | with pytest.raises(TokenNotFoundException): 80 | get_token([{"value": "HQ_TOKEN", "type": "ENVIRONMENT_VARIABLE"}]) 81 | mock_os.environ.get.assert_called_with("HQ_TOKEN") 82 | 83 | 84 | def test_get_token_string_configuration(): 85 | token = get_token("my_token") 86 | assert token == "my_token" 87 | 88 | 89 | def test_get_aws_secrets_manager(mock_boto3): 90 | mock_boto3.get_secret_value.return_value = {"SecretString": '{"token": "my_token"}'} 91 | token = get_token( 92 | [{"secret_name": "hq_token", "type": "AWS_SECRETS_MANAGER", "region": "us-west-2", "secret_key": "token"}] 93 | ) 94 | assert token == "my_token" 95 | mock_boto3.get_secret_value.assert_called_with(SecretId="hq_token") 96 | 97 | 98 | def test_get_aws_secrets_manager_incorrect_secret_key(mock_boto3): 99 | mock_boto3.get_secret_value.return_value = {"SecretString": '{"token": "my_token"}'} 100 | with pytest.raises(AwsSecretsManagerTokenRetrievalException): 101 | get_token( 102 | [ 103 | { 104 | "secret_name": "hq_token", 105 | "type": "AWS_SECRETS_MANAGER", 106 | "region": "us-west-2", 107 | "secret_key": "wrong_key", 108 | } 109 | ] 110 | ) 111 | mock_boto3.get_secret_value.assert_called_with(SecretId="hq_token") 112 | 113 | 114 | def test_get_aws_secrets_manager_binary_secret(mock_boto3): 115 | mock_boto3.get_secret_value.return_value = {"binary": "it_will_fail"} 116 | with pytest.raises(AwsSecretsManagerTokenRetrievalException): 117 | get_token( 118 | [{"secret_name": "hq_token", "type": "AWS_SECRETS_MANAGER", "region": "us-west-2", "secret_key": "token"}] 119 | ) 120 | mock_boto3.get_secret_value.assert_called_with(SecretId="hq_token") 121 | 122 | 123 | def test_get_aws_secrets_manager_resource_not_found_exception(mock_boto3): 124 | mock_boto3.get_secret_value.side_effect = ClientError( 125 | error_response={"Error": {"Code": "ResourceNotFoundException"}}, operation_name="get_secret_value" 126 | ) 127 | with pytest.raises(AwsSecretsManagerTokenRetrievalException): 128 | get_token( 129 | [{"secret_name": "hq_token", "type": "AWS_SECRETS_MANAGER", "region": "us-west-2", "secret_key": "token"}] 130 | ) 131 | mock_boto3.get_secret_value.assert_called_with(SecretId="hq_token") 132 | 133 | 134 | def test_get_aws_secrets_manager_invalid_request_exception(mock_boto3): 135 | mock_boto3.get_secret_value.side_effect = ClientError( 136 | error_response={"Error": {"Code": "InvalidRequestException"}}, operation_name="get_secret_value" 137 | ) 138 | with pytest.raises(AwsSecretsManagerTokenRetrievalException): 139 | get_token( 140 | [{"secret_name": "hq_token", "type": "AWS_SECRETS_MANAGER", "region": "us-west-2", "secret_key": "token"}] 141 | ) 142 | mock_boto3.get_secret_value.assert_called_with(SecretId="hq_token") 143 | 144 | 145 | def test_get_aws_secrets_manager_invalid_parameter_exception(mock_boto3): 146 | mock_boto3.get_secret_value.side_effect = ClientError( 147 | error_response={"Error": {"Code": "InvalidParameterException"}}, operation_name="get_secret_value" 148 | ) 149 | with pytest.raises(AwsSecretsManagerTokenRetrievalException): 150 | get_token( 151 | [{"secret_name": "hq_token", "type": "AWS_SECRETS_MANAGER", "region": "us-west-2", "secret_key": "token"}] 152 | ) 153 | mock_boto3.get_secret_value.assert_called_with(SecretId="hq_token") 154 | -------------------------------------------------------------------------------- /tests/test_client.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import unittest 3 | from typing import Dict, List 4 | 5 | import requests_mock 6 | 7 | from cachet_url_monitor.client import CachetClient 8 | from cachet_url_monitor.exceptions import MetricNonexistentError 9 | from cachet_url_monitor.status import ComponentStatus 10 | 11 | TOKEN: str = "token_123" 12 | CACHET_URL: str = "http://foo.localhost" 13 | JSON: Dict[str, List[Dict[str, int]]] = {"data": [{"id": 1}]} 14 | 15 | 16 | class ClientTest(unittest.TestCase): 17 | def setUp(self): 18 | self.client = CachetClient("foo.localhost", TOKEN) 19 | 20 | def test_init(self): 21 | self.assertEqual(self.client.headers, {"X-Cachet-Token": TOKEN}, "Header was not set correctly") 22 | self.assertEqual(self.client.url, CACHET_URL, "Cachet API URL was set incorrectly") 23 | 24 | @requests_mock.mock() 25 | def test_get_components(self, m): 26 | m.get(f"{CACHET_URL}/components", json=JSON, headers={"X-Cachet-Token": TOKEN}) 27 | components = self.client.get_components() 28 | 29 | self.assertEqual(components, [{"id": 1}], "Getting components list is incorrect.") 30 | 31 | @requests_mock.mock() 32 | def test_get_metrics(self, m): 33 | m.get(f"{CACHET_URL}/metrics", json=JSON) 34 | metrics = self.client.get_metrics() 35 | 36 | self.assertEqual(metrics, [{"id": 1}], "Getting metrics list is incorrect.") 37 | 38 | @requests_mock.mock() 39 | def test_generate_config(self, m): 40 | def components(): 41 | return { 42 | "data": [ 43 | {"id": "1", "name": "apache", "link": "http://abc.def", "enabled": True}, 44 | {"id": "2", "name": "haproxy", "link": "http://ghi.jkl", "enabled": False}, 45 | {"id": "3", "name": "nginx", "link": "http://mno.pqr", "enabled": True}, 46 | ] 47 | } 48 | 49 | m.get(f"{CACHET_URL}/components", json=components(), headers={"X-Cachet-Token": TOKEN}) 50 | config = self.client.generate_config() 51 | 52 | self.assertEqual( 53 | config, 54 | { 55 | "cachet": {"api_url": CACHET_URL, "token": TOKEN}, 56 | "endpoints": [ 57 | { 58 | "name": "apache", 59 | "url": "http://abc.def", 60 | "method": "GET", 61 | "timeout": 1, 62 | "expectation": [{"type": "HTTP_STATUS", "status_range": "200-300", "incident": "MAJOR"}], 63 | "allowed_fails": 0, 64 | "frequency": 30, 65 | "component_id": "1", 66 | "action": ["CREATE_INCIDENT", "UPDATE_STATUS"], 67 | "public_incidents": True, 68 | }, 69 | { 70 | "name": "nginx", 71 | "url": "http://mno.pqr", 72 | "method": "GET", 73 | "timeout": 1, 74 | "expectation": [{"type": "HTTP_STATUS", "status_range": "200-300", "incident": "MAJOR"}], 75 | "allowed_fails": 0, 76 | "frequency": 30, 77 | "component_id": "3", 78 | "action": ["CREATE_INCIDENT", "UPDATE_STATUS"], 79 | "public_incidents": True, 80 | }, 81 | ], 82 | }, 83 | "Generated config is incorrect.", 84 | ) 85 | 86 | @requests_mock.mock() 87 | def test_get_default_metric_value(self, m): 88 | m.get(f"{CACHET_URL}/metrics/123", json={"data": {"default_value": 0.456}}, headers={"X-Cachet-Token": TOKEN}) 89 | default_metric_value = self.client.get_default_metric_value(123) 90 | 91 | self.assertEqual(default_metric_value, 0.456, "Getting default metric value is incorrect.") 92 | 93 | @requests_mock.mock() 94 | def test_get_default_metric_value_invalid_id(self, m): 95 | m.get(f"{CACHET_URL}/metrics/123", headers={"X-Cachet-Token": TOKEN}, status_code=400) 96 | with self.assertRaises(MetricNonexistentError): 97 | self.client.get_default_metric_value(123) 98 | 99 | @requests_mock.mock() 100 | def test_get_component_status(self, m): 101 | def json(): 102 | return {"data": {"status": ComponentStatus.OPERATIONAL.value}} 103 | 104 | m.get(f"{CACHET_URL}/components/123", json=json(), headers={"X-Cachet-Token": TOKEN}) 105 | status = self.client.get_component_status(123) 106 | 107 | self.assertEqual(status, ComponentStatus.OPERATIONAL, "Getting component status value is incorrect.") 108 | 109 | @requests_mock.mock() 110 | def test_push_status(self, m): 111 | m.put( 112 | f"{CACHET_URL}/components/123?id=123&status={ComponentStatus.PARTIAL_OUTAGE.value}", 113 | headers={"X-Cachet-Token": TOKEN}, 114 | ) 115 | response = self.client.push_status(123, ComponentStatus.PARTIAL_OUTAGE) 116 | 117 | self.assertTrue(response.ok, "Pushing status value is failed.") 118 | -------------------------------------------------------------------------------- /tests/test_configuration.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import mock 3 | import pytest 4 | import requests 5 | import requests_mock 6 | from yaml import load, SafeLoader 7 | 8 | import cachet_url_monitor.exceptions 9 | import cachet_url_monitor.status 10 | from cachet_url_monitor.webhook import Webhook 11 | 12 | from cachet_url_monitor.configuration import Configuration 13 | import os 14 | 15 | 16 | @pytest.fixture() 17 | def mock_client(): 18 | client = mock.Mock() 19 | client.get_component_status.return_value = cachet_url_monitor.status.ComponentStatus.OPERATIONAL 20 | yield client 21 | 22 | 23 | @pytest.fixture() 24 | def config_file(): 25 | with open(os.path.join(os.path.dirname(__file__), "configs/config.yml"), "rt") as yaml_file: 26 | config_file_data = load(yaml_file, SafeLoader) 27 | yield config_file_data 28 | 29 | 30 | @pytest.fixture() 31 | def header_config_file(): 32 | with open(os.path.join(os.path.dirname(__file__), "configs/config_header.yml"), "rt") as yaml_file: 33 | config_file_data = load(yaml_file, SafeLoader) 34 | yield config_file_data 35 | 36 | 37 | @pytest.fixture() 38 | def multiple_urls_config_file(): 39 | with open(os.path.join(os.path.dirname(__file__), "configs/config_multiple_urls.yml"), "rt") as yaml_file: 40 | config_file_data = load(yaml_file, SafeLoader) 41 | yield config_file_data 42 | 43 | 44 | @pytest.fixture() 45 | def invalid_config_file(): 46 | with open(os.path.join(os.path.dirname(__file__), "configs/config_invalid_type.yml"), "rt") as yaml_file: 47 | config_file_data = load(yaml_file, SafeLoader) 48 | yield config_file_data 49 | 50 | 51 | @pytest.fixture() 52 | def webhooks_config_file(): 53 | with open(os.path.join(os.path.dirname(__file__), "configs/config_webhooks.yml"), "rt") as yaml_file: 54 | config_file_data = load(yaml_file, SafeLoader) 55 | yield config_file_data 56 | 57 | 58 | @pytest.fixture() 59 | def insecure_config_file(): 60 | with open(os.path.join(os.path.dirname(__file__), "configs/config_insecure.yml"), "rt") as yaml_file: 61 | config_file_data = load(yaml_file, SafeLoader) 62 | yield config_file_data 63 | 64 | 65 | @pytest.fixture() 66 | def missing_name_config_file(): 67 | with open(os.path.join(os.path.dirname(__file__), "configs/config_missing_name.yml"), "rt") as yaml_file: 68 | config_file_data = load(yaml_file, SafeLoader) 69 | yield config_file_data 70 | 71 | 72 | @pytest.fixture() 73 | def metric_config_file(): 74 | with open(os.path.join(os.path.dirname(__file__), "configs/config_metric.yml"), "rt") as yaml_file: 75 | config_file_data = load(yaml_file, SafeLoader) 76 | yield config_file_data 77 | 78 | 79 | @pytest.fixture() 80 | def missing_latency_unit_config_file(): 81 | with open(os.path.join(os.path.dirname(__file__), "configs/config_default_latency_unit.yml"), "rt") as yaml_file: 82 | config_file_data = load(yaml_file, SafeLoader) 83 | yield config_file_data 84 | 85 | 86 | @pytest.fixture() 87 | def mock_logger_module(): 88 | with mock.patch("cachet_url_monitor.configuration.logging") as _mock_logger: 89 | yield _mock_logger 90 | 91 | 92 | @pytest.fixture() 93 | def mock_logger(mock_logger_module): 94 | _mock_logger = mock.Mock() 95 | mock_logger_module.getLogger.return_value = _mock_logger 96 | yield _mock_logger 97 | 98 | 99 | @pytest.fixture() 100 | def configuration(config_file, mock_client, mock_logger): 101 | yield Configuration(config_file, 0, mock_client) 102 | 103 | 104 | @pytest.fixture() 105 | def insecure_configuration(insecure_config_file, mock_client, mock_logger): 106 | yield Configuration(insecure_config_file, 0, mock_client) 107 | 108 | 109 | @pytest.fixture() 110 | def header_configuration(header_config_file, mock_client, mock_logger): 111 | yield Configuration(header_config_file, 0, mock_client) 112 | 113 | 114 | @pytest.fixture() 115 | def webhooks_configuration(webhooks_config_file, mock_client, mock_logger): 116 | webhooks = [] 117 | for webhook in webhooks_config_file.get("webhooks", []): 118 | webhooks.append(Webhook(webhook["url"], webhook.get("params", {}))) 119 | yield Configuration(webhooks_config_file, 0, mock_client, webhooks) 120 | 121 | 122 | @pytest.fixture() 123 | def multiple_urls_configuration(multiple_urls_config_file, mock_client, mock_logger): 124 | yield [ 125 | Configuration(multiple_urls_config_file, index, mock_client) 126 | for index in range(len(multiple_urls_config_file["endpoints"])) 127 | ] 128 | 129 | 130 | def test_init(configuration, mock_client): 131 | assert len(configuration.data) == 2, "Number of root elements in config.yml is incorrect" 132 | assert len(configuration.expectations) == 3, "Number of expectations read from file is incorrect" 133 | assert configuration.latency_unit == "ms" 134 | mock_client.get_default_metric_value.assert_not_called() 135 | 136 | 137 | def test_init_with_header(header_configuration): 138 | assert len(header_configuration.data) == 2, "Number of root elements in config.yml is incorrect" 139 | assert len(header_configuration.expectations) == 3, "Number of expectations read from file is incorrect" 140 | assert header_configuration.endpoint_header == {"SOME-HEADER": "SOME-VALUE"}, "Header is incorrect" 141 | assert header_configuration.latency_unit == "ms" 142 | 143 | 144 | def test_init_missing_latency_unit(missing_latency_unit_config_file, mock_client): 145 | configuration = Configuration(missing_latency_unit_config_file, 0, mock_client) 146 | assert configuration.latency_unit == "s" 147 | 148 | 149 | def test_init_unknown_status(config_file, mock_client): 150 | mock_client.get_component_status.return_value = cachet_url_monitor.status.ComponentStatus.UNKNOWN 151 | configuration = Configuration(config_file, 0, mock_client) 152 | 153 | assert configuration.previous_status == cachet_url_monitor.status.ComponentStatus.UNKNOWN 154 | 155 | 156 | def test_init_missing_name(missing_name_config_file, mock_client): 157 | with pytest.raises(cachet_url_monitor.configuration.ConfigurationValidationError): 158 | Configuration(missing_name_config_file, 0, mock_client) 159 | 160 | 161 | def test_init_with_metric_id(metric_config_file, mock_client): 162 | mock_client.get_default_metric_value.return_value = 0.456 163 | configuration = Configuration(metric_config_file, 0, mock_client) 164 | 165 | assert ( 166 | configuration.default_metric_value == 0.456 167 | ), "Default metric was not set during init" 168 | mock_client.get_default_metric_value.assert_called_once_with(3) 169 | 170 | 171 | def test_evaluate(configuration): 172 | with requests_mock.mock() as m: 173 | m.get("http://localhost:8080/swagger", text="") 174 | configuration.evaluate() 175 | 176 | assert ( 177 | configuration.status == cachet_url_monitor.status.ComponentStatus.OPERATIONAL 178 | ), "Component status set incorrectly" 179 | assert ( 180 | m.last_request.verify == True 181 | ) 182 | 183 | 184 | def test_evaluate_insecure(insecure_configuration): 185 | with requests_mock.mock() as m: 186 | m.get("http://localhost:8080/swagger", text="") 187 | insecure_configuration.evaluate() 188 | 189 | assert ( 190 | insecure_configuration.status == cachet_url_monitor.status.ComponentStatus.OPERATIONAL 191 | ), "Component status set incorrectly" 192 | assert ( 193 | m.last_request.verify == False 194 | ) 195 | 196 | 197 | def test_evaluate_without_header(configuration): 198 | with requests_mock.mock() as m: 199 | m.get("http://localhost:8080/swagger", text="") 200 | configuration.evaluate() 201 | 202 | assert ( 203 | configuration.status == cachet_url_monitor.status.ComponentStatus.OPERATIONAL 204 | ), "Component status set incorrectly" 205 | 206 | 207 | def test_evaluate_with_header(header_configuration): 208 | with requests_mock.mock() as m: 209 | m.get("http://localhost:8080/swagger", text="", headers={'SOME-HEADER': 'SOME-VALUE'}) 210 | header_configuration.evaluate() 211 | 212 | assert ( 213 | header_configuration.status == cachet_url_monitor.status.ComponentStatus.OPERATIONAL 214 | ), "Component status set incorrectly" 215 | 216 | 217 | def test_evaluate_with_failure(configuration): 218 | with requests_mock.mock() as m: 219 | m.get("http://localhost:8080/swagger", text="", status_code=400) 220 | configuration.evaluate() 221 | 222 | assert ( 223 | configuration.status == cachet_url_monitor.status.ComponentStatus.MAJOR_OUTAGE 224 | ), "Component status set incorrectly or custom incident status is incorrectly parsed" 225 | 226 | 227 | def test_evaluate_with_timeout(configuration, mock_logger): 228 | with requests_mock.mock() as m: 229 | m.get("http://localhost:8080/swagger", exc=requests.Timeout) 230 | configuration.evaluate() 231 | 232 | assert ( 233 | configuration.status == cachet_url_monitor.status.ComponentStatus.PERFORMANCE_ISSUES 234 | ), "Component status set incorrectly" 235 | mock_logger.warning.assert_called_with("Request timed out") 236 | 237 | 238 | def test_evaluate_with_connection_error(configuration, mock_logger): 239 | with requests_mock.mock() as m: 240 | m.get("http://localhost:8080/swagger", exc=requests.ConnectionError) 241 | configuration.evaluate() 242 | 243 | assert ( 244 | configuration.status == cachet_url_monitor.status.ComponentStatus.PARTIAL_OUTAGE 245 | ), "Component status set incorrectly" 246 | mock_logger.warning.assert_called_with("The URL is unreachable: GET http://localhost:8080/swagger") 247 | 248 | 249 | def test_evaluate_with_http_error(configuration, mock_logger): 250 | with requests_mock.mock() as m: 251 | m.get("http://localhost:8080/swagger", exc=requests.HTTPError) 252 | configuration.evaluate() 253 | 254 | assert ( 255 | configuration.status == cachet_url_monitor.status.ComponentStatus.PARTIAL_OUTAGE 256 | ), "Component status set incorrectly" 257 | mock_logger.exception.assert_called_with("Unexpected HTTP response") 258 | 259 | 260 | def test_webhooks(webhooks_configuration, mock_logger, mock_client): 261 | assert len(webhooks_configuration.webhooks) == 2 262 | push_incident_response = mock.Mock() 263 | push_incident_response.ok = False 264 | mock_client.push_incident.return_value = push_incident_response 265 | with requests_mock.mock() as m: 266 | m.get("http://localhost:8080/swagger", exc=requests.HTTPError) 267 | m.post("https://push.example.com/foo%20is%20unavailable", text="") 268 | m.post("https://push.example.com/message?token=%3Capptoken%3E&title=foo+is+unavailable", text="") 269 | webhooks_configuration.evaluate() 270 | 271 | assert ( 272 | webhooks_configuration.status == cachet_url_monitor.status.ComponentStatus.PARTIAL_OUTAGE 273 | ), "Component status set incorrectly" 274 | mock_logger.exception.assert_called_with("Unexpected HTTP response") 275 | webhooks_configuration.push_incident() 276 | mock_logger.info.assert_called_with( 277 | "Webhook https://push.example.com/message?token= triggered with foo unavailable" 278 | ) 279 | 280 | 281 | def test_push_status(configuration, mock_client): 282 | mock_client.get_component_status.return_value = cachet_url_monitor.status.ComponentStatus.PARTIAL_OUTAGE 283 | push_status_response = mock.Mock() 284 | mock_client.push_status.return_value = push_status_response 285 | push_status_response.ok = True 286 | configuration.previous_status = cachet_url_monitor.status.ComponentStatus.PARTIAL_OUTAGE 287 | configuration.status = cachet_url_monitor.status.ComponentStatus.OPERATIONAL 288 | 289 | configuration.push_status() 290 | 291 | mock_client.push_status.assert_called_once_with(1, cachet_url_monitor.status.ComponentStatus.OPERATIONAL) 292 | 293 | 294 | def test_push_status_with_new_failure(configuration, mock_client): 295 | mock_client.get_component_status.return_value = cachet_url_monitor.status.ComponentStatus.OPERATIONAL 296 | push_status_response = mock.Mock() 297 | mock_client.push_status.return_value = push_status_response 298 | push_status_response.ok = False 299 | configuration.status = cachet_url_monitor.status.ComponentStatus.PARTIAL_OUTAGE 300 | 301 | configuration.push_status() 302 | 303 | mock_client.push_status.assert_called_once_with(1, cachet_url_monitor.status.ComponentStatus.PARTIAL_OUTAGE) 304 | 305 | 306 | def test_push_status_same_status(configuration, mock_client): 307 | mock_client.get_component_status.return_value = cachet_url_monitor.status.ComponentStatus.OPERATIONAL 308 | configuration.status = cachet_url_monitor.status.ComponentStatus.OPERATIONAL 309 | 310 | configuration.push_status() 311 | 312 | mock_client.push_status.assert_not_called() 313 | 314 | 315 | def test_init_multiple_urls(multiple_urls_configuration): 316 | expected_method = ["GET", "POST"] 317 | expected_url = ["http://localhost:8080/swagger", "http://localhost:8080/bar"] 318 | 319 | assert len(multiple_urls_configuration) == 2 320 | for index in range(len(multiple_urls_configuration)): 321 | config = multiple_urls_configuration[index] 322 | assert len(config.data) == 2, "Number of root elements in config.yml is incorrect" 323 | assert len(config.expectations) == 1, "Number of expectations read from file is incorrect" 324 | 325 | assert expected_method[index] == config.endpoint_method 326 | assert expected_url[index] == config.endpoint_url 327 | 328 | 329 | def test_init_invalid_configuration(invalid_config_file, mock_client): 330 | with pytest.raises(cachet_url_monitor.configuration.ConfigurationValidationError): 331 | Configuration(invalid_config_file, 0, mock_client) 332 | -------------------------------------------------------------------------------- /tests/test_expectation.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import re 3 | import unittest 4 | 5 | import mock 6 | import pytest 7 | 8 | from cachet_url_monitor.expectation import HttpStatus, Regex, Latency 9 | from cachet_url_monitor.status import ComponentStatus 10 | 11 | 12 | class LatencyTest(unittest.TestCase): 13 | def setUp(self): 14 | self.expectation = Latency({"type": "LATENCY", "threshold": 1}) 15 | 16 | def test_init(self): 17 | assert self.expectation.threshold == 1 18 | 19 | def test_get_status_healthy(self): 20 | def total_seconds(): 21 | return 0.1 22 | 23 | request = mock.Mock() 24 | elapsed = mock.Mock() 25 | request.elapsed = elapsed 26 | elapsed.total_seconds = total_seconds 27 | 28 | assert self.expectation.get_status(request) == ComponentStatus.OPERATIONAL 29 | 30 | def test_get_status_unhealthy(self): 31 | def total_seconds(): 32 | return 2 33 | 34 | request = mock.Mock() 35 | elapsed = mock.Mock() 36 | request.elapsed = elapsed 37 | elapsed.total_seconds = total_seconds 38 | 39 | assert self.expectation.get_status(request) == ComponentStatus.PERFORMANCE_ISSUES 40 | 41 | def test_get_message(self): 42 | def total_seconds(): 43 | return 0.1 44 | 45 | request = mock.Mock() 46 | elapsed = mock.Mock() 47 | request.elapsed = elapsed 48 | elapsed.total_seconds = total_seconds 49 | 50 | assert self.expectation.get_message(request) == ("Latency above " "threshold: 0.1000 seconds") 51 | 52 | 53 | class HttpStatusTest(unittest.TestCase): 54 | def setUp(self): 55 | self.expectation = HttpStatus({"type": "HTTP_STATUS", "status_range": "200-300"}) 56 | 57 | def test_init(self): 58 | assert self.expectation.status_range == (200, 300) 59 | 60 | def test_init_with_one_status(self): 61 | """With only one value, we still expect a valid tuple""" 62 | self.expectation = HttpStatus({"type": "HTTP_STATUS", "status_range": "200"}) 63 | 64 | assert self.expectation.status_range == (200, 201) 65 | 66 | def test_init_with_invalid_number(self): 67 | """Invalid values should just fail with a ValueError, as we can't convert it to int.""" 68 | with pytest.raises(ValueError): 69 | self.expectation = HttpStatus({"type": "HTTP_STATUS", "status_range": "foo"}) 70 | 71 | def test_get_status_healthy(self): 72 | request = mock.Mock() 73 | request.status_code = 200 74 | 75 | assert self.expectation.get_status(request) == ComponentStatus.OPERATIONAL 76 | 77 | def test_get_status_healthy_boundary(self): 78 | request = mock.Mock() 79 | request.status_code = 299 80 | 81 | assert self.expectation.get_status(request) == ComponentStatus.OPERATIONAL 82 | 83 | def test_get_status_unhealthy(self): 84 | request = mock.Mock() 85 | request.status_code = 400 86 | 87 | assert self.expectation.get_status(request) == ComponentStatus.PARTIAL_OUTAGE 88 | 89 | def test_get_status_unhealthy_boundary(self): 90 | request = mock.Mock() 91 | request.status_code = 300 92 | 93 | assert self.expectation.get_status(request) == ComponentStatus.PARTIAL_OUTAGE 94 | 95 | def test_get_message(self): 96 | request = mock.Mock() 97 | request.status_code = 400 98 | 99 | assert self.expectation.get_message(request) == ("Unexpected HTTP " "status (400)") 100 | 101 | 102 | class RegexTest(unittest.TestCase): 103 | def setUp(self): 104 | self.expectation = Regex({"type": "REGEX", "regex": ".*(find stuff).*"}) 105 | 106 | def test_init(self): 107 | assert self.expectation.regex == re.compile(".*(find stuff).*", re.UNICODE + re.DOTALL) 108 | 109 | def test_get_status_healthy(self): 110 | request = mock.Mock() 111 | request.text = "We could find stuff\n in this body." 112 | 113 | assert self.expectation.get_status(request) == ComponentStatus.OPERATIONAL 114 | 115 | def test_get_status_unhealthy(self): 116 | request = mock.Mock() 117 | request.text = "We will not find it here" 118 | 119 | assert self.expectation.get_status(request) == ComponentStatus.PARTIAL_OUTAGE 120 | 121 | def test_get_message(self): 122 | request = mock.Mock() 123 | request.text = "We will not find it here" 124 | 125 | assert self.expectation.get_message(request) == ("Regex did not match " "anything in the body") 126 | -------------------------------------------------------------------------------- /tests/test_latency_unit.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from cachet_url_monitor.latency_unit import convert_to_unit 4 | 5 | 6 | def test_convert_to_unit_ms(): 7 | assert convert_to_unit("ms", 1) == 1000 8 | 9 | 10 | def test_convert_to_unit_s(): 11 | assert convert_to_unit("s", 20) == 20 12 | 13 | 14 | def test_convert_to_unit_m(): 15 | assert convert_to_unit("m", 3) == float(3) / 60 16 | 17 | 18 | def test_convert_to_unit_h(): 19 | assert convert_to_unit("h", 7200) == 2 20 | -------------------------------------------------------------------------------- /tests/test_scheduler.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import unittest 3 | 4 | import mock 5 | 6 | from cachet_url_monitor.scheduler import Agent, Scheduler 7 | 8 | 9 | class AgentTest(unittest.TestCase): 10 | def setUp(self): 11 | self.configuration = mock.Mock() 12 | self.agent = Agent(self.configuration) 13 | 14 | def test_init(self): 15 | assert self.agent.configuration == self.configuration 16 | 17 | def test_execute(self): 18 | evaluate = self.configuration.evaluate 19 | push_status = self.configuration.push_status 20 | self.agent.execute() 21 | 22 | evaluate.assert_called_once() 23 | push_status.assert_not_called() 24 | 25 | 26 | class SchedulerTest(unittest.TestCase): 27 | @mock.patch("requests.get") 28 | def setUp(self, mock_requests): 29 | def get(url, headers): 30 | get_return = mock.Mock() 31 | get_return.ok = True 32 | get_return.json = mock.Mock() 33 | get_return.json.return_value = {"data": {"status": 1}} 34 | return get_return 35 | 36 | mock_requests.get = get 37 | 38 | self.agent = mock.MagicMock() 39 | 40 | self.scheduler = Scheduler( 41 | { 42 | "endpoints": [ 43 | { 44 | "name": "foo", 45 | "url": "http://localhost:8080/swagger", 46 | "method": "GET", 47 | "expectation": [{"type": "HTTP_STATUS", "status_range": "200 - 300", "incident": "MAJOR"}], 48 | "allowed_fails": 0, 49 | "component_id": 1, 50 | "action": ["CREATE_INCIDENT", "UPDATE_STATUS"], 51 | "public_incidents": True, 52 | "latency_unit": "ms", 53 | "frequency": 30, 54 | } 55 | ], 56 | "cachet": {"api_url": "https: // demo.cachethq.io / api / v1", "token": "my_token"}, 57 | }, 58 | self.agent, 59 | ) 60 | 61 | def test_init(self): 62 | self.assertFalse(self.scheduler.stop) 63 | 64 | def test_start(self): 65 | # TODO(mtakaki|2016-05-01): We need a better way of testing this method. 66 | # Leaving it as a placeholder. 67 | self.scheduler.stop = True 68 | self.scheduler.start() 69 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | # content of: tox.ini , put in same dir as setup.py 2 | [tox] 3 | envlist = py37,pytest,black,flake8,linters,docs 4 | 5 | [testenv] 6 | passenv = SSH_AUTH_SOCK CODACY_PROJECT_TOKEN COVERALLS_REPO_TOKEN 7 | deps = 8 | -rdev_requirements.txt 9 | -rrequirements.txt 10 | 11 | [testenv:pytest] 12 | commands = 13 | python -m pytest tests --cov=cachet_url_monitor 14 | 15 | 16 | # Autoformatter 17 | [testenv:black] 18 | basepython = python3 19 | skip_install = true 20 | deps = 21 | black>=19.10b0 22 | commands = 23 | black --line-length 120 --check --diff cachet_url_monitor/ tests/ 24 | 25 | # Linters 26 | [flake8] 27 | filename = *.py 28 | exclude = 29 | .tox, 30 | .git, 31 | __pycache__, 32 | docs/source/conf.py, 33 | build, 34 | dist, 35 | tests/fixtures/*, 36 | *.pyc, 37 | *.egg-info, 38 | .cache, 39 | .eggs 40 | max-line-length = 120 41 | skip_install = true 42 | deps = 43 | wheel 44 | flake8-colors 45 | commands = 46 | python setup.py -qq bdist_wheel 47 | pip install --force-reinstall -U --pre --find-links ./dist/ flake8 48 | flake8 cachet_url_monitor tests/ setup.py 49 | 50 | [testenv:pylint] 51 | basepython = python3 52 | skip_install = true 53 | deps = 54 | pyflakes 55 | pylint!=2.5.0 56 | commands = 57 | pylint cachet_url_monitor/ 58 | 59 | [testenv:doc8] 60 | basepython = python3 61 | skip_install = true 62 | deps = 63 | sphinx 64 | doc8 65 | commands = 66 | doc8 docs/source/ 67 | 68 | [testenv:pre-commit] 69 | basepython = python3 70 | skip_install = true 71 | deps = pre-commit 72 | commands = 73 | pre-commit run --all-files --show-diff-on-failure 74 | 75 | [testenv:build] 76 | basepython = python3 77 | skip_install = true 78 | deps = 79 | wheel 80 | setuptools 81 | commands = 82 | python setup.py -q sdist bdist_wheel 83 | 84 | [testenv:release] 85 | basepython = python3 86 | skip_install = true 87 | deps = 88 | {[testenv:build]deps} 89 | twine >= 1.5.0 90 | commands = 91 | {[testenv:build]commands} 92 | twine upload --skip-existing dist/* 93 | 94 | [testenv:circleci] 95 | basepython = python3 96 | deps = 97 | -rdev_requirements.txt 98 | -rrequirements.txt 99 | commands = 100 | python -m pytest tests --junitxml=test-reports/junit.xml --cov=cachet_url_monitor 101 | coveralls 102 | coverage xml 103 | python-codacy-coverage -r coverage.xml 104 | --------------------------------------------------------------------------------