├── LICENSE ├── README.md └── datadog ├── .gitignore ├── Makefile ├── README.md ├── dashboard-docker.tf.json ├── dashboard └── docker.json ├── main.tf ├── monitors.tf ├── policy ├── dashboard.sentinel ├── monitor.sentinel ├── sentinel.hcl └── test │ ├── dashboard │ ├── fail.json │ └── good.json │ ├── monitor │ ├── fail.json │ └── good.json │ └── testdata │ ├── mock-tfplan-fail.sentinel │ └── mock-tfplan-v2.sentinel ├── setup ├── compute_host.tf ├── main.tf ├── packer │ ├── Makefile │ ├── bootstrap.sh │ ├── build.pkr.hcl │ └── resources │ │ ├── google-startup-scripts.service │ │ └── gor.service └── variables.tf ├── store-frontend-anomaly-p90-latency.tf ├── terraform.auto.tfvars └── variables.tf /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2020 HashiCorp, Inc. 2 | 3 | Mozilla Public License Version 2.0 4 | ================================== 5 | 6 | 1. Definitions 7 | -------------- 8 | 9 | 1.1. "Contributor" 10 | means each individual or legal entity that creates, contributes to 11 | the creation of, or owns Covered Software. 12 | 13 | 1.2. "Contributor Version" 14 | means the combination of the Contributions of others (if any) used 15 | by a Contributor and that particular Contributor's Contribution. 16 | 17 | 1.3. "Contribution" 18 | means Covered Software of a particular Contributor. 19 | 20 | 1.4. "Covered Software" 21 | means Source Code Form to which the initial Contributor has attached 22 | the notice in Exhibit A, the Executable Form of such Source Code 23 | Form, and Modifications of such Source Code Form, in each case 24 | including portions thereof. 25 | 26 | 1.5. "Incompatible With Secondary Licenses" 27 | means 28 | 29 | (a) that the initial Contributor has attached the notice described 30 | in Exhibit B to the Covered Software; or 31 | 32 | (b) that the Covered Software was made available under the terms of 33 | version 1.1 or earlier of the License, but not also under the 34 | terms of a Secondary License. 35 | 36 | 1.6. "Executable Form" 37 | means any form of the work other than Source Code Form. 38 | 39 | 1.7. "Larger Work" 40 | means a work that combines Covered Software with other material, in 41 | a separate file or files, that is not Covered Software. 42 | 43 | 1.8. "License" 44 | means this document. 45 | 46 | 1.9. "Licensable" 47 | means having the right to grant, to the maximum extent possible, 48 | whether at the time of the initial grant or subsequently, any and 49 | all of the rights conveyed by this License. 50 | 51 | 1.10. "Modifications" 52 | means any of the following: 53 | 54 | (a) any file in Source Code Form that results from an addition to, 55 | deletion from, or modification of the contents of Covered 56 | Software; or 57 | 58 | (b) any new file in Source Code Form that contains any Covered 59 | Software. 60 | 61 | 1.11. "Patent Claims" of a Contributor 62 | means any patent claim(s), including without limitation, method, 63 | process, and apparatus claims, in any patent Licensable by such 64 | Contributor that would be infringed, but for the grant of the 65 | License, by the making, using, selling, offering for sale, having 66 | made, import, or transfer of either its Contributions or its 67 | Contributor Version. 68 | 69 | 1.12. "Secondary License" 70 | means either the GNU General Public License, Version 2.0, the GNU 71 | Lesser General Public License, Version 2.1, the GNU Affero General 72 | Public License, Version 3.0, or any later versions of those 73 | licenses. 74 | 75 | 1.13. "Source Code Form" 76 | means the form of the work preferred for making modifications. 77 | 78 | 1.14. "You" (or "Your") 79 | means an individual or a legal entity exercising rights under this 80 | License. For legal entities, "You" includes any entity that 81 | controls, is controlled by, or is under common control with You. For 82 | purposes of this definition, "control" means (a) the power, direct 83 | or indirect, to cause the direction or management of such entity, 84 | whether by contract or otherwise, or (b) ownership of more than 85 | fifty percent (50%) of the outstanding shares or beneficial 86 | ownership of such entity. 87 | 88 | 2. License Grants and Conditions 89 | -------------------------------- 90 | 91 | 2.1. Grants 92 | 93 | Each Contributor hereby grants You a world-wide, royalty-free, 94 | non-exclusive license: 95 | 96 | (a) under intellectual property rights (other than patent or trademark) 97 | Licensable by such Contributor to use, reproduce, make available, 98 | modify, display, perform, distribute, and otherwise exploit its 99 | Contributions, either on an unmodified basis, with Modifications, or 100 | as part of a Larger Work; and 101 | 102 | (b) under Patent Claims of such Contributor to make, use, sell, offer 103 | for sale, have made, import, and otherwise transfer either its 104 | Contributions or its Contributor Version. 105 | 106 | 2.2. Effective Date 107 | 108 | The licenses granted in Section 2.1 with respect to any Contribution 109 | become effective for each Contribution on the date the Contributor first 110 | distributes such Contribution. 111 | 112 | 2.3. Limitations on Grant Scope 113 | 114 | The licenses granted in this Section 2 are the only rights granted under 115 | this License. No additional rights or licenses will be implied from the 116 | distribution or licensing of Covered Software under this License. 117 | Notwithstanding Section 2.1(b) above, no patent license is granted by a 118 | Contributor: 119 | 120 | (a) for any code that a Contributor has removed from Covered Software; 121 | or 122 | 123 | (b) for infringements caused by: (i) Your and any other third party's 124 | modifications of Covered Software, or (ii) the combination of its 125 | Contributions with other software (except as part of its Contributor 126 | Version); or 127 | 128 | (c) under Patent Claims infringed by Covered Software in the absence of 129 | its Contributions. 130 | 131 | This License does not grant any rights in the trademarks, service marks, 132 | or logos of any Contributor (except as may be necessary to comply with 133 | the notice requirements in Section 3.4). 134 | 135 | 2.4. Subsequent Licenses 136 | 137 | No Contributor makes additional grants as a result of Your choice to 138 | distribute the Covered Software under a subsequent version of this 139 | License (see Section 10.2) or under the terms of a Secondary License (if 140 | permitted under the terms of Section 3.3). 141 | 142 | 2.5. Representation 143 | 144 | Each Contributor represents that the Contributor believes its 145 | Contributions are its original creation(s) or it has sufficient rights 146 | to grant the rights to its Contributions conveyed by this License. 147 | 148 | 2.6. Fair Use 149 | 150 | This License is not intended to limit any rights You have under 151 | applicable copyright doctrines of fair use, fair dealing, or other 152 | equivalents. 153 | 154 | 2.7. Conditions 155 | 156 | Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted 157 | in Section 2.1. 158 | 159 | 3. Responsibilities 160 | ------------------- 161 | 162 | 3.1. Distribution of Source Form 163 | 164 | All distribution of Covered Software in Source Code Form, including any 165 | Modifications that You create or to which You contribute, must be under 166 | the terms of this License. You must inform recipients that the Source 167 | Code Form of the Covered Software is governed by the terms of this 168 | License, and how they can obtain a copy of this License. You may not 169 | attempt to alter or restrict the recipients' rights in the Source Code 170 | Form. 171 | 172 | 3.2. Distribution of Executable Form 173 | 174 | If You distribute Covered Software in Executable Form then: 175 | 176 | (a) such Covered Software must also be made available in Source Code 177 | Form, as described in Section 3.1, and You must inform recipients of 178 | the Executable Form how they can obtain a copy of such Source Code 179 | Form by reasonable means in a timely manner, at a charge no more 180 | than the cost of distribution to the recipient; and 181 | 182 | (b) You may distribute such Executable Form under the terms of this 183 | License, or sublicense it under different terms, provided that the 184 | license for the Executable Form does not attempt to limit or alter 185 | the recipients' rights in the Source Code Form under this License. 186 | 187 | 3.3. Distribution of a Larger Work 188 | 189 | You may create and distribute a Larger Work under terms of Your choice, 190 | provided that You also comply with the requirements of this License for 191 | the Covered Software. If the Larger Work is a combination of Covered 192 | Software with a work governed by one or more Secondary Licenses, and the 193 | Covered Software is not Incompatible With Secondary Licenses, this 194 | License permits You to additionally distribute such Covered Software 195 | under the terms of such Secondary License(s), so that the recipient of 196 | the Larger Work may, at their option, further distribute the Covered 197 | Software under the terms of either this License or such Secondary 198 | License(s). 199 | 200 | 3.4. Notices 201 | 202 | You may not remove or alter the substance of any license notices 203 | (including copyright notices, patent notices, disclaimers of warranty, 204 | or limitations of liability) contained within the Source Code Form of 205 | the Covered Software, except that You may alter any license notices to 206 | the extent required to remedy known factual inaccuracies. 207 | 208 | 3.5. Application of Additional Terms 209 | 210 | You may choose to offer, and to charge a fee for, warranty, support, 211 | indemnity or liability obligations to one or more recipients of Covered 212 | Software. However, You may do so only on Your own behalf, and not on 213 | behalf of any Contributor. You must make it absolutely clear that any 214 | such warranty, support, indemnity, or liability obligation is offered by 215 | You alone, and You hereby agree to indemnify every Contributor for any 216 | liability incurred by such Contributor as a result of warranty, support, 217 | indemnity or liability terms You offer. You may include additional 218 | disclaimers of warranty and limitations of liability specific to any 219 | jurisdiction. 220 | 221 | 4. Inability to Comply Due to Statute or Regulation 222 | --------------------------------------------------- 223 | 224 | If it is impossible for You to comply with any of the terms of this 225 | License with respect to some or all of the Covered Software due to 226 | statute, judicial order, or regulation then You must: (a) comply with 227 | the terms of this License to the maximum extent possible; and (b) 228 | describe the limitations and the code they affect. Such description must 229 | be placed in a text file included with all distributions of the Covered 230 | Software under this License. Except to the extent prohibited by statute 231 | or regulation, such description must be sufficiently detailed for a 232 | recipient of ordinary skill to be able to understand it. 233 | 234 | 5. Termination 235 | -------------- 236 | 237 | 5.1. The rights granted under this License will terminate automatically 238 | if You fail to comply with any of its terms. However, if You become 239 | compliant, then the rights granted under this License from a particular 240 | Contributor are reinstated (a) provisionally, unless and until such 241 | Contributor explicitly and finally terminates Your grants, and (b) on an 242 | ongoing basis, if such Contributor fails to notify You of the 243 | non-compliance by some reasonable means prior to 60 days after You have 244 | come back into compliance. Moreover, Your grants from a particular 245 | Contributor are reinstated on an ongoing basis if such Contributor 246 | notifies You of the non-compliance by some reasonable means, this is the 247 | first time You have received notice of non-compliance with this License 248 | from such Contributor, and You become compliant prior to 30 days after 249 | Your receipt of the notice. 250 | 251 | 5.2. If You initiate litigation against any entity by asserting a patent 252 | infringement claim (excluding declaratory judgment actions, 253 | counter-claims, and cross-claims) alleging that a Contributor Version 254 | directly or indirectly infringes any patent, then the rights granted to 255 | You by any and all Contributors for the Covered Software under Section 256 | 2.1 of this License shall terminate. 257 | 258 | 5.3. In the event of termination under Sections 5.1 or 5.2 above, all 259 | end user license agreements (excluding distributors and resellers) which 260 | have been validly granted by You or Your distributors under this License 261 | prior to termination shall survive termination. 262 | 263 | ************************************************************************ 264 | * * 265 | * 6. Disclaimer of Warranty * 266 | * ------------------------- * 267 | * * 268 | * Covered Software is provided under this License on an "as is" * 269 | * basis, without warranty of any kind, either expressed, implied, or * 270 | * statutory, including, without limitation, warranties that the * 271 | * Covered Software is free of defects, merchantable, fit for a * 272 | * particular purpose or non-infringing. The entire risk as to the * 273 | * quality and performance of the Covered Software is with You. * 274 | * Should any Covered Software prove defective in any respect, You * 275 | * (not any Contributor) assume the cost of any necessary servicing, * 276 | * repair, or correction. This disclaimer of warranty constitutes an * 277 | * essential part of this License. No use of any Covered Software is * 278 | * authorized under this License except under this disclaimer. * 279 | * * 280 | ************************************************************************ 281 | 282 | ************************************************************************ 283 | * * 284 | * 7. Limitation of Liability * 285 | * -------------------------- * 286 | * * 287 | * Under no circumstances and under no legal theory, whether tort * 288 | * (including negligence), contract, or otherwise, shall any * 289 | * Contributor, or anyone who distributes Covered Software as * 290 | * permitted above, be liable to You for any direct, indirect, * 291 | * special, incidental, or consequential damages of any character * 292 | * including, without limitation, damages for lost profits, loss of * 293 | * goodwill, work stoppage, computer failure or malfunction, or any * 294 | * and all other commercial damages or losses, even if such party * 295 | * shall have been informed of the possibility of such damages. This * 296 | * limitation of liability shall not apply to liability for death or * 297 | * personal injury resulting from such party's negligence to the * 298 | * extent applicable law prohibits such limitation. Some * 299 | * jurisdictions do not allow the exclusion or limitation of * 300 | * incidental or consequential damages, so this exclusion and * 301 | * limitation may not apply to You. * 302 | * * 303 | ************************************************************************ 304 | 305 | 8. Litigation 306 | ------------- 307 | 308 | Any litigation relating to this License may be brought only in the 309 | courts of a jurisdiction where the defendant maintains its principal 310 | place of business and such litigation shall be governed by laws of that 311 | jurisdiction, without reference to its conflict-of-law provisions. 312 | Nothing in this Section shall prevent a party's ability to bring 313 | cross-claims or counter-claims. 314 | 315 | 9. Miscellaneous 316 | ---------------- 317 | 318 | This License represents the complete agreement concerning the subject 319 | matter hereof. If any provision of this License is held to be 320 | unenforceable, such provision shall be reformed only to the extent 321 | necessary to make it enforceable. Any law or regulation which provides 322 | that the language of a contract shall be construed against the drafter 323 | shall not be used to construe this License against a Contributor. 324 | 325 | 10. Versions of the License 326 | --------------------------- 327 | 328 | 10.1. New Versions 329 | 330 | Mozilla Foundation is the license steward. Except as provided in Section 331 | 10.3, no one other than the license steward has the right to modify or 332 | publish new versions of this License. Each version will be given a 333 | distinguishing version number. 334 | 335 | 10.2. Effect of New Versions 336 | 337 | You may distribute the Covered Software under the terms of the version 338 | of the License under which You originally received the Covered Software, 339 | or under the terms of any subsequent version published by the license 340 | steward. 341 | 342 | 10.3. Modified Versions 343 | 344 | If you create software not governed by this License, and you want to 345 | create a new license for such software, you may create and use a 346 | modified version of this License if you rename the license and remove 347 | any references to the name of the license steward (except to note that 348 | such modified license differs from this License). 349 | 350 | 10.4. Distributing Source Code Form that is Incompatible With Secondary 351 | Licenses 352 | 353 | If You choose to distribute Source Code Form that is Incompatible With 354 | Secondary Licenses under the terms of this version of the License, the 355 | notice described in Exhibit B of this License must be attached. 356 | 357 | Exhibit A - Source Code Form License Notice 358 | ------------------------------------------- 359 | 360 | This Source Code Form is subject to the terms of the Mozilla Public 361 | License, v. 2.0. If a copy of the MPL was not distributed with this 362 | file, You can obtain one at http://mozilla.org/MPL/2.0/. 363 | 364 | If it is not possible or desirable to put the notice in a particular 365 | file, then You may include the notice in a location (such as a LICENSE 366 | file in a relevant directory) where a recipient would be likely to look 367 | for such a notice. 368 | 369 | You may add additional accurate notices of copyright ownership. 370 | 371 | Exhibit B - "Incompatible With Secondary Licenses" Notice 372 | --------------------------------------------------------- 373 | 374 | This Source Code Form is "Incompatible With Secondary Licenses", as 375 | defined by the Mozilla Public License, v. 2.0. 376 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Observability as Code 2 | 3 | Examples for Observability as Code with Terraform 4 | 5 | ## Configurations 6 | 7 | * Datadog 8 | -------------------------------------------------------------------------------- /datadog/.gitignore: -------------------------------------------------------------------------------- 1 | # Local .terraform directories 2 | **/.terraform/* 3 | 4 | # .tfstate files 5 | *.tfstate 6 | *.tfstate.* 7 | -------------------------------------------------------------------------------- /datadog/Makefile: -------------------------------------------------------------------------------- 1 | docker-dashboard: 2 | jq '.resource.datadog_dashboard.docker = \ 3 | (.widget = .widgets | .template_variable = .template_variables | \ 4 | .widget |= map((. | .definition.request = .definition.requests | del(.definition.requests) | \ 5 | del(.definition.tags_execution) | \ 6 | del(.definition.request | select(. == null)) | \ 7 | del(.definition.show_legend | select(. == false)) | \ 8 | ."\(.definition.type)_definition" = .definition ) | \ 9 | del(."\(.definition.type)_definition".type, ."\(.definition.type)_definition".legend_size) // .) | \ 10 | del(.widget[].definition, .widget[].id, .widgets, .template_variables, .id)) | \ 11 | del(.title, .description, .layout_type, .is_read_only, .notify_list, .id, .widgets, .template_variables)' \ 12 | dashboard/docker.json > dashboard-docker.tf.json -------------------------------------------------------------------------------- /datadog/README.md: -------------------------------------------------------------------------------- 1 | # Datadog eCommerce Application 2 | 3 | This example demonstrates how to use the [Terraform Datadog 4 | Provider](https://www.terraform.io/docs/providers/datadog/index.html) 5 | for configuring dashboards, monitors, and tracing. 6 | 7 | ## Prerequisites 8 | 9 | * [Datadog Account](https://app.datadoghq.com/signup) 10 | * [HashiCorp Terraform 0.12+](https://www.terraform.io/downloads.html) 11 | * `docker-compose` 12 | 13 | ### Optional: Remote Instance of eCommerce Application 14 | 15 | If you would like to deploy an instance of the eCommerce application that 16 | is not on your laptop, you will need: 17 | 18 | * Google Cloud Platform account 19 | * [HashiCorp Packer](https://www.packer.io/downloads.html) 20 | 21 | ## Deploy eCommerce Application 22 | 23 | Locally, to deploy eCommerce application: 24 | 25 | 1. Clone the eCommerce example. 26 | 27 | ```shell 28 | git clone https://github.com/datadog/ecommerce-workshop 29 | ``` 30 | 31 | 1. Follow the instructions in the README to create it locally. 32 | 33 | Remotely, to deploy eCommerce application: 34 | 35 | 1. Clone this repository. 36 | 37 | 1. Build a GCP image with Docker, docker-compose, and dependencies. 38 | 39 | ```shell 40 | cd datadog/setup/packer 41 | ZONE=${GOOGLE_ZONE} PROJECT_ID=${GOOGLE_PROJECT_ID} make build 42 | ``` 43 | 44 | This makes it faster to spin up the instance once it's created. 45 | 46 | 1. After the GCP image has been created, you can spin up the instance 47 | with Terraform. Make sure to define the Terraform variables. 48 | 49 | ```shell 50 | export TF_VAR_datadog_api_key=${DATADOG_API_KEY} 51 | export TF_VAR_project_id=${GCP_PROJECT} 52 | 53 | terraform init 54 | terraform plan 55 | terraform apply 56 | ``` 57 | 58 | 1. This will create a publicly available instance that has traffic replaying 59 | to the application within the GCP instance. You can access the public 60 | endpoint of the instance by retrieving the public IP from the Terraform 61 | output. 62 | 63 | ```shell 64 | open http://$(terraform output ecommerce):3000 65 | ``` 66 | 67 | ## Deploy Monitors & Dashboards for the eCommerce application 68 | 69 | You can deploy the monitors and dashboards for the application 70 | using Terraform. 71 | 72 | 1. Define the Datadog API and Application Key. 73 | 74 | ```shell 75 | export TF_VAR_datadog_api_key=${DATADOG_API_KEY} 76 | export TF_VAR_datadog_app_key=${DATADOG_APP_KEY} 77 | ``` 78 | 79 | 1. Check out `terraform.auto.tfvars` for the variable 80 | definitions for the ecommerce application. 81 | 82 | 1. Dry run the changes to the monitors and dashboards. 83 | 84 | ```shell 85 | cd datadog/ 86 | terraform init 87 | terraform plan 88 | ``` 89 | 90 | 1. Apply the changes to the monitors and dashboards. 91 | 92 | ```shell 93 | cd datadog/ 94 | terraform apply 95 | ``` 96 | 97 | This will create a fake integration to PagerDuty, 98 | some monitors, and a dashboard to Datadog. 99 | -------------------------------------------------------------------------------- /datadog/dashboard-docker.tf.json: -------------------------------------------------------------------------------- 1 | { 2 | "resource": { 3 | "datadog_dashboard": { 4 | "docker": { 5 | "title": "Docker - ${var.application} Containers", 6 | "description": "For containers related to ${var.application} service", 7 | "layout_type": "free", 8 | "is_read_only": false, 9 | "notify_list": [], 10 | "widget": [ 11 | { 12 | "layout": { 13 | "x": 68, 14 | "y": 10, 15 | "width": 50, 16 | "height": 29 17 | }, 18 | "timeseries_definition": { 19 | "title": "Running containers by image", 20 | "title_size": "16", 21 | "title_align": "left", 22 | "time": { 23 | "live_span": "1h" 24 | }, 25 | "request": [ 26 | { 27 | "q": "sum:docker.containers.running{$scope} by {docker_image}.fill(0)", 28 | "display_type": "bars" 29 | } 30 | ] 31 | } 32 | }, 33 | { 34 | "layout": { 35 | "x": 52, 36 | "y": 98, 37 | "width": 51, 38 | "height": 15 39 | }, 40 | "toplist_definition": { 41 | "title": "Most RAM-intensive containers", 42 | "title_size": "16", 43 | "title_align": "left", 44 | "time": { 45 | "live_span": "1h" 46 | }, 47 | "request": [ 48 | { 49 | "q": "top(avg:docker.mem.rss{$scope} by {container_name}, 5, 'max', 'desc')", 50 | "style": { 51 | "palette": "dog_classic" 52 | } 53 | } 54 | ] 55 | } 56 | }, 57 | { 58 | "layout": { 59 | "x": 0, 60 | "y": 82, 61 | "width": 51, 62 | "height": 15 63 | }, 64 | "toplist_definition": { 65 | "title": "Most CPU-intensive containers", 66 | "title_size": "16", 67 | "title_align": "left", 68 | "time": { 69 | "live_span": "1h" 70 | }, 71 | "request": [ 72 | { 73 | "q": "top(avg:docker.cpu.user{$scope} by {container_name}, 5, 'max', 'desc')", 74 | "style": { 75 | "palette": "cool" 76 | } 77 | } 78 | ] 79 | } 80 | }, 81 | { 82 | "layout": { 83 | "x": 52, 84 | "y": 114, 85 | "width": 51, 86 | "height": 15 87 | }, 88 | "heatmap_definition": { 89 | "title": "Memory by container", 90 | "title_size": "16", 91 | "title_align": "left", 92 | "time": { 93 | "live_span": "1h" 94 | }, 95 | "request": [ 96 | { 97 | "q": "avg:docker.mem.rss{$scope} by {container_name}" 98 | } 99 | ] 100 | } 101 | }, 102 | { 103 | "layout": { 104 | "x": 89, 105 | "y": 0, 106 | "width": 14, 107 | "height": 9 108 | }, 109 | "query_value_definition": { 110 | "title": "Running containers", 111 | "title_size": "16", 112 | "title_align": "center", 113 | "time": { 114 | "live_span": "1m" 115 | }, 116 | "autoscale": true, 117 | "precision": 0, 118 | "text_align": "center", 119 | "request": [ 120 | { 121 | "q": "sum:docker.containers.running{$scope}", 122 | "aggregator": "last" 123 | } 124 | ] 125 | } 126 | }, 127 | { 128 | "layout": { 129 | "x": 104, 130 | "y": 0, 131 | "width": 14, 132 | "height": 9 133 | }, 134 | "query_value_definition": { 135 | "title": "Stopped containers", 136 | "title_size": "16", 137 | "title_align": "center", 138 | "time": { 139 | "live_span": "1m" 140 | }, 141 | "autoscale": true, 142 | "precision": 0, 143 | "text_align": "center", 144 | "request": [ 145 | { 146 | "q": "sum:docker.containers.stopped{$scope}", 147 | "aggregator": "last" 148 | } 149 | ] 150 | } 151 | }, 152 | { 153 | "layout": { 154 | "x": 0, 155 | "y": 98, 156 | "width": 51, 157 | "height": 15 158 | }, 159 | "heatmap_definition": { 160 | "title": "CPU by container", 161 | "title_size": "16", 162 | "title_align": "left", 163 | "time": { 164 | "live_span": "1h" 165 | }, 166 | "request": [ 167 | { 168 | "q": "avg:docker.cpu.user{$scope} by {container_name}" 169 | } 170 | ] 171 | } 172 | }, 173 | { 174 | "layout": { 175 | "x": 0, 176 | "y": 50, 177 | "width": 51, 178 | "height": 15 179 | }, 180 | "timeseries_definition": { 181 | "title": "CPU user by image", 182 | "title_size": "16", 183 | "title_align": "left", 184 | "time": { 185 | "live_span": "1h" 186 | }, 187 | "request": [ 188 | { 189 | "q": "avg:docker.cpu.user{$scope} by {docker_image}.fill(0)", 190 | "display_type": "line", 191 | "style": { 192 | "palette": "cool" 193 | } 194 | } 195 | ] 196 | } 197 | }, 198 | { 199 | "layout": { 200 | "x": 52, 201 | "y": 50, 202 | "width": 51, 203 | "height": 15 204 | }, 205 | "timeseries_definition": { 206 | "title": "RSS memory by image", 207 | "title_size": "16", 208 | "title_align": "left", 209 | "time": { 210 | "live_span": "1h" 211 | }, 212 | "request": [ 213 | { 214 | "q": "avg:docker.mem.rss{$scope} by {docker_image}.fill(0)", 215 | "display_type": "line" 216 | } 217 | ] 218 | } 219 | }, 220 | { 221 | "layout": { 222 | "x": 0, 223 | "y": 17, 224 | "width": 51, 225 | "height": 24 226 | }, 227 | "event_stream_definition": { 228 | "query": "sources:docker", 229 | "event_size": "s", 230 | "time": { 231 | "live_span": "1d" 232 | } 233 | } 234 | }, 235 | { 236 | "layout": { 237 | "x": 0, 238 | "y": 8, 239 | "width": 51, 240 | "height": 9 241 | }, 242 | "event_timeline_definition": { 243 | "query": "sources:docker", 244 | "time": { 245 | "live_span": "1d" 246 | } 247 | } 248 | }, 249 | { 250 | "layout": { 251 | "x": 68, 252 | "y": 0, 253 | "width": 20, 254 | "height": 9 255 | }, 256 | "query_value_definition": { 257 | "title": "Running container change", 258 | "title_size": "16", 259 | "title_align": "center", 260 | "time": { 261 | "live_span": "5m" 262 | }, 263 | "autoscale": false, 264 | "custom_unit": "%", 265 | "precision": 0, 266 | "text_align": "center", 267 | "request": [ 268 | { 269 | "q": "100*(sum:docker.containers.running{$scope}/timeshift(sum:docker.containers.running{$scope}, -300))", 270 | "aggregator": "last", 271 | "conditional_formats": [ 272 | { 273 | "comparator": ">=", 274 | "value": 80, 275 | "palette": "white_on_green", 276 | "hide_value": false 277 | }, 278 | { 279 | "comparator": ">", 280 | "value": 50, 281 | "palette": "white_on_yellow", 282 | "hide_value": false 283 | }, 284 | { 285 | "comparator": ">=", 286 | "value": 0, 287 | "palette": "white_on_red", 288 | "hide_value": false 289 | } 290 | ] 291 | } 292 | ] 293 | } 294 | }, 295 | { 296 | "layout": { 297 | "x": 0, 298 | "y": 66, 299 | "width": 51, 300 | "height": 15 301 | }, 302 | "timeseries_definition": { 303 | "title": "CPU system by image", 304 | "title_size": "16", 305 | "title_align": "left", 306 | "time": { 307 | "live_span": "1h" 308 | }, 309 | "request": [ 310 | { 311 | "q": "avg:docker.cpu.system{$scope} by {docker_image}.fill(0)", 312 | "display_type": "line", 313 | "style": { 314 | "palette": "cool" 315 | } 316 | } 317 | ] 318 | } 319 | }, 320 | { 321 | "layout": { 322 | "x": 52, 323 | "y": 0, 324 | "width": 14, 325 | "height": 39 326 | }, 327 | "note_definition": { 328 | "content": "\n\nContainers", 329 | "background_color": "blue", 330 | "font_size": "24", 331 | "text_align": "center", 332 | "show_tick": true, 333 | "tick_pos": "50%", 334 | "tick_edge": "right" 335 | } 336 | }, 337 | { 338 | "layout": { 339 | "x": 52, 340 | "y": 82, 341 | "width": 51, 342 | "height": 15 343 | }, 344 | "timeseries_definition": { 345 | "title": "Cache memory by image", 346 | "title_size": "16", 347 | "title_align": "left", 348 | "time": { 349 | "live_span": "1h" 350 | }, 351 | "request": [ 352 | { 353 | "q": "sum:docker.mem.cache{$scope} by {docker_image}", 354 | "display_type": "line" 355 | } 356 | ] 357 | } 358 | }, 359 | { 360 | "layout": { 361 | "x": 0, 362 | "y": 0, 363 | "width": 51, 364 | "height": 8 365 | }, 366 | "image_definition": { 367 | "url": "/static/images/screenboard/integrations/docker-logo-792x269.png", 368 | "sizing": "fit" 369 | } 370 | }, 371 | { 372 | "layout": { 373 | "x": 0, 374 | "y": 42, 375 | "width": 51, 376 | "height": 6 377 | }, 378 | "note_definition": { 379 | "content": "[CPU Core Load](https://www.datadoghq.com/blog/how-to-monitor-docker-resource-metrics/#toc2)", 380 | "background_color": "blue", 381 | "font_size": "24", 382 | "text_align": "center", 383 | "show_tick": true, 384 | "tick_pos": "50%", 385 | "tick_edge": "bottom" 386 | } 387 | }, 388 | { 389 | "layout": { 390 | "x": 52, 391 | "y": 42, 392 | "width": 51, 393 | "height": 6 394 | }, 395 | "note_definition": { 396 | "content": "Memory", 397 | "background_color": "blue", 398 | "font_size": "24", 399 | "text_align": "center", 400 | "show_tick": true, 401 | "tick_pos": "50%", 402 | "tick_edge": "bottom" 403 | } 404 | }, 405 | { 406 | "layout": { 407 | "x": 52, 408 | "y": 66, 409 | "width": 51, 410 | "height": 15 411 | }, 412 | "timeseries_definition": { 413 | "title": "Swap by image", 414 | "title_size": "16", 415 | "title_align": "left", 416 | "time": { 417 | "live_span": "1h" 418 | }, 419 | "request": [ 420 | { 421 | "q": "avg:docker.mem.swap{$scope} by {docker_image}", 422 | "display_type": "line" 423 | } 424 | ] 425 | } 426 | }, 427 | { 428 | "layout": { 429 | "x": 156, 430 | "y": 42, 431 | "width": 51, 432 | "height": 6 433 | }, 434 | "note_definition": { 435 | "content": "I/O", 436 | "background_color": "blue", 437 | "font_size": "24", 438 | "text_align": "center", 439 | "show_tick": true, 440 | "tick_pos": "50%", 441 | "tick_edge": "bottom" 442 | } 443 | }, 444 | { 445 | "layout": { 446 | "x": 156, 447 | "y": 50, 448 | "width": 51, 449 | "height": 15 450 | }, 451 | "timeseries_definition": { 452 | "title": "Avg. I/O bytes read by image", 453 | "title_size": "16", 454 | "title_align": "left", 455 | "time": { 456 | "live_span": "1h" 457 | }, 458 | "request": [ 459 | { 460 | "q": "avg:docker.io.read_bytes{$scope} by {docker_image}", 461 | "display_type": "area", 462 | "style": { 463 | "palette": "dog_classic" 464 | } 465 | } 466 | ] 467 | } 468 | }, 469 | { 470 | "layout": { 471 | "x": 156, 472 | "y": 66, 473 | "width": 51, 474 | "height": 15 475 | }, 476 | "timeseries_definition": { 477 | "title": "Avg. I/O bytes written by image", 478 | "title_size": "16", 479 | "title_align": "left", 480 | "time": { 481 | "live_span": "1h" 482 | }, 483 | "request": [ 484 | { 485 | "q": "avg:docker.io.write_bytes{$scope} by {docker_image}", 486 | "display_type": "area", 487 | "style": { 488 | "palette": "dog_classic" 489 | } 490 | } 491 | ] 492 | } 493 | }, 494 | { 495 | "layout": { 496 | "x": 104, 497 | "y": 42, 498 | "width": 51, 499 | "height": 6 500 | }, 501 | "note_definition": { 502 | "content": "Network", 503 | "background_color": "blue", 504 | "font_size": "24", 505 | "text_align": "center", 506 | "show_tick": true, 507 | "tick_pos": "50%", 508 | "tick_edge": "bottom" 509 | } 510 | }, 511 | { 512 | "layout": { 513 | "x": 104, 514 | "y": 50, 515 | "width": 51, 516 | "height": 15 517 | }, 518 | "timeseries_definition": { 519 | "title": "Avg. rx bytes by image", 520 | "title_size": "16", 521 | "title_align": "left", 522 | "time": { 523 | "live_span": "1h" 524 | }, 525 | "request": [ 526 | { 527 | "q": "avg:docker.net.bytes_rcvd{$scope} by {docker_image}", 528 | "display_type": "area", 529 | "style": { 530 | "palette": "cool" 531 | } 532 | } 533 | ] 534 | } 535 | }, 536 | { 537 | "layout": { 538 | "x": 104, 539 | "y": 66, 540 | "width": 51, 541 | "height": 15 542 | }, 543 | "timeseries_definition": { 544 | "title": "Avg. tx bytes by image", 545 | "title_size": "16", 546 | "title_align": "left", 547 | "time": { 548 | "live_span": "1h" 549 | }, 550 | "request": [ 551 | { 552 | "q": "avg:docker.net.bytes_sent{$scope} by {docker_image}", 553 | "display_type": "area", 554 | "style": { 555 | "palette": "purple" 556 | } 557 | } 558 | ] 559 | } 560 | }, 561 | { 562 | "layout": { 563 | "x": 104, 564 | "y": 82, 565 | "width": 51, 566 | "height": 15 567 | }, 568 | "toplist_definition": { 569 | "title": "Most tx-intensive containers", 570 | "title_size": "16", 571 | "title_align": "left", 572 | "time": { 573 | "live_span": "1h" 574 | }, 575 | "request": [ 576 | { 577 | "q": "top(avg:docker.net.bytes_sent{$scope} by {container_name}, 5, 'max', 'desc')", 578 | "style": { 579 | "palette": "purple" 580 | } 581 | } 582 | ] 583 | } 584 | }, 585 | { 586 | "layout": { 587 | "x": 104, 588 | "y": 98, 589 | "width": 51, 590 | "height": 15 591 | }, 592 | "heatmap_definition": { 593 | "title": "tx by container", 594 | "title_size": "16", 595 | "title_align": "left", 596 | "time": { 597 | "live_span": "1h" 598 | }, 599 | "request": [ 600 | { 601 | "q": "avg:docker.net.bytes_sent{$scope} by {container_name}" 602 | } 603 | ] 604 | } 605 | }, 606 | { 607 | "layout": { 608 | "x": 119, 609 | "y": 0, 610 | "width": 36, 611 | "height": 39 612 | }, 613 | "toplist_definition": { 614 | "title": "Running containers by image", 615 | "title_size": "16", 616 | "title_align": "left", 617 | "time": { 618 | "live_span": "1h" 619 | }, 620 | "request": [ 621 | { 622 | "q": "timeshift(top(sum:docker.containers.running{$scope} by {docker_image}.fill(60), 20, 'last', 'desc'), 40)", 623 | "style": { 624 | "palette": "dog_classic" 625 | } 626 | } 627 | ] 628 | } 629 | } 630 | ], 631 | "template_variable": [ 632 | { 633 | "name": "scope", 634 | "default": "*", 635 | "prefix": "short_image" 636 | } 637 | ] 638 | } 639 | } 640 | } 641 | } 642 | -------------------------------------------------------------------------------- /datadog/dashboard/docker.json: -------------------------------------------------------------------------------- 1 | { 2 | "title": "Docker - eCommerce", 3 | "description": "For containers related to eCommerce", 4 | "widgets": [ 5 | { 6 | "id": 0, 7 | "definition": { 8 | "type": "timeseries", 9 | "requests": [ 10 | { 11 | "q": "sum:docker.containers.running{$scope} by {docker_image}.fill(0)", 12 | "display_type": "bars" 13 | } 14 | ], 15 | "title": "Running containers by image", 16 | "title_size": "16", 17 | "title_align": "left", 18 | "time": { 19 | "live_span": "1h" 20 | }, 21 | "show_legend": false 22 | }, 23 | "layout": { 24 | "x": 68, 25 | "y": 10, 26 | "width": 50, 27 | "height": 29 28 | } 29 | }, 30 | { 31 | "id": 1, 32 | "definition": { 33 | "type": "toplist", 34 | "requests": [ 35 | { 36 | "q": "top(avg:docker.mem.rss{$scope} by {container_name}, 5, 'max', 'desc')", 37 | "style": { 38 | "palette": "dog_classic" 39 | } 40 | } 41 | ], 42 | "title": "Most RAM-intensive containers", 43 | "title_size": "16", 44 | "title_align": "left", 45 | "time": { 46 | "live_span": "1h" 47 | } 48 | }, 49 | "layout": { 50 | "x": 52, 51 | "y": 98, 52 | "width": 51, 53 | "height": 15 54 | } 55 | }, 56 | { 57 | "id": 2, 58 | "definition": { 59 | "type": "toplist", 60 | "requests": [ 61 | { 62 | "q": "top(avg:docker.cpu.user{$scope} by {container_name}, 5, 'max', 'desc')", 63 | "style": { 64 | "palette": "cool" 65 | } 66 | } 67 | ], 68 | "title": "Most CPU-intensive containers", 69 | "title_size": "16", 70 | "title_align": "left", 71 | "time": { 72 | "live_span": "1h" 73 | } 74 | }, 75 | "layout": { 76 | "x": 0, 77 | "y": 82, 78 | "width": 51, 79 | "height": 15 80 | } 81 | }, 82 | { 83 | "id": 3, 84 | "definition": { 85 | "type": "heatmap", 86 | "requests": [ 87 | { 88 | "q": "avg:docker.mem.rss{$scope} by {container_name}" 89 | } 90 | ], 91 | "title": "Memory by container", 92 | "title_size": "16", 93 | "title_align": "left", 94 | "time": { 95 | "live_span": "1h" 96 | } 97 | }, 98 | "layout": { 99 | "x": 52, 100 | "y": 114, 101 | "width": 51, 102 | "height": 15 103 | } 104 | }, 105 | { 106 | "id": 4, 107 | "definition": { 108 | "type": "query_value", 109 | "requests": [ 110 | { 111 | "q": "sum:docker.containers.running{$scope}", 112 | "aggregator": "last" 113 | } 114 | ], 115 | "title": "Running containers", 116 | "title_size": "16", 117 | "title_align": "center", 118 | "time": { 119 | "live_span": "1m" 120 | }, 121 | "autoscale": true, 122 | "precision": 0, 123 | "text_align": "center" 124 | }, 125 | "layout": { 126 | "x": 89, 127 | "y": 0, 128 | "width": 14, 129 | "height": 9 130 | } 131 | }, 132 | { 133 | "id": 5, 134 | "definition": { 135 | "type": "query_value", 136 | "requests": [ 137 | { 138 | "q": "sum:docker.containers.stopped{$scope}", 139 | "aggregator": "last" 140 | } 141 | ], 142 | "title": "Stopped containers", 143 | "title_size": "16", 144 | "title_align": "center", 145 | "time": { 146 | "live_span": "1m" 147 | }, 148 | "autoscale": true, 149 | "precision": 0, 150 | "text_align": "center" 151 | }, 152 | "layout": { 153 | "x": 104, 154 | "y": 0, 155 | "width": 14, 156 | "height": 9 157 | } 158 | }, 159 | { 160 | "id": 6, 161 | "definition": { 162 | "type": "heatmap", 163 | "requests": [ 164 | { 165 | "q": "avg:docker.cpu.user{$scope} by {container_name}" 166 | } 167 | ], 168 | "title": "CPU by container", 169 | "title_size": "16", 170 | "title_align": "left", 171 | "time": { 172 | "live_span": "1h" 173 | } 174 | }, 175 | "layout": { 176 | "x": 0, 177 | "y": 98, 178 | "width": 51, 179 | "height": 15 180 | } 181 | }, 182 | { 183 | "id": 7, 184 | "definition": { 185 | "type": "timeseries", 186 | "requests": [ 187 | { 188 | "q": "avg:docker.cpu.user{$scope} by {docker_image}.fill(0)", 189 | "display_type": "line", 190 | "style": { 191 | "palette": "cool" 192 | } 193 | } 194 | ], 195 | "title": "CPU user by image", 196 | "title_size": "16", 197 | "title_align": "left", 198 | "time": { 199 | "live_span": "1h" 200 | }, 201 | "show_legend": false 202 | }, 203 | "layout": { 204 | "x": 0, 205 | "y": 50, 206 | "width": 51, 207 | "height": 15 208 | } 209 | }, 210 | { 211 | "id": 8, 212 | "definition": { 213 | "type": "timeseries", 214 | "requests": [ 215 | { 216 | "q": "avg:docker.mem.rss{$scope} by {docker_image}.fill(0)", 217 | "display_type": "line" 218 | } 219 | ], 220 | "title": "RSS memory by image", 221 | "title_size": "16", 222 | "title_align": "left", 223 | "time": { 224 | "live_span": "1h" 225 | }, 226 | "show_legend": false 227 | }, 228 | "layout": { 229 | "x": 52, 230 | "y": 50, 231 | "width": 51, 232 | "height": 15 233 | } 234 | }, 235 | { 236 | "id": 9, 237 | "definition": { 238 | "type": "event_stream", 239 | "query": "sources:docker", 240 | "tags_execution": "and", 241 | "event_size": "s", 242 | "time": { 243 | "live_span": "1d" 244 | } 245 | }, 246 | "layout": { 247 | "x": 0, 248 | "y": 17, 249 | "width": 51, 250 | "height": 24 251 | } 252 | }, 253 | { 254 | "id": 10, 255 | "definition": { 256 | "type": "event_timeline", 257 | "query": "sources:docker", 258 | "tags_execution": "and", 259 | "time": { 260 | "live_span": "1d" 261 | } 262 | }, 263 | "layout": { 264 | "x": 0, 265 | "y": 8, 266 | "width": 51, 267 | "height": 9 268 | } 269 | }, 270 | { 271 | "id": 11, 272 | "definition": { 273 | "type": "query_value", 274 | "requests": [ 275 | { 276 | "q": "100*(sum:docker.containers.running{$scope}/timeshift(sum:docker.containers.running{$scope}, -300))", 277 | "aggregator": "last", 278 | "conditional_formats": [ 279 | { 280 | "comparator": ">=", 281 | "value": 80, 282 | "palette": "white_on_green", 283 | "hide_value": false 284 | }, 285 | { 286 | "comparator": ">", 287 | "value": 50, 288 | "palette": "white_on_yellow", 289 | "hide_value": false 290 | }, 291 | { 292 | "comparator": ">=", 293 | "value": 0, 294 | "palette": "white_on_red", 295 | "hide_value": false 296 | } 297 | ] 298 | } 299 | ], 300 | "title": "Running container change", 301 | "title_size": "16", 302 | "title_align": "center", 303 | "time": { 304 | "live_span": "5m" 305 | }, 306 | "autoscale": false, 307 | "custom_unit": "%", 308 | "precision": 0, 309 | "text_align": "center" 310 | }, 311 | "layout": { 312 | "x": 68, 313 | "y": 0, 314 | "width": 20, 315 | "height": 9 316 | } 317 | }, 318 | { 319 | "id": 12, 320 | "definition": { 321 | "type": "timeseries", 322 | "requests": [ 323 | { 324 | "q": "avg:docker.cpu.system{$scope} by {docker_image}.fill(0)", 325 | "display_type": "line", 326 | "style": { 327 | "palette": "cool" 328 | } 329 | } 330 | ], 331 | "title": "CPU system by image", 332 | "title_size": "16", 333 | "title_align": "left", 334 | "time": { 335 | "live_span": "1h" 336 | }, 337 | "show_legend": false 338 | }, 339 | "layout": { 340 | "x": 0, 341 | "y": 66, 342 | "width": 51, 343 | "height": 15 344 | } 345 | }, 346 | { 347 | "id": 13, 348 | "definition": { 349 | "type": "note", 350 | "content": "\n\nContainers", 351 | "background_color": "blue", 352 | "font_size": "24", 353 | "text_align": "center", 354 | "show_tick": true, 355 | "tick_pos": "50%", 356 | "tick_edge": "right" 357 | }, 358 | "layout": { 359 | "x": 52, 360 | "y": 0, 361 | "width": 14, 362 | "height": 39 363 | } 364 | }, 365 | { 366 | "id": 14, 367 | "definition": { 368 | "type": "timeseries", 369 | "requests": [ 370 | { 371 | "q": "sum:docker.mem.cache{$scope} by {docker_image}", 372 | "display_type": "line" 373 | } 374 | ], 375 | "title": "Cache memory by image", 376 | "title_size": "16", 377 | "title_align": "left", 378 | "time": { 379 | "live_span": "1h" 380 | }, 381 | "show_legend": false 382 | }, 383 | "layout": { 384 | "x": 52, 385 | "y": 82, 386 | "width": 51, 387 | "height": 15 388 | } 389 | }, 390 | { 391 | "id": 15, 392 | "definition": { 393 | "type": "image", 394 | "url": "/static/images/screenboard/integrations/docker-logo-792x269.png", 395 | "sizing": "fit" 396 | }, 397 | "layout": { 398 | "x": 0, 399 | "y": 0, 400 | "width": 51, 401 | "height": 8 402 | } 403 | }, 404 | { 405 | "id": 16, 406 | "definition": { 407 | "type": "note", 408 | "content": "[CPU Core Load](https://www.datadoghq.com/blog/how-to-monitor-docker-resource-metrics/#toc2)", 409 | "background_color": "blue", 410 | "font_size": "24", 411 | "text_align": "center", 412 | "show_tick": true, 413 | "tick_pos": "50%", 414 | "tick_edge": "bottom" 415 | }, 416 | "layout": { 417 | "x": 0, 418 | "y": 42, 419 | "width": 51, 420 | "height": 6 421 | } 422 | }, 423 | { 424 | "id": 17, 425 | "definition": { 426 | "type": "note", 427 | "content": "Memory", 428 | "background_color": "blue", 429 | "font_size": "24", 430 | "text_align": "center", 431 | "show_tick": true, 432 | "tick_pos": "50%", 433 | "tick_edge": "bottom" 434 | }, 435 | "layout": { 436 | "x": 52, 437 | "y": 42, 438 | "width": 51, 439 | "height": 6 440 | } 441 | }, 442 | { 443 | "id": 18, 444 | "definition": { 445 | "type": "timeseries", 446 | "requests": [ 447 | { 448 | "q": "avg:docker.mem.swap{$scope} by {docker_image}", 449 | "display_type": "line" 450 | } 451 | ], 452 | "title": "Swap by image", 453 | "title_size": "16", 454 | "title_align": "left", 455 | "time": { 456 | "live_span": "1h" 457 | }, 458 | "show_legend": false 459 | }, 460 | "layout": { 461 | "x": 52, 462 | "y": 66, 463 | "width": 51, 464 | "height": 15 465 | } 466 | }, 467 | { 468 | "id": 19, 469 | "definition": { 470 | "type": "note", 471 | "content": "I/O", 472 | "background_color": "blue", 473 | "font_size": "24", 474 | "text_align": "center", 475 | "show_tick": true, 476 | "tick_pos": "50%", 477 | "tick_edge": "bottom" 478 | }, 479 | "layout": { 480 | "x": 156, 481 | "y": 42, 482 | "width": 51, 483 | "height": 6 484 | } 485 | }, 486 | { 487 | "id": 20, 488 | "definition": { 489 | "type": "timeseries", 490 | "requests": [ 491 | { 492 | "q": "avg:docker.io.read_bytes{$scope} by {docker_image}", 493 | "display_type": "area", 494 | "style": { 495 | "palette": "dog_classic" 496 | } 497 | } 498 | ], 499 | "title": "Avg. I/O bytes read by image", 500 | "title_size": "16", 501 | "title_align": "left", 502 | "time": { 503 | "live_span": "1h" 504 | }, 505 | "show_legend": false 506 | }, 507 | "layout": { 508 | "x": 156, 509 | "y": 50, 510 | "width": 51, 511 | "height": 15 512 | } 513 | }, 514 | { 515 | "id": 21, 516 | "definition": { 517 | "type": "timeseries", 518 | "requests": [ 519 | { 520 | "q": "avg:docker.io.write_bytes{$scope} by {docker_image}", 521 | "display_type": "area", 522 | "style": { 523 | "palette": "dog_classic" 524 | } 525 | } 526 | ], 527 | "title": "Avg. I/O bytes written by image", 528 | "title_size": "16", 529 | "title_align": "left", 530 | "time": { 531 | "live_span": "1h" 532 | }, 533 | "show_legend": false 534 | }, 535 | "layout": { 536 | "x": 156, 537 | "y": 66, 538 | "width": 51, 539 | "height": 15 540 | } 541 | }, 542 | { 543 | "id": 22, 544 | "definition": { 545 | "type": "note", 546 | "content": "Network", 547 | "background_color": "blue", 548 | "font_size": "24", 549 | "text_align": "center", 550 | "show_tick": true, 551 | "tick_pos": "50%", 552 | "tick_edge": "bottom" 553 | }, 554 | "layout": { 555 | "x": 104, 556 | "y": 42, 557 | "width": 51, 558 | "height": 6 559 | } 560 | }, 561 | { 562 | "id": 23, 563 | "definition": { 564 | "type": "timeseries", 565 | "requests": [ 566 | { 567 | "q": "avg:docker.net.bytes_rcvd{$scope} by {docker_image}", 568 | "display_type": "area", 569 | "style": { 570 | "palette": "cool" 571 | } 572 | } 573 | ], 574 | "title": "Avg. rx bytes by image", 575 | "title_size": "16", 576 | "title_align": "left", 577 | "time": { 578 | "live_span": "1h" 579 | }, 580 | "show_legend": false 581 | }, 582 | "layout": { 583 | "x": 104, 584 | "y": 50, 585 | "width": 51, 586 | "height": 15 587 | } 588 | }, 589 | { 590 | "id": 24, 591 | "definition": { 592 | "type": "timeseries", 593 | "requests": [ 594 | { 595 | "q": "avg:docker.net.bytes_sent{$scope} by {docker_image}", 596 | "display_type": "area", 597 | "style": { 598 | "palette": "purple" 599 | } 600 | } 601 | ], 602 | "title": "Avg. tx bytes by image", 603 | "title_size": "16", 604 | "title_align": "left", 605 | "time": { 606 | "live_span": "1h" 607 | }, 608 | "show_legend": false 609 | }, 610 | "layout": { 611 | "x": 104, 612 | "y": 66, 613 | "width": 51, 614 | "height": 15 615 | } 616 | }, 617 | { 618 | "id": 25, 619 | "definition": { 620 | "type": "toplist", 621 | "requests": [ 622 | { 623 | "q": "top(avg:docker.net.bytes_sent{$scope} by {container_name}, 5, 'max', 'desc')", 624 | "style": { 625 | "palette": "purple" 626 | } 627 | } 628 | ], 629 | "title": "Most tx-intensive containers", 630 | "title_size": "16", 631 | "title_align": "left", 632 | "time": { 633 | "live_span": "1h" 634 | } 635 | }, 636 | "layout": { 637 | "x": 104, 638 | "y": 82, 639 | "width": 51, 640 | "height": 15 641 | } 642 | }, 643 | { 644 | "id": 26, 645 | "definition": { 646 | "type": "heatmap", 647 | "requests": [ 648 | { 649 | "q": "avg:docker.net.bytes_sent{$scope} by {container_name}" 650 | } 651 | ], 652 | "title": "tx by container", 653 | "title_size": "16", 654 | "title_align": "left", 655 | "time": { 656 | "live_span": "1h" 657 | } 658 | }, 659 | "layout": { 660 | "x": 104, 661 | "y": 98, 662 | "width": 51, 663 | "height": 15 664 | } 665 | }, 666 | { 667 | "id": 27, 668 | "definition": { 669 | "type": "toplist", 670 | "requests": [ 671 | { 672 | "q": "timeshift(top(sum:docker.containers.running{$scope} by {docker_image}.fill(60), 20, 'last', 'desc'), 40)", 673 | "style": { 674 | "palette": "dog_classic" 675 | } 676 | } 677 | ], 678 | "title": "Running containers by image", 679 | "title_size": "16", 680 | "title_align": "left", 681 | "time": { 682 | "live_span": "1h" 683 | } 684 | }, 685 | "layout": { 686 | "x": 119, 687 | "y": 0, 688 | "width": 36, 689 | "height": 39 690 | } 691 | } 692 | ], 693 | "template_variables": [ 694 | { 695 | "name": "scope", 696 | "default": "*", 697 | "prefix": "short_image" 698 | } 699 | ], 700 | "layout_type": "free", 701 | "is_read_only": false, 702 | "notify_list": [], 703 | "id": "dki-wm7-kd3" 704 | } 705 | -------------------------------------------------------------------------------- /datadog/main.tf: -------------------------------------------------------------------------------- 1 | # Copyright (c) HashiCorp, Inc. 2 | # SPDX-License-Identifier: MPL-2.0 3 | 4 | terraform { 5 | required_providers { 6 | datadog = { 7 | source = "DataDog/datadog" 8 | version = "~> 3.25.0" 9 | } 10 | } 11 | 12 | required_version = "~> 1.0" 13 | } 14 | 15 | provider "datadog" { 16 | api_key = var.datadog_api_key 17 | app_key = var.datadog_app_key 18 | api_url = var.datadog_api_url 19 | } 20 | -------------------------------------------------------------------------------- /datadog/monitors.tf: -------------------------------------------------------------------------------- 1 | # Copyright (c) HashiCorp, Inc. 2 | # SPDX-License-Identifier: MPL-2.0 3 | 4 | resource "datadog_monitor" "apm_service_high_error_rate" { 5 | for_each = var.services 6 | name = "Service ${each.key} has a high error rate on ${each.value.environment}" 7 | type = "query alert" 8 | message = "Service ${each.key} has a high error rate. @pagerduty-${each.key}" 9 | escalation_message = "Service ${each.key} has a high error rate!! @pagerduty-${each.key}" 10 | 11 | query = "avg(last_10m):(sum:trace.${each.value.framework}.request.errors{env:${each.value.environment},service:${each.key}} / sum:trace.${each.value.framework}.request.hits{env:${each.value.environment},service:${each.key}}) > ${each.value.high_error_rate_critical}" 12 | 13 | monitor_thresholds { 14 | warning = each.value.high_error_rate_warning 15 | critical = each.value.high_error_rate_critical 16 | } 17 | 18 | notify_no_data = false 19 | renotify_interval = 0 20 | 21 | notify_audit = false 22 | timeout_h = 0 23 | include_tags = true 24 | 25 | tags = ["service:${each.key}", "env:${each.value.environment}"] 26 | } 27 | 28 | resource "datadog_monitor" "apm_service_high_avg_latency" { 29 | for_each = var.services 30 | name = "Service ${each.key} has a high average latency on ${each.value.environment}" 31 | type = "query alert" 32 | message = "Service ${each.key} has a high average latency. @pagerduty-${each.key}" 33 | escalation_message = "Service ${each.key} has a high average latency!! @pagerduty-${each.key}" 34 | 35 | query = "avg(last_10m):(sum:trace.${each.value.framework}.request.duration{env:${each.value.environment},service:${each.key}} / sum:trace.flask.request.hits{env:${each.value.environment},service:${each.key}}) > ${each.value.high_avg_latency_critical}" 36 | 37 | monitor_thresholds { 38 | warning = each.value.high_avg_latency_warning 39 | critical = each.value.high_avg_latency_critical 40 | } 41 | 42 | notify_no_data = false 43 | renotify_interval = 0 44 | 45 | notify_audit = false 46 | timeout_h = 0 47 | include_tags = true 48 | 49 | tags = ["service:${each.key}", "env:${each.value.environment}"] 50 | } 51 | 52 | resource "datadog_monitor" "apm_service_high_p90_latency" { 53 | for_each = var.services 54 | name = "Service ${each.key} has a high p90 latency on ${each.value.environment}" 55 | type = "query alert" 56 | message = "Service ${each.key} has a high p90 latency. @pagerduty-${each.key}" 57 | escalation_message = "Service ${each.key} has a high p90 latency!! @pagerduty-${each.key}" 58 | 59 | query = "avg(last_10m):trace.${each.value.framework}.request.duration.by.service.90p{service:${each.key},env:${each.value.environment}} > ${each.value.high_p90_latency_critical}" 60 | 61 | monitor_thresholds { 62 | warning = each.value.high_p90_latency_warning 63 | critical = each.value.high_p90_latency_critical 64 | } 65 | 66 | notify_no_data = false 67 | renotify_interval = 0 68 | 69 | notify_audit = false 70 | timeout_h = 0 71 | include_tags = true 72 | 73 | tags = ["service:${each.key}", "env:${each.value.environment}"] 74 | } 75 | -------------------------------------------------------------------------------- /datadog/policy/dashboard.sentinel: -------------------------------------------------------------------------------- 1 | import "tfplan/v2" as tfplan 2 | 3 | resources = values(tfplan.planned_values.resources) 4 | 5 | dashboards = filter resources as _, v { v.type is "datadog_dashboard" } 6 | 7 | all_dashboards_are_read_only = rule { 8 | all dashboards as dashboard { 9 | dashboard.values.is_read_only 10 | } 11 | } 12 | 13 | main = rule { 14 | all_dashboards_are_read_only 15 | } -------------------------------------------------------------------------------- /datadog/policy/monitor.sentinel: -------------------------------------------------------------------------------- 1 | import "tfplan/v2" as tfplan 2 | 3 | resources = values(tfplan.planned_values.resources) 4 | 5 | monitors = filter resources as _, v { v.type is "datadog_monitor" } 6 | 7 | all_monitors_notify_pagerduty = rule { 8 | all monitors as monitor { 9 | "@pagerduty" in monitor.values.message 10 | } 11 | } 12 | 13 | main = rule { 14 | all_monitors_notify_pagerduty 15 | } -------------------------------------------------------------------------------- /datadog/policy/sentinel.hcl: -------------------------------------------------------------------------------- 1 | # Copyright (c) HashiCorp, Inc. 2 | # SPDX-License-Identifier: MPL-2.0 3 | 4 | policy "dashboard" { 5 | enforcement_level = "advisory" 6 | } 7 | 8 | policy "monitor" { 9 | enforcement_level = "advisory" 10 | } -------------------------------------------------------------------------------- /datadog/policy/test/dashboard/fail.json: -------------------------------------------------------------------------------- 1 | { 2 | "mock": { 3 | "tfplan/v2": "../testdata/mock-tfplan-fail.sentinel" 4 | }, 5 | "test": { 6 | "main": false 7 | } 8 | } -------------------------------------------------------------------------------- /datadog/policy/test/dashboard/good.json: -------------------------------------------------------------------------------- 1 | { 2 | "mock": { 3 | "tfplan/v2": "../testdata/mock-tfplan-v2.sentinel" 4 | } 5 | } -------------------------------------------------------------------------------- /datadog/policy/test/monitor/fail.json: -------------------------------------------------------------------------------- 1 | { 2 | "mock": { 3 | "tfplan/v2": "../testdata/mock-tfplan-fail.sentinel" 4 | }, 5 | "test": { 6 | "main": false 7 | } 8 | } -------------------------------------------------------------------------------- /datadog/policy/test/monitor/good.json: -------------------------------------------------------------------------------- 1 | { 2 | "mock": { 3 | "tfplan/v2": "../testdata/mock-tfplan-v2.sentinel" 4 | } 5 | } -------------------------------------------------------------------------------- /datadog/setup/compute_host.tf: -------------------------------------------------------------------------------- 1 | # Copyright (c) HashiCorp, Inc. 2 | # SPDX-License-Identifier: MPL-2.0 3 | 4 | data "google_compute_network" "default" { 5 | name = "default" 6 | } 7 | 8 | resource "google_compute_firewall" "ecommerce" { 9 | count = var.enable_firewall_rule ? 1 : 0 10 | 11 | name = "allow-ecommerce" 12 | network = data.google_compute_network.default.name 13 | 14 | allow { 15 | protocol = "tcp" 16 | ports = ["3000"] 17 | } 18 | 19 | source_ranges = ["0.0.0.0/0"] 20 | } 21 | 22 | locals { 23 | docker_compose = var.fix_frontend ? "fixed" : "broken" 24 | } 25 | 26 | resource "google_compute_address" "ecommerce" { 27 | name = "datadog-webinar-ecommerce" 28 | } 29 | 30 | resource "google_compute_instance" "ecommerce" { 31 | name = "datadog-webinar-ecommerce" 32 | machine_type = "n1-standard-2" 33 | zone = var.zone 34 | 35 | tags = ["datadog", "webinar"] 36 | 37 | boot_disk { 38 | initialize_params { 39 | image = "datadog-ecommerce" 40 | } 41 | } 42 | 43 | network_interface { 44 | network = data.google_compute_network.default.name 45 | 46 | access_config { 47 | nat_ip = google_compute_address.ecommerce.address 48 | } 49 | } 50 | 51 | metadata = { 52 | partner = "datadog" 53 | purpose = "webinar" 54 | } 55 | 56 | metadata_startup_script = </dev/null; do echo waiting ...; sleep 1; done' 13 | 14 | # Install packages. 15 | export DEBIAN_FRONTEND=noninteractive 16 | 17 | apt-get -y install apt-transport-https ca-certificates curl software-properties-common 18 | 19 | curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add - 20 | 21 | add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" 22 | 23 | apt-get update 24 | 25 | apt-get -y install docker-ce 26 | 27 | curl -L "https://github.com/docker/compose/releases/download/1.25.4/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose 28 | chmod +x /usr/local/bin/docker-compose 29 | 30 | apt-get -y install git wget 31 | 32 | git clone https://github.com/DataDog/ecommerce-workshop.git 33 | 34 | curl -L https://github.com/buger/goreplay/releases/download/v1.0.0/gor_1.0.0_x64.tar.gz -o gor_1.0.0_x64.tar.gz 35 | tar -xf gor_1.0.0_x64.tar.gz 36 | mv gor /usr/local/bin/gor 37 | rm -rf gor_1.0.0_x64.tar.gz 38 | 39 | systemctl disable gor 40 | -------------------------------------------------------------------------------- /datadog/setup/packer/build.pkr.hcl: -------------------------------------------------------------------------------- 1 | # Copyright (c) HashiCorp, Inc. 2 | # SPDX-License-Identifier: MPL-2.0 3 | 4 | variable "project_id" { 5 | type = string 6 | default = "" 7 | } 8 | 9 | variable "zone" { 10 | type = string 11 | default = "" 12 | } 13 | 14 | # "timestamp" template function replacement 15 | locals { timestamp = regex_replace(timestamp(), "[- TZ:]", "") } 16 | 17 | # source blocks are generated from your builders; a source can be referenced in 18 | # build blocks. A build block runs provisioner and post-processors on a 19 | # source. Read the documentation for source blocks here: 20 | # https://www.packer.io/docs/templates/hcl_templates/blocks/source 21 | source "googlecompute" "ubuntu" { 22 | image_labels = { 23 | created = "${local.timestamp}" 24 | } 25 | image_name = "datadog-ecommerce" 26 | project_id = "${var.project_id}" 27 | source_image_family = "ubuntu-1804-lts" 28 | ssh_username = "root" 29 | zone = "${var.zone}" 30 | } 31 | 32 | # a build block invokes sources and runs provisioning steps on them. The 33 | # documentation for build blocks can be found here: 34 | # https://www.packer.io/docs/templates/hcl_templates/blocks/build 35 | build { 36 | sources = ["source.googlecompute.ubuntu"] 37 | 38 | provisioner "file" { 39 | destination = "/tmp/google-startup-scripts.service" 40 | source = "resources/google-startup-scripts.service" 41 | } 42 | 43 | provisioner "file" { 44 | destination = "/lib/systemd/system/gor.service" 45 | source = "resources/gor.service" 46 | } 47 | 48 | provisioner "shell" { 49 | script = "bootstrap.sh" 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /datadog/setup/packer/resources/google-startup-scripts.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Google Compute Engine Startup Scripts 3 | After=local-fs.target network-online.target network.target rsyslog.service 4 | After=google-instance-setup.service google-network-setup.service 5 | Wants=local-fs.target network-online.target network.target 6 | 7 | [Service] 8 | ExecStart=/usr/bin/google_metadata_script_runner --script-type startup 9 | KillMode=process 10 | Type=simple 11 | 12 | [Install] 13 | WantedBy=multi-user.target 14 | -------------------------------------------------------------------------------- /datadog/setup/packer/resources/gor.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=GOR for Datadog 3 | After=local-fs.target network-online.target network.target rsyslog.service 4 | After=google-instance-setup.service google-network-setup.service 5 | Wants=local-fs.target network-online.target network.target 6 | 7 | [Service] 8 | WorkingDirectory=/root/ecommerce-workshop 9 | ExecStart=/usr/local/bin/gor --input-file-loop --input-file requests_0.gor --output-http http://localhost:3000 10 | KillMode=process 11 | Type=simple 12 | 13 | [Install] 14 | WantedBy=multi-user.target 15 | -------------------------------------------------------------------------------- /datadog/setup/variables.tf: -------------------------------------------------------------------------------- 1 | # Copyright (c) HashiCorp, Inc. 2 | # SPDX-License-Identifier: MPL-2.0 3 | 4 | variable "datadog_api_key" { 5 | type = string 6 | sensitive = true 7 | description = "Datadog API key" 8 | } 9 | 10 | variable "project_id" { 11 | type = string 12 | description = "GCP default project" 13 | } 14 | 15 | variable "zone" { 16 | type = string 17 | description = "GCP Zone to deploy" 18 | default = "us-east1-b" 19 | } 20 | 21 | variable "enable_firewall_rule" { 22 | type = bool 23 | description = "Creates firewall rule to allow public traffic" 24 | default = true 25 | } 26 | 27 | variable "fix_frontend" { 28 | type = bool 29 | description = "Toggle to fix frontend application" 30 | default = true 31 | } 32 | -------------------------------------------------------------------------------- /datadog/store-frontend-anomaly-p90-latency.tf: -------------------------------------------------------------------------------- 1 | # Copyright (c) HashiCorp, Inc. 2 | # SPDX-License-Identifier: MPL-2.0 3 | 4 | locals { 5 | critical = 0.75 6 | warning = 0.66 7 | critical_recovery = 0 8 | } 9 | 10 | resource "datadog_monitor" "apm_store_frontend_anomalous_p90_latency" { 11 | name = "Service store-frontend has an anomalous p90 latency on ruby-shop" 12 | type = "query alert" 13 | message = "Service store-fronted has an anomalous p90 latency on ruby-shop." 14 | escalation_message = "Service store-fronted has an anomalous p90 latency on ruby-shop!!" 15 | 16 | query = "avg(last_1h):anomalies(avg:trace.rack.request.duration.by.service.90p{service:store-frontend,env:ruby-shop}, 'basic', 2, direction='above', interval=20) >= ${local.critical}" 17 | 18 | monitor_thresholds { 19 | critical = local.critical 20 | warning = local.warning 21 | critical_recovery = local.critical_recovery 22 | } 23 | 24 | monitor_threshold_windows { 25 | trigger_window = "last_5m" 26 | recovery_window = "last_10m" 27 | } 28 | 29 | notify_no_data = false 30 | renotify_interval = 0 31 | 32 | notify_audit = false 33 | timeout_h = 0 34 | include_tags = true 35 | 36 | tags = ["service:store-frontend", "env:ruby-shop"] 37 | } 38 | -------------------------------------------------------------------------------- /datadog/terraform.auto.tfvars: -------------------------------------------------------------------------------- 1 | # Copyright (c) HashiCorp, Inc. 2 | # SPDX-License-Identifier: MPL-2.0 3 | 4 | application = "eCommerce" 5 | services = { 6 | store-frontend = { 7 | pd_service_key = "54321098765432109876" 8 | environment = "development" 9 | framework = "rack" 10 | high_error_rate_critical = 10 11 | high_error_rate_warning = 8 12 | high_avg_latency_critical = 3 13 | high_avg_latency_warning = 1 14 | high_p90_latency_critical = 6 15 | high_p90_latency_warning = 4 16 | } 17 | advertisements-service = { 18 | pd_service_key = "54321098765432109877" 19 | environment = "development" 20 | framework = "flask" 21 | high_error_rate_critical = 10 22 | high_error_rate_warning = 8 23 | high_avg_latency_critical = 3 24 | high_avg_latency_warning = 2 25 | high_p90_latency_critical = 6 26 | high_p90_latency_warning = 4 27 | } 28 | discounts-service = { 29 | pd_service_key = "54321098765432109878" 30 | environment = "development" 31 | framework = "flask" 32 | high_error_rate_critical = 10 33 | high_error_rate_warning = 8 34 | high_avg_latency_critical = 4 35 | high_avg_latency_warning = 3 36 | high_p90_latency_critical = 6 37 | high_p90_latency_warning = 4 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /datadog/variables.tf: -------------------------------------------------------------------------------- 1 | # Copyright (c) HashiCorp, Inc. 2 | # SPDX-License-Identifier: MPL-2.0 3 | 4 | variable "datadog_api_key" { 5 | type = string 6 | sensitive = true 7 | description = "Datadog API key" 8 | } 9 | 10 | variable "datadog_app_key" { 11 | type = string 12 | sensitive = true 13 | description = "Datadog APP key" 14 | } 15 | 16 | variable "datadog_api_url" { 17 | type = string 18 | description = "Datadog API URL. See https://docs.datadoghq.com/getting_started/site/ for all available regions." 19 | default = "https://app.datadoghq.com" 20 | 21 | validation { 22 | condition = contains(["https://app.datadoghq.com", "https://us3.datadoghq.com", "https://us5.datadoghq.com", "https://app.datadoghq.eu", "https://app.ddog-gov.com", "https://ap1.datadoghq.com"], var.datadog_api_url) 23 | error_message = "The configured Datadog APP url is invalid." 24 | } 25 | } 26 | 27 | variable "application" { 28 | type = string 29 | description = "Name of application" 30 | } 31 | 32 | variable "services" { 33 | type = map(object({ 34 | pd_service_key = string, 35 | environment = string, 36 | framework = string, 37 | high_error_rate_warning = number, 38 | high_error_rate_critical = number, 39 | high_avg_latency_warning = number, 40 | high_avg_latency_critical = number, 41 | high_p90_latency_warning = number, 42 | high_p90_latency_critical = number, 43 | })) 44 | description = "Services and query alert thresholds" 45 | } 46 | --------------------------------------------------------------------------------