├── .circleci └── config.yml ├── .github └── workflows │ └── clabot.yml ├── Dockerfile ├── LICENSE ├── Makefile ├── NURD.png ├── README.md ├── cluster.go ├── cluster_test.go ├── config.go ├── config_test.go ├── config_test.json ├── db.go ├── db_test.go ├── docker-compose.yml ├── etc └── nurd │ └── config.json ├── go.mod ├── go.sum ├── grafana.json ├── main.go ├── main_test.go └── run_tests.sh /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | jobs: 3 | build: 4 | machine: true 5 | working_directory: ~/go/src/github.com/Roblox/rblx_nurd 6 | steps: 7 | - checkout 8 | - run: make test -------------------------------------------------------------------------------- /.github/workflows/clabot.yml: -------------------------------------------------------------------------------- 1 | name: "CLA Signature Bot" 2 | on: 3 | issue_comment: 4 | types: [created] 5 | pull_request: 6 | types: [opened,closed,synchronize] 7 | 8 | jobs: 9 | call-clabot-workflow: 10 | uses: Roblox/cla-signature-bot/.github/workflows/clabot-workflow.yml@master 11 | with: 12 | whitelist: "shishir-a412ed,austinmacrbx,ShuangW" 13 | use-remote-repo: true 14 | remote-repo-name: "roblox/cla-bot-store" 15 | secrets: inherit 16 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:latest 2 | 3 | LABEL maintainer="Austin Mac " 4 | 5 | ENV CONNECTION_STRING="Server=mssql;Database=master;User Id=sa;Password=yourStrong(!)Password;" 6 | 7 | RUN mkdir -p /go/src/nurd 8 | 9 | WORKDIR /go/src/nurd 10 | 11 | COPY . . 12 | 13 | RUN apt-get update 14 | RUN apt-get install -y vim 15 | RUN go mod download 16 | 17 | RUN make install 18 | 19 | EXPOSE 8080 20 | 21 | CMD ["nurd"] -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | Version 2.0, January 2004 176 | http://www.apache.org/licenses/ 177 | 178 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 179 | 180 | 1. Definitions. 181 | 182 | "License" shall mean the terms and conditions for use, reproduction, 183 | and distribution as defined by Sections 1 through 9 of this document. 184 | 185 | "Licensor" shall mean the copyright owner or entity authorized by 186 | the copyright owner that is granting the License. 187 | 188 | "Legal Entity" shall mean the union of the acting entity and all 189 | other entities that control, are controlled by, or are under common 190 | control with that entity. For the purposes of this definition, 191 | "control" means (i) the power, direct or indirect, to cause the 192 | direction or management of such entity, whether by contract or 193 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 194 | outstanding shares, or (iii) beneficial ownership of such entity. 195 | 196 | "You" (or "Your") shall mean an individual or Legal Entity 197 | exercising permissions granted by this License. 198 | 199 | "Source" form shall mean the preferred form for making modifications, 200 | including but not limited to software source code, documentation 201 | source, and configuration files. 202 | 203 | "Object" form shall mean any form resulting from mechanical 204 | transformation or translation of a Source form, including but 205 | not limited to compiled object code, generated documentation, 206 | and conversions to other media types. 207 | 208 | "Work" shall mean the work of authorship, whether in Source or 209 | Object form, made available under the License, as indicated by a 210 | copyright notice that is included in or attached to the work 211 | (an example is provided in the Appendix below). 212 | 213 | "Derivative Works" shall mean any work, whether in Source or Object 214 | form, that is based on (or derived from) the Work and for which the 215 | editorial revisions, annotations, elaborations, or other modifications 216 | represent, as a whole, an original work of authorship. For the purposes 217 | of this License, Derivative Works shall not include works that remain 218 | separable from, or merely link (or bind by name) to the interfaces of, 219 | the Work and Derivative Works thereof. 220 | 221 | "Contribution" shall mean any work of authorship, including 222 | the original version of the Work and any modifications or additions 223 | to that Work or Derivative Works thereof, that is intentionally 224 | submitted to Licensor for inclusion in the Work by the copyright owner 225 | or by an individual or Legal Entity authorized to submit on behalf of 226 | the copyright owner. For the purposes of this definition, "submitted" 227 | means any form of electronic, verbal, or written communication sent 228 | to the Licensor or its representatives, including but not limited to 229 | communication on electronic mailing lists, source code control systems, 230 | and issue tracking systems that are managed by, or on behalf of, the 231 | Licensor for the purpose of discussing and improving the Work, but 232 | excluding communication that is conspicuously marked or otherwise 233 | designated in writing by the copyright owner as "Not a Contribution." 234 | 235 | "Contributor" shall mean Licensor and any individual or Legal Entity 236 | on behalf of whom a Contribution has been received by Licensor and 237 | subsequently incorporated within the Work. 238 | 239 | 2. Grant of Copyright License. Subject to the terms and conditions of 240 | this License, each Contributor hereby grants to You a perpetual, 241 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 242 | copyright license to reproduce, prepare Derivative Works of, 243 | publicly display, publicly perform, sublicense, and distribute the 244 | Work and such Derivative Works in Source or Object form. 245 | 246 | 3. Grant of Patent License. Subject to the terms and conditions of 247 | this License, each Contributor hereby grants to You a perpetual, 248 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 249 | (except as stated in this section) patent license to make, have made, 250 | use, offer to sell, sell, import, and otherwise transfer the Work, 251 | where such license applies only to those patent claims licensable 252 | by such Contributor that are necessarily infringed by their 253 | Contribution(s) alone or by combination of their Contribution(s) 254 | with the Work to which such Contribution(s) was submitted. If You 255 | institute patent litigation against any entity (including a 256 | cross-claim or counterclaim in a lawsuit) alleging that the Work 257 | or a Contribution incorporated within the Work constitutes direct 258 | or contributory patent infringement, then any patent licenses 259 | granted to You under this License for that Work shall terminate 260 | as of the date such litigation is filed. 261 | 262 | 4. Redistribution. You may reproduce and distribute copies of the 263 | Work or Derivative Works thereof in any medium, with or without 264 | modifications, and in Source or Object form, provided that You 265 | meet the following conditions: 266 | 267 | (a) You must give any other recipients of the Work or 268 | Derivative Works a copy of this License; and 269 | 270 | (b) You must cause any modified files to carry prominent notices 271 | stating that You changed the files; and 272 | 273 | (c) You must retain, in the Source form of any Derivative Works 274 | that You distribute, all copyright, patent, trademark, and 275 | attribution notices from the Source form of the Work, 276 | excluding those notices that do not pertain to any part of 277 | the Derivative Works; and 278 | 279 | (d) If the Work includes a "NOTICE" text file as part of its 280 | distribution, then any Derivative Works that You distribute must 281 | include a readable copy of the attribution notices contained 282 | within such NOTICE file, excluding those notices that do not 283 | pertain to any part of the Derivative Works, in at least one 284 | of the following places: within a NOTICE text file distributed 285 | as part of the Derivative Works; within the Source form or 286 | documentation, if provided along with the Derivative Works; or, 287 | within a display generated by the Derivative Works, if and 288 | wherever such third-party notices normally appear. The contents 289 | of the NOTICE file are for informational purposes only and 290 | do not modify the License. You may add Your own attribution 291 | notices within Derivative Works that You distribute, alongside 292 | or as an addendum to the NOTICE text from the Work, provided 293 | that such additional attribution notices cannot be construed 294 | as modifying the License. 295 | 296 | You may add Your own copyright statement to Your modifications and 297 | may provide additional or different license terms and conditions 298 | for use, reproduction, or distribution of Your modifications, or 299 | for any such Derivative Works as a whole, provided Your use, 300 | reproduction, and distribution of the Work otherwise complies with 301 | the conditions stated in this License. 302 | 303 | 5. Submission of Contributions. Unless You explicitly state otherwise, 304 | any Contribution intentionally submitted for inclusion in the Work 305 | by You to the Licensor shall be under the terms and conditions of 306 | this License, without any additional terms or conditions. 307 | Notwithstanding the above, nothing herein shall supersede or modify 308 | the terms of any separate license agreement you may have executed 309 | with Licensor regarding such Contributions. 310 | 311 | 6. Trademarks. This License does not grant permission to use the trade 312 | names, trademarks, service marks, or product names of the Licensor, 313 | except as required for reasonable and customary use in describing the 314 | origin of the Work and reproducing the content of the NOTICE file. 315 | 316 | 7. Disclaimer of Warranty. Unless required by applicable law or 317 | agreed to in writing, Licensor provides the Work (and each 318 | Contributor provides its Contributions) on an "AS IS" BASIS, 319 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 320 | implied, including, without limitation, any warranties or conditions 321 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 322 | PARTICULAR PURPOSE. You are solely responsible for determining the 323 | appropriateness of using or redistributing the Work and assume any 324 | risks associated with Your exercise of permissions under this License. 325 | 326 | 8. Limitation of Liability. In no event and under no legal theory, 327 | whether in tort (including negligence), contract, or otherwise, 328 | unless required by applicable law (such as deliberate and grossly 329 | negligent acts) or agreed to in writing, shall any Contributor be 330 | liable to You for damages, including any direct, indirect, special, 331 | incidental, or consequential damages of any character arising as a 332 | result of this License or out of the use or inability to use the 333 | Work (including but not limited to damages for loss of goodwill, 334 | work stoppage, computer failure or malfunction, or any and all 335 | other commercial damages or losses), even if such Contributor 336 | has been advised of the possibility of such damages. 337 | 338 | 9. Accepting Warranty or Additional Liability. While redistributing 339 | the Work or Derivative Works thereof, You may choose to offer, 340 | and charge a fee for, acceptance of support, warranty, indemnity, 341 | or other liability obligations and/or rights consistent with this 342 | License. However, in accepting such obligations, You may act only 343 | on Your own behalf and on Your sole responsibility, not on behalf 344 | of any other Contributor, and only if You agree to indemnify, 345 | defend, and hold each Contributor harmless for any liability 346 | incurred by, or claims asserted against, such Contributor by reason 347 | of your accepting any such warranty or additional liability. -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | ifndef $(GOLANG) 2 | GOLANG=$(shell which go) 3 | export GOLANG 4 | endif 5 | BINARY ?= nurd 6 | BINDIR ?= $(DESTDIR)/usr/local/bin 7 | SYSCONFDIR ?= $(DESTDIR)/etc/nurd 8 | 9 | build: 10 | $(GOLANG) build -o $(BINARY) cluster.go config.go db.go main.go 11 | 12 | install: 13 | mkdir -p $(SYSCONFDIR) 14 | 15 | if [ ! -f "$(SYSCONFDIR)/config.json" ]; then \ 16 | install -m 644 etc/nurd/config.json $(SYSCONFDIR)/config.json; \ 17 | fi 18 | 19 | 20 | $(GOLANG) build -o $(BINARY) cluster.go config.go db.go main.go 21 | install -m 755 $(BINARY) $(BINDIR) 22 | 23 | test: 24 | ./run_tests.sh 25 | 26 | clean: 27 | rm -f $(BINARY) -------------------------------------------------------------------------------- /NURD.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Roblox/rblx_nurd/9e6723757f0cc1a49d3a41af911d28db477238c3/NURD.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # :NURD: Nomad Usage Resource Dashboard (NURD) 2 | [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://github.com/Roblox/rblx_nurd/blob/master/LICENSE) 3 | [![CircleCI](https://circleci-github.rcs.simulpong.com/gh/Roblox/rblx_nurd/tree/master.svg?style=shield&circle-token=638e19f15c88268832a4f2a7bfee4f081df8d65d)](https://circleci-github.rcs.simulpong.com/gh/Roblox/rblx_nurd/tree/master) 4 | 5 | NURD is a dashboard which aggregates and displays CPU and memory resource usage for each job running through specified Hashicorp Nomad servers. The dashboard also displays resources requested by each job, which can be used with resource usage to calculate waste and aid capacity planning. 6 | 7 | ## Prerequisites 8 | * Docker Version: >=19.03.8+ 9 | * **Required:** At least one active Nomad server 10 | * **Optional:** A VictoriaMetrics server containing allocation level resource statistics 11 | 12 | ## Setup 13 | The user can configure NURD to connect to a containerized SQL Server instance with [docker-compose.yml](https://github.com/Roblox/rblx_nurd/blob/master/docker-compose.yml) or point to another SQL Server instance with [Dockerfile](https://github.com/Roblox/rblx_nurd/blob/master/Dockerfile). See options below for details. By default, NURD collects data every 15 minutes. To modify the frequency, edit [Dockerfile](https://github.com/Roblox/rblx_nurd/blob/master/Dockerfile#L21) with the following formatting style before startup:
14 | `CMD ["nurd", "--aggregate-frequency", "15m"]` 15 | 16 | ### Containerized SQL Server Instance 17 | 1. `$ git clone git@github.com:Roblox/nurd.git` 18 | 2. **Configuration**
19 | * **[docker-compose.yml](https://github.com/Roblox/rblx_nurd/blob/master/docker-compose.yml)**
20 | This file contains the necessary login information to create a SQL Server instance. 21 | * **[etc/nurd/config.json](https://github.com/Roblox/rblx_nurd/blob/master/etc/nurd/config.json)**
22 | This file contains the configuration information for the Nomad server(s) and the VictoriaMetrics server. The default URLs and ports must be overwritten. If no VictoriaMetrics server exists, the VictoriaMetrics stanza must be removed. Note, any amount of servers can be added to the `Nomad` array. 23 | 4. `$ docker-compose build` 24 | 5. `$ docker-compose up -d` 25 | 6. **Grafana Dashboard**
26 | a. Navigate to [localhost:3000](http://localhost:3000)
27 | b. Login with 28 | 29 | username: admin 30 | password: admin 31 | c. Change the password
32 | d. Navigate to [localhost:3000/datasources/new](http://localhost:3000/datasources/new) and select `Microsoft SQL Server`
33 | e. Input the following connection data 34 | 35 | Host: mssql 36 | Database: master 37 | User: sa 38 | Password: yourStrong(!)Password 39 | f. Select `Save & Test`
40 | g. Navigate to [localhost:3000/dashboard/import](http://localhost:3000/dashboard/import) and select `Upload JSON file`
41 | h. Upload [grafana.json](https://github.com/Roblox/rblx_nurd/blob/master/grafana.json) and select `import`
42 | 43 | 44 | ### Another SQL Server Instance 45 | 1. `$ git clone git@github.com:Roblox/nurd.git` 46 | 2. **Configuration**
47 | * **[Dockerfile](https://github.com/Roblox/rblx_nurd/blob/master/Dockerfile)**
48 | This file contains the necessary login information to connect to a separate SQL Server instance. It is necessary to configure the [connection string](https://github.com/Roblox/rblx_nurd/blob/master/Dockerfile#L5) environment variable. 49 | * **[etc/nurd/config.json](https://github.com/Roblox/rblx_nurd/blob/master/etc/nurd/config.json)**
50 | This file contains the configuration information for the Nomad server(s) and the VictoriaMetrics server. The default URLs and ports must be overwritten. If no VictoriaMetrics server exists, the VictoriaMetrics stanza must be removed. Note, any amount of servers can be added to the `Nomad` array. 51 | 3. `$ cd nurd` 52 | 4. `$ docker build -t nurd .` 53 | 5. `$ docker run -dp 8080:8080 nurd` 54 | 55 | ## Exit 56 | 1. `$ docker-compose down` __or__ `$ docker stop` 57 | 58 | ## Usage 59 | ### Grafana Dashboard 60 | From [localhost:3000](http://localhost:3000), or an alternative NURD host address, the user can access the Grafana dashboard. The following parameters are available to query through the dropdown menu.
61 | **Note:** No time series will display until NURD has inserted data into the database.
62 | * `JobID`: ID of a job 63 | * `Metrics` 64 | * `UsedMemory`: the memory currently in use by the selected jobs in MiB 65 | * `RequestedMemory`: the memory requested by the selected jobs in MiB 66 | * `UsedCPU`: the CPU currently in use by the selected jobs in MHz 67 | * `RequestedCPU`: the CPU requested by the selected jobs in MHz 68 | * `Total`: toggle to aggregate metrics over the current selection 69 | 70 | ### API 71 | From [localhost:8080](http://localhost:8080), or an alternative NURD host address, the user can access several endpoints: 72 | 73 | #### Home Page 74 | * **`/`**
75 | The home page for NURD. 76 | * **Sample Request**
77 | `http://localhost:8080/` 78 | 79 | #### List All Jobs 80 | * **`/v1/jobs`**
81 | Lists all job data in NURD. 82 | * **Sample Request**
83 | `http://localhost:8080/v1/jobs` 84 | 85 | #### List Specified Job(s) 86 | * **`/v1/job/:job_id`**
87 | Lists the latest recorded job data for the specified job_id.
88 | **Optional Parameters**
89 | `begin`: Specifies the earliest datetime from which to query.
90 | `end`: Specifies the latest datetime from which to query.
91 | * **Sample Request**
92 | * `http://localhost:8080/v1/job/sample_job_id`
93 | * `http://localhost:8080/v1/job/sample_job_id?begin=2020-07-07%2017:34:53&end=2020-07-08%2017:42:19` 94 | * **Sample Response**
95 | ``` 96 | [ 97 | { 98 | "JobID":"sample-job", 99 | "Name":"sample-job", 100 | "UTicks":7318.394561709347, 101 | "RCPU":1500, 102 | "URSS":21.542070543374642, 103 | "UCache":0.4997979027645376, 104 | "RMemoryMB":768, 105 | "RdiskMB":900, 106 | "RIOPS":0, 107 | "Namespace":"default", 108 | "DataCenters":"DC0,DC1", 109 | "CurrentTime":"", 110 | "InsertTime":"2020-07-07T11:49:34Z" 111 | } 112 | ] 113 | ``` 114 | ### Reload Config File 115 | NURD supports hot reloading to point NURD to different Nomad clusters and/or a VictoriaMetrics server. 116 | 117 | 1. `Exec` into the container running NURD
118 | `$ docker exec -it nurd /bin/bash` 119 | 2. Edit the contents of [/etc/nurd/config.json](https://github.com/Roblox/rblx_nurd/blob/master/etc/nurd/config.json)
120 | `$ vim /etc/nurd/config.json` 121 | 3. Exit the container
122 | `$ exit` 123 | 3. Send a SIGHUP signal to the container running NURD.
124 | `$ docker kill --signal=HUP nurd` 125 | 126 | Once SIGHUP has been sent to NURD, NURD will complete resource aggregation of the addresses in the previous cycle before aggregating on the new addresses. 127 | -------------------------------------------------------------------------------- /cluster.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2020 Roblox Corporation 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | */ 17 | 18 | package main 19 | 20 | import ( 21 | "encoding/json" 22 | "fmt" 23 | "net/http" 24 | "strconv" 25 | "time" 26 | 27 | log "github.com/sirupsen/logrus" 28 | ) 29 | 30 | type JobData struct { 31 | JobID string 32 | Name string 33 | UTicks float64 34 | RCPU float64 35 | URSS float64 36 | UCache float64 37 | RMemoryMB float64 38 | RdiskMB float64 39 | RIOPS float64 40 | Namespace string 41 | DataCenters string 42 | CurrentTime string 43 | } 44 | 45 | type RawAlloc struct { 46 | Status string 47 | Data DataMap 48 | } 49 | 50 | type DataMap struct { 51 | ResultType string 52 | Result []MetVal 53 | } 54 | 55 | type MetVal struct { 56 | Metric MetricType 57 | Value []interface{} 58 | } 59 | 60 | type MetricType struct { 61 | Alloc_id string 62 | } 63 | 64 | type NomadAlloc struct { 65 | ResourceUsage MemCPU 66 | } 67 | 68 | type MemCPU struct { 69 | MemoryStats Memory 70 | CpuStats CPU 71 | } 72 | 73 | type Memory struct { 74 | RSS float64 75 | Cache float64 76 | Swap float64 77 | Usage float64 78 | MaxUsage float64 79 | KernelUsage float64 80 | KernelMaxUsage float64 81 | } 82 | 83 | type CPU struct { 84 | TotalTicks float64 85 | } 86 | 87 | type JobSpec struct { 88 | TaskGroups []TaskGroup 89 | } 90 | 91 | type TaskGroup struct { 92 | Name string 93 | Count float64 94 | Tasks []Task 95 | EphemeralDisk Disk 96 | } 97 | 98 | type Task struct { 99 | Resources Resource 100 | } 101 | 102 | type Disk struct { 103 | SizeMB float64 104 | } 105 | 106 | type Resource struct { 107 | CPU float64 108 | MemoryMB float64 109 | DiskMB float64 110 | IOPS float64 111 | } 112 | 113 | type JobDesc struct { 114 | ID string 115 | Name string 116 | Datacenters []string 117 | Type string 118 | JobSummary JobSum 119 | } 120 | 121 | type JobSum struct { 122 | Namespace string 123 | } 124 | 125 | type Alloc struct { 126 | ID string 127 | TaskGroup string 128 | } 129 | 130 | func getVMAllocs(metricsAddress, query string) map[string]struct{} { 131 | m := make(map[string]struct{}) 132 | 133 | log.SetReportCaller(true) 134 | 135 | api := "http://" + metricsAddress + "/api/v1/query?query=" + query 136 | response, err := http.Get(api) 137 | if err != nil { 138 | log.Error(fmt.Sprintf("Error in getting API response: %v", err)) 139 | return nil 140 | } 141 | defer response.Body.Close() 142 | 143 | var allocs RawAlloc 144 | err = json.NewDecoder(response.Body).Decode(&allocs) 145 | if err != nil { 146 | log.Error(fmt.Sprintf("Error in decoding JSON: %v", err)) 147 | return nil 148 | } 149 | 150 | var empty struct{} 151 | for _, val := range allocs.Data.Result { 152 | m[val.Metric.Alloc_id] = empty 153 | } 154 | 155 | return m 156 | } 157 | 158 | func getNomadAllocs(clusterAddress, jobID string) map[string]struct{} { 159 | m := make(map[string]struct{}) 160 | 161 | log.SetReportCaller(true) 162 | 163 | api := "http://" + clusterAddress + "/v1/job/" + jobID + "/allocations" 164 | response, err := http.Get(api) 165 | if err != nil { 166 | log.Error(fmt.Sprintf("Error in getting API response: %v", err)) 167 | return nil 168 | } 169 | defer response.Body.Close() 170 | 171 | var allocs []Alloc 172 | err = json.NewDecoder(response.Body).Decode(&allocs) 173 | if err != nil { 174 | log.Error(fmt.Sprintf("Error in decoding JSON: %v", err)) 175 | return nil 176 | } 177 | 178 | var empty struct{} 179 | for _, alloc := range allocs { 180 | m[alloc.ID] = empty 181 | } 182 | 183 | return m 184 | } 185 | 186 | func getRSS(clusterAddress, metricsAddress, jobID, jobName string, remainders map[string][]string) float64 { 187 | var rss float64 188 | 189 | log.SetReportCaller(true) 190 | 191 | api := "http://" + metricsAddress + "/api/v1/query?query=sum(nomad_client_allocs_memory_rss_value%7Bjob%3D%22" + jobName + "%22%7D)%20by%20(job)" 192 | response, err := http.Get(api) 193 | if err != nil { 194 | log.Error(fmt.Sprintf("Error in getting API response: %v", err)) 195 | nomadAllocs := getNomadAllocs(clusterAddress, jobID) 196 | for allocID := range nomadAllocs { 197 | remainders[allocID] = append(remainders[allocID], "rss") 198 | } 199 | return rss 200 | } 201 | defer response.Body.Close() 202 | 203 | var VMStats RawAlloc 204 | err = json.NewDecoder(response.Body).Decode(&VMStats) 205 | if err != nil { 206 | log.Error(fmt.Sprintf("Error in decoding JSON: %v", err)) 207 | return rss 208 | } 209 | 210 | if len(VMStats.Data.Result) != 0 { 211 | num, err := strconv.ParseFloat(VMStats.Data.Result[0].Value[1].(string), 64) 212 | if err != nil { 213 | log.Error(fmt.Sprintf("Error in parsing float: %v", err)) 214 | return rss 215 | } 216 | rss += num / 1.049e6 217 | } 218 | 219 | nomadAllocs := getNomadAllocs(clusterAddress, jobID) 220 | VMAllocs := getVMAllocs(metricsAddress, "nomad_client_allocs_memory_rss_value") 221 | for allocID := range nomadAllocs { 222 | if _, ok := VMAllocs[allocID]; !ok { 223 | remainders[allocID] = append(remainders[allocID], "rss") 224 | } 225 | } 226 | 227 | return rss 228 | } 229 | 230 | func getCache(clusterAddress, metricsAddress, jobID, jobName string, remainders map[string][]string) float64 { 231 | var cache float64 232 | 233 | log.SetReportCaller(true) 234 | 235 | api := "http://" + metricsAddress + "/api/v1/query?query=sum(nomad_client_allocs_memory_cache_value%7Bjob%3D%22" + jobName + "%22%7D)%20by%20(job)" 236 | response, err := http.Get(api) 237 | if err != nil { 238 | log.Error(fmt.Sprintf("Error in getting API response: %v", err)) 239 | nomadAllocs := getNomadAllocs(clusterAddress, jobID) 240 | for allocID := range nomadAllocs { 241 | remainders[allocID] = append(remainders[allocID], "cache") 242 | } 243 | return cache 244 | } 245 | defer response.Body.Close() 246 | 247 | var VMStats RawAlloc 248 | err = json.NewDecoder(response.Body).Decode(&VMStats) 249 | if err != nil { 250 | log.Error(fmt.Sprintf("Error in decoding JSON: %v", err)) 251 | return cache 252 | } 253 | 254 | if len(VMStats.Data.Result) != 0 { 255 | num, err := strconv.ParseFloat(VMStats.Data.Result[0].Value[1].(string), 64) 256 | if err != nil { 257 | log.Error(fmt.Sprintf("Error in parsing float: %v", err)) 258 | return cache 259 | } 260 | cache += num / 1.049e6 261 | } 262 | 263 | nomadAllocs := getNomadAllocs(clusterAddress, jobID) 264 | VMAllocs := getVMAllocs(metricsAddress, "nomad_client_allocs_memory_cache_value") 265 | for allocID := range nomadAllocs { 266 | if _, ok := VMAllocs[allocID]; !ok { 267 | remainders[allocID] = append(remainders[allocID], "cache") 268 | } 269 | } 270 | 271 | return cache 272 | } 273 | 274 | func getTicks(clusterAddress, metricsAddress, jobID, jobName string, remainders map[string][]string) float64 { 275 | var ticks float64 276 | 277 | log.SetReportCaller(true) 278 | 279 | api := "http://" + metricsAddress + "/api/v1/query?query=sum(nomad_client_allocs_cpu_total_ticks_value%7Bjob%3D%22" + jobName + "%22%7D)%20by%20(job)" 280 | response, err := http.Get(api) 281 | if err != nil { 282 | log.Error(fmt.Sprintf("Error in getting API response: %v", err)) 283 | nomadAllocs := getNomadAllocs(clusterAddress, jobID) 284 | for allocID := range nomadAllocs { 285 | remainders[allocID] = append(remainders[allocID], "ticks") 286 | } 287 | return ticks 288 | } 289 | defer response.Body.Close() 290 | 291 | var VMStats RawAlloc 292 | err = json.NewDecoder(response.Body).Decode(&VMStats) 293 | if err != nil { 294 | log.Error(fmt.Sprintf("Error in decoding JSON: %v", err)) 295 | return ticks 296 | } 297 | 298 | if len(VMStats.Data.Result) != 0 { 299 | num, err := strconv.ParseFloat(VMStats.Data.Result[0].Value[1].(string), 64) 300 | if err != nil { 301 | log.Error(fmt.Sprintf("Error in parsing float: %v", err)) 302 | return ticks 303 | } 304 | ticks += num 305 | } 306 | 307 | nomadAllocs := getNomadAllocs(clusterAddress, jobID) 308 | VMAllocs := getVMAllocs(metricsAddress, "nomad_client_allocs_cpu_total_ticks_value") 309 | for allocID := range nomadAllocs { 310 | if _, ok := VMAllocs[allocID]; !ok { 311 | remainders[allocID] = append(remainders[allocID], "ticks") 312 | } 313 | } 314 | 315 | return ticks 316 | } 317 | 318 | func getRemainderNomad(clusterAddress string, remainders map[string][]string) (float64, float64, float64) { 319 | var rss, cache, ticks float64 320 | 321 | log.SetReportCaller(true) 322 | 323 | for allocID, slice := range remainders { 324 | api := "http://" + clusterAddress + "/v1/client/allocation/" + allocID + "/stats" 325 | response, err := http.Get(api) 326 | if err != nil { 327 | log.Error(fmt.Sprintf("Error in getting API response: %v", err)) 328 | continue 329 | } 330 | defer response.Body.Close() 331 | 332 | var nomadAlloc NomadAlloc 333 | err = json.NewDecoder(response.Body).Decode(&nomadAlloc) 334 | if err != nil { 335 | log.Error(fmt.Sprintf("Error in decoding JSON: %v", err)) 336 | continue 337 | } 338 | 339 | for _, val := range slice { 340 | if nomadAlloc.ResourceUsage != (MemCPU{}) { 341 | resourceUsage := nomadAlloc.ResourceUsage 342 | memoryStats := resourceUsage.MemoryStats 343 | cpuStats := resourceUsage.CpuStats 344 | switch val { 345 | case "rss": 346 | rss += memoryStats.RSS / 1.049e6 347 | case "cache": 348 | cache += memoryStats.Cache / 1.049e6 349 | case "ticks": 350 | ticks += cpuStats.TotalTicks 351 | } 352 | } 353 | } 354 | } 355 | 356 | return rss, cache, ticks 357 | } 358 | 359 | func aggUsed(clusterAddress, metricsAddress, jobID, jobName string) (float64, float64, float64) { 360 | remainders := make(map[string][]string) 361 | 362 | rss := getRSS(clusterAddress, metricsAddress, jobID, jobName, remainders) 363 | cache := getCache(clusterAddress, metricsAddress, jobID, jobName, remainders) 364 | ticks := getTicks(clusterAddress, metricsAddress, jobID, jobName, remainders) 365 | 366 | rssRemainder, cacheRemainder, ticksRemainder := getRemainderNomad(clusterAddress, remainders) 367 | rss += rssRemainder 368 | cache += cacheRemainder 369 | ticks += ticksRemainder 370 | 371 | return rss, ticks, cache 372 | } 373 | 374 | func aggRequested(clusterAddress, jobID, jobType string) (float64, float64, float64, float64) { 375 | var cpu, memoryMB, diskMB, iops, count float64 376 | 377 | log.SetLevel(log.TraceLevel) 378 | log.SetReportCaller(true) 379 | 380 | api := "http://" + clusterAddress + "/v1/job/" + jobID 381 | response, err := http.Get(api) 382 | if err != nil { 383 | log.Error(fmt.Sprintf("Error in getting API response: %v", err)) 384 | return cpu, memoryMB, diskMB, iops 385 | } 386 | defer response.Body.Close() 387 | 388 | var jobSpec JobSpec 389 | err = json.NewDecoder(response.Body).Decode(&jobSpec) 390 | if err != nil { 391 | log.Error(fmt.Sprintf("Error in decoding JSON: %v", err)) 392 | return cpu, memoryMB, diskMB, iops 393 | } 394 | 395 | if jobSpec.TaskGroups == nil { 396 | return cpu, memoryMB, diskMB, iops 397 | } 398 | 399 | mapTaskGroupCount := make(map[string]float64) 400 | if jobType == "system" { 401 | api = "http://" + clusterAddress + "/v1/job/" + jobID + "/allocations" 402 | response, err := http.Get(api) 403 | if err != nil { 404 | log.Error(fmt.Sprintf("Error in getting API response: %v", err)) 405 | return cpu, memoryMB, diskMB, iops 406 | } 407 | defer response.Body.Close() 408 | 409 | var allocs []Alloc 410 | err = json.NewDecoder(response.Body).Decode(&allocs) 411 | if err != nil { 412 | log.Error(fmt.Sprintf("Error in decoding JSON: %v", err)) 413 | return cpu, memoryMB, diskMB, iops 414 | } 415 | 416 | for _, alloc := range allocs { 417 | mapTaskGroupCount[alloc.TaskGroup] += 1 418 | } 419 | } 420 | 421 | for _, taskGroup := range jobSpec.TaskGroups { 422 | switch jobType { 423 | case "service": 424 | count = taskGroup.Count 425 | case "system": 426 | count = mapTaskGroupCount[taskGroup.Name] 427 | } 428 | 429 | for _, task := range taskGroup.Tasks { 430 | resources := task.Resources 431 | cpu += count * resources.CPU 432 | memoryMB += count * resources.MemoryMB 433 | iops += count * resources.IOPS 434 | } 435 | diskMB += count * taskGroup.EphemeralDisk.SizeMB 436 | } 437 | 438 | return cpu, memoryMB, diskMB, iops 439 | } 440 | 441 | func reachCluster(clusterAddress, metricsAddress string, c chan<- []JobData) { 442 | var jobData []JobData 443 | var rss, ticks, cache, CPUTotal, memoryMBTotal, diskMBTotal, IOPSTotal float64 444 | 445 | log.SetLevel(log.TraceLevel) 446 | log.SetReportCaller(true) 447 | 448 | api := "http://" + clusterAddress + "/v1/jobs" 449 | response, err := http.Get(api) 450 | if err != nil { 451 | log.Error(fmt.Sprintf("Error in getting API response: %v", err)) 452 | wg.Done() 453 | return 454 | } 455 | defer response.Body.Close() 456 | 457 | var jobs []JobDesc 458 | err = json.NewDecoder(response.Body).Decode(&jobs) 459 | if err != nil { 460 | log.Error(fmt.Sprintf("Error in decoding JSON: %v", err)) 461 | return 462 | } 463 | 464 | for _, job := range jobs { 465 | log.Trace(job.ID) 466 | 467 | if job.Type != "system" && job.Type != "service" { 468 | continue 469 | } 470 | rss, ticks, cache = aggUsed(clusterAddress, metricsAddress, job.ID, job.Name) 471 | CPUTotal, memoryMBTotal, diskMBTotal, IOPSTotal = aggRequested(clusterAddress, job.ID, job.Type) 472 | 473 | var dataCenters string 474 | for i, val := range job.Datacenters { 475 | dataCenters += val 476 | if i != len(job.Datacenters)-1 { 477 | dataCenters += "," 478 | } 479 | } 480 | 481 | currentTime := time.Now().Format("2006-01-02 15:04:05") 482 | jobStruct := JobData{ 483 | job.ID, 484 | job.Name, 485 | ticks, 486 | CPUTotal, 487 | rss, 488 | cache, 489 | memoryMBTotal, 490 | diskMBTotal, 491 | IOPSTotal, 492 | job.JobSummary.Namespace, 493 | dataCenters, 494 | currentTime, 495 | } 496 | jobData = append(jobData, jobStruct) 497 | } 498 | 499 | c <- jobData 500 | wg.Done() 501 | } 502 | -------------------------------------------------------------------------------- /cluster_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2020 Roblox Corporation 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | */ 17 | 18 | package main 19 | 20 | import ( 21 | "io/ioutil" 22 | "testing" 23 | 24 | "github.com/jarcoal/httpmock" 25 | log "github.com/sirupsen/logrus" 26 | "github.com/stretchr/testify/assert" 27 | ) 28 | 29 | func TestGetVMAllocs(t *testing.T) { 30 | log.SetOutput(ioutil.Discard) 31 | httpmock.Activate() 32 | defer httpmock.DeactivateAndReset() 33 | 34 | httpmock.RegisterResponder("GET", "http://goodAddress/api/v1/query?query=query1", 35 | httpmock.NewStringResponder(200, ` 36 | { 37 | "status":"successTest", 38 | "data":{ 39 | "resultType":"vectorTest", 40 | "result":[ 41 | 42 | ] 43 | } 44 | }`, 45 | ), 46 | ) 47 | expectedVMAllocs := map[string]struct{}{} 48 | actualVMAllocs := getVMAllocs("goodAddress", "query1") 49 | assert.Empty(t, actualVMAllocs) 50 | assert.Equal(t, expectedVMAllocs, actualVMAllocs) 51 | 52 | httpmock.RegisterResponder("GET", "http://goodAddress/api/v1/query?query=query2", 53 | httpmock.NewStringResponder(200, ` 54 | { 55 | "status":"successTest", 56 | "data":{ 57 | "resultType":"vectorTest", 58 | "result":[ 59 | { 60 | "metric":{ 61 | "alloc_id":"alloc_id1" 62 | } 63 | }, 64 | { 65 | "metric":{ 66 | "alloc_id":"alloc_id2" 67 | } 68 | } 69 | ] 70 | } 71 | }`, 72 | ), 73 | ) 74 | expectedVMAllocs = map[string]struct{}{ 75 | "alloc_id1": {}, 76 | "alloc_id2": {}, 77 | } 78 | actualVMAllocs = getVMAllocs("goodAddress", "query2") 79 | assert.NotNil(t, actualVMAllocs) 80 | assert.Equal(t, expectedVMAllocs, actualVMAllocs) 81 | 82 | httpmock.RegisterResponder("GET", "http://goodAddress/api/v1/query?query=query3", 83 | httpmock.NewStringResponder(200, ` 84 | { 85 | invalid JSON 86 | }`, 87 | ), 88 | ) 89 | expectedVMAllocs = nil 90 | actualVMAllocs = getVMAllocs("goodAddress", "query3") 91 | assert.Empty(t, actualVMAllocs) 92 | assert.Equal(t, expectedVMAllocs, actualVMAllocs) 93 | 94 | expectedVMAllocs = nil 95 | actualVMAllocs = getVMAllocs("goodAddress", "badQuery") 96 | assert.Empty(t, actualVMAllocs) 97 | assert.Equal(t, expectedVMAllocs, actualVMAllocs) 98 | 99 | expectedVMAllocs = nil 100 | actualVMAllocs = getVMAllocs("badAddress", "query2") 101 | assert.Empty(t, actualVMAllocs) 102 | assert.Equal(t, expectedVMAllocs, actualVMAllocs) 103 | } 104 | 105 | func TestGetNomadAllocs(t *testing.T) { 106 | httpmock.Activate() 107 | defer httpmock.DeactivateAndReset() 108 | 109 | httpmock.RegisterResponder("GET", "http://goodAddress/v1/job/job1/allocations", 110 | httpmock.NewStringResponder(200, ` 111 | [ 112 | ]`, 113 | ), 114 | ) 115 | expectedNomadAllocs := map[string]struct{}{} 116 | actualNomadAllocs := getNomadAllocs("goodAddress", "job1") 117 | assert.Empty(t, actualNomadAllocs) 118 | assert.Equal(t, expectedNomadAllocs, actualNomadAllocs) 119 | 120 | httpmock.RegisterResponder("GET", "http://goodAddress/v1/job/job2/allocations", 121 | httpmock.NewStringResponder(200, ` 122 | [ 123 | { 124 | "ID": "ID1" 125 | }, 126 | { 127 | "ID": "ID2" 128 | } 129 | ]`, 130 | ), 131 | ) 132 | expectedNomadAllocs = map[string]struct{}{ 133 | "ID1": {}, 134 | "ID2": {}, 135 | } 136 | actualNomadAllocs = getNomadAllocs("goodAddress", "job2") 137 | assert.NotNil(t, actualNomadAllocs) 138 | assert.Equal(t, expectedNomadAllocs, actualNomadAllocs) 139 | 140 | httpmock.RegisterResponder("GET", "http://goodAddress/v1/job/job3/allocations", 141 | httpmock.NewStringResponder(200, ` 142 | [ 143 | invalid JSON 144 | ]`, 145 | ), 146 | ) 147 | expectedNomadAllocs = nil 148 | actualNomadAllocs = getNomadAllocs("goodAddress", "job3") 149 | assert.Empty(t, actualNomadAllocs) 150 | assert.Equal(t, expectedNomadAllocs, actualNomadAllocs) 151 | 152 | expectedNomadAllocs = nil 153 | actualNomadAllocs = getNomadAllocs("goodAddress", "badJobID") 154 | assert.Empty(t, actualNomadAllocs) 155 | assert.Equal(t, expectedNomadAllocs, actualNomadAllocs) 156 | 157 | expectedNomadAllocs = nil 158 | actualNomadAllocs = getNomadAllocs("badAddress", "job2") 159 | assert.Empty(t, actualNomadAllocs) 160 | assert.Equal(t, expectedNomadAllocs, actualNomadAllocs) 161 | } 162 | 163 | func TestGetRSS(t *testing.T) { 164 | httpmock.Activate() 165 | defer httpmock.DeactivateAndReset() 166 | 167 | httpmock.RegisterResponder("GET", "http://metricsAddress/api/v1/query?query=sum(nomad_client_allocs_memory_rss_value%7Bjob%3D%22jobName%22%7D)%20by%20(job)", 168 | httpmock.NewStringResponder(200, ` 169 | { 170 | "status": "success", 171 | "data": { 172 | "resultType": "vector", 173 | "result": [ 174 | { 175 | "metric": { 176 | "job": "jobName" 177 | }, 178 | "value": [ 179 | 1597365496, 180 | "13459456" 181 | ] 182 | } 183 | ] 184 | } 185 | }`, 186 | ), 187 | ) 188 | httpmock.RegisterResponder("GET", "http://clusterAddress/v1/job/jobID/allocations", 189 | httpmock.NewStringResponder(200, ` 190 | [ 191 | { 192 | "ID": "alloc_id1" 193 | }, 194 | { 195 | "ID": "alloc_id2" 196 | }, 197 | { 198 | "ID": "alloc_id3" 199 | }, 200 | { 201 | "ID": "alloc_id4" 202 | } 203 | ]`, 204 | ), 205 | ) 206 | httpmock.RegisterResponder("GET", "http://metricsAddress/api/v1/query?query=nomad_client_allocs_memory_rss_value", 207 | httpmock.NewStringResponder(200, ` 208 | { 209 | "status":"successTest", 210 | "data":{ 211 | "resultType":"vectorTest", 212 | "result":[ 213 | { 214 | "metric":{ 215 | "alloc_id":"alloc_id1" 216 | } 217 | }, 218 | { 219 | "metric":{ 220 | "alloc_id":"alloc_id2" 221 | } 222 | } 223 | ] 224 | } 225 | }`, 226 | ), 227 | ) 228 | expectedRSS := 13459456 / 1.049e6 229 | expectedRemainders := map[string][]string{ 230 | "alloc_id3": {"rss"}, 231 | "alloc_id4": {"rss"}, 232 | } 233 | actualRemainders := map[string][]string{} 234 | actualRSS := getRSS("clusterAddress", "metricsAddress", "jobID", "jobName", actualRemainders) 235 | assert.NotNil(t, actualRSS) 236 | assert.Equal(t, expectedRSS, actualRSS) 237 | assert.NotNil(t, actualRemainders) 238 | assert.Equal(t, expectedRemainders, actualRemainders) 239 | 240 | expectedRSS = 13459456 / 1.049e6 241 | expectedRemainders = map[string][]string{} 242 | actualRemainders = map[string][]string{} 243 | actualRSS = getRSS("badAddress", "metricsAddress", "jobID", "jobName", actualRemainders) 244 | assert.NotNil(t, actualRSS) 245 | assert.Equal(t, expectedRSS, actualRSS) 246 | assert.NotNil(t, actualRemainders) 247 | assert.Equal(t, expectedRemainders, actualRemainders) 248 | 249 | expectedRSS = 0.0 250 | expectedRemainders = map[string][]string{ 251 | "alloc_id1": {"rss"}, 252 | "alloc_id2": {"rss"}, 253 | "alloc_id3": {"rss"}, 254 | "alloc_id4": {"rss"}, 255 | } 256 | actualRemainders = map[string][]string{} 257 | actualRSS = getRSS("clusterAddress", "badAddress", "jobID", "jobName", actualRemainders) 258 | assert.NotNil(t, actualRSS) 259 | assert.Equal(t, expectedRSS, actualRSS) 260 | assert.NotNil(t, actualRemainders) 261 | assert.Equal(t, expectedRemainders, actualRemainders) 262 | 263 | expectedRSS = 0.0 264 | expectedRemainders = map[string][]string{} 265 | actualRemainders = map[string][]string{} 266 | actualRSS = getRSS("badAddress", "badAddress", "jobID", "jobName", actualRemainders) 267 | assert.NotNil(t, actualRSS) 268 | assert.Equal(t, expectedRSS, actualRSS) 269 | assert.NotNil(t, actualRemainders) 270 | assert.Equal(t, expectedRemainders, actualRemainders) 271 | 272 | httpmock.RegisterResponder("GET", "http://metricsAddress2/api/v1/query?query=sum(nomad_client_allocs_memory_rss_value%7Bjob%3D%22jobName%22%7D)%20by%20(job)", 273 | httpmock.NewStringResponder(200, ` 274 | { 275 | invalid JSON 276 | }`, 277 | ), 278 | ) 279 | expectedRSS = 0.0 280 | expectedRemainders = map[string][]string{} 281 | actualRemainders = map[string][]string{} 282 | actualRSS = getRSS("clusterAddress", "metricsAddress2", "jobID", "jobName", actualRemainders) 283 | assert.NotNil(t, actualRSS) 284 | assert.Equal(t, expectedRSS, actualRSS) 285 | assert.NotNil(t, actualRemainders) 286 | assert.Equal(t, expectedRemainders, actualRemainders) 287 | 288 | httpmock.RegisterResponder("GET", "http://metricsAddress3/api/v1/query?query=sum(nomad_client_allocs_memory_rss_value%7Bjob%3D%22jobName%22%7D)%20by%20(job)", 289 | httpmock.NewStringResponder(200, ` 290 | { 291 | "status": "success", 292 | "data": { 293 | "resultType": "vector", 294 | "result": [ 295 | { 296 | "metric": { 297 | "job": "jobName" 298 | }, 299 | "value": [ 300 | 1597365496, 301 | "notFloat" 302 | ] 303 | } 304 | ] 305 | } 306 | }`, 307 | ), 308 | ) 309 | expectedRSS = 0.0 310 | expectedRemainders = map[string][]string{} 311 | actualRemainders = map[string][]string{} 312 | actualRSS = getRSS("clusterAddress", "metricsAddress3", "jobID", "jobName", actualRemainders) 313 | assert.NotNil(t, actualRSS) 314 | assert.Equal(t, expectedRSS, actualRSS) 315 | assert.NotNil(t, actualRemainders) 316 | assert.Equal(t, expectedRemainders, actualRemainders) 317 | } 318 | 319 | func TestGetCache(t *testing.T) { 320 | httpmock.Activate() 321 | defer httpmock.DeactivateAndReset() 322 | 323 | httpmock.RegisterResponder("GET", "http://metricsAddress/api/v1/query?query=sum(nomad_client_allocs_memory_cache_value%7Bjob%3D%22jobName%22%7D)%20by%20(job)", 324 | httpmock.NewStringResponder(200, ` 325 | { 326 | "status": "success", 327 | "data": { 328 | "resultType": "vector", 329 | "result": [ 330 | { 331 | "metric": { 332 | "job": "jobName" 333 | }, 334 | "value": [ 335 | 1597365496, 336 | "13459456" 337 | ] 338 | } 339 | ] 340 | } 341 | }`, 342 | ), 343 | ) 344 | httpmock.RegisterResponder("GET", "http://clusterAddress/v1/job/jobID/allocations", 345 | httpmock.NewStringResponder(200, ` 346 | [ 347 | { 348 | "ID": "alloc_id1" 349 | }, 350 | { 351 | "ID": "alloc_id2" 352 | }, 353 | { 354 | "ID": "alloc_id3" 355 | }, 356 | { 357 | "ID": "alloc_id4" 358 | } 359 | ]`, 360 | ), 361 | ) 362 | httpmock.RegisterResponder("GET", "http://metricsAddress/api/v1/query?query=nomad_client_allocs_memory_cache_value", 363 | httpmock.NewStringResponder(200, ` 364 | { 365 | "status":"successTest", 366 | "data":{ 367 | "resultType":"vectorTest", 368 | "result":[ 369 | { 370 | "metric":{ 371 | "alloc_id":"alloc_id1" 372 | } 373 | }, 374 | { 375 | "metric":{ 376 | "alloc_id":"alloc_id2" 377 | } 378 | } 379 | ] 380 | } 381 | }`, 382 | ), 383 | ) 384 | expectedCache := 13459456 / 1.049e6 385 | expectedRemainders := map[string][]string{ 386 | "alloc_id3": {"cache"}, 387 | "alloc_id4": {"cache"}, 388 | } 389 | actualRemainders := map[string][]string{} 390 | actualCache := getCache("clusterAddress", "metricsAddress", "jobID", "jobName", actualRemainders) 391 | assert.NotNil(t, actualCache) 392 | assert.Equal(t, expectedCache, actualCache) 393 | assert.NotNil(t, actualRemainders) 394 | assert.Equal(t, expectedRemainders, actualRemainders) 395 | 396 | expectedCache = 13459456 / 1.049e6 397 | expectedRemainders = map[string][]string{} 398 | actualRemainders = map[string][]string{} 399 | actualCache = getCache("badAddress", "metricsAddress", "jobID", "jobName", actualRemainders) 400 | assert.NotNil(t, actualCache) 401 | assert.Equal(t, expectedCache, actualCache) 402 | assert.NotNil(t, actualRemainders) 403 | assert.Equal(t, expectedRemainders, actualRemainders) 404 | 405 | expectedCache = 0.0 406 | expectedRemainders = map[string][]string{ 407 | "alloc_id1": {"cache"}, 408 | "alloc_id2": {"cache"}, 409 | "alloc_id3": {"cache"}, 410 | "alloc_id4": {"cache"}, 411 | } 412 | actualRemainders = map[string][]string{} 413 | actualCache = getCache("clusterAddress", "badAddress", "jobID", "jobName", actualRemainders) 414 | assert.NotNil(t, actualCache) 415 | assert.Equal(t, expectedCache, actualCache) 416 | assert.NotNil(t, actualRemainders) 417 | assert.Equal(t, expectedRemainders, actualRemainders) 418 | 419 | expectedCache = 0.0 420 | expectedRemainders = map[string][]string{} 421 | actualRemainders = map[string][]string{} 422 | actualCache = getCache("badAddress", "badAddress", "jobID", "jobName", actualRemainders) 423 | assert.NotNil(t, actualCache) 424 | assert.Equal(t, expectedCache, actualCache) 425 | assert.NotNil(t, actualRemainders) 426 | assert.Equal(t, expectedRemainders, actualRemainders) 427 | 428 | httpmock.RegisterResponder("GET", "http://metricsAddress2/api/v1/query?query=sum(nomad_client_allocs_memory_cache_value%7Bjob%3D%22jobName%22%7D)%20by%20(job)", 429 | httpmock.NewStringResponder(200, ` 430 | { 431 | invalid JSON 432 | }`, 433 | ), 434 | ) 435 | expectedCache = 0.0 436 | expectedRemainders = map[string][]string{} 437 | actualRemainders = map[string][]string{} 438 | actualCache = getCache("clusterAddress", "metricsAddress2", "jobID", "jobName", actualRemainders) 439 | assert.NotNil(t, actualCache) 440 | assert.Equal(t, expectedCache, actualCache) 441 | assert.NotNil(t, actualRemainders) 442 | assert.Equal(t, expectedRemainders, actualRemainders) 443 | 444 | httpmock.RegisterResponder("GET", "http://metricsAddress3/api/v1/query?query=sum(nomad_client_allocs_memory_cache_value%7Bjob%3D%22jobName%22%7D)%20by%20(job)", 445 | httpmock.NewStringResponder(200, ` 446 | { 447 | "status": "success", 448 | "data": { 449 | "resultType": "vector", 450 | "result": [ 451 | { 452 | "metric": { 453 | "job": "jobName" 454 | }, 455 | "value": [ 456 | 1597365496, 457 | "notFloat" 458 | ] 459 | } 460 | ] 461 | } 462 | }`, 463 | ), 464 | ) 465 | expectedCache = 0.0 466 | expectedRemainders = map[string][]string{} 467 | actualRemainders = map[string][]string{} 468 | actualCache = getCache("clusterAddress", "metricsAddress3", "jobID", "jobName", actualRemainders) 469 | assert.NotNil(t, actualCache) 470 | assert.Equal(t, expectedCache, actualCache) 471 | assert.NotNil(t, actualRemainders) 472 | assert.Equal(t, expectedRemainders, actualRemainders) 473 | } 474 | 475 | func TestGetTicks(t *testing.T) { 476 | httpmock.Activate() 477 | defer httpmock.DeactivateAndReset() 478 | 479 | httpmock.RegisterResponder("GET", "http://metricsAddress/api/v1/query?query=sum(nomad_client_allocs_cpu_total_ticks_value%7Bjob%3D%22jobName%22%7D)%20by%20(job)", 480 | httpmock.NewStringResponder(200, ` 481 | { 482 | "status": "success", 483 | "data": { 484 | "resultType": "vector", 485 | "result": [ 486 | { 487 | "metric": { 488 | "job": "jobName" 489 | }, 490 | "value": [ 491 | 1597365496, 492 | "13459456" 493 | ] 494 | } 495 | ] 496 | } 497 | }`, 498 | ), 499 | ) 500 | httpmock.RegisterResponder("GET", "http://clusterAddress/v1/job/jobID/allocations", 501 | httpmock.NewStringResponder(200, ` 502 | [ 503 | { 504 | "ID": "alloc_id1" 505 | }, 506 | { 507 | "ID": "alloc_id2" 508 | }, 509 | { 510 | "ID": "alloc_id3" 511 | }, 512 | { 513 | "ID": "alloc_id4" 514 | } 515 | ]`, 516 | ), 517 | ) 518 | httpmock.RegisterResponder("GET", "http://metricsAddress/api/v1/query?query=nomad_client_allocs_cpu_total_ticks_value", 519 | httpmock.NewStringResponder(200, ` 520 | { 521 | "status":"successTest", 522 | "data":{ 523 | "resultType":"vectorTest", 524 | "result":[ 525 | { 526 | "metric":{ 527 | "alloc_id":"alloc_id1" 528 | } 529 | }, 530 | { 531 | "metric":{ 532 | "alloc_id":"alloc_id2" 533 | } 534 | } 535 | ] 536 | } 537 | }`, 538 | ), 539 | ) 540 | expectedTicks := 13459456.0 541 | expectedRemainders := map[string][]string{ 542 | "alloc_id3": {"ticks"}, 543 | "alloc_id4": {"ticks"}, 544 | } 545 | actualRemainders := map[string][]string{} 546 | actualTicks := getTicks("clusterAddress", "metricsAddress", "jobID", "jobName", actualRemainders) 547 | assert.NotNil(t, actualTicks) 548 | assert.Equal(t, expectedTicks, actualTicks) 549 | assert.NotNil(t, actualRemainders) 550 | assert.Equal(t, expectedRemainders, actualRemainders) 551 | 552 | expectedTicks = 13459456.0 553 | expectedRemainders = map[string][]string{} 554 | actualRemainders = map[string][]string{} 555 | actualTicks = getTicks("badAddress", "metricsAddress", "jobID", "jobName", actualRemainders) 556 | assert.NotNil(t, actualTicks) 557 | assert.Equal(t, expectedTicks, actualTicks) 558 | assert.NotNil(t, actualRemainders) 559 | assert.Equal(t, expectedRemainders, actualRemainders) 560 | 561 | expectedTicks = 0.0 562 | expectedRemainders = map[string][]string{ 563 | "alloc_id1": {"ticks"}, 564 | "alloc_id2": {"ticks"}, 565 | "alloc_id3": {"ticks"}, 566 | "alloc_id4": {"ticks"}, 567 | } 568 | actualRemainders = map[string][]string{} 569 | actualTicks = getTicks("clusterAddress", "badAddress", "jobID", "jobName", actualRemainders) 570 | assert.NotNil(t, actualTicks) 571 | assert.Equal(t, expectedTicks, actualTicks) 572 | assert.NotNil(t, actualRemainders) 573 | assert.Equal(t, expectedRemainders, actualRemainders) 574 | 575 | expectedTicks = 0.0 576 | expectedRemainders = map[string][]string{} 577 | actualRemainders = map[string][]string{} 578 | actualTicks = getTicks("badAddress", "badAddress", "jobID", "jobName", actualRemainders) 579 | assert.NotNil(t, actualTicks) 580 | assert.Equal(t, expectedTicks, actualTicks) 581 | assert.NotNil(t, actualRemainders) 582 | assert.Equal(t, expectedRemainders, actualRemainders) 583 | 584 | httpmock.RegisterResponder("GET", "http://metricsAddress2/api/v1/query?query=sum(nomad_client_allocs_cpu_total_ticks_value%7Bjob%3D%22jobName%22%7D)%20by%20(job)", 585 | httpmock.NewStringResponder(200, ` 586 | { 587 | Invalid JSON 588 | }`, 589 | ), 590 | ) 591 | expectedTicks = 0.0 592 | expectedRemainders = map[string][]string{} 593 | actualRemainders = map[string][]string{} 594 | actualTicks = getTicks("clusterAddress", "metricsAddress2", "jobID", "jobName", actualRemainders) 595 | assert.NotNil(t, actualTicks) 596 | assert.Equal(t, expectedTicks, actualTicks) 597 | assert.NotNil(t, actualRemainders) 598 | assert.Equal(t, expectedRemainders, actualRemainders) 599 | 600 | httpmock.RegisterResponder("GET", "http://metricsAddress3/api/v1/query?query=sum(nomad_client_allocs_cpu_total_ticks_value%7Bjob%3D%22jobName%22%7D)%20by%20(job)", 601 | httpmock.NewStringResponder(200, ` 602 | { 603 | "status": "success", 604 | "data": { 605 | "resultType": "vector", 606 | "result": [ 607 | { 608 | "metric": { 609 | "job": "jobName" 610 | }, 611 | "value": [ 612 | 1597365496, 613 | "notFloat" 614 | ] 615 | } 616 | ] 617 | } 618 | }`, 619 | ), 620 | ) 621 | expectedTicks = 0.0 622 | expectedRemainders = map[string][]string{} 623 | actualRemainders = map[string][]string{} 624 | actualTicks = getTicks("clusterAddress", "metricsAddress3", "jobID", "jobName", actualRemainders) 625 | assert.NotNil(t, actualTicks) 626 | assert.Equal(t, expectedTicks, actualTicks) 627 | assert.NotNil(t, actualRemainders) 628 | assert.Equal(t, expectedRemainders, actualRemainders) 629 | } 630 | 631 | func TestGetRemainderNomad(t *testing.T) { 632 | httpmock.Activate() 633 | defer httpmock.DeactivateAndReset() 634 | 635 | httpmock.RegisterResponder("GET", "http://clusterAddress/v1/client/allocation/alloc_id1/stats", 636 | httpmock.NewStringResponder(200, ` 637 | { 638 | "ResourceUsage": { 639 | "MemoryStats": { 640 | "RSS": 6451200, 641 | "Cache": 654321, 642 | "Swap": 0, 643 | "Usage": 7569408, 644 | "MaxUsage": 9162752, 645 | "KernelUsage": 0, 646 | "KernelMaxUsage": 0 647 | }, 648 | "CpuStats": { 649 | "TotalTicks": 2394.4724337708644 650 | } 651 | } 652 | }`, 653 | ), 654 | ) 655 | httpmock.RegisterResponder("GET", "http://clusterAddress/v1/client/allocation/alloc_id2/stats", 656 | httpmock.NewStringResponder(200, ` 657 | { 658 | "ResourceUsage": { 659 | "MemoryStats": { 660 | "RSS": 552821, 661 | "Cache": 789246, 662 | "Swap": 0, 663 | "Usage": 98176514, 664 | "MaxUsage": 16546, 665 | "KernelUsage": 0, 666 | "KernelMaxUsage": 0 667 | }, 668 | "CpuStats": { 669 | "TotalTicks": 1125.6842315 670 | } 671 | } 672 | }`, 673 | ), 674 | ) 675 | remainders := map[string][]string{ 676 | "alloc_id1": {"rss", "cache", "ticks"}, 677 | "alloc_id2": {"rss", "cache", "ticks"}, 678 | } 679 | expectedRSS := 6451200/1.049e6 + 552821/1.049e6 680 | expectedCache := 654321/1.049e6 + 789246/1.049e6 681 | expectedTicks := 2394.4724337708644 + 1125.6842315 682 | actualRSS, actualCache, actualTicks := getRemainderNomad("clusterAddress", remainders) 683 | assert.NotNil(t, actualRSS) 684 | assert.NotNil(t, actualCache) 685 | assert.NotNil(t, actualTicks) 686 | assert.Equal(t, expectedRSS, actualRSS) 687 | assert.Equal(t, expectedCache, actualCache) 688 | assert.Equal(t, expectedTicks, actualTicks) 689 | 690 | remainders = map[string][]string{ 691 | "alloc_id1": {"cache", "ticks"}, 692 | "alloc_id2": {"rss"}, 693 | } 694 | expectedRSS = 552821 / 1.049e6 695 | expectedCache = 654321 / 1.049e6 696 | expectedTicks = 2394.4724337708644 697 | actualRSS, actualCache, actualTicks = getRemainderNomad("clusterAddress", remainders) 698 | assert.NotNil(t, actualRSS) 699 | assert.NotNil(t, actualCache) 700 | assert.NotNil(t, actualTicks) 701 | assert.Equal(t, expectedRSS, actualRSS) 702 | assert.Equal(t, expectedCache, actualCache) 703 | assert.Equal(t, expectedTicks, actualTicks) 704 | 705 | httpmock.RegisterResponder("GET", "http://clusterAddress/v1/client/allocation/alloc_id3/stats", 706 | httpmock.NewStringResponder(200, ` 707 | { 708 | Invalid JSON 709 | }`, 710 | ), 711 | ) 712 | remainders = map[string][]string{ 713 | "alloc_id3": {"rss", "cache", "ticks"}, 714 | } 715 | expectedRSS = 0.0 716 | expectedCache = 0.0 717 | expectedTicks = 0.0 718 | actualRSS, actualCache, actualTicks = getRemainderNomad("clusterAddress", remainders) 719 | assert.Empty(t, actualRSS) 720 | assert.Empty(t, actualCache) 721 | assert.Empty(t, actualTicks) 722 | assert.Equal(t, expectedRSS, actualRSS) 723 | assert.Equal(t, expectedCache, actualCache) 724 | assert.Equal(t, expectedTicks, actualTicks) 725 | 726 | remainders = map[string][]string{ 727 | "alloc_id1": {"rss", "cache", "ticks"}, 728 | "alloc_id2": {"rss", "cache", "ticks"}, 729 | } 730 | expectedRSS = 0.0 731 | expectedCache = 0.0 732 | expectedTicks = 0.0 733 | actualRSS, actualCache, actualTicks = getRemainderNomad("badAddress", remainders) 734 | assert.Empty(t, actualRSS) 735 | assert.Empty(t, actualCache) 736 | assert.Empty(t, actualTicks) 737 | assert.Equal(t, expectedRSS, actualRSS) 738 | assert.Equal(t, expectedCache, actualCache) 739 | assert.Equal(t, expectedTicks, actualTicks) 740 | 741 | remainders = map[string][]string{ 742 | "alloc_id1": {"rss", "cache", "ticks"}, 743 | "alloc_id3": {"rss", "cache", "ticks"}, 744 | } 745 | expectedRSS = 6451200 / 1.049e6 746 | expectedCache = 654321 / 1.049e6 747 | expectedTicks = 2394.4724337708644 748 | actualRSS, actualCache, actualTicks = getRemainderNomad("clusterAddress", remainders) 749 | assert.NotNil(t, actualRSS) 750 | assert.NotNil(t, actualCache) 751 | assert.NotNil(t, actualTicks) 752 | assert.Equal(t, expectedRSS, actualRSS) 753 | assert.Equal(t, expectedCache, actualCache) 754 | assert.Equal(t, expectedTicks, actualTicks) 755 | 756 | remainders = map[string][]string{ 757 | "alloc_id3": {"rss", "cache", "ticks"}, 758 | "alloc_id1": {"rss", "cache", "ticks"}, 759 | } 760 | expectedRSS = 6451200 / 1.049e6 761 | expectedCache = 654321 / 1.049e6 762 | expectedTicks = 2394.4724337708644 763 | actualRSS, actualCache, actualTicks = getRemainderNomad("clusterAddress", remainders) 764 | assert.NotNil(t, actualRSS) 765 | assert.NotNil(t, actualCache) 766 | assert.NotNil(t, actualTicks) 767 | assert.Equal(t, expectedRSS, actualRSS) 768 | assert.Equal(t, expectedCache, actualCache) 769 | assert.Equal(t, expectedTicks, actualTicks) 770 | 771 | remainders = map[string][]string{ 772 | "alloc_id4": {"rss", "cache", "ticks"}, 773 | "alloc_id1": {"rss", "cache", "ticks"}, 774 | } 775 | expectedRSS = 6451200 / 1.049e6 776 | expectedCache = 654321 / 1.049e6 777 | expectedTicks = 2394.4724337708644 778 | actualRSS, actualCache, actualTicks = getRemainderNomad("clusterAddress", remainders) 779 | assert.NotNil(t, actualRSS) 780 | assert.NotNil(t, actualCache) 781 | assert.NotNil(t, actualTicks) 782 | assert.Equal(t, expectedRSS, actualRSS) 783 | assert.Equal(t, expectedCache, actualCache) 784 | assert.Equal(t, expectedTicks, actualTicks) 785 | } 786 | 787 | func TestAggUsed(t *testing.T) { 788 | httpmock.Activate() 789 | defer httpmock.DeactivateAndReset() 790 | 791 | httpmock.RegisterResponder("GET", "http://metricsAddress/api/v1/query?query=sum(nomad_client_allocs_memory_rss_value%7Bjob%3D%22jobName%22%7D)%20by%20(job)", 792 | httpmock.NewStringResponder(200, ` 793 | { 794 | "status": "success", 795 | "data": { 796 | "resultType": "vector", 797 | "result": [ 798 | { 799 | "metric": { 800 | "job": "jobName" 801 | }, 802 | "value": [ 803 | 1597365496, 804 | "13459456" 805 | ] 806 | } 807 | ] 808 | } 809 | }`, 810 | ), 811 | ) 812 | httpmock.RegisterResponder("GET", "http://metricsAddress/api/v1/query?query=sum(nomad_client_allocs_memory_cache_value%7Bjob%3D%22jobName%22%7D)%20by%20(job)", 813 | httpmock.NewStringResponder(200, ` 814 | { 815 | "status": "success", 816 | "data": { 817 | "resultType": "vector", 818 | "result": [ 819 | { 820 | "metric": { 821 | "job": "jobName" 822 | }, 823 | "value": [ 824 | 1597365496, 825 | "33459456" 826 | ] 827 | } 828 | ] 829 | } 830 | }`, 831 | ), 832 | ) 833 | httpmock.RegisterResponder("GET", "http://metricsAddress/api/v1/query?query=sum(nomad_client_allocs_cpu_total_ticks_value%7Bjob%3D%22jobName%22%7D)%20by%20(job)", 834 | httpmock.NewStringResponder(200, ` 835 | { 836 | "status": "success", 837 | "data": { 838 | "resultType": "vector", 839 | "result": [ 840 | { 841 | "metric": { 842 | "job": "jobName" 843 | }, 844 | "value": [ 845 | 1597365496, 846 | "23459456.0" 847 | ] 848 | } 849 | ] 850 | } 851 | }`, 852 | ), 853 | ) 854 | expectedRSS := 13459456 / 1.049e6 855 | expectedTicks := 23459456.0 856 | expectedCache := 33459456 / 1.049e6 857 | actualRSS, actualTicks, actualCache := aggUsed("clusterAddress", "metricsAddress", "jobID", "jobName") 858 | assert.NotNil(t, actualRSS) 859 | assert.NotNil(t, actualTicks) 860 | assert.NotNil(t, actualCache) 861 | assert.Equal(t, expectedRSS, actualRSS) 862 | assert.Equal(t, expectedTicks, actualTicks) 863 | assert.Equal(t, expectedCache, actualCache) 864 | 865 | // nomadAllocs 866 | httpmock.RegisterResponder("GET", "http://clusterAddress/v1/job/jobID/allocations", 867 | httpmock.NewStringResponder(200, ` 868 | [ 869 | { 870 | "ID": "alloc_id1" 871 | }, 872 | { 873 | "ID": "alloc_id2" 874 | } 875 | ]`, 876 | ), 877 | ) 878 | httpmock.RegisterResponder("GET", "http://clusterAddress/v1/client/allocation/alloc_id1/stats", 879 | httpmock.NewStringResponder(200, ` 880 | { 881 | "ResourceUsage": { 882 | "MemoryStats": { 883 | "RSS": 6451200, 884 | "Cache": 654321, 885 | "Swap": 0, 886 | "Usage": 7569408, 887 | "MaxUsage": 9162752, 888 | "KernelUsage": 0, 889 | "KernelMaxUsage": 0 890 | }, 891 | "CpuStats": { 892 | "TotalTicks": 2394.4724337708644 893 | } 894 | } 895 | }`, 896 | ), 897 | ) 898 | httpmock.RegisterResponder("GET", "http://clusterAddress/v1/client/allocation/alloc_id2/stats", 899 | httpmock.NewStringResponder(200, ` 900 | { 901 | "ResourceUsage": { 902 | "MemoryStats": { 903 | "RSS": 552821, 904 | "Cache": 789246, 905 | "Swap": 0, 906 | "Usage": 98176514, 907 | "MaxUsage": 16546, 908 | "KernelUsage": 0, 909 | "KernelMaxUsage": 0 910 | }, 911 | "CpuStats": { 912 | "TotalTicks": 1125.6842315 913 | } 914 | } 915 | }`, 916 | ), 917 | ) 918 | expectedRSS = (13459456 + 6451200 + 552821) / 1.049e6 919 | expectedTicks = 23459456.0 + 2394.4724337708644 + 1125.6842315 920 | expectedCache = (33459456 + 654321 + 789246) / 1.049e6 921 | actualRSS, actualTicks, actualCache = aggUsed("clusterAddress", "metricsAddress", "jobID", "jobName") 922 | assert.NotNil(t, actualRSS) 923 | assert.NotNil(t, actualTicks) 924 | assert.NotNil(t, actualCache) 925 | assert.Equal(t, expectedRSS, actualRSS) 926 | assert.Equal(t, expectedTicks, actualTicks) 927 | assert.Equal(t, expectedCache, actualCache) 928 | 929 | // VMAllocs 930 | httpmock.RegisterResponder("GET", "http://metricsAddress/api/v1/query?query=nomad_client_allocs_memory_rss_value", 931 | httpmock.NewStringResponder(200, ` 932 | { 933 | "status":"successTest", 934 | "data":{ 935 | "resultType":"vectorTest", 936 | "result":[ 937 | { 938 | "metric":{ 939 | "alloc_id":"alloc_id1" 940 | } 941 | }, 942 | { 943 | "metric":{ 944 | "alloc_id":"alloc_id2" 945 | } 946 | } 947 | ] 948 | } 949 | }`, 950 | ), 951 | ) 952 | httpmock.RegisterResponder("GET", "http://metricsAddress/api/v1/query?query=nomad_client_allocs_cpu_total_ticks_value", 953 | httpmock.NewStringResponder(200, ` 954 | { 955 | "status":"successTest", 956 | "data":{ 957 | "resultType":"vectorTest", 958 | "result":[ 959 | { 960 | "metric":{ 961 | "alloc_id":"alloc_id1" 962 | } 963 | }, 964 | { 965 | "metric":{ 966 | "alloc_id":"alloc_id2" 967 | } 968 | } 969 | ] 970 | } 971 | }`, 972 | ), 973 | ) 974 | httpmock.RegisterResponder("GET", "http://metricsAddress/api/v1/query?query=nomad_client_allocs_memory_cache_value", 975 | httpmock.NewStringResponder(200, ` 976 | { 977 | "status":"successTest", 978 | "data":{ 979 | "resultType":"vectorTest", 980 | "result":[ 981 | { 982 | "metric":{ 983 | "alloc_id":"alloc_id1" 984 | } 985 | }, 986 | { 987 | "metric":{ 988 | "alloc_id":"alloc_id2" 989 | } 990 | } 991 | ] 992 | } 993 | }`, 994 | ), 995 | ) 996 | expectedRSS = 13459456 / 1.049e6 997 | expectedTicks = 23459456.0 998 | expectedCache = 33459456 / 1.049e6 999 | actualRSS, actualTicks, actualCache = aggUsed("clusterAddress", "metricsAddress", "jobID", "jobName") 1000 | assert.NotNil(t, actualRSS) 1001 | assert.NotNil(t, actualTicks) 1002 | assert.NotNil(t, actualCache) 1003 | assert.Equal(t, expectedRSS, actualRSS) 1004 | assert.Equal(t, expectedTicks, actualTicks) 1005 | assert.Equal(t, expectedCache, actualCache) 1006 | } 1007 | 1008 | func TestAggRequested(t *testing.T) { 1009 | httpmock.Activate() 1010 | defer httpmock.DeactivateAndReset() 1011 | 1012 | // System Job 1013 | httpmock.RegisterResponder("GET", "http://clusterAddress/v1/job/jobID", 1014 | httpmock.NewStringResponder(200, ` 1015 | { 1016 | "ID": "jobID", 1017 | "TaskGroups": [ 1018 | { 1019 | "Name": "TaskGroup1", 1020 | "Count": 1, 1021 | "Tasks": [ 1022 | { 1023 | "Resources": { 1024 | "CPU": 200, 1025 | "MemoryMB": 512, 1026 | "IOPS": 20 1027 | } 1028 | } 1029 | ], 1030 | "EphemeralDisk": { 1031 | "SizeMB": 1000 1032 | } 1033 | }, 1034 | { 1035 | "Name": "TaskGroup2", 1036 | "Count": 1, 1037 | "Tasks": [ 1038 | { 1039 | "Resources": { 1040 | "CPU": 400, 1041 | "MemoryMB": 256, 1042 | "IOPS": 40 1043 | } 1044 | } 1045 | ], 1046 | "EphemeralDisk": { 1047 | "SizeMB": 500 1048 | } 1049 | } 1050 | ] 1051 | }`, 1052 | ), 1053 | ) 1054 | httpmock.RegisterResponder("GET", "http://clusterAddress/v1/job/jobID/allocations", 1055 | httpmock.NewStringResponder(200, ` 1056 | [ 1057 | { 1058 | "TaskGroup": "TaskGroup1" 1059 | }, 1060 | { 1061 | "TaskGroup": "TaskGroup1" 1062 | }, 1063 | { 1064 | "TaskGroup": "TaskGroup2" 1065 | }, 1066 | { 1067 | "TaskGroup": "TaskGroup2" 1068 | }, 1069 | { 1070 | "TaskGroup": "TaskGroup2" 1071 | } 1072 | ]`, 1073 | ), 1074 | ) 1075 | expectedCPU := 1600.0 1076 | expectedMemory := 1792.0 1077 | expectedDisk := 3500.0 1078 | expectedIOPS := 160.0 1079 | actualCPU, actualMemory, actualDisk, actualIOPS := aggRequested("clusterAddress", "jobID", "system") 1080 | assert.NotNil(t, actualCPU) 1081 | assert.NotNil(t, actualMemory) 1082 | assert.NotNil(t, actualDisk) 1083 | assert.NotNil(t, actualIOPS) 1084 | assert.Equal(t, expectedCPU, actualCPU) 1085 | assert.Equal(t, expectedMemory, actualMemory) 1086 | assert.Equal(t, expectedDisk, actualDisk) 1087 | assert.Equal(t, expectedIOPS, actualIOPS) 1088 | 1089 | // Service Job 1090 | httpmock.RegisterResponder("GET", "http://clusterAddress/v1/job/jobID2", 1091 | httpmock.NewStringResponder(200, ` 1092 | { 1093 | "ID": "jobID", 1094 | "TaskGroups": [ 1095 | { 1096 | "Name": "TaskGroup1", 1097 | "Count": 3, 1098 | "Tasks": [ 1099 | { 1100 | "Resources": { 1101 | "CPU": 200, 1102 | "MemoryMB": 512, 1103 | "IOPS": 20 1104 | } 1105 | } 1106 | ], 1107 | "EphemeralDisk": { 1108 | "SizeMB": 1000 1109 | } 1110 | }, 1111 | { 1112 | "Name": "TaskGroup2", 1113 | "Count": 2, 1114 | "Tasks": [ 1115 | { 1116 | "Resources": { 1117 | "CPU": 400, 1118 | "MemoryMB": 256, 1119 | "IOPS": 40 1120 | } 1121 | } 1122 | ], 1123 | "EphemeralDisk": { 1124 | "SizeMB": 500 1125 | } 1126 | } 1127 | ] 1128 | }`, 1129 | ), 1130 | ) 1131 | expectedCPU = 1400.0 1132 | expectedMemory = 2048.0 1133 | expectedDisk = 4000.0 1134 | expectedIOPS = 140.0 1135 | actualCPU, actualMemory, actualDisk, actualIOPS = aggRequested("clusterAddress", "jobID2", "service") 1136 | assert.NotNil(t, actualCPU) 1137 | assert.NotNil(t, actualMemory) 1138 | assert.NotNil(t, actualDisk) 1139 | assert.NotNil(t, actualIOPS) 1140 | assert.Equal(t, expectedCPU, actualCPU) 1141 | assert.Equal(t, expectedMemory, actualMemory) 1142 | assert.Equal(t, expectedDisk, actualDisk) 1143 | assert.Equal(t, expectedIOPS, actualIOPS) 1144 | 1145 | expectedCPU = 0.0 1146 | expectedMemory = 0.0 1147 | expectedDisk = 0.0 1148 | expectedIOPS = 0.0 1149 | actualCPU, actualMemory, actualDisk, actualIOPS = aggRequested("clusterAddress", "jobID", "none") 1150 | assert.NotNil(t, actualCPU) 1151 | assert.NotNil(t, actualMemory) 1152 | assert.NotNil(t, actualDisk) 1153 | assert.NotNil(t, actualIOPS) 1154 | assert.Equal(t, expectedCPU, actualCPU) 1155 | assert.Equal(t, expectedMemory, actualMemory) 1156 | assert.Equal(t, expectedDisk, actualDisk) 1157 | assert.Equal(t, expectedIOPS, actualIOPS) 1158 | 1159 | expectedCPU = 0.0 1160 | expectedMemory = 0.0 1161 | expectedDisk = 0.0 1162 | expectedIOPS = 0.0 1163 | actualCPU, actualMemory, actualDisk, actualIOPS = aggRequested("badAddress", "jobID", "system") 1164 | assert.NotNil(t, actualCPU) 1165 | assert.NotNil(t, actualMemory) 1166 | assert.NotNil(t, actualDisk) 1167 | assert.NotNil(t, actualIOPS) 1168 | assert.Equal(t, expectedCPU, actualCPU) 1169 | assert.Equal(t, expectedMemory, actualMemory) 1170 | assert.Equal(t, expectedDisk, actualDisk) 1171 | assert.Equal(t, expectedIOPS, actualIOPS) 1172 | } 1173 | 1174 | func TestReachCluster(t *testing.T) { 1175 | httpmock.Activate() 1176 | defer httpmock.DeactivateAndReset() 1177 | 1178 | // Job List 1179 | httpmock.RegisterResponder("GET", "http://clusterAddress/v1/jobs", 1180 | httpmock.NewStringResponder(200, ` 1181 | [ 1182 | { 1183 | "ID": "jobID1", 1184 | "Name": "jobName1", 1185 | "Datacenters": [ 1186 | "DC1" 1187 | ], 1188 | "Type": "service", 1189 | "JobSummary": { 1190 | "Namespace": "default" 1191 | } 1192 | }, 1193 | { 1194 | "ID": "jobID2", 1195 | "Name": "jobName2", 1196 | "Datacenters": [ 1197 | "DC2" 1198 | ], 1199 | "Type": "system", 1200 | "JobSummary": { 1201 | "Namespace": "default" 1202 | } 1203 | }, 1204 | { 1205 | "ID": "jobID3", 1206 | "Name": "jobName3", 1207 | "Datacenters": [ 1208 | "DC3" 1209 | ], 1210 | "Type": "other", 1211 | "JobSummary": { 1212 | "Namespace": "default" 1213 | } 1214 | } 1215 | ]`, 1216 | ), 1217 | ) 1218 | 1219 | // VictoriaMetrics 1220 | // jobName1 1221 | httpmock.RegisterResponder("GET", "http://metricsAddress/api/v1/query?query=sum(nomad_client_allocs_memory_rss_value%7Bjob%3D%22jobName1%22%7D)%20by%20(job)", 1222 | httpmock.NewStringResponder(200, ` 1223 | { 1224 | "status": "success", 1225 | "data": { 1226 | "resultType": "vector", 1227 | "result": [ 1228 | { 1229 | "metric": { 1230 | "job": "jobName" 1231 | }, 1232 | "value": [ 1233 | 1597365496, 1234 | "13459456" 1235 | ] 1236 | } 1237 | ] 1238 | } 1239 | }`, 1240 | ), 1241 | ) 1242 | httpmock.RegisterResponder("GET", "http://metricsAddress/api/v1/query?query=sum(nomad_client_allocs_memory_cache_value%7Bjob%3D%22jobName1%22%7D)%20by%20(job)", 1243 | httpmock.NewStringResponder(200, ` 1244 | { 1245 | "status": "success", 1246 | "data": { 1247 | "resultType": "vector", 1248 | "result": [ 1249 | { 1250 | "metric": { 1251 | "job": "jobName" 1252 | }, 1253 | "value": [ 1254 | 1597365496, 1255 | "33459456" 1256 | ] 1257 | } 1258 | ] 1259 | } 1260 | }`, 1261 | ), 1262 | ) 1263 | httpmock.RegisterResponder("GET", "http://metricsAddress/api/v1/query?query=sum(nomad_client_allocs_cpu_total_ticks_value%7Bjob%3D%22jobName1%22%7D)%20by%20(job)", 1264 | httpmock.NewStringResponder(200, ` 1265 | { 1266 | "status": "success", 1267 | "data": { 1268 | "resultType": "vector", 1269 | "result": [ 1270 | { 1271 | "metric": { 1272 | "job": "jobName" 1273 | }, 1274 | "value": [ 1275 | 1597365496, 1276 | "23459456.0" 1277 | ] 1278 | } 1279 | ] 1280 | } 1281 | }`, 1282 | ), 1283 | ) 1284 | // jobName2 1285 | httpmock.RegisterResponder("GET", "http://metricsAddress/api/v1/query?query=sum(nomad_client_allocs_memory_rss_value%7Bjob%3D%22jobName2%22%7D)%20by%20(job)", 1286 | httpmock.NewStringResponder(200, ` 1287 | { 1288 | "status": "success", 1289 | "data": { 1290 | "resultType": "vector", 1291 | "result": [ 1292 | { 1293 | "metric": { 1294 | "job": "jobName" 1295 | }, 1296 | "value": [ 1297 | 1597365496, 1298 | "23459456" 1299 | ] 1300 | } 1301 | ] 1302 | } 1303 | }`, 1304 | ), 1305 | ) 1306 | httpmock.RegisterResponder("GET", "http://metricsAddress/api/v1/query?query=sum(nomad_client_allocs_memory_cache_value%7Bjob%3D%22jobName2%22%7D)%20by%20(job)", 1307 | httpmock.NewStringResponder(200, ` 1308 | { 1309 | "status": "success", 1310 | "data": { 1311 | "resultType": "vector", 1312 | "result": [ 1313 | { 1314 | "metric": { 1315 | "job": "jobName" 1316 | }, 1317 | "value": [ 1318 | 1597365496, 1319 | "54459456" 1320 | ] 1321 | } 1322 | ] 1323 | } 1324 | }`, 1325 | ), 1326 | ) 1327 | httpmock.RegisterResponder("GET", "http://metricsAddress/api/v1/query?query=sum(nomad_client_allocs_cpu_total_ticks_value%7Bjob%3D%22jobName2%22%7D)%20by%20(job)", 1328 | httpmock.NewStringResponder(200, ` 1329 | { 1330 | "status": "success", 1331 | "data": { 1332 | "resultType": "vector", 1333 | "result": [ 1334 | { 1335 | "metric": { 1336 | "job": "jobName" 1337 | }, 1338 | "value": [ 1339 | 1597365496, 1340 | "63459456.0" 1341 | ] 1342 | } 1343 | ] 1344 | } 1345 | }`, 1346 | ), 1347 | ) 1348 | // jobName3 1349 | httpmock.RegisterResponder("GET", "http://metricsAddress/api/v1/query?query=sum(nomad_client_allocs_memory_rss_value%7Bjob%3D%22jobName3%22%7D)%20by%20(job)", 1350 | httpmock.NewStringResponder(200, ` 1351 | { 1352 | "status": "success", 1353 | "data": { 1354 | "resultType": "vector", 1355 | "result": [ 1356 | { 1357 | "metric": { 1358 | "job": "jobName" 1359 | }, 1360 | "value": [ 1361 | 1597365496, 1362 | "12459456" 1363 | ] 1364 | } 1365 | ] 1366 | } 1367 | }`, 1368 | ), 1369 | ) 1370 | httpmock.RegisterResponder("GET", "http://metricsAddress/api/v1/query?query=sum(nomad_client_allocs_memory_cache_value%7Bjob%3D%22jobName3%22%7D)%20by%20(job)", 1371 | httpmock.NewStringResponder(200, ` 1372 | { 1373 | "status": "success", 1374 | "data": { 1375 | "resultType": "vector", 1376 | "result": [ 1377 | { 1378 | "metric": { 1379 | "job": "jobName" 1380 | }, 1381 | "value": [ 1382 | 1597365496, 1383 | "56459456" 1384 | ] 1385 | } 1386 | ] 1387 | } 1388 | }`, 1389 | ), 1390 | ) 1391 | httpmock.RegisterResponder("GET", "http://metricsAddress/api/v1/query?query=sum(nomad_client_allocs_cpu_total_ticks_value%7Bjob%3D%22jobName3%22%7D)%20by%20(job)", 1392 | httpmock.NewStringResponder(200, ` 1393 | { 1394 | "status": "success", 1395 | "data": { 1396 | "resultType": "vector", 1397 | "result": [ 1398 | { 1399 | "metric": { 1400 | "job": "jobName" 1401 | }, 1402 | "value": [ 1403 | 1597365496, 1404 | "15459456.0" 1405 | ] 1406 | } 1407 | ] 1408 | } 1409 | }`, 1410 | ), 1411 | ) 1412 | 1413 | // Requested Resources 1414 | httpmock.RegisterResponder("GET", "http://clusterAddress/v1/job/jobID1", 1415 | httpmock.NewStringResponder(200, ` 1416 | { 1417 | "ID": "jobID1", 1418 | "TaskGroups": [ 1419 | { 1420 | "Name": "TaskGroup1", 1421 | "Count": 3, 1422 | "Tasks": [ 1423 | { 1424 | "Resources": { 1425 | "CPU": 200, 1426 | "MemoryMB": 512, 1427 | "IOPS": 20 1428 | } 1429 | } 1430 | ], 1431 | "EphemeralDisk": { 1432 | "SizeMB": 1000 1433 | } 1434 | }, 1435 | { 1436 | "Name": "TaskGroup2", 1437 | "Count": 2, 1438 | "Tasks": [ 1439 | { 1440 | "Resources": { 1441 | "CPU": 400, 1442 | "MemoryMB": 256, 1443 | "IOPS": 40 1444 | } 1445 | } 1446 | ], 1447 | "EphemeralDisk": { 1448 | "SizeMB": 500 1449 | } 1450 | } 1451 | ] 1452 | }`, 1453 | ), 1454 | ) 1455 | httpmock.RegisterResponder("GET", "http://clusterAddress/v1/job/jobID2", 1456 | httpmock.NewStringResponder(200, ` 1457 | { 1458 | "ID": "jobID2", 1459 | "TaskGroups": [ 1460 | { 1461 | "Name": "TaskGroup1", 1462 | "Count": 1, 1463 | "Tasks": [ 1464 | { 1465 | "Resources": { 1466 | "CPU": 200, 1467 | "MemoryMB": 512, 1468 | "IOPS": 20 1469 | } 1470 | } 1471 | ], 1472 | "EphemeralDisk": { 1473 | "SizeMB": 1000 1474 | } 1475 | }, 1476 | { 1477 | "Name": "TaskGroup2", 1478 | "Count": 1, 1479 | "Tasks": [ 1480 | { 1481 | "Resources": { 1482 | "CPU": 400, 1483 | "MemoryMB": 256, 1484 | "IOPS": 40 1485 | } 1486 | } 1487 | ], 1488 | "EphemeralDisk": { 1489 | "SizeMB": 500 1490 | } 1491 | } 1492 | ] 1493 | }`, 1494 | ), 1495 | ) 1496 | httpmock.RegisterResponder("GET", "http://clusterAddress/v1/job/jobID3", 1497 | httpmock.NewStringResponder(200, ` 1498 | { 1499 | "ID": "jobID2", 1500 | "TaskGroups": [ 1501 | { 1502 | "Name": "TaskGroup1", 1503 | "Count": 1, 1504 | "Tasks": [ 1505 | { 1506 | "Resources": { 1507 | "CPU": 200, 1508 | "MemoryMB": 512, 1509 | "IOPS": 20 1510 | } 1511 | } 1512 | ], 1513 | "EphemeralDisk": { 1514 | "SizeMB": 1000 1515 | } 1516 | }, 1517 | { 1518 | "Name": "TaskGroup2", 1519 | "Count": 1, 1520 | "Tasks": [ 1521 | { 1522 | "Resources": { 1523 | "CPU": 400, 1524 | "MemoryMB": 256, 1525 | "IOPS": 40 1526 | } 1527 | } 1528 | ], 1529 | "EphemeralDisk": { 1530 | "SizeMB": 500 1531 | } 1532 | } 1533 | ] 1534 | }`, 1535 | ), 1536 | ) 1537 | httpmock.RegisterResponder("GET", "http://clusterAddress/v1/job/jobID2/allocations", 1538 | httpmock.NewStringResponder(200, ` 1539 | [ 1540 | { 1541 | "TaskGroup": "TaskGroup1" 1542 | }, 1543 | { 1544 | "TaskGroup": "TaskGroup1" 1545 | }, 1546 | { 1547 | "TaskGroup": "TaskGroup2" 1548 | }, 1549 | { 1550 | "TaskGroup": "TaskGroup2" 1551 | }, 1552 | { 1553 | "TaskGroup": "TaskGroup2" 1554 | } 1555 | ]`, 1556 | ), 1557 | ) 1558 | 1559 | wg.Add(1) 1560 | c := make(chan []JobData, 1) 1561 | reachCluster("clusterAddress", "metricsAddress", c) 1562 | wg.Wait() 1563 | close(c) 1564 | 1565 | expectedJob1 := JobData{ 1566 | "jobID1", 1567 | "jobName1", 1568 | 23459456.0, 1569 | 1400.0, 1570 | 13459456 / 1.049e6, 1571 | 33459456 / 1.049e6, 1572 | 2048.0, 1573 | 4000.0, 1574 | 140.0, 1575 | "default", 1576 | "DC1", 1577 | "", 1578 | } 1579 | expectedJob2 := JobData{ 1580 | "jobID2", 1581 | "jobName2", 1582 | 63459456.0, 1583 | 1600.0, 1584 | 23459456 / 1.049e6, 1585 | 54459456 / 1.049e6, 1586 | 1792.0, 1587 | 3500.0, 1588 | 160.0, 1589 | "default", 1590 | "DC2", 1591 | "", 1592 | } 1593 | actualJobs := <-c 1594 | assert.Equal(t, expectedJob1.JobID, actualJobs[0].JobID) 1595 | assert.Equal(t, expectedJob1.Name, actualJobs[0].Name) 1596 | assert.Equal(t, expectedJob1.UTicks, actualJobs[0].UTicks) 1597 | assert.Equal(t, expectedJob1.RCPU, actualJobs[0].RCPU) 1598 | assert.Equal(t, expectedJob1.URSS, actualJobs[0].URSS) 1599 | assert.Equal(t, expectedJob1.UCache, actualJobs[0].UCache) 1600 | assert.Equal(t, expectedJob1.RMemoryMB, actualJobs[0].RMemoryMB) 1601 | assert.Equal(t, expectedJob1.RdiskMB, actualJobs[0].RdiskMB) 1602 | assert.Equal(t, expectedJob1.RIOPS, actualJobs[0].RIOPS) 1603 | assert.Equal(t, expectedJob1.Namespace, actualJobs[0].Namespace) 1604 | assert.Equal(t, expectedJob1.DataCenters, actualJobs[0].DataCenters) 1605 | 1606 | assert.Equal(t, expectedJob2.JobID, actualJobs[1].JobID) 1607 | assert.Equal(t, expectedJob2.Name, actualJobs[1].Name) 1608 | assert.Equal(t, expectedJob2.UTicks, actualJobs[1].UTicks) 1609 | assert.Equal(t, expectedJob2.RCPU, actualJobs[1].RCPU) 1610 | assert.Equal(t, expectedJob2.URSS, actualJobs[1].URSS) 1611 | assert.Equal(t, expectedJob2.UCache, actualJobs[1].UCache) 1612 | assert.Equal(t, expectedJob2.RMemoryMB, actualJobs[1].RMemoryMB) 1613 | assert.Equal(t, expectedJob2.RdiskMB, actualJobs[1].RdiskMB) 1614 | assert.Equal(t, expectedJob2.RIOPS, actualJobs[1].RIOPS) 1615 | assert.Equal(t, expectedJob2.Namespace, actualJobs[1].Namespace) 1616 | assert.Equal(t, expectedJob2.DataCenters, actualJobs[1].DataCenters) 1617 | } -------------------------------------------------------------------------------- /config.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2020 Roblox Corporation 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | */ 17 | 18 | package main 19 | 20 | import ( 21 | "encoding/json" 22 | "io/ioutil" 23 | ) 24 | 25 | type ConfigFile struct { 26 | VictoriaMetrics Server 27 | Nomad []Server 28 | } 29 | 30 | type Server struct { 31 | URL string 32 | Port string 33 | } 34 | 35 | var ( 36 | nomadAddresses []string 37 | metricsAddress string 38 | ) 39 | 40 | func loadConfig(path string) error { 41 | nomadAddresses = []string{} 42 | 43 | data, err := ioutil.ReadFile(path) 44 | if err != nil { 45 | return err 46 | } 47 | var config ConfigFile 48 | err = json.Unmarshal(data, &config) 49 | if err != nil { 50 | return err 51 | } 52 | 53 | metricsAddress = config.VictoriaMetrics.URL + ":" + config.VictoriaMetrics.Port 54 | 55 | for _, server := range config.Nomad { 56 | nomadAddresses = append(nomadAddresses, server.URL+":"+server.Port) 57 | } 58 | 59 | return nil 60 | } 61 | -------------------------------------------------------------------------------- /config_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2020 Roblox Corporation 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | */ 17 | 18 | package main 19 | 20 | import ( 21 | "testing" 22 | 23 | "github.com/stretchr/testify/assert" 24 | ) 25 | 26 | func TestLoadConfig(t *testing.T) { 27 | err := loadConfig("NOPATH") 28 | assert.Error(t, err) 29 | assert.Empty(t, nomadAddresses) 30 | assert.Empty(t, metricsAddress) 31 | 32 | err = loadConfig("config_test.json") 33 | assert.Empty(t, err) 34 | assert.IsType(t, []string{}, nomadAddresses) 35 | assert.Equal(t, 2, len(nomadAddresses)) 36 | assert.IsType(t, "", nomadAddresses[0]) 37 | assert.Equal(t, "NomadURL0:NomadPort0", nomadAddresses[0]) 38 | assert.IsType(t, "", nomadAddresses[1]) 39 | assert.Equal(t, "NomadURL1:NomadPort1", nomadAddresses[1]) 40 | assert.IsType(t, "", metricsAddress) 41 | assert.Equal(t, "VMURL:VMPort", metricsAddress) 42 | } -------------------------------------------------------------------------------- /config_test.json: -------------------------------------------------------------------------------- 1 | { 2 | "VictoriaMetrics": { 3 | "URL": "VMURL", 4 | "Port": "VMPort" 5 | }, 6 | "Nomad": [ 7 | { 8 | "URL": "NomadURL0", 9 | "Port": "NomadPort0" 10 | }, 11 | { 12 | "URL": "NomadURL1", 13 | "Port": "NomadPort1" 14 | } 15 | ] 16 | 17 | } -------------------------------------------------------------------------------- /db.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2020 Roblox Corporation 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | */ 17 | 18 | package main 19 | 20 | import ( 21 | "database/sql" 22 | "fmt" 23 | "os" 24 | 25 | _ "github.com/denisenkom/go-mssqldb" 26 | ) 27 | 28 | type JobDataDB struct { 29 | JobID string 30 | Name string 31 | Ticks float64 32 | CPU float64 33 | RSS float64 34 | Cache float64 35 | MemoryMB float64 36 | diskMB float64 37 | IOPS float64 38 | Namespace string 39 | DataCenters string 40 | CurrentTime string 41 | InsertTime string 42 | } 43 | 44 | func initDB() (*sql.DB, *sql.Stmt, error) { 45 | db, err := sql.Open("mssql", os.Getenv("CONNECTION_STRING")) 46 | if err != nil { 47 | return nil, nil, fmt.Errorf("Error in opening DB: %v", err) 48 | } 49 | 50 | createTable, err := db.Prepare(`if not exists (select * from sysobjects where name='resources' and xtype='U') 51 | CREATE TABLE resources 52 | (id INTEGER IDENTITY(1,1) PRIMARY KEY, 53 | JobID VARCHAR(255), 54 | name VARCHAR(255), 55 | uTicks REAL, 56 | rCPU REAL, 57 | uRSS REAL, 58 | uCache REAL, 59 | rMemoryMB REAL, 60 | rdiskMB REAL, 61 | rIOPS REAL, 62 | namespace VARCHAR(255), 63 | dataCenters VARCHAR(255), 64 | date DATETIME, 65 | insertTime DATETIME);`) 66 | if err != nil { 67 | return nil, nil, fmt.Errorf("Error in creating DB table: %v", err) 68 | } 69 | createTable.Exec() 70 | 71 | insert, err := db.Prepare(`INSERT INTO resources (JobID, 72 | name, 73 | uTicks, 74 | rCPU, 75 | uRSS, 76 | uCache, 77 | rMemoryMB, 78 | rdiskMB, 79 | rIOPS, 80 | namespace, 81 | dataCenters, 82 | date, 83 | insertTime) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`) 84 | if err != nil { 85 | return nil, nil, fmt.Errorf("Error in preparing DB insert: %v", err) 86 | } 87 | 88 | return db, insert, nil 89 | } 90 | 91 | func getAllRowsDB(db *sql.DB) ([]JobDataDB, error) { 92 | if db == nil { 93 | return nil, fmt.Errorf("Parameter db *sql.DB is nil") 94 | } 95 | 96 | all := make([]JobDataDB, 0) 97 | 98 | rows, err := db.Query("SELECT * FROM resources") 99 | if err != nil { 100 | return nil, fmt.Errorf("Error in querying DB: %v", err) 101 | } 102 | 103 | var JobID, name, namespace, dataCenters, currentTime, insertTime string 104 | var uTicks, rCPU, uRSS, uCache, rMemoryMB, rdiskMB, rIOPS float64 105 | var id int 106 | for rows.Next() { 107 | rows.Scan(&id, &JobID, &name, &uTicks, &rCPU, &uRSS, &uCache, &rMemoryMB, &rdiskMB, &rIOPS, &namespace, &dataCenters, ¤tTime, &insertTime) 108 | all = append(all, JobDataDB{ 109 | JobID, 110 | name, 111 | uTicks, 112 | rCPU, 113 | uRSS, 114 | uCache, 115 | rMemoryMB, 116 | rdiskMB, 117 | rIOPS, 118 | namespace, 119 | dataCenters, 120 | currentTime, 121 | insertTime, 122 | }, 123 | ) 124 | } 125 | 126 | return all, nil 127 | } 128 | 129 | func getLatestJobDB(db *sql.DB, jobID string) ([]JobDataDB, error) { 130 | if db == nil { 131 | return nil, fmt.Errorf("Parameter db *sql.DB is nil") 132 | } 133 | 134 | all := make([]JobDataDB, 0) 135 | 136 | jobID = "'" + jobID + "'" 137 | rows, err := db.Query(`SELECT JobID, name, SUM(uTicks), SUM(rCPU), SUM(uRSS), SUM(uCache), SUM(rMemoryMB), SUM(rdiskMB), namespace, dataCenters, insertTime 138 | FROM resources 139 | WHERE insertTime IN (SELECT MAX(insertTime) FROM resources) AND JobID = ` + jobID + ` 140 | GROUP BY JobID, name, namespace, dataCenters, insertTime`) 141 | if err != nil { 142 | return nil, fmt.Errorf("Error in querying DB: %v", err) 143 | } 144 | 145 | var JobID, name, namespace, dataCenters, currentTime, insertTime string 146 | var uTicks, rCPU, uRSS, uCache, rMemoryMB, rdiskMB, rIOPS float64 147 | 148 | for rows.Next() { 149 | rows.Scan(&JobID, &name, &uTicks, &rCPU, &uRSS, &uCache, &rMemoryMB, &rdiskMB, &namespace, &dataCenters, &insertTime) 150 | all = append(all, JobDataDB{ 151 | JobID, 152 | name, 153 | uTicks, 154 | rCPU, 155 | uRSS, 156 | uCache, 157 | rMemoryMB, 158 | rdiskMB, 159 | rIOPS, 160 | namespace, 161 | dataCenters, 162 | currentTime, 163 | insertTime}) 164 | } 165 | 166 | return all, nil 167 | } 168 | 169 | func getTimeSliceDB(db *sql.DB, jobID, begin, end string) ([]JobDataDB, error) { 170 | if db == nil { 171 | return nil, fmt.Errorf("Parameter db *sql.DB is nil") 172 | } 173 | 174 | all := make([]JobDataDB, 0) 175 | 176 | jobID = "'" + jobID + "'" 177 | begin = "'" + begin + "'" 178 | end = "'" + end + "'" 179 | rows, err := db.Query(`SELECT JobID, name, SUM(uTicks), SUM(rCPU), SUM(uRSS), SUM(uCache), SUM(rMemoryMB), SUM(rdiskMB), namespace, dataCenters, insertTime 180 | FROM resources 181 | WHERE JobID = ` + jobID + ` AND insertTime BETWEEN ` + begin + ` AND ` + end + ` 182 | GROUP BY JobID, name, namespace, dataCenters, insertTime 183 | ORDER BY insertTime DESC`) 184 | if err != nil { 185 | return nil, fmt.Errorf("Error in querying DB: %v", err) 186 | } 187 | 188 | var JobID, name, namespace, dataCenters, currentTime, insertTime string 189 | var uTicks, rCPU, uRSS, uCache, rMemoryMB, rdiskMB, rIOPS float64 190 | 191 | for rows.Next() { 192 | rows.Scan(&JobID, &name, &uTicks, &rCPU, &uRSS, &uCache, &rMemoryMB, &rdiskMB, &namespace, &dataCenters, &insertTime) 193 | all = append(all, 194 | JobDataDB{ 195 | JobID, 196 | name, 197 | uTicks, 198 | rCPU, 199 | uRSS, 200 | uCache, 201 | rMemoryMB, 202 | rdiskMB, 203 | rIOPS, 204 | namespace, 205 | dataCenters, 206 | currentTime, 207 | insertTime, 208 | }, 209 | ) 210 | } 211 | 212 | return all, nil 213 | } 214 | -------------------------------------------------------------------------------- /db_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2020 Roblox Corporation 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | */ 17 | 18 | package main 19 | 20 | import ( 21 | "database/sql" 22 | "os" 23 | "testing" 24 | "time" 25 | 26 | "github.com/DATA-DOG/go-sqlmock" 27 | "github.com/stretchr/testify/assert" 28 | ) 29 | 30 | func populateDB(t *testing.T, insert *sql.Stmt) { 31 | layout := "2006-01-02 15:04:05" 32 | str := "2000-01-01 00:00:00" 33 | time1, err := time.Parse(layout, str) 34 | if err != nil { 35 | t.Fatal(err) 36 | } 37 | _, err = insert.Exec("JobID1", "JobName1", 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, "Namespace1", "DC1", time1, time1) 38 | if err != nil { 39 | t.Fatal(err) 40 | } 41 | 42 | str = "2000-01-02 00:00:00" 43 | time2, err := time.Parse(layout, str) 44 | if err != nil { 45 | t.Fatal(err) 46 | } 47 | _, err = insert.Exec("JobID1", "JobName1", 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, "Namespace1", "DC1", time2, time2) 48 | if err != nil { 49 | t.Fatal(err) 50 | } 51 | _, err = insert.Exec("JobID1", "JobName1", 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, "Namespace1", "DC1", time2, time2) 52 | if err != nil { 53 | t.Fatal(err) 54 | } 55 | _, err = insert.Exec("JobID2", "JobName2", 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, "Namespace2", "DC2", time2, time2) 56 | if err != nil { 57 | t.Fatal(err) 58 | } 59 | } 60 | 61 | func TestInitDBMock(t *testing.T) { 62 | var DBPtr *sql.DB 63 | var StmtPtr *sql.Stmt 64 | 65 | os.Setenv("CONNECTION_STRING", "VALUE") 66 | 67 | db, insert, err := initDB() 68 | assert.NotNil(t, err) 69 | assert.Empty(t, db) 70 | assert.IsType(t, DBPtr, db) 71 | assert.Empty(t, insert) 72 | assert.IsType(t, StmtPtr, insert) 73 | assert.Empty(t, insert) 74 | } 75 | 76 | func TestInitDBLive(t *testing.T) { 77 | var db, DBPtr *sql.DB 78 | var insert, StmtPtr *sql.Stmt 79 | var err error 80 | 81 | os.Setenv("CONNECTION_STRING", "Server=localhost;Database=master;User Id=sa;Password=yourStrong(!)Password;") 82 | 83 | // Retry initializing DB 5 times before failing 84 | retryLoad := 5 85 | for i := 0; i < retryLoad; i++ { 86 | db, insert, err = initDB() 87 | if err == nil { 88 | break 89 | } 90 | time.Sleep(5 * time.Second) 91 | } 92 | assert.Empty(t, err) 93 | assert.NotNil(t, db) 94 | assert.IsType(t, DBPtr, db) 95 | assert.NotNil(t, insert) 96 | assert.IsType(t, StmtPtr, insert) 97 | assert.NotNil(t, insert) 98 | } 99 | 100 | func TestGetAllRowsDBMock(t *testing.T) { 101 | db, mock, err := sqlmock.New() 102 | assert.Empty(t, err) 103 | defer db.Close() 104 | 105 | all, err := getAllRowsDB(nil) 106 | assert.NotNil(t, err) 107 | assert.Empty(t, all) 108 | 109 | all, err = getAllRowsDB(db) 110 | assert.NotNil(t, err) 111 | assert.Empty(t, all) 112 | 113 | // Test on an empty DB 114 | query := `SELECT \* FROM resources` 115 | rows := sqlmock.NewRows([]string{"id", "JobID", "name", "uTicks", "rCPU", "uRSS", "uCache", "rMemoryMB", "rdiskMB", "rIOPS", "namespace", "dataCenters", "date", "insertTime"}) 116 | mock.ExpectQuery(query).WillReturnRows(rows) 117 | all, err = getAllRowsDB(db) 118 | assert.Empty(t, err) 119 | assert.Empty(t, all) 120 | 121 | // Test after inserting rows into DB 122 | rows = sqlmock.NewRows([]string{"id", "JobID", "name", "uTicks", "rCPU", "uRSS", "uCache", "rMemoryMB", "rdiskMB", "rIOPS", "namespace", "dataCenters", "date", "insertTime"}). 123 | AddRow(1, "JobID1", "name1", 111.1, 111.1, 111.1, 111.1, 111.1, 111.1, 111.1, "namespace1", "dataCenter1", "0000-00-01", "0000-00-01"). 124 | AddRow(2, "JobID2", "name2", 222.2, 222.2, 222.2, 222.2, 222.2, 222.2, 222.2, "namespace2", "dataCenter2", "0000-00-02", "0000-00-02") 125 | mock.ExpectQuery(query).WillReturnRows(rows) 126 | all, err = getAllRowsDB(db) 127 | assert.Empty(t, err) 128 | assert.NotEmpty(t, all) 129 | 130 | expected := []JobDataDB{ 131 | { 132 | "JobID1", 133 | "name1", 134 | 111.1, 135 | 111.1, 136 | 111.1, 137 | 111.1, 138 | 111.1, 139 | 111.1, 140 | 111.1, 141 | "namespace1", 142 | "dataCenter1", 143 | "0000-00-01", 144 | "0000-00-01", 145 | }, 146 | { 147 | "JobID2", 148 | "name2", 149 | 222.2, 150 | 222.2, 151 | 222.2, 152 | 222.2, 153 | 222.2, 154 | 222.2, 155 | 222.2, 156 | "namespace2", 157 | "dataCenter2", 158 | "0000-00-02", 159 | "0000-00-02", 160 | }, 161 | } 162 | assert.Equal(t, expected, all) 163 | } 164 | 165 | func TestGetAllRowsDBLive(t *testing.T) { 166 | var db *sql.DB 167 | var insert *sql.Stmt 168 | var err error 169 | 170 | os.Setenv("CONNECTION_STRING", "Server=localhost;Database=master;User Id=sa;Password=yourStrong(!)Password;") 171 | db, insert, err = initDB() 172 | 173 | populateDB(t, insert) 174 | 175 | all, err := getAllRowsDB(db) 176 | assert.Nil(t, err) 177 | assert.NotNil(t, all) 178 | 179 | expected := []JobDataDB{ 180 | { 181 | "JobID1", 182 | "JobName1", 183 | 1.0, 184 | 1.0, 185 | 1.0, 186 | 1.0, 187 | 1.0, 188 | 1.0, 189 | 1.0, 190 | "Namespace1", 191 | "DC1", 192 | "2000-01-01T00:00:00Z", 193 | "2000-01-01T00:00:00Z", 194 | }, 195 | { 196 | "JobID1", 197 | "JobName1", 198 | 3.0, 199 | 3.0, 200 | 3.0, 201 | 3.0, 202 | 3.0, 203 | 3.0, 204 | 3.0, 205 | "Namespace1", 206 | "DC1", 207 | "2000-01-02T00:00:00Z", 208 | "2000-01-02T00:00:00Z", 209 | }, 210 | { 211 | "JobID1", 212 | "JobName1", 213 | 2.0, 214 | 2.0, 215 | 2.0, 216 | 2.0, 217 | 2.0, 218 | 2.0, 219 | 2.0, 220 | "Namespace1", 221 | "DC1", 222 | "2000-01-02T00:00:00Z", 223 | "2000-01-02T00:00:00Z", 224 | }, 225 | { 226 | "JobID2", 227 | "JobName2", 228 | 2.0, 229 | 2.0, 230 | 2.0, 231 | 2.0, 232 | 2.0, 233 | 2.0, 234 | 2.0, 235 | "Namespace2", 236 | "DC2", 237 | "2000-01-02T00:00:00Z", 238 | "2000-01-02T00:00:00Z", 239 | }, 240 | } 241 | assert.Equal(t, expected, all) 242 | } 243 | 244 | func TestGetLatestJobDBMock(t *testing.T) { 245 | db, mock, err := sqlmock.New() 246 | assert.Empty(t, err) 247 | defer db.Close() 248 | 249 | all, err := getLatestJobDB(nil, "") 250 | assert.NotNil(t, err) 251 | assert.Empty(t, all) 252 | 253 | all, err = getLatestJobDB(db, "") 254 | assert.NotNil(t, err) 255 | assert.Empty(t, all) 256 | 257 | // Test on an empty DB 258 | query := ` 259 | SELECT 260 | JobID, 261 | name, 262 | SUM\(uTicks\), 263 | SUM\(rCPU\), 264 | SUM\(uRSS\), 265 | SUM\(uCache\), 266 | SUM\(rMemoryMB\), 267 | SUM\(rdiskMB\), 268 | namespace, 269 | dataCenters, 270 | insertTime 271 | FROM 272 | resources 273 | WHERE 274 | insertTime IN \(SELECT MAX\(insertTime\) FROM resources\) 275 | AND JobID \= 'JobID1' 276 | GROUP BY 277 | JobID, 278 | name, 279 | namespace, 280 | dataCenters, 281 | insertTime` 282 | rows := sqlmock.NewRows([]string{"JobID", "name", "uTicks", "rCPU", "uRSS", "uCache", "rMemoryMB", "rdiskMB", "namespace", "dataCenters", "insertTime"}) 283 | mock.ExpectQuery(query).WillReturnRows(rows) 284 | all, err = getLatestJobDB(db, "JobID1") 285 | assert.Empty(t, err) 286 | assert.Empty(t, all) 287 | 288 | // Test after inserting rows into DB 289 | rows = sqlmock.NewRows([]string{"JobID", "name", "uTicks", "rCPU", "uRSS", "uCache", "rMemoryMB", "rdiskMB", "namespace", "dataCenters", "insertTime"}). 290 | AddRow("JobID1", "name1", 111.1, 111.1, 111.1, 111.1, 111.1, 111.1, "namespace1", "dataCenter1", "0001-01-04T00:00:00Z") 291 | mock.ExpectQuery(query).WillReturnRows(rows) 292 | all, err = getLatestJobDB(db, "JobID1") 293 | assert.Empty(t, err) 294 | assert.NotEmpty(t, all) 295 | 296 | expected := []JobDataDB{ 297 | { 298 | "JobID1", 299 | "name1", 300 | 111.1, 301 | 111.1, 302 | 111.1, 303 | 111.1, 304 | 111.1, 305 | 111.1, 306 | 0, 307 | "namespace1", 308 | "dataCenter1", 309 | "", 310 | "0001-01-04T00:00:00Z", 311 | }, 312 | } 313 | assert.Equal(t, expected, all) 314 | } 315 | 316 | func TestGetLatestJobDBLive(t *testing.T) { 317 | var db *sql.DB 318 | var err error 319 | 320 | os.Setenv("CONNECTION_STRING", "Server=localhost;Database=master;User Id=sa;Password=yourStrong(!)Password;") 321 | db, _, err = initDB() 322 | 323 | all, err := getLatestJobDB(db, "JobID1") 324 | assert.Nil(t, err) 325 | assert.NotNil(t, all) 326 | expected := []JobDataDB{ 327 | { 328 | "JobID1", 329 | "JobName1", 330 | 5.0, 331 | 5.0, 332 | 5.0, 333 | 5.0, 334 | 5.0, 335 | 5.0, 336 | 0.0, 337 | "Namespace1", 338 | "DC1", 339 | "", 340 | "2000-01-02T00:00:00Z", 341 | }, 342 | } 343 | assert.Equal(t, expected, all) 344 | } 345 | 346 | func TestGetTimeSliceDBMock(t *testing.T) { 347 | db, mock, err := sqlmock.New() 348 | assert.Empty(t, err) 349 | defer db.Close() 350 | 351 | all, err := getTimeSliceDB(nil, "", "2020-07-07 17:34:53", "2020-07-18 17:42:19") 352 | assert.NotNil(t, err) 353 | assert.Empty(t, all) 354 | 355 | all, err = getTimeSliceDB(db, "", "2020-07-07 17:34:53", "2020-07-18 17:42:19") 356 | assert.NotNil(t, err) 357 | assert.Empty(t, all) 358 | 359 | // Test on an empty DB 360 | rows := sqlmock.NewRows([]string{"JobID", "name", "uTicks", "rCPU", "uRSS", "uCache", "rMemoryMB", "rdiskMB", "namespace", "dataCenters", "insertTime"}) 361 | query := ` 362 | SELECT 363 | JobID, 364 | name, 365 | SUM\(uTicks\), 366 | SUM\(rCPU\), 367 | SUM\(uRSS\), 368 | SUM\(uCache\), 369 | SUM\(rMemoryMB\), 370 | SUM\(rdiskMB\), 371 | namespace, 372 | dataCenters, 373 | insertTime 374 | FROM 375 | resources 376 | WHERE 377 | JobID \= 'JobID1' 378 | AND insertTime BETWEEN '2020\-07\-07 17\:34\:53' AND '2020\-07\-18 17\:42\:19' 379 | GROUP BY 380 | JobID, 381 | name, 382 | namespace, 383 | dataCenters, 384 | insertTime 385 | ORDER BY 386 | insertTime DESC` 387 | mock.ExpectQuery(query).WillReturnRows(rows) 388 | all, err = getTimeSliceDB(db, "JobID1", "2020-07-07 17:34:53", "2020-07-18 17:42:19") 389 | assert.Empty(t, err) 390 | assert.Empty(t, all) 391 | 392 | // Test after inserting rows into DB 393 | rows = sqlmock.NewRows([]string{"JobID", "name", "uTicks", "rCPU", "uRSS", "uCache", "rMemoryMB", "rdiskMB", "namespace", "dataCenters", "insertTime"}). 394 | AddRow("JobID1", "name1", 111.1, 111.1, 111.1, 111.1, 111.1, 111.1, "namespace1", "dataCenter1", "2020-07-07T17:35:00Z") 395 | mock.ExpectQuery(query).WillReturnRows(rows) 396 | all, err = getTimeSliceDB(db, "JobID1", "2020-07-07 17:34:53", "2020-07-18 17:42:19") 397 | assert.Empty(t, err) 398 | assert.NotEmpty(t, all) 399 | 400 | expected := []JobDataDB{ 401 | { 402 | "JobID1", 403 | "name1", 404 | 111.1, 405 | 111.1, 406 | 111.1, 407 | 111.1, 408 | 111.1, 409 | 111.1, 410 | 0, 411 | "namespace1", 412 | "dataCenter1", 413 | "", 414 | "2020-07-07T17:35:00Z", 415 | }, 416 | } 417 | assert.Equal(t, expected, all) 418 | } 419 | 420 | func TestGetTimeSliceDBLive(t *testing.T) { 421 | var db *sql.DB 422 | var err error 423 | 424 | os.Setenv("CONNECTION_STRING", "Server=localhost;Database=master;User Id=sa;Password=yourStrong(!)Password;") 425 | db, _, err = initDB() 426 | 427 | all, err := getTimeSliceDB(db, "JobID1", "2000-01-01 00:00:01", "2000-01-02 00:00:01") 428 | assert.Nil(t, err) 429 | assert.NotNil(t, all) 430 | 431 | expected := []JobDataDB{ 432 | { 433 | "JobID1", 434 | "JobName1", 435 | 5.0, 436 | 5.0, 437 | 5.0, 438 | 5.0, 439 | 5.0, 440 | 5.0, 441 | 0, 442 | "Namespace1", 443 | "DC1", 444 | "", 445 | "2000-01-02T00:00:00Z", 446 | }, 447 | } 448 | assert.Equal(t, expected, all) 449 | 450 | all, err = getTimeSliceDB(db, "JobID1", "2000-01-01 00:00:00", "2000-01-01 12:00:01") 451 | assert.Nil(t, err) 452 | assert.NotNil(t, all) 453 | 454 | expected = []JobDataDB{ 455 | { 456 | "JobID1", 457 | "JobName1", 458 | 1.0, 459 | 1.0, 460 | 1.0, 461 | 1.0, 462 | 1.0, 463 | 1.0, 464 | 0, 465 | "Namespace1", 466 | "DC1", 467 | "", 468 | "2000-01-01T00:00:00Z", 469 | }, 470 | } 471 | assert.NotNil(t, all) 472 | assert.Equal(t, expected, all) 473 | 474 | all, err = getTimeSliceDB(db, "JobID1", "2000-04-04 00:00:00", "2000-05-05 12:00:01") 475 | assert.Nil(t, err) 476 | assert.NotNil(t, all) 477 | 478 | expected = []JobDataDB{} 479 | assert.NotNil(t, all) 480 | assert.Equal(t, expected, all) 481 | } -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "2.0" 2 | 3 | services: 4 | nurd: 5 | build: . 6 | ports: 7 | - 8080:8080 8 | environment: 9 | CONNECTION_STRING: Server=mssql;Database=master;User Id=sa;Password=yourStrong(!)Password; 10 | container_name: nurd 11 | mssql: 12 | image: microsoft/mssql-server-linux 13 | ports: 14 | - 1433:1433 15 | environment: 16 | ACCEPT_EULA: Y 17 | SA_PASSWORD: yourStrong(!)Password 18 | container_name: nurd_mssql 19 | grafana: 20 | image: grafana/grafana:latest 21 | ports: 22 | - 3000:3000 23 | container_name: nurd_grafana 24 | -------------------------------------------------------------------------------- /etc/nurd/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "VictoriaMetrics": { 3 | "URL": "VMURL", 4 | "Port": "VMPort" 5 | }, 6 | "Nomad": [ 7 | { 8 | "URL": "NomadURL0", 9 | "Port": "NomadPort0" 10 | } 11 | ] 12 | } -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/Roblox/nurd 2 | 3 | go 1.14 4 | 5 | require ( 6 | github.com/DATA-DOG/go-sqlmock v1.4.1 7 | github.com/denisenkom/go-mssqldb v0.0.0-20200620013148-b91950f658ec 8 | github.com/gorilla/mux v1.7.4 9 | github.com/jarcoal/httpmock v1.0.5 10 | github.com/sirupsen/logrus v1.6.0 11 | github.com/stretchr/testify v1.6.1 12 | ) 13 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/DATA-DOG/go-sqlmock v1.4.1 h1:ThlnYciV1iM/V0OSF/dtkqWb6xo5qITT1TJBG1MRDJM= 2 | github.com/DATA-DOG/go-sqlmock v1.4.1/go.mod h1:f/Ixk793poVmq4qj/V1dPUg2JEAKC73Q5eFN3EC/SaM= 3 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 4 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 5 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 6 | github.com/denisenkom/go-mssqldb v0.0.0-20200620013148-b91950f658ec h1:NfhRXXFDPxcF5Cwo06DzeIaE7uuJtAUhsDwH3LNsjos= 7 | github.com/denisenkom/go-mssqldb v0.0.0-20200620013148-b91950f658ec/go.mod h1:xbL0rPBG9cCiLr28tMa8zpbdarY27NDyej4t/EjAShU= 8 | github.com/golang-sql/civil v0.0.0-20190719163853-cb61b32ac6fe h1:lXe2qZdvpiX5WZkZR4hgp4KJVfY3nMkvmwbVkpv1rVY= 9 | github.com/golang-sql/civil v0.0.0-20190719163853-cb61b32ac6fe/go.mod h1:8vg3r2VgvsThLBIFL93Qb5yWzgyZWhEmBwUJWevAkK0= 10 | github.com/gorilla/mux v1.7.4 h1:VuZ8uybHlWmqV03+zRzdwKL4tUnIp1MAQtp1mIFE1bc= 11 | github.com/gorilla/mux v1.7.4/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So= 12 | github.com/jarcoal/httpmock v1.0.5 h1:cHtVEcTxRSX4J0je7mWPfc9BpDpqzXSJ5HbymZmyHck= 13 | github.com/jarcoal/httpmock v1.0.5/go.mod h1:ATjnClrvW/3tijVmpL/va5Z3aAyGvqU3gCT8nX0Txik= 14 | github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= 15 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 16 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 17 | github.com/sirupsen/logrus v1.6.0 h1:UBcNElsrwanuuMsnGSlYmtmgbb23qDR5dG+6X6Oo89I= 18 | github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88= 19 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 20 | github.com/stretchr/testify v1.2.2 h1:bSDNvY7ZPG5RlJ8otE/7V6gMiyenm9RtJ7IUVIAoJ1w= 21 | github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= 22 | github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0= 23 | github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 24 | golang.org/x/crypto v0.0.0-20190325154230-a5d413f7728c h1:Vj5n4GlwjmQteupaxJ9+0FNOmBrHfq7vN4btdGoDZgI= 25 | golang.org/x/crypto v0.0.0-20190325154230-a5d413f7728c/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 26 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 27 | golang.org/x/sys v0.0.0-20190422165155-953cdadca894 h1:Cz4ceDQGXuKRnVBDTS23GTn/pU5OE2C0WrNTOYK1Uuc= 28 | golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 29 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 30 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= 31 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 32 | -------------------------------------------------------------------------------- /grafana.json: -------------------------------------------------------------------------------- 1 | { 2 | "annotations": { 3 | "list": [ 4 | { 5 | "builtIn": 1, 6 | "datasource": "-- Grafana --", 7 | "enable": true, 8 | "hide": true, 9 | "iconColor": "rgba(0, 211, 255, 1)", 10 | "name": "Annotations & Alerts", 11 | "type": "dashboard" 12 | } 13 | ] 14 | }, 15 | "editable": true, 16 | "gnetId": null, 17 | "graphTooltip": 0, 18 | "id": 1, 19 | "iteration": 1597698292924, 20 | "links": [], 21 | "panels": [ 22 | { 23 | "aliasColors": {}, 24 | "bars": false, 25 | "dashLength": 10, 26 | "dashes": false, 27 | "datasource": "Microsoft SQL Server", 28 | "description": "", 29 | "fieldConfig": { 30 | "defaults": { 31 | "custom": {}, 32 | "links": [] 33 | }, 34 | "overrides": [] 35 | }, 36 | "fill": 1, 37 | "fillGradient": 0, 38 | "gridPos": { 39 | "h": 21, 40 | "w": 24, 41 | "x": 0, 42 | "y": 0 43 | }, 44 | "hiddenSeries": false, 45 | "id": 14, 46 | "legend": { 47 | "avg": false, 48 | "current": false, 49 | "max": false, 50 | "min": false, 51 | "show": true, 52 | "total": false, 53 | "values": false 54 | }, 55 | "lines": true, 56 | "linewidth": 1, 57 | "nullPointMode": "null", 58 | "percentage": false, 59 | "pluginVersion": "7.1.3", 60 | "pointradius": 2, 61 | "points": false, 62 | "renderer": "flot", 63 | "seriesOverrides": [], 64 | "spaceLength": 10, 65 | "stack": false, 66 | "steppedLine": false, 67 | "targets": [ 68 | { 69 | "alias": "", 70 | "format": "time_series", 71 | "rawSql": "SELECT \n ${Total:raw} ${Metric:csv}, insertTime AS time \nFROM \n resources\nWHERE \n JobID IN (${JobID})\nGROUP BY\n ${Total:raw} insertTime\nORDER BY insertTime;", 72 | "refId": "A" 73 | } 74 | ], 75 | "thresholds": [], 76 | "timeFrom": null, 77 | "timeRegions": [], 78 | "timeShift": null, 79 | "title": "${Metric}: ${JobID}", 80 | "tooltip": { 81 | "shared": true, 82 | "sort": 0, 83 | "value_type": "individual" 84 | }, 85 | "type": "graph", 86 | "xaxis": { 87 | "buckets": null, 88 | "mode": "time", 89 | "name": null, 90 | "show": true, 91 | "values": [] 92 | }, 93 | "yaxes": [ 94 | { 95 | "format": "short", 96 | "label": null, 97 | "logBase": 1, 98 | "max": null, 99 | "min": null, 100 | "show": true 101 | }, 102 | { 103 | "format": "short", 104 | "label": null, 105 | "logBase": 1, 106 | "max": null, 107 | "min": null, 108 | "show": true 109 | } 110 | ], 111 | "yaxis": { 112 | "align": false, 113 | "alignLevel": null 114 | } 115 | } 116 | ], 117 | "refresh": false, 118 | "schemaVersion": 26, 119 | "style": "dark", 120 | "tags": [], 121 | "templating": { 122 | "list": [ 123 | { 124 | "allValue": null, 125 | "current": { 126 | "selected": true, 127 | "text": "All", 128 | "value": [ 129 | "$__all" 130 | ] 131 | }, 132 | "datasource": "Microsoft SQL Server", 133 | "definition": "IF (SELECT COUNT(DISTINCT(JobID)) from resources) = 0\nBEGIN\nSELECT 'empty'\nEND\nELSE\nBEGIN\nSELECT DISTINCT JobID FROM resources\nEND;", 134 | "hide": 0, 135 | "includeAll": true, 136 | "label": null, 137 | "multi": true, 138 | "name": "JobID", 139 | "options": [], 140 | "query": "IF (SELECT COUNT(DISTINCT(JobID)) from resources) = 0\nBEGIN\nSELECT 'empty'\nEND\nELSE\nBEGIN\nSELECT DISTINCT JobID FROM resources\nEND;", 141 | "refresh": 2, 142 | "regex": "", 143 | "skipUrlSync": false, 144 | "sort": 1, 145 | "tagValuesQuery": "", 146 | "tags": [], 147 | "tagsQuery": "", 148 | "type": "query", 149 | "useTags": false 150 | }, 151 | { 152 | "allValue": null, 153 | "current": { 154 | "selected": true, 155 | "text": "SUM(uTicks) AS UsedCPU", 156 | "value": [ 157 | "SUM(uTicks) AS UsedCPU" 158 | ] 159 | }, 160 | "hide": 0, 161 | "includeAll": false, 162 | "label": null, 163 | "multi": true, 164 | "name": "Metric", 165 | "options": [ 166 | { 167 | "selected": false, 168 | "text": "SUM(uRSS) AS UsedMemory", 169 | "value": "SUM(uRSS) AS UsedMemory" 170 | }, 171 | { 172 | "selected": true, 173 | "text": "SUM(uTicks) AS UsedCPU", 174 | "value": "SUM(uTicks) AS UsedCPU" 175 | }, 176 | { 177 | "selected": false, 178 | "text": "SUM(rCPU) AS RequestedCPU", 179 | "value": "SUM(rCPU) AS RequestedCPU" 180 | }, 181 | { 182 | "selected": false, 183 | "text": "SUM(rMemoryMB) AS RequestedMemory", 184 | "value": "SUM(rMemoryMB) AS RequestedMemory" 185 | } 186 | ], 187 | "query": "SUM(uRSS) AS UsedMemory, SUM(uTicks) AS UsedCPU, SUM(rCPU) AS RequestedCPU, SUM(rMemoryMB) AS RequestedMemory", 188 | "queryValue": "", 189 | "skipUrlSync": false, 190 | "type": "custom" 191 | }, 192 | { 193 | "allValue": null, 194 | "current": { 195 | "selected": true, 196 | "text": "JobID,", 197 | "value": [ 198 | "JobID," 199 | ] 200 | }, 201 | "hide": 0, 202 | "includeAll": false, 203 | "label": null, 204 | "multi": true, 205 | "name": "Total", 206 | "options": [ 207 | { 208 | "selected": true, 209 | "text": "JobID,", 210 | "value": "JobID," 211 | } 212 | ], 213 | "query": "JobID\\,", 214 | "queryValue": "", 215 | "skipUrlSync": false, 216 | "type": "custom" 217 | } 218 | ] 219 | }, 220 | "time": { 221 | "from": "now-1h", 222 | "to": "now" 223 | }, 224 | "timepicker": { 225 | "refresh_intervals": [ 226 | "10s", 227 | "30s", 228 | "1m", 229 | "5m", 230 | "15m", 231 | "30m", 232 | "1h", 233 | "2h", 234 | "1d" 235 | ] 236 | }, 237 | "timezone": "", 238 | "title": "NURD", 239 | "uid": "apRQ8jGGk9", 240 | "version": 3 241 | } -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2020 Roblox Corporation 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | */ 17 | 18 | package main 19 | 20 | import ( 21 | "database/sql" 22 | "encoding/json" 23 | "flag" 24 | "fmt" 25 | "github.com/gorilla/mux" 26 | "net/http" 27 | "os" 28 | "os/signal" 29 | "sync" 30 | "syscall" 31 | "time" 32 | 33 | log "github.com/sirupsen/logrus" 34 | ) 35 | 36 | type APIError struct { 37 | Error string 38 | } 39 | 40 | var ( 41 | wg sync.WaitGroup 42 | db *sql.DB 43 | insert *sql.Stmt 44 | ) 45 | 46 | func handleAPIError(w http.ResponseWriter, err string, status int) { 47 | log.Error(err) 48 | w.WriteHeader(status) 49 | 50 | errJSON := APIError{ 51 | Error: err, 52 | } 53 | if json.NewEncoder(w).Encode(errJSON) != nil { 54 | w.WriteHeader(http.StatusInternalServerError) 55 | return 56 | } 57 | } 58 | 59 | func homePage(w http.ResponseWriter, r *http.Request) { 60 | log.SetLevel(log.TraceLevel) 61 | log.SetReportCaller(true) 62 | log.Trace(r) 63 | 64 | w.WriteHeader(http.StatusOK) 65 | fmt.Fprintf(w, "Welcome to NURD.") 66 | } 67 | 68 | func returnAll(w http.ResponseWriter, r *http.Request) { 69 | log.SetLevel(log.TraceLevel) 70 | log.SetReportCaller(true) 71 | log.Trace(r) 72 | 73 | all, err := getAllRowsDB(db) 74 | if err != nil { 75 | handleAPIError(w, fmt.Sprintf("Error in getting all rows from DB: %v", err), http.StatusInternalServerError) 76 | return 77 | } 78 | err = json.NewEncoder(w).Encode(all) 79 | if err != nil { 80 | handleAPIError(w, fmt.Sprintf("Error in encoding JSON: %v", err), http.StatusInternalServerError) 81 | return 82 | } 83 | } 84 | 85 | func returnJob(w http.ResponseWriter, r *http.Request) { 86 | log.SetLevel(log.TraceLevel) 87 | log.SetReportCaller(true) 88 | log.Trace(r) 89 | 90 | jobID := mux.Vars(r)["id"] 91 | begin, okBegin := r.URL.Query()["begin"] 92 | end, okEnd := r.URL.Query()["end"] 93 | 94 | if !okBegin && !okEnd { 95 | all, err := getLatestJobDB(db, jobID) 96 | if err != nil { 97 | handleAPIError(w, fmt.Sprintf("Error in getting latest job from DB: %v", err), http.StatusInternalServerError) 98 | return 99 | } 100 | err = json.NewEncoder(w).Encode(all) 101 | if err != nil { 102 | handleAPIError(w, fmt.Sprintf("Error in encoding JSON: %v", err), http.StatusInternalServerError) 103 | return 104 | } 105 | } else if !okBegin && okEnd { 106 | handleAPIError(w, "Missing query param: 'begin'", http.StatusBadRequest) 107 | } else if okBegin && !okEnd { 108 | handleAPIError(w, "Missing query param: 'end'", http.StatusBadRequest) 109 | } else { 110 | all, err := getTimeSliceDB(db, jobID, begin[0], end[0]) 111 | if err != nil { 112 | handleAPIError(w, fmt.Sprintf("Error in getting latest job from DB: %v", err), http.StatusInternalServerError) 113 | return 114 | } 115 | err = json.NewEncoder(w).Encode(all) 116 | if err != nil { 117 | handleAPIError(w, fmt.Sprintf("Error in encoding JSON: %v", err), http.StatusInternalServerError) 118 | return; 119 | } 120 | } 121 | } 122 | 123 | func healthCheck(w http.ResponseWriter, r *http.Request) { 124 | log.SetLevel(log.TraceLevel) 125 | log.SetReportCaller(true) 126 | log.Trace(r) 127 | w.WriteHeader(http.StatusOK) 128 | } 129 | 130 | func collectData(freq *string) { 131 | log.SetReportCaller(true) 132 | log.SetLevel(log.TraceLevel) 133 | 134 | duration, err := time.ParseDuration(*freq) 135 | if err != nil { 136 | log.Fatal(fmt.Sprintf("Failed to parse duration: %v", err)) 137 | } 138 | if duration > 30 * time.Minute || duration <= 0 * time.Minute { 139 | log.Warning("--aggregate-frequency should be within (0m, 30m]. Defaulting to 15m.") 140 | duration = 15 * time.Minute 141 | } 142 | 143 | err = loadConfig("/etc/nurd/config.json") 144 | if err != nil { 145 | log.Fatal(fmt.Sprintf("Error in loading /etc/nurd/config.json: %v", err)) 146 | } 147 | 148 | // Retry initializing DB 5 times before exiting 149 | retryLoad := 5 150 | for i := 0; i < retryLoad; i++ { 151 | db, insert, err = initDB() 152 | if err != nil { 153 | log.Warning(fmt.Sprintf("DB initialization failed, retrying: %v", err)) 154 | } else { 155 | log.Info("DB initialized successfully, break ...") 156 | break 157 | } 158 | 159 | if i == retryLoad-1 { 160 | log.Fatal(fmt.Sprintf("Error in initializing DB: %v", err)) 161 | } 162 | 163 | time.Sleep(5 * time.Second) 164 | } 165 | 166 | for { 167 | log.Trace("BEGIN AGGREGATION") 168 | c := make(chan []JobData, len(nomadAddresses)) 169 | 170 | for _, address := range nomadAddresses { 171 | wg.Add(1) 172 | go reachCluster(address, metricsAddress, c) 173 | } 174 | 175 | wg.Wait() 176 | close(c) 177 | 178 | insertTime := time.Now().Truncate(time.Minute).Format("2006-01-02 15:04:05") 179 | for jobDataSlice := range c { 180 | for _, v := range jobDataSlice { 181 | insert.Exec(v.JobID, 182 | v.Name, 183 | v.UTicks, 184 | v.RCPU, 185 | v.URSS, 186 | v.UCache, 187 | v.RMemoryMB, 188 | v.RdiskMB, 189 | v.RIOPS, 190 | v.Namespace, 191 | v.DataCenters, 192 | v.CurrentTime, 193 | insertTime) 194 | } 195 | } 196 | 197 | log.Trace("END AGGREGATION") 198 | time.Sleep(duration) 199 | } 200 | } 201 | 202 | func reloadConfig(sigs chan os.Signal) { 203 | log.SetReportCaller(true) 204 | 205 | for { 206 | select { 207 | case <-sigs: 208 | log.Info("Reloading /etc/nurd/config.json") 209 | if err := loadConfig("/etc/nurd/config.json"); err != nil { 210 | log.Warning(fmt.Sprintf("Error in reloading /etc/nurd/config.json: %v", err)) 211 | } 212 | } 213 | } 214 | } 215 | 216 | func main() { 217 | freq := flag.String("aggregate-frequency", "15m", "frequency of resource aggregation") 218 | flag.Parse() 219 | go collectData(freq) 220 | 221 | sigs := make(chan os.Signal, 1) 222 | signal.Notify(sigs, syscall.SIGHUP) 223 | go reloadConfig(sigs) 224 | 225 | router := mux.NewRouter().StrictSlash(true) 226 | router.HandleFunc("/", homePage) 227 | router.HandleFunc("/v1/jobs", returnAll) 228 | router.HandleFunc("/v1/job/{id}", returnJob) 229 | router.HandleFunc("/v1/health", healthCheck) 230 | log.Fatal(http.ListenAndServe(":8080", router)) 231 | } 232 | -------------------------------------------------------------------------------- /main_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2020 Roblox Corporation 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | */ 17 | 18 | package main 19 | 20 | import ( 21 | "encoding/json" 22 | "io/ioutil" 23 | "net/http" 24 | "net/http/httptest" 25 | "testing" 26 | 27 | log "github.com/sirupsen/logrus" 28 | "github.com/stretchr/testify/assert" 29 | ) 30 | 31 | func TestHomePage(t *testing.T) { 32 | log.SetOutput(ioutil.Discard) 33 | 34 | req, err := http.NewRequest("GET", "/", nil) 35 | if err != nil { 36 | t.Fatal(err) 37 | } 38 | rr := httptest.NewRecorder() 39 | handler := http.HandlerFunc(homePage) 40 | handler.ServeHTTP(rr, req) 41 | assert.Equal(t, http.StatusOK, rr.Code) 42 | 43 | expectedStr := "Welcome to NURD." 44 | body, err := ioutil.ReadAll(rr.Body) 45 | if err != nil { 46 | t.Fatal(err) 47 | } 48 | assert.Equal(t, expectedStr, string(body)) 49 | } 50 | 51 | func TestReturnAllNoDB(t *testing.T) { 52 | req, err := http.NewRequest("GET", "/v1/jobs", nil) 53 | if err != nil { 54 | t.Fatal(err) 55 | } 56 | rr := httptest.NewRecorder() 57 | handler := http.HandlerFunc(returnAll) 58 | handler.ServeHTTP(rr, req) 59 | assert.Equal(t, http.StatusInternalServerError, rr.Code) 60 | 61 | expectedStr := APIError{ 62 | Error: "Error in getting all rows from DB: Parameter db *sql.DB is nil", 63 | } 64 | var actualStr APIError 65 | err = json.NewDecoder(rr.Body).Decode(&actualStr) 66 | if err != nil { 67 | t.Fatal(err) 68 | } 69 | assert.Equal(t, expectedStr, actualStr) 70 | } 71 | 72 | func TestReturnJobNoParam(t *testing.T) { 73 | req, err := http.NewRequest("GET", "/v1/job/jobID", nil) 74 | if err != nil { 75 | t.Fatal(err) 76 | } 77 | rr := httptest.NewRecorder() 78 | handler := http.HandlerFunc(returnJob) 79 | handler.ServeHTTP(rr, req) 80 | assert.Equal(t, http.StatusInternalServerError, rr.Code) 81 | 82 | expectedStr := APIError{ 83 | Error: "Error in getting latest job from DB: Parameter db *sql.DB is nil", 84 | } 85 | var actualStr APIError 86 | err = json.NewDecoder(rr.Body).Decode(&actualStr) 87 | if err != nil { 88 | t.Fatal(err) 89 | } 90 | assert.Equal(t, expectedStr, actualStr) 91 | } 92 | 93 | func TestReturnJobNoBegin(t *testing.T) { 94 | req, err := http.NewRequest("GET", "/v1/job/jobID?end=2020-07-18%2017:42:19", nil) 95 | if err != nil { 96 | t.Fatal(err) 97 | } 98 | rr := httptest.NewRecorder() 99 | handler := http.HandlerFunc(returnJob) 100 | handler.ServeHTTP(rr, req) 101 | assert.Equal(t, http.StatusBadRequest, rr.Code) 102 | 103 | expectedStr := APIError{ 104 | Error: "Missing query param: 'begin'", 105 | } 106 | var actualStr APIError 107 | err = json.NewDecoder(rr.Body).Decode(&actualStr) 108 | if err != nil { 109 | t.Fatal(err) 110 | } 111 | assert.Equal(t, expectedStr, actualStr) 112 | } 113 | 114 | func TestReturnJobNoEnd(t *testing.T) { 115 | req, err := http.NewRequest("GET", "/v1/job/jobID?begin=2020-07-18%2017:42:19", nil) 116 | if err != nil { 117 | t.Fatal(err) 118 | } 119 | rr := httptest.NewRecorder() 120 | handler := http.HandlerFunc(returnJob) 121 | handler.ServeHTTP(rr, req) 122 | assert.Equal(t, http.StatusBadRequest, rr.Code) 123 | 124 | expectedStr := APIError{ 125 | Error: "Missing query param: 'end'", 126 | } 127 | var actualStr APIError 128 | err = json.NewDecoder(rr.Body).Decode(&actualStr) 129 | if err != nil { 130 | t.Fatal(err) 131 | } 132 | assert.Equal(t, expectedStr, actualStr) 133 | } 134 | 135 | func TestReturnJobParams(t *testing.T) { 136 | req, err := http.NewRequest("GET", "/v1/job/jobID?begin=2020-07-18%2017:42:19&end=2020-07-18%2017:42:20", nil) 137 | if err != nil { 138 | t.Fatal(err) 139 | } 140 | rr := httptest.NewRecorder() 141 | handler := http.HandlerFunc(returnJob) 142 | handler.ServeHTTP(rr, req) 143 | assert.Equal(t, http.StatusInternalServerError, rr.Code) 144 | 145 | expectedStr := APIError{ 146 | Error: "Error in getting latest job from DB: Parameter db *sql.DB is nil", 147 | } 148 | var actualStr APIError 149 | err = json.NewDecoder(rr.Body).Decode(&actualStr) 150 | if err != nil { 151 | t.Fatal(err) 152 | } 153 | assert.Equal(t, expectedStr, actualStr) 154 | } 155 | 156 | func TestHealthCheck(t *testing.T) { 157 | req, err := http.NewRequest("GET", "/v1/health", nil) 158 | if err != nil { 159 | t.Fatal(err) 160 | } 161 | rr := httptest.NewRecorder() 162 | handler := http.HandlerFunc(healthCheck) 163 | handler.ServeHTTP(rr, req) 164 | assert.Equal(t, http.StatusOK, rr.Code) 165 | } -------------------------------------------------------------------------------- /run_tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -eo pipefail 4 | 5 | export PATH=$PATH:/usr/local/go/bin 6 | export PATH=$PATH:/usr/local/bin 7 | export GO_VERSION=1.14.3 8 | 9 | main() { 10 | if [ ! -z "$CIRCLECI" ]; then 11 | # Remove default golang (1.7.3) and install a custom version (1.14.3) of golang. 12 | # This is required for supporting go mod, and to be able to compile nurd. 13 | sudo rm -rf /usr/local/go 14 | 15 | # Install golang 1.14.3 16 | curl -L -o go${GO_VERSION}.linux-amd64.tar.gz https://dl.google.com/go/go${GO_VERSION}.linux-amd64.tar.gz 17 | sudo tar -C /usr/local -xzf go${GO_VERSION}.linux-amd64.tar.gz 18 | sudo chmod +x /usr/local/go 19 | rm -f go${GO_VERSION}.linux-amd64.tar.gz 20 | fi 21 | 22 | # Run tests 23 | trap 'docker-compose down' exit 24 | docker-compose up -d mssql 25 | go test -cover -count=1 -v ./... 26 | } 27 | 28 | main "$@" --------------------------------------------------------------------------------