├── .github └── workflows │ └── go.yml ├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── component1.yaml ├── info.go ├── job └── jsonParser.go ├── main.go ├── mapActions.go ├── nomad.job ├── rootfs └── etc │ └── ssl │ └── certs │ └── ca-certificates.crt ├── scalad_mascot.png ├── scale-down.go ├── scale-up.go ├── scaler.go ├── slack └── slack.go ├── structs └── structs.go ├── templates.go ├── templates └── info.html └── tickers.go /.github/workflows/go.yml: -------------------------------------------------------------------------------- 1 | name: Go 2 | on: [push] 3 | jobs: 4 | 5 | build: 6 | name: Build 7 | runs-on: ubuntu-latest 8 | steps: 9 | 10 | - name: Set up Go 1.12 11 | uses: actions/setup-go@v1 12 | with: 13 | go-version: 1.12 14 | id: go 15 | 16 | - name: Check out code into the Go module directory 17 | uses: actions/checkout@v1 18 | 19 | - name: Get dependencies 20 | run: | 21 | go get -v -t -d ./... 22 | if [ -f Gopkg.toml ]; then 23 | curl https://raw.githubusercontent.com/golang/dep/master/install.sh | sh 24 | dep ensure 25 | fi 26 | 27 | - name: Build 28 | run: go build -v . 29 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # IDEs / os related 2 | **/.DS_Store 3 | **/*~ 4 | .vscode 5 | .idea 6 | 7 | # Tests 8 | *.test 9 | *_test.go 10 | 11 | # Binaries 12 | scalad 13 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.14.2 as build-env 2 | COPY . /go/src/github.com/trivago/scalad 3 | WORKDIR /go/src/github.com/trivago/scalad 4 | RUN go get 5 | RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -o /scalad 6 | 7 | FROM scratch 8 | 9 | # Copy root filesystem 10 | COPY rootfs / 11 | COPY --from=build-env /scalad / 12 | ENTRYPOINT ["/scalad"] 13 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 |

4 | 5 | Env variables: 6 | 7 | * `PORT = "80"`: Port on which the application is going to be listening. 8 | * `NOMAD_HOST = "http://nomad.service.consul:4646"`: Nomad host endpoint. 9 | * `NOMAD_REGION = "global"`: Nomad Region. 10 | * `VAULT_TOKEN = "ljkasdflkjafd"`: Token for vault with permission to access every secret needed for all the scale jobs. 11 | * `USE_SLACK = "true"`: Flag to enable the use of slack as a system message application. 12 | * `HTTP_USER = "user"`: User needed for basic auth for endpoints to scale up or down manually. 13 | * `HTTP_PASS = "password"`: Password needed for basic auth for endpoints to scale up or down manually 14 | * `METRICS_ENDPOINT = "http://prometheus.yourorg.com/api/v1/query?query="`: Endpoint from where to get the metrics which are going to be used to triger the scale events. 15 | 16 | Inside Job file (At taskGroup lvl): 17 | ```` 18 | meta { 19 | scaler = "true" Activate the scaler 20 | min_query = "sum(rate(nomad_client_allocs_cpu_total_ticks{exported_job='scaler-test'}[1m]))by(exported_job) < bool 1" Query that gives the Min threshold for scaling down 21 | max_query = "sum(rate(nomad_client_allocs_cpu_total_ticks{exported_job='scaler-test'}[1m]))by(exported_job) > bool 2" Query that gives the Max threshold for scaling up 22 | query_fire_time = "2m" Time the query need to be true before triggering the saling event 23 | scale_cooldown_down = "20s" Time in cooldown for a scale dow event 24 | scale_cooldown_up = "25s" Time in cooldown for a scale up event 25 | scale_count_down = "1" Amount of containers that are going to be removed on a scale down event 26 | scale_count_up = "2" Amount of containers that are going to be added on a scale up event 27 | scale_max = "16" Maximun amount of containers 28 | scale_min = "1" Minimun amount amount of containers 29 | } 30 | ```` 31 | -------------------------------------------------------------------------------- /component1.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: backstage.io/v1alpha1 2 | kind: Component 3 | metadata: 4 | name: Scalad 5 | description: Component for the nomad autoscaler 6 | annotations: 7 | backstage.io/github-actions-id: trivago/scalad 8 | spec: 9 | type: service 10 | endpoint: http://www.trivago.com 11 | documentation: http://knowledge.trivago.com/ 12 | lifecycle: stable 13 | owner: esteban.barrios@trivago.com 14 | githubLink: https://api.github.com/repos/trivago/scalad/actions/runs 15 | -------------------------------------------------------------------------------- /info.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "net/http" 5 | "strconv" 6 | "time" 7 | 8 | log "github.com/Sirupsen/logrus" 9 | "github.com/alecthomas/template" 10 | ) 11 | 12 | type lastJob struct { 13 | JobID string 14 | Region string 15 | Direction string 16 | Time time.Time 17 | } 18 | 19 | // LastJobs function updates the lastJobs map with the jobs executed in the last X (INFO_TIME env variable) minutes. 20 | func LastJobs(jobID string, region string, direction string, trigerTime time.Time) { 21 | secs := trigerTime.Unix() 22 | var m lastJob 23 | 24 | m.JobID = jobID 25 | m.Region = region 26 | m.Direction = direction 27 | m.Time = trigerTime 28 | 29 | lastJobs[secs] = m 30 | } 31 | 32 | func clearInfoMap() { 33 | now := time.Now().Unix() 34 | infoTimeInt64, err := strconv.ParseInt(infoTime, 10, 64) 35 | if err != nil { 36 | log.Error("Error converting int to int64 with err: ", err, ". Setting infoTimeInt64 to 60 minutes") 37 | infoTimeInt64 = 60 38 | } 39 | for key := range lastJobs { 40 | if (now - (infoTimeInt64 * 60)) > key { 41 | delete(lastJobs, key) 42 | } 43 | } 44 | } 45 | 46 | // StatusPage function returns an html page displaying the last 20 scalling operations performed. 47 | func StatusPage(w http.ResponseWriter, r *http.Request) { 48 | message, err := Asset("templates/info.html") 49 | if err != nil { 50 | log.Error("Error loading asset for info.html with err: ", err) 51 | return 52 | } 53 | 54 | messageTmpl, err := template.New("message").Parse(string(message)) 55 | if err != nil { 56 | log.Error("Error rendering template for info.html with err: ", err) 57 | return 58 | } 59 | 60 | info := struct { 61 | LastJobs map[int64]lastJob 62 | InfoTime string 63 | }{ 64 | lastJobs, 65 | infoTime, 66 | } 67 | 68 | messageTmpl.Execute(w, info) 69 | 70 | } 71 | -------------------------------------------------------------------------------- /job/jsonParser.go: -------------------------------------------------------------------------------- 1 | package job 2 | 3 | import ( 4 | "strconv" 5 | "time" 6 | 7 | "github.com/trivago/scalad/structs" 8 | log "github.com/Sirupsen/logrus" 9 | nomad "github.com/hashicorp/nomad/api" 10 | tparse "github.com/karrick/tparse/v2" 11 | "github.com/prometheus/client_golang/prometheus" 12 | ) 13 | 14 | var ( 15 | namespace = "scalers" 16 | subsystem = "" 17 | stableStopedScaleLabels = []string{} 18 | stableStopedScaleEventStatusVec = prometheus.NewCounterVec( 19 | prometheus.CounterOpts{ 20 | Name: prometheus.BuildFQName(namespace, subsystem, "stableStopedScaleEventStatus"), 21 | Help: "Scaling jobs", 22 | }, 23 | stableStopedScaleLabels, 24 | ) 25 | ) 26 | 27 | func init() { 28 | prometheus.MustRegister(stableStopedScaleEventStatusVec) 29 | } 30 | 31 | // ParseJSON takes a byte array from getJob and a string and checks that this nomad Job has the 32 | // scalling stanza correctly declared inside of it. It also checks before scalling down that the current job is stable. 33 | // This status check is ignored for scalling up just in case the application is not stable because it is overloaded by requests. 34 | // Returns jobs []JobStruct and nomadJob nomad.Job 35 | func ParseJSON(orgJob nomad.Job, call string) (groupsMap map[string]structs.JobStruct, nomadJob nomad.Job) { 36 | 37 | groupsMap = make(map[string]structs.JobStruct) 38 | // Do not check if the job is stable on scalling up just in case the application is overloaded and can not get stable. 39 | // if call != "up" { 40 | if *orgJob.Stable == false { 41 | log.Debug("Job: ", *orgJob.Name, " is not stable for Scalling!! Aborting Scale operation until the job get stable...") 42 | var labels prometheus.Labels 43 | if len(stableStopedScaleLabels) > 0 { 44 | if len(stableStopedScaleLabels) == 3 { 45 | labels["connector"] = stableStopedScaleLabels[0] 46 | labels["region"] = stableStopedScaleLabels[1] 47 | labels["action"] = stableStopedScaleLabels[2] 48 | stableStopedScaleEventStatusVec.With(labels).Inc() 49 | } 50 | } 51 | stableStopedScaleEventStatusVec.WithLabelValues().Inc() 52 | 53 | return 54 | } 55 | 56 | var err error 57 | 58 | //checkGroups for meta stanza and if found put it on the map 59 | for _, taskGroup := range orgJob.TaskGroups { 60 | var jsonJob structs.JobStruct 61 | jsonJob.GroupName = *taskGroup.Name 62 | 63 | jsonJob.ScaleMin, err = strconv.Atoi(taskGroup.Meta["scale_min"]) 64 | if err != nil { 65 | log.Debug("Unable to convert ScaleMin to int in Task: ", *taskGroup.Name, " in job: ", *orgJob.Name) 66 | continue 67 | } 68 | jsonJob.ScaleMax, err = strconv.Atoi(taskGroup.Meta["scale_max"]) 69 | if err != nil { 70 | log.Debug("Unable to convert ScaleMax to int in Task: ", *taskGroup.Name, " in job: ", *orgJob.Name) 71 | continue 72 | } 73 | jsonJob.ScaleCountUp, err = strconv.Atoi(taskGroup.Meta["scale_count_up"]) 74 | if err != nil { 75 | log.Debug("Unable to convert ScaleCountUp to int in Task: ", *taskGroup.Name, " in job: ", *orgJob.Name) 76 | continue 77 | } 78 | jsonJob.ScaleCountDown, err = strconv.Atoi(taskGroup.Meta["scale_count_down"]) 79 | if err != nil { 80 | log.Debug("Unable to convert ScaleCountDown to int in Task: ", *taskGroup.Name, " in job: ", *orgJob.Name) 81 | continue 82 | } 83 | jsonJob.Count = *taskGroup.Count 84 | jsonJob.LastRun = time.Now() 85 | now := time.Now() 86 | _, ok := taskGroup.Meta["scale_cooldown_up"] 87 | if ok { 88 | up, err := tparse.AddDuration(now, "+"+taskGroup.Meta["scale_cooldown_up"]) 89 | if err != nil { 90 | log.Debug("Meta ScaleCooldownUP error: ", err) 91 | continue 92 | } 93 | jsonJob.ScaleCooldownUp = up 94 | } 95 | _, ok = taskGroup.Meta["scale_cooldown_down"] 96 | if ok { 97 | down, err := tparse.AddDuration(now, "+"+taskGroup.Meta["scale_cooldown_down"]) 98 | if err != nil { 99 | log.Debug("Meta ScaleCooldownDown error: ", err) 100 | continue 101 | } 102 | jsonJob.ScaleCooldownDown = down 103 | } 104 | 105 | jsonJob.JobName = *orgJob.Name 106 | jsonJob.Region = *orgJob.Region 107 | if jsonJob.ScaleMin != 0 { 108 | log.Info("Adding ", jsonJob.GroupName, " to map.") 109 | groupsMap[jsonJob.GroupName] = jsonJob 110 | } 111 | } 112 | 113 | //checkGroups Tasks for meta stanza and if found put it on the map 114 | for i, taskGroup := range orgJob.TaskGroups { 115 | 116 | var jsonJob structs.JobStruct 117 | jsonJob.GroupName = *taskGroup.Name 118 | jsonJob.Count = *taskGroup.Count 119 | 120 | for _, tasks := range taskGroup.Tasks { 121 | _, exists := groupsMap[*taskGroup.Name] 122 | if exists { 123 | log.Debug("Group: ", *taskGroup.Name, " exists in group map") 124 | break 125 | } 126 | 127 | jsonJob.TaskName = tasks.Name 128 | jsonJob.ScaleMin, err = strconv.Atoi(tasks.Meta["scale_min"]) 129 | if err != nil { 130 | log.Debug("Unable to convert ScaleMin to int in Taskgroup: ", *taskGroup.Name, " Task: ", tasks.Name, " in job: ", *orgJob.Name) 131 | continue 132 | } 133 | jsonJob.ScaleMax, err = strconv.Atoi(tasks.Meta["scale_max"]) 134 | if err != nil { 135 | log.Debug("Unable to convert ScaleMax to int in Taskgroup: ", *taskGroup.Name, " Task: ", tasks.Name, " in job: ", *orgJob.Name) 136 | continue 137 | } 138 | jsonJob.ScaleCountUp, err = strconv.Atoi(tasks.Meta["scale_count_up"]) 139 | if err != nil { 140 | log.Debug("Unable to convert ScaleCountUp to int in Taskgroup: ", *taskGroup.Name, " Task: ", tasks.Name, " in job: ", *orgJob.Name) 141 | continue 142 | } 143 | jsonJob.ScaleCountDown, err = strconv.Atoi(tasks.Meta["scale_count_down"]) 144 | if err != nil { 145 | log.Debug("Unable to convert ScaleCountDown to int in Taskgroup: ", *taskGroup.Name, " Task: ", tasks.Name, " in job: ", *orgJob.Name) 146 | continue 147 | } 148 | jsonJob.LastRun = time.Now() 149 | now := time.Now() 150 | _, ok := tasks.Meta["scale_cooldown_up"] 151 | if ok { 152 | up, err := tparse.AddDuration(now, "+"+tasks.Meta["scale_cooldown_up"]) 153 | if err != nil { 154 | log.Debug("Meta ScaleCooldownUP error: ", err) 155 | continue 156 | } 157 | jsonJob.ScaleCooldownUp = up 158 | } 159 | _, ok = tasks.Meta["scale_cooldown_down"] 160 | if ok { 161 | down, err := tparse.AddDuration(now, "+"+tasks.Meta["scale_cooldown_down"]) 162 | if err != nil { 163 | log.Debug("Meta ScaleCooldownDown error: ", err) 164 | continue 165 | } 166 | jsonJob.ScaleCooldownDown = down 167 | } 168 | 169 | jsonJob.JobName = *orgJob.Name 170 | jsonJob.Region = *orgJob.Region 171 | jsonJob.Group = i 172 | 173 | groupsMap[jsonJob.GroupName] = jsonJob 174 | 175 | } 176 | } 177 | 178 | log.Debug("Current Map: ") 179 | for _, entry := range groupsMap { 180 | log.Debug("JobName: ", entry.JobName) 181 | log.Debug(" GroupName: ", entry.GroupName) 182 | log.Debug(" Count: ", entry.Count) 183 | log.Debug(" Scale Min: ", entry.ScaleMin) 184 | log.Debug(" Scale Max: ", entry.ScaleMax) 185 | 186 | } 187 | 188 | return groupsMap, orgJob 189 | } 190 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "io/ioutil" 7 | "net/http" 8 | "net/url" 9 | "os" 10 | "os/signal" 11 | "strconv" 12 | "strings" 13 | "sync" 14 | "time" 15 | 16 | log "github.com/Sirupsen/logrus" 17 | "github.com/go-chi/chi" 18 | "github.com/hashicorp/nomad/api" 19 | nomad "github.com/hashicorp/nomad/api" 20 | tparse "github.com/karrick/tparse/v2" 21 | "github.com/prometheus/client_golang/prometheus" 22 | "github.com/prometheus/client_golang/prometheus/promhttp" 23 | "github.com/trivago/scalad/slack" 24 | "github.com/trivago/scalad/structs" 25 | ) 26 | 27 | var ( 28 | port = os.Getenv("PORT") 29 | nomadHost = os.Getenv("NOMAD_HOST") 30 | region = os.Getenv("NOMAD_REGION") 31 | nomadCaCert = os.Getenv("NOMAD_CACERT") 32 | vaultToken = os.Getenv("VAULT_TOKEN") 33 | useSlack = os.Getenv("USE_SLACK") 34 | username = os.Getenv("HTTP_USER") 35 | password = os.Getenv("HTTP_PASS") 36 | metricsEndpoint = os.Getenv("METRICS_ENDPOINT") 37 | infoTime = os.Getenv("INFO_TIME") 38 | fireMapTickerEnv = os.Getenv("FIREMAP_TICKER_SECS") 39 | scalerTickerEnv = os.Getenv("SCALER_TICKER_SECS") 40 | scalerTickerEnvInt int64 41 | fireMapTickerEnvInt int64 42 | lastJobs map[int64]lastJob 43 | namespace = "scalers" 44 | subsystem = "" 45 | scalerLabels = []string{"name", "region", "direction"} 46 | apiLabels = []string{} 47 | scalerVec = prometheus.NewCounterVec( 48 | prometheus.CounterOpts{ 49 | Name: prometheus.BuildFQName(namespace, subsystem, "count"), 50 | Help: "Scaling jobs", 51 | }, 52 | scalerLabels, 53 | ) 54 | apiRequestsVec = prometheus.NewCounterVec( 55 | prometheus.CounterOpts{ 56 | Name: prometheus.BuildFQName(namespace, subsystem, "apicalls"), 57 | Help: "Scaling jobs", 58 | }, 59 | apiLabels, 60 | ) 61 | mutex = &sync.Mutex{} 62 | jobMap map[string]*nomad.Job 63 | jobMapMutex = &sync.Mutex{} 64 | jobMapScale map[string]*nomad.Job 65 | jobMapScaleMutex = &sync.Mutex{} 66 | jobMetaMap map[string]*structs.Meta 67 | jobMetaMapMutex = &sync.Mutex{} 68 | fireTimeMap map[string]*structs.TrigeredAction 69 | fireTimeMapMutex = &sync.Mutex{} 70 | 71 | scaler Scaler 72 | ) 73 | 74 | // init function checks that both env variables are set in order to run the scaler. 75 | // These are: nomadAddr -> Address under wich nomad is running 76 | // port -> port in which the application is going to listen. 77 | // This function also register two Vectors with prometheus. 78 | // One for api requests and another for scale operations performed 79 | // by the scaler. 80 | func init() { 81 | if len(nomadHost) == 0 { 82 | nomadHost = "http://nomad.service.consul:4646" 83 | } 84 | if len(port) == 0 { 85 | port = ":8080" 86 | } 87 | if len(infoTime) == 0 { 88 | infoTime = "60" 89 | } 90 | if len(fireMapTickerEnv) == 0 { 91 | fireMapTickerEnv = "30" 92 | } 93 | if len(scalerTickerEnv) == 0 { 94 | scalerTickerEnv = "60" 95 | } 96 | var err error 97 | fireMapTickerEnvInt, err = strconv.ParseInt(fireMapTickerEnv, 10, 64) 98 | if err != nil { 99 | log.Fatal("Error converting fireMapTicker to int with err: ", err) 100 | } 101 | 102 | scalerTickerEnvInt, err = strconv.ParseInt(scalerTickerEnv, 10, 64) 103 | if err != nil { 104 | log.Fatal("Error converting scalerTicker to int with err: ", err) 105 | } 106 | 107 | prometheus.MustRegister(scalerVec) 108 | prometheus.MustRegister(apiRequestsVec) 109 | 110 | jobMap = make(map[string]*nomad.Job) 111 | jobMapScale = make(map[string]*nomad.Job) 112 | jobMetaMap = make(map[string]*structs.Meta) 113 | fireTimeMap = make(map[string]*structs.TrigeredAction) 114 | lastJobs = make(map[int64]lastJob) 115 | } 116 | 117 | // startHTTP function starts the chi router and register all the enpoints availables. 118 | func startHTTP() { 119 | r := chi.NewMux() 120 | 121 | scaler = newScaler() 122 | 123 | r.Post("/scale", scaler.scale) 124 | 125 | r.Get("/", scaler.health) 126 | 127 | r.Get("/stop-scalling/{jobName}/{timer}", scaler.stopScallingJob) 128 | 129 | r.Get("/resume-scalling/{jobName}", scaler.resumeScallingJob) 130 | 131 | r.Get("/scale-up/{jobName}/{region}", manualScaleUp) 132 | r.Get("/scale-down/{jobName}/{region}", manualScaleDown) 133 | 134 | r.Get("/info", StatusPage) 135 | 136 | promHandler := promhttp.Handler() 137 | r.Get("/metrics", promHandler.ServeHTTP) 138 | 139 | // Profiling endpoints. These are disabled to preserver memory. 140 | /* 141 | r.Get("/debug/pprof/", pprof.Index) 142 | r.Get("/debug/pprof/cmdline", pprof.Cmdline) 143 | r.Get("/debug/pprof/profile", pprof.Profile) 144 | r.Get("/debug/pprof/symbol", pprof.Symbol) 145 | 146 | // Register pprof handlers 147 | r.HandleFunc("/debug/pprof/", pprof.Index) 148 | r.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline) 149 | r.HandleFunc("/debug/pprof/profile", pprof.Profile) 150 | r.HandleFunc("/debug/pprof/symbol", pprof.Symbol) 151 | 152 | r.Handle("/debug/pprof/goroutine", pprof.Handler("goroutine")) 153 | r.Handle("/debug/pprof/heap", pprof.Handler("heap")) 154 | r.Handle("/debug/pprof/threadcreate", pprof.Handler("threadcreate")) 155 | r.Handle("/debug/pprof/block", pprof.Handler("block")) 156 | */ 157 | http.ListenAndServe(port, r) 158 | } 159 | 160 | func checkFiringMap() { 161 | log.Debug("Checking firingMap") 162 | fireTimeMapMutex.Lock() 163 | for mapName, trigger := range fireTimeMap { 164 | log.Debug(mapName, trigger) 165 | runTime, err := tparse.AddDuration(trigger.Time, "+"+jobMetaMap[mapName].FireTime) 166 | if err != nil { 167 | log.Error("ERROR: JobName: ", mapName) 168 | log.Error("Can't add trigger.Time and meta.Firetime in checkFiringMap with err: ", err) 169 | continue 170 | } 171 | now := time.Now() 172 | if now.After(runTime) { 173 | if trigger.Direction == "up" { 174 | log.Debug("Scaling up: ", mapName) 175 | err := scaler.ScaleUp(mapName, region) 176 | if err != nil { 177 | log.Error("Error scaling up with err: ", err) 178 | continue 179 | } 180 | delete(fireTimeMap, mapName) 181 | 182 | } else if trigger.Direction == "down" { 183 | log.Debug("Scaling down: ", mapName) 184 | err := scaler.ScaleDown(mapName, region) 185 | if err != nil { 186 | log.Error("Error scaling up with err: ", err) 187 | continue 188 | } 189 | delete(fireTimeMap, mapName) 190 | } 191 | } 192 | } 193 | fireTimeMapMutex.Unlock() 194 | 195 | } 196 | 197 | func addToFiringMap(id string, trigered time.Time, direction string) { 198 | _, ok := fireTimeMap[id] 199 | if !ok { 200 | var trigeredAction structs.TrigeredAction 201 | trigeredAction.Time = trigered 202 | trigeredAction.Direction = direction 203 | 204 | fireTimeMapMutex.Lock() 205 | fireTimeMap[id] = &trigeredAction 206 | fireTimeMapMutex.Unlock() 207 | log.Debug("added entry to fireTimeMap -> Direction: ", fireTimeMap[id].Direction, " Trigered: ", fireTimeMap[id].Time) 208 | } 209 | 210 | } 211 | 212 | func removeFromFiringMap(id string) { 213 | _, ok := fireTimeMap[id] 214 | if !ok { 215 | fireTimeMapMutex.Lock() 216 | delete(fireTimeMap, id) 217 | fireTimeMapMutex.Unlock() 218 | log.Debug("removed entry from fireTimeMap for ", id) 219 | } 220 | 221 | } 222 | 223 | func prometheusQueries(jobMetaMap map[string]*structs.Meta) { 224 | jobMetaMapMutex.Lock() 225 | for id, job := range jobMetaMap { 226 | job.MaxQuery = strings.Replace(job.MaxQuery, "\\", "", -1) 227 | job.MinQuery = strings.Replace(job.MinQuery, "\\", "", -1) 228 | 229 | log.Debug("Job: ", id) 230 | log.Debug("MaxQuery: ", job.MaxQuery) 231 | maxResult, err := queryPrometheus(job.MaxQuery) 232 | if err != nil { 233 | log.Error("Unable to get max result from prometheus with err: ", err, " for job: ", id) 234 | removeFromFiringMap(id) 235 | continue 236 | } 237 | 238 | log.Debug("MaxResult query result: ", maxResult) 239 | if maxResult { 240 | addToFiringMap(id, time.Now(), "up") 241 | continue 242 | } 243 | 244 | log.Debug("MinQuery: ", job.MinQuery) 245 | minResult, err := queryPrometheus(job.MinQuery) 246 | if err != nil { 247 | log.Error("Unable to get min result from prometheus with err: ", err, " for job: ", id) 248 | removeFromFiringMap(id) 249 | continue 250 | } 251 | 252 | log.Debug("MinResult query result: ", minResult) 253 | if minResult { 254 | addToFiringMap(id, time.Now(), "down") 255 | continue 256 | } 257 | } 258 | 259 | jobMetaMapMutex.Unlock() 260 | } 261 | 262 | func queryPrometheus(promQuery string) (bool, error) { 263 | var result structs.Prometheus 264 | 265 | client := &http.Client{ 266 | Timeout: (time.Second * 10), 267 | } 268 | 269 | query_url := fmt.Sprintf("%s%s", metricsEndpoint, url.QueryEscape(promQuery)) 270 | log.Debug("Query URL: ", query_url) 271 | 272 | u, err := url.Parse(query_url) 273 | req, err := http.NewRequest("GET", u.String(), nil) 274 | if err != nil { 275 | log.Error("Error creating new request with err: ", err) 276 | return false, err 277 | } 278 | 279 | resp, err := client.Do(req) 280 | if err != nil { 281 | log.Error("Error executing request with err:", err) 282 | return false, err 283 | } 284 | 285 | data, err := ioutil.ReadAll(resp.Body) 286 | if err != nil { 287 | log.Error("Unabel to read resp.Body: ", err) 288 | return false, err 289 | } 290 | 291 | if 400 <= resp.StatusCode { 292 | return false, fmt.Errorf("error response: %s", string(data)) 293 | } 294 | 295 | if err = json.Unmarshal(data, &result); err != nil { 296 | log.Error("Unable to unmarshall with err: ", err) 297 | return false, err 298 | } 299 | 300 | var resultInt int 301 | 302 | if len(result.Data.Result) > 0 { 303 | if len(result.Data.Result[0].Value) > 0 { 304 | resultInt, err = strconv.Atoi(result.Data.Result[0].Value[1].(string)) 305 | if err != nil { 306 | log.Error("Error canverting prometheus response into Int with err: ", err) 307 | return false, err 308 | } 309 | } 310 | } else { 311 | return false, fmt.Errorf("Error: lenght of propetheus respond is 0") 312 | } 313 | 314 | if resultInt != 1 { 315 | return false, err 316 | } 317 | 318 | return true, err 319 | } 320 | 321 | func checkMeta(jobMap map[string]*api.Job) { 322 | jobMapScaleMutex.Lock() 323 | jobMetaMapMutex.Lock() 324 | defer jobMapScaleMutex.Unlock() 325 | defer jobMetaMapMutex.Unlock() 326 | for _, job := range jobMap { 327 | if job.Meta["scaler"] == "true" { 328 | jobMapScale[*job.Name] = job 329 | jobMetaMap[*job.Name] = readMeta(job.Meta) 330 | log.Debug("Adding ", *job.Name, " to jobMapScale JOB level") 331 | } 332 | for _, taskGroup := range job.TaskGroups { 333 | if taskGroup.Meta["scaler"] == "true" { 334 | // bug 335 | // todo: replace with job.Name + task group 336 | jobMapScale[*job.Name] = job 337 | jobMetaMap[*job.Name] = readMeta(taskGroup.Meta) 338 | log.Debug("Adding ", *job.Name, " to jobMapScale TASKGROUP level") 339 | } 340 | for _, task := range taskGroup.Tasks { 341 | if task.Meta["scaler"] == "true" { 342 | jobMapScale[*job.Name] = job 343 | jobMetaMap[*job.Name] = readMeta(task.Meta) 344 | log.Debug("Adding ", *job.Name, " to jobMapScale TASK level") 345 | 346 | } 347 | } 348 | } 349 | } 350 | } 351 | 352 | func readMeta(t map[string]string) *structs.Meta { 353 | var m structs.Meta 354 | m.MinQuery = t["min_query"] 355 | m.MaxQuery = t["max_query"] 356 | m.FireTime = t["query_fire_time"] 357 | m.ScaleMin = t["scale_min"] 358 | m.ScaleMax = t["scale_max"] 359 | m.ScaleCountUp = t["scale_count_up"] 360 | m.ScaleCooldown = t["scale_count_down"] 361 | m.ScaleCooldownUp = t["scale_cooldown_up"] 362 | m.ScaleCooldownDown = t["scale_cooldown_down"] 363 | return &m 364 | } 365 | 366 | func getJobs() (map[string]*nomad.Job, error) { 367 | jobMap := make(map[string]*nomad.Job) 368 | 369 | nomadClient, err := api.NewClient(&api.Config{Address: nomadHost, TLSConfig: &api.TLSConfig{CACert: nomadCaCert}}) 370 | if err != nil { 371 | log.Error("Error creating nomad client with err: ", err) 372 | } 373 | 374 | options := &api.QueryOptions{AllowStale: true} 375 | 376 | joblist, _, err := nomadClient.Jobs().List(options) 377 | if err != nil { 378 | log.Error("Unable to get job list from nomad with err: ", err) 379 | return nil, err 380 | } 381 | 382 | jobMapMutex.Lock() 383 | jobMapScaleMutex.Lock() 384 | 385 | for job := range jobMap { 386 | delete(jobMap, job) 387 | } 388 | 389 | for job := range jobMapScale { 390 | delete(jobMapScale, job) 391 | } 392 | 393 | jobMapScaleMutex.Unlock() 394 | 395 | for _, job := range joblist { 396 | value, _, err := nomadClient.Jobs().Info(job.ID, options) 397 | if err != nil { 398 | log.Error("Error geting job Info from nomad with err: ", err, " for jobName: ", job.Name) 399 | continue 400 | } 401 | 402 | if value.IsPeriodic() == true || *value.Type == "system" || *value.Type == "batch" { 403 | continue 404 | } 405 | 406 | jobMap[job.Name] = value 407 | } 408 | 409 | jobMapMutex.Unlock() 410 | 411 | return jobMap, nil 412 | 413 | } 414 | 415 | // main function sets the logging formatter, logging level, starts the go routine for the http 416 | // server and waits for a kill signal. 417 | func main() { 418 | customFormater := new(log.TextFormatter) 419 | customFormater.FullTimestamp = true 420 | customFormater.TimestampFormat = "2006-01-02 15:04:05" 421 | customFormater.ForceColors = true 422 | log.SetFormatter(customFormater) 423 | //log.SetLevel(log.InfoLevel) 424 | log.SetLevel(log.DebugLevel) 425 | log.Info("Loging to stderr") 426 | 427 | log.Info("Starting scalad....") 428 | log.Info("Loaded configuration:") 429 | log.Info("Port: ", port) 430 | log.Info("Nomad Host: ", nomadHost) 431 | log.Info("Nomad Region: ", region) 432 | log.Info("Nomad CA Cert: ", nomadCaCert) 433 | if len(vaultToken) != 0 { 434 | log.Info("Vault Token: ", "************") 435 | } else { 436 | log.Info("Vault Token: ", "EMPTY!!") 437 | } 438 | log.Info("Use slack: ", useSlack) 439 | log.Info("Http user: ", username) 440 | if len(password) != 0 { 441 | log.Info("Http pass: ", "**********") 442 | } else { 443 | log.Info("Http pass: ", "EMPTY!!!") 444 | } 445 | log.Info("Metrics Endpoint:", metricsEndpoint) 446 | 447 | if useSlack == "true" { 448 | slack.StartSlackTicker() 449 | } 450 | 451 | go startHTTP() 452 | 453 | go scalerTicker() 454 | 455 | go fireMapTicker() 456 | 457 | c := make(chan os.Signal, 1) 458 | signal.Notify(c, os.Interrupt) 459 | 460 | // Block until a signal is received. 461 | s := <-c 462 | log.Debug("Got signal:", s) 463 | 464 | } 465 | 466 | // GetJob function contacts Nomad based on nomadAddr with an jobID and returns the body of this request. 467 | // This requests contains the job definition from nomad that wants to be scaled. 468 | func GetJob(jobID string, region string) (nomad.Job, error) { 469 | 470 | if _, ok := jobMap[jobID]; ok { 471 | return *jobMap[jobID], nil 472 | } 473 | 474 | var nomadJob nomad.Job 475 | 476 | client, err := api.NewClient(&api.Config{Address: nomadHost, TLSConfig: &api.TLSConfig{CACert: nomadCaCert}}) 477 | if err != nil { 478 | log.Error("Unable to create Nomad client with err: ", err) 479 | return nomadJob, err 480 | } 481 | 482 | options := &api.QueryOptions{AllowStale: true} 483 | 484 | nomadJobPointer, _, err := client.Jobs().Info(jobID, options) 485 | if err != nil { 486 | log.Error("Unable to get job for ", jobID, " from nomad with err: ", err) 487 | return nomadJob, err 488 | } 489 | 490 | nomadJob = *nomadJobPointer 491 | return nomadJob, nil 492 | 493 | } 494 | 495 | func executeJob(nomadJob nomad.Job) (ok bool, err error) { 496 | *nomadJob.VaultToken = vaultToken 497 | 498 | nomadClient, err := api.NewClient(&api.Config{Address: nomadHost, TLSConfig: &api.TLSConfig{CACert: nomadCaCert}}) 499 | if err != nil { 500 | log.Error("Unable to create Nomad client with err: ", err) 501 | return false, err 502 | } 503 | 504 | _, _, err = nomadClient.Jobs().Register(&nomadJob, nil) 505 | if err != nil { 506 | return false, err 507 | } 508 | 509 | return true, nil 510 | } 511 | -------------------------------------------------------------------------------- /mapActions.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "time" 5 | ) 6 | 7 | // startJopMapWatcher starts a ticker to check the cooldown expiracy every 5 seconds in the map. 8 | func (scaler *Scaler) startJobMapWatcher() { 9 | ticker := time.NewTicker(time.Second * 5) 10 | 11 | go func() { 12 | for _ = range ticker.C { 13 | scaler.cleanMap() 14 | } 15 | }() 16 | } 17 | 18 | // cleanMap functions checks all the entries in the map for expired cooldonws and removed them from the map. 19 | func (scaler *Scaler) cleanMap() { 20 | mutex.Lock() 21 | for key, job := range scaler.jobMap { 22 | now := time.Now() 23 | 24 | if now.After(job.ScaleCooldown) { 25 | delete(scaler.jobMap, key) 26 | 27 | } 28 | } 29 | mutex.Unlock() 30 | } 31 | -------------------------------------------------------------------------------- /nomad.job: -------------------------------------------------------------------------------- 1 | job "scalad" { 2 | region = "global" 3 | datacenters = ["dc1"] 4 | type = "service" 5 | 6 | group "job-autoscaler" { 7 | count = 1 8 | 9 | task "scaler" { 10 | driver = "docker" 11 | 12 | config { 13 | image = "trivago/scalad:0.1" 14 | force_pull = true 15 | network_mode = "host" 16 | } 17 | 18 | service { 19 | name = "${NOMAD_JOB_NAME}" 20 | tags = ["nomad-autoscaler", "scalad"] 21 | port = "http" 22 | 23 | check { 24 | type = "http" 25 | path = "/" 26 | interval = "30s" 27 | timeout = "2s" 28 | } 29 | } 30 | 31 | template { 32 | data = < job.ScaleMax { 64 | log.Info("Job ", jobID, " Group: ", job.GroupName, " in: ", region, " is above the MaxCount") 65 | job.NoGo = true 66 | } else { 67 | job.NoGo = false 68 | } 69 | structLocal := groupsMap[job.GroupName] 70 | structLocal = job 71 | groupsMap[job.GroupName] = structLocal 72 | } 73 | 74 | for _, job := range groupsMap { 75 | if job.NoGo == false { 76 | log.Debug(job.GroupName, " Group needs to be scaled Down.") 77 | AnyTrue = true 78 | } 79 | } 80 | 81 | if AnyTrue { 82 | p := log.Debug 83 | p("Scaling UP: ") 84 | p("JobName: ", jobID) 85 | 86 | for _, job := range groupsMap { 87 | p("Group: ", job.GroupName) 88 | if job.TaskName != "" { 89 | p("TaskName: ", job.TaskName) 90 | } 91 | p("Region: ", job.Region) 92 | p("ScaleMin: ", job.ScaleMin) 93 | p("ScaleMax: ", job.ScaleMax) 94 | p("ScaleCountUp: ", job.ScaleCountUp) 95 | p("ScaleCountDown: ", job.ScaleCountDown) 96 | p("Count: ", job.Count) 97 | p("ScaleCooldown: ", job.ScaleCooldown) 98 | } 99 | err := ScaleJobDown(groupsMap, nomadJob) 100 | if err != nil { 101 | log.Error("Scale up failed with err: ", err) 102 | return err 103 | } 104 | } 105 | 106 | return nil 107 | } 108 | 109 | // ScaleJobDown calculate the new amount of allocations necesary for every group in the job and sends the request to nomad to 110 | // scale the job. It also updates the list of the last 20 executed jobs after sending the request to nomad. 111 | func ScaleJobDown(groupsMap map[string]structs.JobStruct, nomadJob nomad.Job) error { 112 | for _, job := range groupsMap { 113 | if job.Count == job.ScaleMin { 114 | job.EndValue = job.ScaleMin 115 | job.NoGo = true 116 | } else { 117 | job.EndValue = job.Count - job.ScaleCountDown 118 | if job.EndValue <= job.ScaleMin { 119 | job.EndValue = job.ScaleMin 120 | log.Info("Scaling down Job: ", job.JobName, " Group:", job.GroupName, " to minimum allowed. Min: ", job.ScaleMin) 121 | job.NoGo = false 122 | } 123 | log.Info("Job: "+job.JobName+" Group: "+job.GroupName+" on: "+job.Region+" NewCount is: ", job.EndValue) 124 | } 125 | structLocal := groupsMap[job.GroupName] 126 | structLocal.EndValue = job.EndValue 127 | groupsMap[job.GroupName] = structLocal 128 | } 129 | 130 | for _, newJob := range nomadJob.TaskGroups { 131 | if groupsMap[*newJob.Name].EndValue != 0 { 132 | *newJob.Count = groupsMap[*newJob.Name].EndValue 133 | } 134 | log.Info("Job: ", *nomadJob.Name, " Group: ", *newJob.Name, " NewCount: ", *newJob.Count) 135 | } 136 | 137 | ok, err := executeJob(nomadJob) 138 | if !ok { 139 | log.Error("Error executing scaledown operation!") 140 | return err 141 | } 142 | 143 | message := `SCALE DOWN: 144 | - Job: ` + *nomadJob.Name + ` 145 | - Region: ` + *nomadJob.Region 146 | slack.SendMessage(message) 147 | slack.MessageBuffered(*nomadJob.Name, "down", time.Now()) 148 | 149 | scalerVec.WithLabelValues(*nomadJob.Name, *nomadJob.Region, "down").Inc() 150 | LastJobs(*nomadJob.Name, *nomadJob.Region, "scaleDown", time.Now()) 151 | return nil 152 | } 153 | 154 | func manualScaleDown(w http.ResponseWriter, r *http.Request) { 155 | jobName := chi.URLParam(r, "jobName") 156 | region := chi.URLParam(r, "region") 157 | user, pass, _ := r.BasicAuth() 158 | if user == username && pass == password { 159 | nomadJob, err := GetJob(jobName, region) 160 | if err != nil { 161 | log.Warn("Error getting job with err: ", err) 162 | return 163 | } 164 | for _, taskGroup := range nomadJob.TaskGroups { 165 | *taskGroup.Count-- 166 | if *taskGroup.Count == 0 { 167 | *taskGroup.Count = 1 168 | } 169 | } 170 | 171 | ok, err := executeJob(nomadJob) 172 | if !ok { 173 | log.Error("Error executing manual scaleup operation!") 174 | fmt.Fprintf(w, "%s", "Error executing manual scaleup operation!") 175 | return 176 | } 177 | 178 | message := `MANUAL SCALE DOWN for ` + jobName + ` in Region: ` + region + ` 179 | All taskGroups count have been decreased by one!! 180 | For safety reason not allowed to scale to 0! Min value is 1` 181 | slack.SendMessage(message) 182 | fmt.Fprintf(w, "%s", "Manual scale down triggered!") 183 | } else { 184 | fmt.Fprintf(w, "%s", "Wrong Username or password!") 185 | } 186 | } 187 | -------------------------------------------------------------------------------- /scale-up.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "net/http" 6 | "time" 7 | 8 | log "github.com/Sirupsen/logrus" 9 | "github.com/go-chi/chi" 10 | nomad "github.com/hashicorp/nomad/api" 11 | "github.com/trivago/scalad/job" 12 | "github.com/trivago/scalad/slack" 13 | "github.com/trivago/scalad/structs" 14 | ) 15 | 16 | // ScaleUp function checks that the current job is not in cooldown in the map and if it is not 17 | // checks for every group in the jobfile that needs to be scaled. 18 | func (scaler *Scaler) ScaleUp(jobID string, region string) (err error) { 19 | now := time.Now() 20 | mapID := jobID + "-" + region 21 | mutex.Lock() 22 | _, ok := scaler.jobMap[mapID] 23 | mutex.Unlock() 24 | if ok { 25 | mutex.Lock() 26 | diff := now.Sub(scaler.jobMap[mapID].ScaleCooldownUp) 27 | mutex.Unlock() 28 | log.Info("Job: ", jobID, " ScaleUp can be retrigger in: ", diff) 29 | return fmt.Errorf("Job in cooldown") 30 | } 31 | 32 | var nomadJob nomad.Job 33 | jobMapMutex.Lock() 34 | _, ok = jobMap[jobID] 35 | if ok { 36 | nomadJob = *jobMap[jobID] 37 | } else { 38 | nomadJob, err = GetJob(jobID, region) 39 | if err != nil { 40 | log.Warn("Error getting job with err: ", err) 41 | return err 42 | } 43 | } 44 | jobMapMutex.Unlock() 45 | 46 | var AnyTrue bool 47 | groupsMap, nomadJob := job.ParseJSON(nomadJob, "up") 48 | 49 | for _, job := range groupsMap { 50 | if (job.ScaleMin == 0) || (job.ScaleMax == 0) || (job.ScaleCountUp == 0) || (job.ScaleCountDown == 0) || (job.Count == 0) { 51 | log.Warn(jobID, "Group: ", job.Group, " doesn't have a scale stanza in it.") 52 | job.NoGo = true 53 | } 54 | 55 | job.ScaleCooldown = job.ScaleCooldownUp 56 | mutex.Lock() 57 | 58 | scaler.jobMap[mapID] = job 59 | mutex.Unlock() 60 | 61 | if job.Count >= job.ScaleMax { 62 | log.Info("Job: ", jobID, " Group: ", job.GroupName, " in: ", region, " is at MaxCount (", job.ScaleMax, " allocations)") 63 | job.NoGo = true 64 | } else if job.Count < job.ScaleMin { 65 | log.Info("Job ", jobID, " Group: ", job.GroupName, " in: ", region, " is below the MinCount") 66 | job.NoGo = true 67 | } else { 68 | job.NoGo = false 69 | } 70 | structLocal := groupsMap[job.GroupName] 71 | structLocal = job 72 | groupsMap[job.GroupName] = structLocal 73 | } 74 | 75 | for _, job := range groupsMap { 76 | if job.NoGo == false { 77 | log.Debug(job.GroupName, " Group needs to be scaled Up.") 78 | AnyTrue = true 79 | } 80 | } 81 | 82 | if AnyTrue { 83 | p := log.Debug 84 | p("") 85 | p("Scaling UP: ") 86 | p("JobName: ", jobID) 87 | 88 | for _, job := range groupsMap { 89 | p("Group: ", job.GroupName) 90 | if job.TaskName != "" { 91 | p("TaskName: ", job.TaskName) 92 | } 93 | p("Region: ", job.Region) 94 | p("ScaleMin: ", job.ScaleMin) 95 | p("ScaleMax: ", job.ScaleMax) 96 | p("ScaleCountUp: ", job.ScaleCountUp) 97 | p("ScaleCountDown: ", job.ScaleCountDown) 98 | p("Count: ", job.Count) 99 | p("ScaleCooldown: ", job.ScaleCooldown) 100 | } 101 | err := ScaleJobUp(groupsMap, nomadJob) 102 | if err != nil { 103 | log.Error("Scale down failed with err: ", err) 104 | return err 105 | } 106 | } 107 | return nil 108 | } 109 | 110 | // ScaleJobUp calculate the new amount of allocations necesary for every group in the job and sends the request to nomad to 111 | // scale the job. It also updates the list of the last 20 executed jobs after sending the request to nomad. 112 | func ScaleJobUp(groupsMap map[string]structs.JobStruct, nomadJob nomad.Job) error { 113 | for _, job := range groupsMap { 114 | if job.Count >= job.ScaleMax { 115 | job.EndValue = job.Count 116 | job.NoGo = true 117 | } else { 118 | job.EndValue = job.Count + job.ScaleCountUp 119 | if job.EndValue > job.ScaleMax { 120 | job.EndValue = job.ScaleMax 121 | log.Info("Scaling up Job: ", job.JobName, " Group: ", job.GroupName, " to maximum allowed. Max: ", job.ScaleMax) 122 | job.NoGo = false 123 | } 124 | log.Info("Job: "+job.JobName+" Group: "+job.GroupName+" on: "+job.Region+" NewCount is: ", job.EndValue) 125 | } 126 | structLocal := groupsMap[job.GroupName] 127 | structLocal.EndValue = job.EndValue 128 | groupsMap[job.GroupName] = structLocal 129 | } 130 | 131 | for _, newJob := range nomadJob.TaskGroups { 132 | if groupsMap[*newJob.Name].EndValue != 0 { 133 | *newJob.Count = groupsMap[*newJob.Name].EndValue 134 | } 135 | log.Info("Job: ", *nomadJob.Name, " Group: ", *newJob.Name, " NewCount: ", *newJob.Count) 136 | } 137 | 138 | ok, err := executeJob(nomadJob) 139 | if !ok { 140 | log.Error("Error executing scaleup operation!") 141 | return err 142 | } 143 | 144 | message := `SCALE UP: 145 | - Job: ` + *nomadJob.Name + ` 146 | - Region: ` + *nomadJob.Region 147 | slack.SendMessage(message) 148 | slack.MessageBuffered(*nomadJob.Name, "up", time.Now()) 149 | 150 | scalerVec.WithLabelValues(*nomadJob.Name, *nomadJob.Region, "up").Inc() 151 | LastJobs(*nomadJob.Name, *nomadJob.Region, "scaleUp", time.Now()) 152 | return nil 153 | } 154 | 155 | func manualScaleUp(w http.ResponseWriter, r *http.Request) { 156 | jobName := chi.URLParam(r, "jobName") 157 | region := chi.URLParam(r, "region") 158 | user, pass, _ := r.BasicAuth() 159 | if user == username && pass == password { 160 | nomadJob, err := GetJob(jobName, region) 161 | if err != nil { 162 | log.Warn("Error getting job with err: ", err) 163 | return 164 | } 165 | for _, taskGroup := range nomadJob.TaskGroups { 166 | *taskGroup.Count++ 167 | } 168 | 169 | ok, err := executeJob(nomadJob) 170 | if !ok { 171 | log.Error("Error executing manual scaleup operation!") 172 | fmt.Fprintf(w, "%s", "Error executing manual scaleup operation!") 173 | return 174 | } 175 | 176 | message := `MANUAL SCALE UP for ` + jobName + ` in Region: ` + region + ` 177 | All taskGroups count have been increased by one!` 178 | slack.SendMessage(message) 179 | fmt.Fprintf(w, "%s", "Manual scale up triggered!") 180 | } else { 181 | fmt.Fprintf(w, "%s", "Wrong Username or password!") 182 | } 183 | } 184 | -------------------------------------------------------------------------------- /scaler.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "io/ioutil" 7 | "net/http" 8 | "strings" 9 | "time" 10 | 11 | "github.com/trivago/scalad/slack" 12 | "github.com/trivago/scalad/structs" 13 | log "github.com/Sirupsen/logrus" 14 | "github.com/go-chi/chi" 15 | tparse "github.com/karrick/tparse/v2" 16 | ) 17 | 18 | // Scaler jobMap handler. 19 | type Scaler struct { 20 | jobMap map[string]structs.JobStruct 21 | } 22 | 23 | // newScaler created the jobMap and also starts the startJobMapWatcher and 24 | // returns the map where all allocation in cooldown will be stored. 25 | func newScaler() Scaler { 26 | scaler := Scaler{jobMap: make(map[string]structs.JobStruct)} 27 | scaler.startJobMapWatcher() 28 | return scaler 29 | } 30 | 31 | // health function is an http enpoint used for consul to check the health of the application. 32 | // If it is healthy it will retun a: http/200 All Good message 33 | func (scaler *Scaler) health(w http.ResponseWriter, r *http.Request) { 34 | message := "All Good" 35 | fmt.Fprintf(w, "%s", message) 36 | } 37 | 38 | func (scaler *Scaler) stopScallingJob(w http.ResponseWriter, r *http.Request) { 39 | jobID := chi.URLParam(r, "jobName") 40 | timer := chi.URLParam(r, "timer") 41 | mapID := jobID + "-" + region 42 | now := time.Now() 43 | var job structs.JobStruct 44 | sleep, err := tparse.AddDuration(now, timer) 45 | if err != nil { 46 | log.Debug("Error parsing time for pause command with err: ", err) 47 | return 48 | } 49 | job.ScaleCooldown = sleep 50 | mutex.Lock() 51 | scaler.jobMap[mapID] = job 52 | mutex.Unlock() 53 | message := "Manually paused: " + mapID + " for " + timer 54 | slack.SendMessage(message) 55 | fmt.Fprintf(w, "%s", message) 56 | } 57 | 58 | func (scaler *Scaler) resumeScallingJob(w http.ResponseWriter, r *http.Request) { 59 | jobID := chi.URLParam(r, "jobName") 60 | mapID := jobID + "-" + region 61 | 62 | jobMapMutex.Lock() 63 | jobMapScaleMutex.Lock() 64 | 65 | log.Debug("Refreshing job config for ", jobID) 66 | delete(jobMap, jobID) 67 | delete(jobMapScale, jobID) 68 | jobMapScaleMutex.Unlock() 69 | 70 | nomadJob, err := GetJob(jobID, region) 71 | if err != nil { 72 | log.Warn("Error getting job ", jobID, " with err: ", err) 73 | } else { 74 | jobMap[jobID] = &nomadJob 75 | } 76 | jobMapMutex.Unlock() 77 | 78 | mutex.Lock() 79 | defer mutex.Unlock() 80 | delete(scaler.jobMap, mapID) 81 | 82 | message := "Manually resumed: " + mapID 83 | slack.SendMessage(message) 84 | fmt.Fprintf(w, "%s", message) 85 | } 86 | 87 | func (scaler *Scaler) scaleAction(body []byte) (err error) { 88 | postStruct := new(structs.PostRequest) 89 | err = json.Unmarshal(body, postStruct) 90 | if err != nil { 91 | log.Error("Body: ", string(body)) 92 | log.Error("Error Unmarshalling postJson with err: ", err) 93 | return err 94 | } 95 | 96 | for k := range postStruct.Alerts { 97 | allocID := postStruct.Alerts[k].Labels.AllocID 98 | jobID := postStruct.Alerts[k].Labels.JobName 99 | log.Debug("Recieved scale for: ", jobID, " with alertname: ", postStruct.Alerts[k].Labels.Alertname) 100 | 101 | if len(region) == 0 { 102 | log.Error("No region defined for AllocID: ", allocID) 103 | continue 104 | } 105 | status := postStruct.Alerts[k].Status 106 | if len(region) < 1 { 107 | log.Info("No region defined for Alert: ", jobID) 108 | continue 109 | } 110 | if len(jobID) < 1 { 111 | log.Info("No JobName defined for Alert") 112 | continue 113 | } 114 | log.Debug("Jobname recieved: ", jobID, " Region: ", region) 115 | 116 | if strings.HasPrefix(postStruct.Alerts[k].Labels.Alertname, "scaleup") { 117 | if strings.HasPrefix(status, "firing") { 118 | err := scaler.ScaleUp(jobID, region) 119 | if err != nil { 120 | log.Debug("Job: ", jobID, " Error: ", err) 121 | } 122 | } 123 | 124 | log.Debug("Status: ", status, " for ", jobID, " Region: ", region) 125 | 126 | } else if strings.HasPrefix(postStruct.Alerts[k].Labels.Alertname, "scaledown") { 127 | if strings.HasPrefix(status, "firing") { 128 | err := scaler.ScaleDown(jobID, region) 129 | if err != nil { 130 | log.Debug("Job: ", jobID, " Error: ", err) 131 | } 132 | } 133 | 134 | log.Debug("Status: ", status, " for ", jobID, " Region: ", region) 135 | 136 | } 137 | } 138 | return nil 139 | } 140 | 141 | // scale function gets a POST requests and analizes the content to decide which scale direction to apply 142 | // or to discard the message. The POST requests comes from alertmanager. 143 | func (scaler *Scaler) scale(w http.ResponseWriter, r *http.Request) { 144 | 145 | body, err := ioutil.ReadAll(r.Body) 146 | if err != nil { 147 | w.WriteHeader(400) 148 | fmt.Fprintf(w, "Error reading request Body, with err: %v", err) 149 | return 150 | } 151 | 152 | go scaler.scaleAction(body) 153 | 154 | w.WriteHeader(200) 155 | } 156 | -------------------------------------------------------------------------------- /slack/slack.go: -------------------------------------------------------------------------------- 1 | package slack 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "strings" 7 | "time" 8 | 9 | log "github.com/Sirupsen/logrus" 10 | slack "github.com/ashwanthkumar/slack-go-webhook" 11 | ) 12 | 13 | var ( 14 | useSlack string 15 | webhookURL string 16 | slackChannel string 17 | slackUsername string 18 | slackEmoji string 19 | compMessagesUp []string 20 | compMessagesDown []string 21 | region = os.Getenv("NOMAD_REGION") 22 | ) 23 | 24 | func init() { 25 | useSlack = os.Getenv("USE_SLACK") 26 | webhookURL = os.Getenv("SLACK_WEBHOOK") 27 | slackChannel = os.Getenv("SLACK_CHANNEL") 28 | slackUsername = os.Getenv("SLACK_USERNAME") 29 | slackUsername = slackUsername + region 30 | slackEmoji = ":scalad:" 31 | if useSlack == "true" { 32 | if len(webhookURL) == 0 { 33 | log.Fatal("ENV variable SLACK_WEBHOOK is empty!") 34 | } 35 | } 36 | log.Info("Slack Channel: ", slackChannel) 37 | log.Info("Slack Username: ", slackUsername) 38 | log.Info("Slack emoji: ", slackEmoji) 39 | 40 | } 41 | 42 | //StartSlackTicker starts a clock to send a resume of scale events every 10 min. 43 | func StartSlackTicker() { 44 | tickerB := time.NewTicker(time.Minute * 30) 45 | 46 | go func() { 47 | for _ = range tickerB.C { 48 | sendBuffered() 49 | } 50 | }() 51 | } 52 | 53 | // SendMessage takes a message and send it to a slack channel or user and return an err in case of failure. 54 | func SendMessage(message string) error { 55 | if useSlack != "true" { 56 | return nil 57 | } 58 | 59 | payload := slack.Payload{ 60 | Text: message, 61 | Username: slackUsername, 62 | Channel: slackChannel, 63 | IconEmoji: slackEmoji, 64 | } 65 | err := slack.Send(webhookURL, "", payload) 66 | if err != nil { 67 | log.Warn("Error sending slack message with err: ", err) 68 | return fmt.Errorf("Error sending slack message wit err: %v", err) 69 | } 70 | return nil 71 | } 72 | 73 | // SendMessageTo takes a message and a user with the @ and sends the message to that user. Returns an err in case of failure. 74 | func SendMessageTo(message string, user string) error { 75 | if useSlack != "true" { 76 | return nil 77 | } 78 | 79 | payload := slack.Payload{ 80 | Text: message, 81 | Username: slackUsername, 82 | Channel: user, 83 | IconEmoji: slackEmoji, 84 | } 85 | err := slack.Send(webhookURL, "", payload) 86 | if err != nil { 87 | log.Warn("Error sending slack message with err: ", err) 88 | return fmt.Errorf("Error sending slack message wit err: %v", err) 89 | } 90 | return nil 91 | } 92 | 93 | // MessageBuffered creates a queue of messages and send them to a channel when a ticker expires. 94 | func MessageBuffered(message string, direction string, t time.Time) error { 95 | if useSlack != "true" { 96 | return nil 97 | } 98 | message = message + ` ` + t.Format("2006-01-02 15:04:05") 99 | if direction == "up" { 100 | compMessagesUp = append(compMessagesUp, message) 101 | } else { 102 | compMessagesDown = append(compMessagesDown, message) 103 | } 104 | return nil 105 | } 106 | 107 | func sendBuffered() error { 108 | var message string 109 | regionUp := strings.ToUpper(region) 110 | message = `30 min resume of autoscaler in ` + regionUp + `: 111 | Upscale: 112 | ` 113 | for _, next := range compMessagesUp { 114 | message = message + ` 115 | ` + next 116 | } 117 | message = message + ` 118 | 119 | Downscale: 120 | ` 121 | for _, next := range compMessagesDown { 122 | message = message + ` 123 | ` + next 124 | } 125 | message = message + ` 126 | 127 | ` 128 | 129 | payload := slack.Payload{ 130 | Text: message, 131 | Username: slackUsername, 132 | Channel: "#scalad-30m", 133 | IconEmoji: slackEmoji, 134 | } 135 | err := slack.Send(webhookURL, "", payload) 136 | if err != nil { 137 | log.Warn("Error sending slack message with err: ", err) 138 | return fmt.Errorf("Error sending slack message wit err: %v", err) 139 | } 140 | compMessagesUp = nil 141 | compMessagesDown = nil 142 | return nil 143 | 144 | } 145 | -------------------------------------------------------------------------------- /structs/structs.go: -------------------------------------------------------------------------------- 1 | package structs 2 | 3 | import ( 4 | "time" 5 | 6 | nomad "github.com/hashicorp/nomad/api" 7 | ) 8 | 9 | // JobStruct is where the meta data extracted from each nomad job is keept. 10 | type JobStruct struct { 11 | JobName string 12 | Region string 13 | ScaleMin int 14 | ScaleMax int 15 | ScaleCountUp int 16 | ScaleCountDown int 17 | ScaleCooldown time.Time 18 | ScaleCooldownUp time.Time 19 | ScaleCooldownDown time.Time 20 | LastRun time.Time 21 | Count int 22 | Group int 23 | NoGo bool 24 | EndValue int 25 | GroupName string 26 | TaskName string 27 | } 28 | 29 | // PostR is the post response that is sent to nomad to trigger the scalling action. 30 | type PostR struct { 31 | Job nomad.Job 32 | } 33 | 34 | // PostRequest is the struct where the alert coming from alert manager would be stored. 35 | type PostRequest struct { 36 | Receiver string `json:"receiver"` 37 | Status string `json:"status"` 38 | Alerts []struct { 39 | Status string `json:"status"` 40 | Labels struct { 41 | Alertname string `json:"alertname"` 42 | Region string `json:"region"` 43 | Client string `json:"client"` 44 | Instance string `json:"instance"` 45 | Job string `json:"job"` 46 | JobName string `json:"exported_job"` 47 | AllocID string `json:"alloc_id"` 48 | Monitor string `json:"monitor"` 49 | Rt string `json:"rt"` 50 | Severity string `json:"severity"` 51 | } `json:"labels"` 52 | Annotations struct { 53 | Description string `json:"description"` 54 | Summary string `json:"summary"` 55 | } `json:"annotations"` 56 | StartsAt time.Time `json:"startsAt"` 57 | EndsAt time.Time `json:"endsAt"` 58 | GeneratorURL string `json:"generatorURL"` 59 | } `json:"alerts"` 60 | GroupLabels struct { 61 | Alertname string `json:"alertname"` 62 | } `json:"groupLabels"` 63 | CommonLabels struct { 64 | Alertname string `json:"alertname"` 65 | Job string `json:"job"` 66 | Monitor string `json:"monitor"` 67 | Severity string `json:"severity"` 68 | } `json:"commonLabels"` 69 | CommonAnnotations struct { 70 | } `json:"commonAnnotations"` 71 | ExternalURL string `json:"externalURL"` 72 | Version string `json:"version"` 73 | Time time.Time 74 | } 75 | 76 | // AllocRequest is the structure that matches the response for an allocation query to nomad 77 | // in order to get the JobName and JobRegion 78 | type AllocRequest struct { 79 | Job struct { 80 | Region string `json:"Region"` 81 | Name string `json:"Name"` 82 | } `json:"Job"` 83 | } 84 | 85 | type Meta struct { 86 | MinQuery string 87 | MaxQuery string 88 | FireTime string 89 | ScaleMin string 90 | ScaleMax string 91 | ScaleCountUp string 92 | ScaleCountDown string 93 | ScaleCooldown string 94 | ScaleCooldownUp string 95 | ScaleCooldownDown string 96 | } 97 | 98 | type Prometheus struct { 99 | Status string `json:"status"` 100 | Data struct { 101 | ResultType string `json:"resultType"` 102 | Result []struct { 103 | Metric struct { 104 | ExportedJob string `json:"exported_job"` 105 | } `json:"metric"` 106 | Value []interface{} `json:"value"` 107 | } `json:"result"` 108 | } `json:"data"` 109 | } 110 | 111 | type TrigeredAction struct { 112 | Time time.Time 113 | Direction string 114 | } 115 | -------------------------------------------------------------------------------- /templates.go: -------------------------------------------------------------------------------- 1 | // Code generated by go-bindata. 2 | // sources: 3 | // templates/info.html 4 | // DO NOT EDIT! 5 | 6 | package main 7 | 8 | import ( 9 | "bytes" 10 | "compress/gzip" 11 | "fmt" 12 | "io" 13 | "io/ioutil" 14 | "os" 15 | "path/filepath" 16 | "strings" 17 | "time" 18 | ) 19 | 20 | func bindataRead(data []byte, name string) ([]byte, error) { 21 | gz, err := gzip.NewReader(bytes.NewBuffer(data)) 22 | if err != nil { 23 | return nil, fmt.Errorf("Read %q: %v", name, err) 24 | } 25 | 26 | var buf bytes.Buffer 27 | _, err = io.Copy(&buf, gz) 28 | clErr := gz.Close() 29 | 30 | if err != nil { 31 | return nil, fmt.Errorf("Read %q: %v", name, err) 32 | } 33 | if clErr != nil { 34 | return nil, err 35 | } 36 | 37 | return buf.Bytes(), nil 38 | } 39 | 40 | type asset struct { 41 | bytes []byte 42 | info os.FileInfo 43 | } 44 | 45 | type bindataFileInfo struct { 46 | name string 47 | size int64 48 | mode os.FileMode 49 | modTime time.Time 50 | } 51 | 52 | func (fi bindataFileInfo) Name() string { 53 | return fi.name 54 | } 55 | func (fi bindataFileInfo) Size() int64 { 56 | return fi.size 57 | } 58 | func (fi bindataFileInfo) Mode() os.FileMode { 59 | return fi.mode 60 | } 61 | func (fi bindataFileInfo) ModTime() time.Time { 62 | return fi.modTime 63 | } 64 | func (fi bindataFileInfo) IsDir() bool { 65 | return false 66 | } 67 | func (fi bindataFileInfo) Sys() interface{} { 68 | return nil 69 | } 70 | 71 | var _templatesInfoHtml = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x7c\x92\x41\x6f\xd4\x30\x10\x85\xcf\xf8\x57\x8c\x44\x91\x40\xda\xac\x9b\x9e\x90\x6b\x2c\x55\x94\x43\x2b\xb8\xd0\xfe\x01\x6f\x3c\x89\x2d\x1c\x3b\xd8\xb3\xcb\x56\x91\xff\x3b\x4a\x9c\x42\x11\x82\x5c\x92\xcc\x7b\xfe\xf4\xe6\xc9\xd2\xd2\xe8\x15\x93\x16\xb5\xc1\xa4\x98\xcc\xf4\xe4\x51\x31\xd2\x07\x8f\x30\x33\x00\x80\x3e\x06\x6a\x7a\x3d\x3a\xff\x24\x40\x27\xa7\xfd\x0e\xb2\x0e\xb9\xc9\x98\x5c\x7f\xbd\x7a\x0e\x31\x19\x4c\x4d\x17\xbd\xd7\x53\x46\x01\xcf\x5f\x55\xfe\xe1\x0c\x59\x01\xed\xe5\xe5\x9b\x6b\x56\x18\x23\xb3\x03\xb2\x1b\xbf\x9e\x15\xd0\x4e\x67\xc8\xd1\x3b\x03\xaf\xcd\xfa\xd4\xb3\x84\x67\x6a\xb4\x77\x43\x10\xe0\xb1\xa7\x3a\x9d\xb4\x31\x2e\x0c\x02\xda\xf7\xd3\xb9\x32\x93\x08\x64\x9b\xce\x3a\x6f\xde\xe2\x09\xc3\xbb\x67\xbe\xee\xbe\x0d\x29\x1e\x83\x59\xf2\xc5\x24\x7e\xf3\x0b\x93\x7c\xdb\x58\xda\x56\x3d\x7e\x7c\x68\x1e\x3a\xed\x31\xc1\x5d\xe8\xa3\xe4\xb6\x55\x4c\xf2\x5f\xe5\xd8\x2b\xf5\x59\x67\x82\x79\xde\x2f\xfa\xa3\x1b\xb1\x14\xf8\xe2\xc2\x91\x30\x43\xc2\xef\x47\xcc\x04\x09\x3b\x87\x27\x34\x42\x72\x7b\xa5\x98\x5c\xbb\x5c\xde\x49\xb1\x57\x92\xac\xba\x8f\x87\xbb\x5b\xc9\xc9\x6e\xff\x5f\x71\x70\x31\xbc\x18\xdc\x78\x4c\x14\xf4\x88\x7f\x98\x2a\x15\x6e\xa8\x4e\x25\x5f\x80\x2b\x75\x9e\x21\xe9\x30\x20\x5c\xb8\x1d\x5c\xe4\x65\x83\x4f\x27\x0c\x04\xe2\x03\xec\x97\xc4\xf7\xf1\x90\xa1\x94\xb5\x0e\x49\x46\xcd\xf3\x0b\xd7\x7e\xcd\x53\x8a\xe4\x64\xd4\xdf\x6a\x4d\xf7\x4f\xf9\xd6\x25\xec\xe8\x7f\x8e\x5a\x53\x15\x79\x8d\x8b\xc1\x94\xc2\x96\x15\xb6\x6e\x78\xbd\x87\xac\xfc\x0c\x00\x00\xff\xff\xd5\xef\x0a\x26\x91\x02\x00\x00") 72 | 73 | func templatesInfoHtmlBytes() ([]byte, error) { 74 | return bindataRead( 75 | _templatesInfoHtml, 76 | "templates/info.html", 77 | ) 78 | } 79 | 80 | func templatesInfoHtml() (*asset, error) { 81 | bytes, err := templatesInfoHtmlBytes() 82 | if err != nil { 83 | return nil, err 84 | } 85 | 86 | info := bindataFileInfo{name: "templates/info.html", size: 657, mode: os.FileMode(420), modTime: time.Unix(1566292097, 0)} 87 | a := &asset{bytes: bytes, info: info} 88 | return a, nil 89 | } 90 | 91 | // Asset loads and returns the asset for the given name. 92 | // It returns an error if the asset could not be found or 93 | // could not be loaded. 94 | func Asset(name string) ([]byte, error) { 95 | cannonicalName := strings.Replace(name, "\\", "/", -1) 96 | if f, ok := _bindata[cannonicalName]; ok { 97 | a, err := f() 98 | if err != nil { 99 | return nil, fmt.Errorf("Asset %s can't read by error: %v", name, err) 100 | } 101 | return a.bytes, nil 102 | } 103 | return nil, fmt.Errorf("Asset %s not found", name) 104 | } 105 | 106 | // MustAsset is like Asset but panics when Asset would return an error. 107 | // It simplifies safe initialization of global variables. 108 | func MustAsset(name string) []byte { 109 | a, err := Asset(name) 110 | if err != nil { 111 | panic("asset: Asset(" + name + "): " + err.Error()) 112 | } 113 | 114 | return a 115 | } 116 | 117 | // AssetInfo loads and returns the asset info for the given name. 118 | // It returns an error if the asset could not be found or 119 | // could not be loaded. 120 | func AssetInfo(name string) (os.FileInfo, error) { 121 | cannonicalName := strings.Replace(name, "\\", "/", -1) 122 | if f, ok := _bindata[cannonicalName]; ok { 123 | a, err := f() 124 | if err != nil { 125 | return nil, fmt.Errorf("AssetInfo %s can't read by error: %v", name, err) 126 | } 127 | return a.info, nil 128 | } 129 | return nil, fmt.Errorf("AssetInfo %s not found", name) 130 | } 131 | 132 | // AssetNames returns the names of the assets. 133 | func AssetNames() []string { 134 | names := make([]string, 0, len(_bindata)) 135 | for name := range _bindata { 136 | names = append(names, name) 137 | } 138 | return names 139 | } 140 | 141 | // _bindata is a table, holding each asset generator, mapped to its name. 142 | var _bindata = map[string]func() (*asset, error){ 143 | "templates/info.html": templatesInfoHtml, 144 | } 145 | 146 | // AssetDir returns the file names below a certain 147 | // directory embedded in the file by go-bindata. 148 | // For example if you run go-bindata on data/... and data contains the 149 | // following hierarchy: 150 | // data/ 151 | // foo.txt 152 | // img/ 153 | // a.png 154 | // b.png 155 | // then AssetDir("data") would return []string{"foo.txt", "img"} 156 | // AssetDir("data/img") would return []string{"a.png", "b.png"} 157 | // AssetDir("foo.txt") and AssetDir("notexist") would return an error 158 | // AssetDir("") will return []string{"data"}. 159 | func AssetDir(name string) ([]string, error) { 160 | node := _bintree 161 | if len(name) != 0 { 162 | cannonicalName := strings.Replace(name, "\\", "/", -1) 163 | pathList := strings.Split(cannonicalName, "/") 164 | for _, p := range pathList { 165 | node = node.Children[p] 166 | if node == nil { 167 | return nil, fmt.Errorf("Asset %s not found", name) 168 | } 169 | } 170 | } 171 | if node.Func != nil { 172 | return nil, fmt.Errorf("Asset %s not found", name) 173 | } 174 | rv := make([]string, 0, len(node.Children)) 175 | for childName := range node.Children { 176 | rv = append(rv, childName) 177 | } 178 | return rv, nil 179 | } 180 | 181 | type bintree struct { 182 | Func func() (*asset, error) 183 | Children map[string]*bintree 184 | } 185 | var _bintree = &bintree{nil, map[string]*bintree{ 186 | "templates": &bintree{nil, map[string]*bintree{ 187 | "info.html": &bintree{templatesInfoHtml, map[string]*bintree{}}, 188 | }}, 189 | }} 190 | 191 | // RestoreAsset restores an asset under the given directory 192 | func RestoreAsset(dir, name string) error { 193 | data, err := Asset(name) 194 | if err != nil { 195 | return err 196 | } 197 | info, err := AssetInfo(name) 198 | if err != nil { 199 | return err 200 | } 201 | err = os.MkdirAll(_filePath(dir, filepath.Dir(name)), os.FileMode(0755)) 202 | if err != nil { 203 | return err 204 | } 205 | err = ioutil.WriteFile(_filePath(dir, name), data, info.Mode()) 206 | if err != nil { 207 | return err 208 | } 209 | err = os.Chtimes(_filePath(dir, name), info.ModTime(), info.ModTime()) 210 | if err != nil { 211 | return err 212 | } 213 | return nil 214 | } 215 | 216 | // RestoreAssets restores an asset under the given directory recursively 217 | func RestoreAssets(dir, name string) error { 218 | children, err := AssetDir(name) 219 | // File 220 | if err != nil { 221 | return RestoreAsset(dir, name) 222 | } 223 | // Dir 224 | for _, child := range children { 225 | err = RestoreAssets(dir, filepath.Join(name, child)) 226 | if err != nil { 227 | return err 228 | } 229 | } 230 | return nil 231 | } 232 | 233 | func _filePath(dir, name string) string { 234 | cannonicalName := strings.Replace(name, "\\", "/", -1) 235 | return filepath.Join(append([]string{dir}, strings.Split(cannonicalName, "/")...)...) 236 | } 237 | 238 | -------------------------------------------------------------------------------- /templates/info.html: -------------------------------------------------------------------------------- 1 | 2 |
3 | 20 |

TCS-Scaler Info

21 |
22 |

Last {{.InfoTime}} Minutes request recieved:

23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | {{ range $i, $scaleEvent := .LastJobs }} 32 | 33 | {{end}} 34 | 35 |
JobIDRegionAlertnameRecieved At
{{$scaleEvent.JobID}}{{$scaleEvent.Region}}{{$scaleEvent.Direction}}{{$scaleEvent.Time}}
36 | 37 | 38 | -------------------------------------------------------------------------------- /tickers.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "time" 5 | 6 | log "github.com/Sirupsen/logrus" 7 | ) 8 | 9 | func fireMapTicker() { 10 | ticker := time.NewTicker(time.Second * time.Duration(fireMapTickerEnvInt)) 11 | 12 | go func() { 13 | for _ = range ticker.C { 14 | checkFiringMap() 15 | } 16 | }() 17 | 18 | } 19 | 20 | func scalerTicker() { 21 | ticker := time.NewTicker(time.Second * time.Duration(scalerTickerEnvInt)) 22 | 23 | var err error 24 | go func() { 25 | for _ = range ticker.C { 26 | jobMap, err = getJobs() 27 | if err != nil { 28 | log.Error("Error getting jobs from nomad from inside Ticker with err: ", err) 29 | } 30 | checkMeta(jobMap) 31 | prometheusQueries(jobMetaMap) 32 | } 33 | }() 34 | 35 | } 36 | --------------------------------------------------------------------------------