├── .github
└── workflows
│ └── go.yml
├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── component1.yaml
├── info.go
├── job
└── jsonParser.go
├── main.go
├── mapActions.go
├── nomad.job
├── rootfs
└── etc
│ └── ssl
│ └── certs
│ └── ca-certificates.crt
├── scalad_mascot.png
├── scale-down.go
├── scale-up.go
├── scaler.go
├── slack
└── slack.go
├── structs
└── structs.go
├── templates.go
├── templates
└── info.html
└── tickers.go
/.github/workflows/go.yml:
--------------------------------------------------------------------------------
1 | name: Go
2 | on: [push]
3 | jobs:
4 |
5 | build:
6 | name: Build
7 | runs-on: ubuntu-latest
8 | steps:
9 |
10 | - name: Set up Go 1.12
11 | uses: actions/setup-go@v1
12 | with:
13 | go-version: 1.12
14 | id: go
15 |
16 | - name: Check out code into the Go module directory
17 | uses: actions/checkout@v1
18 |
19 | - name: Get dependencies
20 | run: |
21 | go get -v -t -d ./...
22 | if [ -f Gopkg.toml ]; then
23 | curl https://raw.githubusercontent.com/golang/dep/master/install.sh | sh
24 | dep ensure
25 | fi
26 |
27 | - name: Build
28 | run: go build -v .
29 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # IDEs / os related
2 | **/.DS_Store
3 | **/*~
4 | .vscode
5 | .idea
6 |
7 | # Tests
8 | *.test
9 | *_test.go
10 |
11 | # Binaries
12 | scalad
13 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM golang:1.14.2 as build-env
2 | COPY . /go/src/github.com/trivago/scalad
3 | WORKDIR /go/src/github.com/trivago/scalad
4 | RUN go get
5 | RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -o /scalad
6 |
7 | FROM scratch
8 |
9 | # Copy root filesystem
10 | COPY rootfs /
11 | COPY --from=build-env /scalad /
12 | ENTRYPOINT ["/scalad"]
13 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Env variables:
6 |
7 | * `PORT = "80"`: Port on which the application is going to be listening.
8 | * `NOMAD_HOST = "http://nomad.service.consul:4646"`: Nomad host endpoint.
9 | * `NOMAD_REGION = "global"`: Nomad Region.
10 | * `VAULT_TOKEN = "ljkasdflkjafd"`: Token for vault with permission to access every secret needed for all the scale jobs.
11 | * `USE_SLACK = "true"`: Flag to enable the use of slack as a system message application.
12 | * `HTTP_USER = "user"`: User needed for basic auth for endpoints to scale up or down manually.
13 | * `HTTP_PASS = "password"`: Password needed for basic auth for endpoints to scale up or down manually
14 | * `METRICS_ENDPOINT = "http://prometheus.yourorg.com/api/v1/query?query="`: Endpoint from where to get the metrics which are going to be used to triger the scale events.
15 |
16 | Inside Job file (At taskGroup lvl):
17 | ````
18 | meta {
19 | scaler = "true" Activate the scaler
20 | min_query = "sum(rate(nomad_client_allocs_cpu_total_ticks{exported_job='scaler-test'}[1m]))by(exported_job) < bool 1" Query that gives the Min threshold for scaling down
21 | max_query = "sum(rate(nomad_client_allocs_cpu_total_ticks{exported_job='scaler-test'}[1m]))by(exported_job) > bool 2" Query that gives the Max threshold for scaling up
22 | query_fire_time = "2m" Time the query need to be true before triggering the saling event
23 | scale_cooldown_down = "20s" Time in cooldown for a scale dow event
24 | scale_cooldown_up = "25s" Time in cooldown for a scale up event
25 | scale_count_down = "1" Amount of containers that are going to be removed on a scale down event
26 | scale_count_up = "2" Amount of containers that are going to be added on a scale up event
27 | scale_max = "16" Maximun amount of containers
28 | scale_min = "1" Minimun amount amount of containers
29 | }
30 | ````
31 |
--------------------------------------------------------------------------------
/component1.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: backstage.io/v1alpha1
2 | kind: Component
3 | metadata:
4 | name: Scalad
5 | description: Component for the nomad autoscaler
6 | annotations:
7 | backstage.io/github-actions-id: trivago/scalad
8 | spec:
9 | type: service
10 | endpoint: http://www.trivago.com
11 | documentation: http://knowledge.trivago.com/
12 | lifecycle: stable
13 | owner: esteban.barrios@trivago.com
14 | githubLink: https://api.github.com/repos/trivago/scalad/actions/runs
15 |
--------------------------------------------------------------------------------
/info.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "net/http"
5 | "strconv"
6 | "time"
7 |
8 | log "github.com/Sirupsen/logrus"
9 | "github.com/alecthomas/template"
10 | )
11 |
12 | type lastJob struct {
13 | JobID string
14 | Region string
15 | Direction string
16 | Time time.Time
17 | }
18 |
19 | // LastJobs function updates the lastJobs map with the jobs executed in the last X (INFO_TIME env variable) minutes.
20 | func LastJobs(jobID string, region string, direction string, trigerTime time.Time) {
21 | secs := trigerTime.Unix()
22 | var m lastJob
23 |
24 | m.JobID = jobID
25 | m.Region = region
26 | m.Direction = direction
27 | m.Time = trigerTime
28 |
29 | lastJobs[secs] = m
30 | }
31 |
32 | func clearInfoMap() {
33 | now := time.Now().Unix()
34 | infoTimeInt64, err := strconv.ParseInt(infoTime, 10, 64)
35 | if err != nil {
36 | log.Error("Error converting int to int64 with err: ", err, ". Setting infoTimeInt64 to 60 minutes")
37 | infoTimeInt64 = 60
38 | }
39 | for key := range lastJobs {
40 | if (now - (infoTimeInt64 * 60)) > key {
41 | delete(lastJobs, key)
42 | }
43 | }
44 | }
45 |
46 | // StatusPage function returns an html page displaying the last 20 scalling operations performed.
47 | func StatusPage(w http.ResponseWriter, r *http.Request) {
48 | message, err := Asset("templates/info.html")
49 | if err != nil {
50 | log.Error("Error loading asset for info.html with err: ", err)
51 | return
52 | }
53 |
54 | messageTmpl, err := template.New("message").Parse(string(message))
55 | if err != nil {
56 | log.Error("Error rendering template for info.html with err: ", err)
57 | return
58 | }
59 |
60 | info := struct {
61 | LastJobs map[int64]lastJob
62 | InfoTime string
63 | }{
64 | lastJobs,
65 | infoTime,
66 | }
67 |
68 | messageTmpl.Execute(w, info)
69 |
70 | }
71 |
--------------------------------------------------------------------------------
/job/jsonParser.go:
--------------------------------------------------------------------------------
1 | package job
2 |
3 | import (
4 | "strconv"
5 | "time"
6 |
7 | "github.com/trivago/scalad/structs"
8 | log "github.com/Sirupsen/logrus"
9 | nomad "github.com/hashicorp/nomad/api"
10 | tparse "github.com/karrick/tparse/v2"
11 | "github.com/prometheus/client_golang/prometheus"
12 | )
13 |
14 | var (
15 | namespace = "scalers"
16 | subsystem = ""
17 | stableStopedScaleLabels = []string{}
18 | stableStopedScaleEventStatusVec = prometheus.NewCounterVec(
19 | prometheus.CounterOpts{
20 | Name: prometheus.BuildFQName(namespace, subsystem, "stableStopedScaleEventStatus"),
21 | Help: "Scaling jobs",
22 | },
23 | stableStopedScaleLabels,
24 | )
25 | )
26 |
27 | func init() {
28 | prometheus.MustRegister(stableStopedScaleEventStatusVec)
29 | }
30 |
31 | // ParseJSON takes a byte array from getJob and a string and checks that this nomad Job has the
32 | // scalling stanza correctly declared inside of it. It also checks before scalling down that the current job is stable.
33 | // This status check is ignored for scalling up just in case the application is not stable because it is overloaded by requests.
34 | // Returns jobs []JobStruct and nomadJob nomad.Job
35 | func ParseJSON(orgJob nomad.Job, call string) (groupsMap map[string]structs.JobStruct, nomadJob nomad.Job) {
36 |
37 | groupsMap = make(map[string]structs.JobStruct)
38 | // Do not check if the job is stable on scalling up just in case the application is overloaded and can not get stable.
39 | // if call != "up" {
40 | if *orgJob.Stable == false {
41 | log.Debug("Job: ", *orgJob.Name, " is not stable for Scalling!! Aborting Scale operation until the job get stable...")
42 | var labels prometheus.Labels
43 | if len(stableStopedScaleLabels) > 0 {
44 | if len(stableStopedScaleLabels) == 3 {
45 | labels["connector"] = stableStopedScaleLabels[0]
46 | labels["region"] = stableStopedScaleLabels[1]
47 | labels["action"] = stableStopedScaleLabels[2]
48 | stableStopedScaleEventStatusVec.With(labels).Inc()
49 | }
50 | }
51 | stableStopedScaleEventStatusVec.WithLabelValues().Inc()
52 |
53 | return
54 | }
55 |
56 | var err error
57 |
58 | //checkGroups for meta stanza and if found put it on the map
59 | for _, taskGroup := range orgJob.TaskGroups {
60 | var jsonJob structs.JobStruct
61 | jsonJob.GroupName = *taskGroup.Name
62 |
63 | jsonJob.ScaleMin, err = strconv.Atoi(taskGroup.Meta["scale_min"])
64 | if err != nil {
65 | log.Debug("Unable to convert ScaleMin to int in Task: ", *taskGroup.Name, " in job: ", *orgJob.Name)
66 | continue
67 | }
68 | jsonJob.ScaleMax, err = strconv.Atoi(taskGroup.Meta["scale_max"])
69 | if err != nil {
70 | log.Debug("Unable to convert ScaleMax to int in Task: ", *taskGroup.Name, " in job: ", *orgJob.Name)
71 | continue
72 | }
73 | jsonJob.ScaleCountUp, err = strconv.Atoi(taskGroup.Meta["scale_count_up"])
74 | if err != nil {
75 | log.Debug("Unable to convert ScaleCountUp to int in Task: ", *taskGroup.Name, " in job: ", *orgJob.Name)
76 | continue
77 | }
78 | jsonJob.ScaleCountDown, err = strconv.Atoi(taskGroup.Meta["scale_count_down"])
79 | if err != nil {
80 | log.Debug("Unable to convert ScaleCountDown to int in Task: ", *taskGroup.Name, " in job: ", *orgJob.Name)
81 | continue
82 | }
83 | jsonJob.Count = *taskGroup.Count
84 | jsonJob.LastRun = time.Now()
85 | now := time.Now()
86 | _, ok := taskGroup.Meta["scale_cooldown_up"]
87 | if ok {
88 | up, err := tparse.AddDuration(now, "+"+taskGroup.Meta["scale_cooldown_up"])
89 | if err != nil {
90 | log.Debug("Meta ScaleCooldownUP error: ", err)
91 | continue
92 | }
93 | jsonJob.ScaleCooldownUp = up
94 | }
95 | _, ok = taskGroup.Meta["scale_cooldown_down"]
96 | if ok {
97 | down, err := tparse.AddDuration(now, "+"+taskGroup.Meta["scale_cooldown_down"])
98 | if err != nil {
99 | log.Debug("Meta ScaleCooldownDown error: ", err)
100 | continue
101 | }
102 | jsonJob.ScaleCooldownDown = down
103 | }
104 |
105 | jsonJob.JobName = *orgJob.Name
106 | jsonJob.Region = *orgJob.Region
107 | if jsonJob.ScaleMin != 0 {
108 | log.Info("Adding ", jsonJob.GroupName, " to map.")
109 | groupsMap[jsonJob.GroupName] = jsonJob
110 | }
111 | }
112 |
113 | //checkGroups Tasks for meta stanza and if found put it on the map
114 | for i, taskGroup := range orgJob.TaskGroups {
115 |
116 | var jsonJob structs.JobStruct
117 | jsonJob.GroupName = *taskGroup.Name
118 | jsonJob.Count = *taskGroup.Count
119 |
120 | for _, tasks := range taskGroup.Tasks {
121 | _, exists := groupsMap[*taskGroup.Name]
122 | if exists {
123 | log.Debug("Group: ", *taskGroup.Name, " exists in group map")
124 | break
125 | }
126 |
127 | jsonJob.TaskName = tasks.Name
128 | jsonJob.ScaleMin, err = strconv.Atoi(tasks.Meta["scale_min"])
129 | if err != nil {
130 | log.Debug("Unable to convert ScaleMin to int in Taskgroup: ", *taskGroup.Name, " Task: ", tasks.Name, " in job: ", *orgJob.Name)
131 | continue
132 | }
133 | jsonJob.ScaleMax, err = strconv.Atoi(tasks.Meta["scale_max"])
134 | if err != nil {
135 | log.Debug("Unable to convert ScaleMax to int in Taskgroup: ", *taskGroup.Name, " Task: ", tasks.Name, " in job: ", *orgJob.Name)
136 | continue
137 | }
138 | jsonJob.ScaleCountUp, err = strconv.Atoi(tasks.Meta["scale_count_up"])
139 | if err != nil {
140 | log.Debug("Unable to convert ScaleCountUp to int in Taskgroup: ", *taskGroup.Name, " Task: ", tasks.Name, " in job: ", *orgJob.Name)
141 | continue
142 | }
143 | jsonJob.ScaleCountDown, err = strconv.Atoi(tasks.Meta["scale_count_down"])
144 | if err != nil {
145 | log.Debug("Unable to convert ScaleCountDown to int in Taskgroup: ", *taskGroup.Name, " Task: ", tasks.Name, " in job: ", *orgJob.Name)
146 | continue
147 | }
148 | jsonJob.LastRun = time.Now()
149 | now := time.Now()
150 | _, ok := tasks.Meta["scale_cooldown_up"]
151 | if ok {
152 | up, err := tparse.AddDuration(now, "+"+tasks.Meta["scale_cooldown_up"])
153 | if err != nil {
154 | log.Debug("Meta ScaleCooldownUP error: ", err)
155 | continue
156 | }
157 | jsonJob.ScaleCooldownUp = up
158 | }
159 | _, ok = tasks.Meta["scale_cooldown_down"]
160 | if ok {
161 | down, err := tparse.AddDuration(now, "+"+tasks.Meta["scale_cooldown_down"])
162 | if err != nil {
163 | log.Debug("Meta ScaleCooldownDown error: ", err)
164 | continue
165 | }
166 | jsonJob.ScaleCooldownDown = down
167 | }
168 |
169 | jsonJob.JobName = *orgJob.Name
170 | jsonJob.Region = *orgJob.Region
171 | jsonJob.Group = i
172 |
173 | groupsMap[jsonJob.GroupName] = jsonJob
174 |
175 | }
176 | }
177 |
178 | log.Debug("Current Map: ")
179 | for _, entry := range groupsMap {
180 | log.Debug("JobName: ", entry.JobName)
181 | log.Debug(" GroupName: ", entry.GroupName)
182 | log.Debug(" Count: ", entry.Count)
183 | log.Debug(" Scale Min: ", entry.ScaleMin)
184 | log.Debug(" Scale Max: ", entry.ScaleMax)
185 |
186 | }
187 |
188 | return groupsMap, orgJob
189 | }
190 |
--------------------------------------------------------------------------------
/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "encoding/json"
5 | "fmt"
6 | "io/ioutil"
7 | "net/http"
8 | "net/url"
9 | "os"
10 | "os/signal"
11 | "strconv"
12 | "strings"
13 | "sync"
14 | "time"
15 |
16 | log "github.com/Sirupsen/logrus"
17 | "github.com/go-chi/chi"
18 | "github.com/hashicorp/nomad/api"
19 | nomad "github.com/hashicorp/nomad/api"
20 | tparse "github.com/karrick/tparse/v2"
21 | "github.com/prometheus/client_golang/prometheus"
22 | "github.com/prometheus/client_golang/prometheus/promhttp"
23 | "github.com/trivago/scalad/slack"
24 | "github.com/trivago/scalad/structs"
25 | )
26 |
27 | var (
28 | port = os.Getenv("PORT")
29 | nomadHost = os.Getenv("NOMAD_HOST")
30 | region = os.Getenv("NOMAD_REGION")
31 | nomadCaCert = os.Getenv("NOMAD_CACERT")
32 | vaultToken = os.Getenv("VAULT_TOKEN")
33 | useSlack = os.Getenv("USE_SLACK")
34 | username = os.Getenv("HTTP_USER")
35 | password = os.Getenv("HTTP_PASS")
36 | metricsEndpoint = os.Getenv("METRICS_ENDPOINT")
37 | infoTime = os.Getenv("INFO_TIME")
38 | fireMapTickerEnv = os.Getenv("FIREMAP_TICKER_SECS")
39 | scalerTickerEnv = os.Getenv("SCALER_TICKER_SECS")
40 | scalerTickerEnvInt int64
41 | fireMapTickerEnvInt int64
42 | lastJobs map[int64]lastJob
43 | namespace = "scalers"
44 | subsystem = ""
45 | scalerLabels = []string{"name", "region", "direction"}
46 | apiLabels = []string{}
47 | scalerVec = prometheus.NewCounterVec(
48 | prometheus.CounterOpts{
49 | Name: prometheus.BuildFQName(namespace, subsystem, "count"),
50 | Help: "Scaling jobs",
51 | },
52 | scalerLabels,
53 | )
54 | apiRequestsVec = prometheus.NewCounterVec(
55 | prometheus.CounterOpts{
56 | Name: prometheus.BuildFQName(namespace, subsystem, "apicalls"),
57 | Help: "Scaling jobs",
58 | },
59 | apiLabels,
60 | )
61 | mutex = &sync.Mutex{}
62 | jobMap map[string]*nomad.Job
63 | jobMapMutex = &sync.Mutex{}
64 | jobMapScale map[string]*nomad.Job
65 | jobMapScaleMutex = &sync.Mutex{}
66 | jobMetaMap map[string]*structs.Meta
67 | jobMetaMapMutex = &sync.Mutex{}
68 | fireTimeMap map[string]*structs.TrigeredAction
69 | fireTimeMapMutex = &sync.Mutex{}
70 |
71 | scaler Scaler
72 | )
73 |
74 | // init function checks that both env variables are set in order to run the scaler.
75 | // These are: nomadAddr -> Address under wich nomad is running
76 | // port -> port in which the application is going to listen.
77 | // This function also register two Vectors with prometheus.
78 | // One for api requests and another for scale operations performed
79 | // by the scaler.
80 | func init() {
81 | if len(nomadHost) == 0 {
82 | nomadHost = "http://nomad.service.consul:4646"
83 | }
84 | if len(port) == 0 {
85 | port = ":8080"
86 | }
87 | if len(infoTime) == 0 {
88 | infoTime = "60"
89 | }
90 | if len(fireMapTickerEnv) == 0 {
91 | fireMapTickerEnv = "30"
92 | }
93 | if len(scalerTickerEnv) == 0 {
94 | scalerTickerEnv = "60"
95 | }
96 | var err error
97 | fireMapTickerEnvInt, err = strconv.ParseInt(fireMapTickerEnv, 10, 64)
98 | if err != nil {
99 | log.Fatal("Error converting fireMapTicker to int with err: ", err)
100 | }
101 |
102 | scalerTickerEnvInt, err = strconv.ParseInt(scalerTickerEnv, 10, 64)
103 | if err != nil {
104 | log.Fatal("Error converting scalerTicker to int with err: ", err)
105 | }
106 |
107 | prometheus.MustRegister(scalerVec)
108 | prometheus.MustRegister(apiRequestsVec)
109 |
110 | jobMap = make(map[string]*nomad.Job)
111 | jobMapScale = make(map[string]*nomad.Job)
112 | jobMetaMap = make(map[string]*structs.Meta)
113 | fireTimeMap = make(map[string]*structs.TrigeredAction)
114 | lastJobs = make(map[int64]lastJob)
115 | }
116 |
117 | // startHTTP function starts the chi router and register all the enpoints availables.
118 | func startHTTP() {
119 | r := chi.NewMux()
120 |
121 | scaler = newScaler()
122 |
123 | r.Post("/scale", scaler.scale)
124 |
125 | r.Get("/", scaler.health)
126 |
127 | r.Get("/stop-scalling/{jobName}/{timer}", scaler.stopScallingJob)
128 |
129 | r.Get("/resume-scalling/{jobName}", scaler.resumeScallingJob)
130 |
131 | r.Get("/scale-up/{jobName}/{region}", manualScaleUp)
132 | r.Get("/scale-down/{jobName}/{region}", manualScaleDown)
133 |
134 | r.Get("/info", StatusPage)
135 |
136 | promHandler := promhttp.Handler()
137 | r.Get("/metrics", promHandler.ServeHTTP)
138 |
139 | // Profiling endpoints. These are disabled to preserver memory.
140 | /*
141 | r.Get("/debug/pprof/", pprof.Index)
142 | r.Get("/debug/pprof/cmdline", pprof.Cmdline)
143 | r.Get("/debug/pprof/profile", pprof.Profile)
144 | r.Get("/debug/pprof/symbol", pprof.Symbol)
145 |
146 | // Register pprof handlers
147 | r.HandleFunc("/debug/pprof/", pprof.Index)
148 | r.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline)
149 | r.HandleFunc("/debug/pprof/profile", pprof.Profile)
150 | r.HandleFunc("/debug/pprof/symbol", pprof.Symbol)
151 |
152 | r.Handle("/debug/pprof/goroutine", pprof.Handler("goroutine"))
153 | r.Handle("/debug/pprof/heap", pprof.Handler("heap"))
154 | r.Handle("/debug/pprof/threadcreate", pprof.Handler("threadcreate"))
155 | r.Handle("/debug/pprof/block", pprof.Handler("block"))
156 | */
157 | http.ListenAndServe(port, r)
158 | }
159 |
160 | func checkFiringMap() {
161 | log.Debug("Checking firingMap")
162 | fireTimeMapMutex.Lock()
163 | for mapName, trigger := range fireTimeMap {
164 | log.Debug(mapName, trigger)
165 | runTime, err := tparse.AddDuration(trigger.Time, "+"+jobMetaMap[mapName].FireTime)
166 | if err != nil {
167 | log.Error("ERROR: JobName: ", mapName)
168 | log.Error("Can't add trigger.Time and meta.Firetime in checkFiringMap with err: ", err)
169 | continue
170 | }
171 | now := time.Now()
172 | if now.After(runTime) {
173 | if trigger.Direction == "up" {
174 | log.Debug("Scaling up: ", mapName)
175 | err := scaler.ScaleUp(mapName, region)
176 | if err != nil {
177 | log.Error("Error scaling up with err: ", err)
178 | continue
179 | }
180 | delete(fireTimeMap, mapName)
181 |
182 | } else if trigger.Direction == "down" {
183 | log.Debug("Scaling down: ", mapName)
184 | err := scaler.ScaleDown(mapName, region)
185 | if err != nil {
186 | log.Error("Error scaling up with err: ", err)
187 | continue
188 | }
189 | delete(fireTimeMap, mapName)
190 | }
191 | }
192 | }
193 | fireTimeMapMutex.Unlock()
194 |
195 | }
196 |
197 | func addToFiringMap(id string, trigered time.Time, direction string) {
198 | _, ok := fireTimeMap[id]
199 | if !ok {
200 | var trigeredAction structs.TrigeredAction
201 | trigeredAction.Time = trigered
202 | trigeredAction.Direction = direction
203 |
204 | fireTimeMapMutex.Lock()
205 | fireTimeMap[id] = &trigeredAction
206 | fireTimeMapMutex.Unlock()
207 | log.Debug("added entry to fireTimeMap -> Direction: ", fireTimeMap[id].Direction, " Trigered: ", fireTimeMap[id].Time)
208 | }
209 |
210 | }
211 |
212 | func removeFromFiringMap(id string) {
213 | _, ok := fireTimeMap[id]
214 | if !ok {
215 | fireTimeMapMutex.Lock()
216 | delete(fireTimeMap, id)
217 | fireTimeMapMutex.Unlock()
218 | log.Debug("removed entry from fireTimeMap for ", id)
219 | }
220 |
221 | }
222 |
223 | func prometheusQueries(jobMetaMap map[string]*structs.Meta) {
224 | jobMetaMapMutex.Lock()
225 | for id, job := range jobMetaMap {
226 | job.MaxQuery = strings.Replace(job.MaxQuery, "\\", "", -1)
227 | job.MinQuery = strings.Replace(job.MinQuery, "\\", "", -1)
228 |
229 | log.Debug("Job: ", id)
230 | log.Debug("MaxQuery: ", job.MaxQuery)
231 | maxResult, err := queryPrometheus(job.MaxQuery)
232 | if err != nil {
233 | log.Error("Unable to get max result from prometheus with err: ", err, " for job: ", id)
234 | removeFromFiringMap(id)
235 | continue
236 | }
237 |
238 | log.Debug("MaxResult query result: ", maxResult)
239 | if maxResult {
240 | addToFiringMap(id, time.Now(), "up")
241 | continue
242 | }
243 |
244 | log.Debug("MinQuery: ", job.MinQuery)
245 | minResult, err := queryPrometheus(job.MinQuery)
246 | if err != nil {
247 | log.Error("Unable to get min result from prometheus with err: ", err, " for job: ", id)
248 | removeFromFiringMap(id)
249 | continue
250 | }
251 |
252 | log.Debug("MinResult query result: ", minResult)
253 | if minResult {
254 | addToFiringMap(id, time.Now(), "down")
255 | continue
256 | }
257 | }
258 |
259 | jobMetaMapMutex.Unlock()
260 | }
261 |
262 | func queryPrometheus(promQuery string) (bool, error) {
263 | var result structs.Prometheus
264 |
265 | client := &http.Client{
266 | Timeout: (time.Second * 10),
267 | }
268 |
269 | query_url := fmt.Sprintf("%s%s", metricsEndpoint, url.QueryEscape(promQuery))
270 | log.Debug("Query URL: ", query_url)
271 |
272 | u, err := url.Parse(query_url)
273 | req, err := http.NewRequest("GET", u.String(), nil)
274 | if err != nil {
275 | log.Error("Error creating new request with err: ", err)
276 | return false, err
277 | }
278 |
279 | resp, err := client.Do(req)
280 | if err != nil {
281 | log.Error("Error executing request with err:", err)
282 | return false, err
283 | }
284 |
285 | data, err := ioutil.ReadAll(resp.Body)
286 | if err != nil {
287 | log.Error("Unabel to read resp.Body: ", err)
288 | return false, err
289 | }
290 |
291 | if 400 <= resp.StatusCode {
292 | return false, fmt.Errorf("error response: %s", string(data))
293 | }
294 |
295 | if err = json.Unmarshal(data, &result); err != nil {
296 | log.Error("Unable to unmarshall with err: ", err)
297 | return false, err
298 | }
299 |
300 | var resultInt int
301 |
302 | if len(result.Data.Result) > 0 {
303 | if len(result.Data.Result[0].Value) > 0 {
304 | resultInt, err = strconv.Atoi(result.Data.Result[0].Value[1].(string))
305 | if err != nil {
306 | log.Error("Error canverting prometheus response into Int with err: ", err)
307 | return false, err
308 | }
309 | }
310 | } else {
311 | return false, fmt.Errorf("Error: lenght of propetheus respond is 0")
312 | }
313 |
314 | if resultInt != 1 {
315 | return false, err
316 | }
317 |
318 | return true, err
319 | }
320 |
321 | func checkMeta(jobMap map[string]*api.Job) {
322 | jobMapScaleMutex.Lock()
323 | jobMetaMapMutex.Lock()
324 | defer jobMapScaleMutex.Unlock()
325 | defer jobMetaMapMutex.Unlock()
326 | for _, job := range jobMap {
327 | if job.Meta["scaler"] == "true" {
328 | jobMapScale[*job.Name] = job
329 | jobMetaMap[*job.Name] = readMeta(job.Meta)
330 | log.Debug("Adding ", *job.Name, " to jobMapScale JOB level")
331 | }
332 | for _, taskGroup := range job.TaskGroups {
333 | if taskGroup.Meta["scaler"] == "true" {
334 | // bug
335 | // todo: replace with job.Name + task group
336 | jobMapScale[*job.Name] = job
337 | jobMetaMap[*job.Name] = readMeta(taskGroup.Meta)
338 | log.Debug("Adding ", *job.Name, " to jobMapScale TASKGROUP level")
339 | }
340 | for _, task := range taskGroup.Tasks {
341 | if task.Meta["scaler"] == "true" {
342 | jobMapScale[*job.Name] = job
343 | jobMetaMap[*job.Name] = readMeta(task.Meta)
344 | log.Debug("Adding ", *job.Name, " to jobMapScale TASK level")
345 |
346 | }
347 | }
348 | }
349 | }
350 | }
351 |
352 | func readMeta(t map[string]string) *structs.Meta {
353 | var m structs.Meta
354 | m.MinQuery = t["min_query"]
355 | m.MaxQuery = t["max_query"]
356 | m.FireTime = t["query_fire_time"]
357 | m.ScaleMin = t["scale_min"]
358 | m.ScaleMax = t["scale_max"]
359 | m.ScaleCountUp = t["scale_count_up"]
360 | m.ScaleCooldown = t["scale_count_down"]
361 | m.ScaleCooldownUp = t["scale_cooldown_up"]
362 | m.ScaleCooldownDown = t["scale_cooldown_down"]
363 | return &m
364 | }
365 |
366 | func getJobs() (map[string]*nomad.Job, error) {
367 | jobMap := make(map[string]*nomad.Job)
368 |
369 | nomadClient, err := api.NewClient(&api.Config{Address: nomadHost, TLSConfig: &api.TLSConfig{CACert: nomadCaCert}})
370 | if err != nil {
371 | log.Error("Error creating nomad client with err: ", err)
372 | }
373 |
374 | options := &api.QueryOptions{AllowStale: true}
375 |
376 | joblist, _, err := nomadClient.Jobs().List(options)
377 | if err != nil {
378 | log.Error("Unable to get job list from nomad with err: ", err)
379 | return nil, err
380 | }
381 |
382 | jobMapMutex.Lock()
383 | jobMapScaleMutex.Lock()
384 |
385 | for job := range jobMap {
386 | delete(jobMap, job)
387 | }
388 |
389 | for job := range jobMapScale {
390 | delete(jobMapScale, job)
391 | }
392 |
393 | jobMapScaleMutex.Unlock()
394 |
395 | for _, job := range joblist {
396 | value, _, err := nomadClient.Jobs().Info(job.ID, options)
397 | if err != nil {
398 | log.Error("Error geting job Info from nomad with err: ", err, " for jobName: ", job.Name)
399 | continue
400 | }
401 |
402 | if value.IsPeriodic() == true || *value.Type == "system" || *value.Type == "batch" {
403 | continue
404 | }
405 |
406 | jobMap[job.Name] = value
407 | }
408 |
409 | jobMapMutex.Unlock()
410 |
411 | return jobMap, nil
412 |
413 | }
414 |
415 | // main function sets the logging formatter, logging level, starts the go routine for the http
416 | // server and waits for a kill signal.
417 | func main() {
418 | customFormater := new(log.TextFormatter)
419 | customFormater.FullTimestamp = true
420 | customFormater.TimestampFormat = "2006-01-02 15:04:05"
421 | customFormater.ForceColors = true
422 | log.SetFormatter(customFormater)
423 | //log.SetLevel(log.InfoLevel)
424 | log.SetLevel(log.DebugLevel)
425 | log.Info("Loging to stderr")
426 |
427 | log.Info("Starting scalad....")
428 | log.Info("Loaded configuration:")
429 | log.Info("Port: ", port)
430 | log.Info("Nomad Host: ", nomadHost)
431 | log.Info("Nomad Region: ", region)
432 | log.Info("Nomad CA Cert: ", nomadCaCert)
433 | if len(vaultToken) != 0 {
434 | log.Info("Vault Token: ", "************")
435 | } else {
436 | log.Info("Vault Token: ", "EMPTY!!")
437 | }
438 | log.Info("Use slack: ", useSlack)
439 | log.Info("Http user: ", username)
440 | if len(password) != 0 {
441 | log.Info("Http pass: ", "**********")
442 | } else {
443 | log.Info("Http pass: ", "EMPTY!!!")
444 | }
445 | log.Info("Metrics Endpoint:", metricsEndpoint)
446 |
447 | if useSlack == "true" {
448 | slack.StartSlackTicker()
449 | }
450 |
451 | go startHTTP()
452 |
453 | go scalerTicker()
454 |
455 | go fireMapTicker()
456 |
457 | c := make(chan os.Signal, 1)
458 | signal.Notify(c, os.Interrupt)
459 |
460 | // Block until a signal is received.
461 | s := <-c
462 | log.Debug("Got signal:", s)
463 |
464 | }
465 |
466 | // GetJob function contacts Nomad based on nomadAddr with an jobID and returns the body of this request.
467 | // This requests contains the job definition from nomad that wants to be scaled.
468 | func GetJob(jobID string, region string) (nomad.Job, error) {
469 |
470 | if _, ok := jobMap[jobID]; ok {
471 | return *jobMap[jobID], nil
472 | }
473 |
474 | var nomadJob nomad.Job
475 |
476 | client, err := api.NewClient(&api.Config{Address: nomadHost, TLSConfig: &api.TLSConfig{CACert: nomadCaCert}})
477 | if err != nil {
478 | log.Error("Unable to create Nomad client with err: ", err)
479 | return nomadJob, err
480 | }
481 |
482 | options := &api.QueryOptions{AllowStale: true}
483 |
484 | nomadJobPointer, _, err := client.Jobs().Info(jobID, options)
485 | if err != nil {
486 | log.Error("Unable to get job for ", jobID, " from nomad with err: ", err)
487 | return nomadJob, err
488 | }
489 |
490 | nomadJob = *nomadJobPointer
491 | return nomadJob, nil
492 |
493 | }
494 |
495 | func executeJob(nomadJob nomad.Job) (ok bool, err error) {
496 | *nomadJob.VaultToken = vaultToken
497 |
498 | nomadClient, err := api.NewClient(&api.Config{Address: nomadHost, TLSConfig: &api.TLSConfig{CACert: nomadCaCert}})
499 | if err != nil {
500 | log.Error("Unable to create Nomad client with err: ", err)
501 | return false, err
502 | }
503 |
504 | _, _, err = nomadClient.Jobs().Register(&nomadJob, nil)
505 | if err != nil {
506 | return false, err
507 | }
508 |
509 | return true, nil
510 | }
511 |
--------------------------------------------------------------------------------
/mapActions.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "time"
5 | )
6 |
7 | // startJopMapWatcher starts a ticker to check the cooldown expiracy every 5 seconds in the map.
8 | func (scaler *Scaler) startJobMapWatcher() {
9 | ticker := time.NewTicker(time.Second * 5)
10 |
11 | go func() {
12 | for _ = range ticker.C {
13 | scaler.cleanMap()
14 | }
15 | }()
16 | }
17 |
18 | // cleanMap functions checks all the entries in the map for expired cooldonws and removed them from the map.
19 | func (scaler *Scaler) cleanMap() {
20 | mutex.Lock()
21 | for key, job := range scaler.jobMap {
22 | now := time.Now()
23 |
24 | if now.After(job.ScaleCooldown) {
25 | delete(scaler.jobMap, key)
26 |
27 | }
28 | }
29 | mutex.Unlock()
30 | }
31 |
--------------------------------------------------------------------------------
/nomad.job:
--------------------------------------------------------------------------------
1 | job "scalad" {
2 | region = "global"
3 | datacenters = ["dc1"]
4 | type = "service"
5 |
6 | group "job-autoscaler" {
7 | count = 1
8 |
9 | task "scaler" {
10 | driver = "docker"
11 |
12 | config {
13 | image = "trivago/scalad:0.1"
14 | force_pull = true
15 | network_mode = "host"
16 | }
17 |
18 | service {
19 | name = "${NOMAD_JOB_NAME}"
20 | tags = ["nomad-autoscaler", "scalad"]
21 | port = "http"
22 |
23 | check {
24 | type = "http"
25 | path = "/"
26 | interval = "30s"
27 | timeout = "2s"
28 | }
29 | }
30 |
31 | template {
32 | data = < job.ScaleMax {
64 | log.Info("Job ", jobID, " Group: ", job.GroupName, " in: ", region, " is above the MaxCount")
65 | job.NoGo = true
66 | } else {
67 | job.NoGo = false
68 | }
69 | structLocal := groupsMap[job.GroupName]
70 | structLocal = job
71 | groupsMap[job.GroupName] = structLocal
72 | }
73 |
74 | for _, job := range groupsMap {
75 | if job.NoGo == false {
76 | log.Debug(job.GroupName, " Group needs to be scaled Down.")
77 | AnyTrue = true
78 | }
79 | }
80 |
81 | if AnyTrue {
82 | p := log.Debug
83 | p("Scaling UP: ")
84 | p("JobName: ", jobID)
85 |
86 | for _, job := range groupsMap {
87 | p("Group: ", job.GroupName)
88 | if job.TaskName != "" {
89 | p("TaskName: ", job.TaskName)
90 | }
91 | p("Region: ", job.Region)
92 | p("ScaleMin: ", job.ScaleMin)
93 | p("ScaleMax: ", job.ScaleMax)
94 | p("ScaleCountUp: ", job.ScaleCountUp)
95 | p("ScaleCountDown: ", job.ScaleCountDown)
96 | p("Count: ", job.Count)
97 | p("ScaleCooldown: ", job.ScaleCooldown)
98 | }
99 | err := ScaleJobDown(groupsMap, nomadJob)
100 | if err != nil {
101 | log.Error("Scale up failed with err: ", err)
102 | return err
103 | }
104 | }
105 |
106 | return nil
107 | }
108 |
109 | // ScaleJobDown calculate the new amount of allocations necesary for every group in the job and sends the request to nomad to
110 | // scale the job. It also updates the list of the last 20 executed jobs after sending the request to nomad.
111 | func ScaleJobDown(groupsMap map[string]structs.JobStruct, nomadJob nomad.Job) error {
112 | for _, job := range groupsMap {
113 | if job.Count == job.ScaleMin {
114 | job.EndValue = job.ScaleMin
115 | job.NoGo = true
116 | } else {
117 | job.EndValue = job.Count - job.ScaleCountDown
118 | if job.EndValue <= job.ScaleMin {
119 | job.EndValue = job.ScaleMin
120 | log.Info("Scaling down Job: ", job.JobName, " Group:", job.GroupName, " to minimum allowed. Min: ", job.ScaleMin)
121 | job.NoGo = false
122 | }
123 | log.Info("Job: "+job.JobName+" Group: "+job.GroupName+" on: "+job.Region+" NewCount is: ", job.EndValue)
124 | }
125 | structLocal := groupsMap[job.GroupName]
126 | structLocal.EndValue = job.EndValue
127 | groupsMap[job.GroupName] = structLocal
128 | }
129 |
130 | for _, newJob := range nomadJob.TaskGroups {
131 | if groupsMap[*newJob.Name].EndValue != 0 {
132 | *newJob.Count = groupsMap[*newJob.Name].EndValue
133 | }
134 | log.Info("Job: ", *nomadJob.Name, " Group: ", *newJob.Name, " NewCount: ", *newJob.Count)
135 | }
136 |
137 | ok, err := executeJob(nomadJob)
138 | if !ok {
139 | log.Error("Error executing scaledown operation!")
140 | return err
141 | }
142 |
143 | message := `SCALE DOWN:
144 | - Job: ` + *nomadJob.Name + `
145 | - Region: ` + *nomadJob.Region
146 | slack.SendMessage(message)
147 | slack.MessageBuffered(*nomadJob.Name, "down", time.Now())
148 |
149 | scalerVec.WithLabelValues(*nomadJob.Name, *nomadJob.Region, "down").Inc()
150 | LastJobs(*nomadJob.Name, *nomadJob.Region, "scaleDown", time.Now())
151 | return nil
152 | }
153 |
154 | func manualScaleDown(w http.ResponseWriter, r *http.Request) {
155 | jobName := chi.URLParam(r, "jobName")
156 | region := chi.URLParam(r, "region")
157 | user, pass, _ := r.BasicAuth()
158 | if user == username && pass == password {
159 | nomadJob, err := GetJob(jobName, region)
160 | if err != nil {
161 | log.Warn("Error getting job with err: ", err)
162 | return
163 | }
164 | for _, taskGroup := range nomadJob.TaskGroups {
165 | *taskGroup.Count--
166 | if *taskGroup.Count == 0 {
167 | *taskGroup.Count = 1
168 | }
169 | }
170 |
171 | ok, err := executeJob(nomadJob)
172 | if !ok {
173 | log.Error("Error executing manual scaleup operation!")
174 | fmt.Fprintf(w, "%s", "Error executing manual scaleup operation!")
175 | return
176 | }
177 |
178 | message := `MANUAL SCALE DOWN for ` + jobName + ` in Region: ` + region + `
179 | All taskGroups count have been decreased by one!!
180 | For safety reason not allowed to scale to 0! Min value is 1`
181 | slack.SendMessage(message)
182 | fmt.Fprintf(w, "%s", "Manual scale down triggered!")
183 | } else {
184 | fmt.Fprintf(w, "%s", "Wrong Username or password!")
185 | }
186 | }
187 |
--------------------------------------------------------------------------------
/scale-up.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "fmt"
5 | "net/http"
6 | "time"
7 |
8 | log "github.com/Sirupsen/logrus"
9 | "github.com/go-chi/chi"
10 | nomad "github.com/hashicorp/nomad/api"
11 | "github.com/trivago/scalad/job"
12 | "github.com/trivago/scalad/slack"
13 | "github.com/trivago/scalad/structs"
14 | )
15 |
16 | // ScaleUp function checks that the current job is not in cooldown in the map and if it is not
17 | // checks for every group in the jobfile that needs to be scaled.
18 | func (scaler *Scaler) ScaleUp(jobID string, region string) (err error) {
19 | now := time.Now()
20 | mapID := jobID + "-" + region
21 | mutex.Lock()
22 | _, ok := scaler.jobMap[mapID]
23 | mutex.Unlock()
24 | if ok {
25 | mutex.Lock()
26 | diff := now.Sub(scaler.jobMap[mapID].ScaleCooldownUp)
27 | mutex.Unlock()
28 | log.Info("Job: ", jobID, " ScaleUp can be retrigger in: ", diff)
29 | return fmt.Errorf("Job in cooldown")
30 | }
31 |
32 | var nomadJob nomad.Job
33 | jobMapMutex.Lock()
34 | _, ok = jobMap[jobID]
35 | if ok {
36 | nomadJob = *jobMap[jobID]
37 | } else {
38 | nomadJob, err = GetJob(jobID, region)
39 | if err != nil {
40 | log.Warn("Error getting job with err: ", err)
41 | return err
42 | }
43 | }
44 | jobMapMutex.Unlock()
45 |
46 | var AnyTrue bool
47 | groupsMap, nomadJob := job.ParseJSON(nomadJob, "up")
48 |
49 | for _, job := range groupsMap {
50 | if (job.ScaleMin == 0) || (job.ScaleMax == 0) || (job.ScaleCountUp == 0) || (job.ScaleCountDown == 0) || (job.Count == 0) {
51 | log.Warn(jobID, "Group: ", job.Group, " doesn't have a scale stanza in it.")
52 | job.NoGo = true
53 | }
54 |
55 | job.ScaleCooldown = job.ScaleCooldownUp
56 | mutex.Lock()
57 |
58 | scaler.jobMap[mapID] = job
59 | mutex.Unlock()
60 |
61 | if job.Count >= job.ScaleMax {
62 | log.Info("Job: ", jobID, " Group: ", job.GroupName, " in: ", region, " is at MaxCount (", job.ScaleMax, " allocations)")
63 | job.NoGo = true
64 | } else if job.Count < job.ScaleMin {
65 | log.Info("Job ", jobID, " Group: ", job.GroupName, " in: ", region, " is below the MinCount")
66 | job.NoGo = true
67 | } else {
68 | job.NoGo = false
69 | }
70 | structLocal := groupsMap[job.GroupName]
71 | structLocal = job
72 | groupsMap[job.GroupName] = structLocal
73 | }
74 |
75 | for _, job := range groupsMap {
76 | if job.NoGo == false {
77 | log.Debug(job.GroupName, " Group needs to be scaled Up.")
78 | AnyTrue = true
79 | }
80 | }
81 |
82 | if AnyTrue {
83 | p := log.Debug
84 | p("")
85 | p("Scaling UP: ")
86 | p("JobName: ", jobID)
87 |
88 | for _, job := range groupsMap {
89 | p("Group: ", job.GroupName)
90 | if job.TaskName != "" {
91 | p("TaskName: ", job.TaskName)
92 | }
93 | p("Region: ", job.Region)
94 | p("ScaleMin: ", job.ScaleMin)
95 | p("ScaleMax: ", job.ScaleMax)
96 | p("ScaleCountUp: ", job.ScaleCountUp)
97 | p("ScaleCountDown: ", job.ScaleCountDown)
98 | p("Count: ", job.Count)
99 | p("ScaleCooldown: ", job.ScaleCooldown)
100 | }
101 | err := ScaleJobUp(groupsMap, nomadJob)
102 | if err != nil {
103 | log.Error("Scale down failed with err: ", err)
104 | return err
105 | }
106 | }
107 | return nil
108 | }
109 |
110 | // ScaleJobUp calculate the new amount of allocations necesary for every group in the job and sends the request to nomad to
111 | // scale the job. It also updates the list of the last 20 executed jobs after sending the request to nomad.
112 | func ScaleJobUp(groupsMap map[string]structs.JobStruct, nomadJob nomad.Job) error {
113 | for _, job := range groupsMap {
114 | if job.Count >= job.ScaleMax {
115 | job.EndValue = job.Count
116 | job.NoGo = true
117 | } else {
118 | job.EndValue = job.Count + job.ScaleCountUp
119 | if job.EndValue > job.ScaleMax {
120 | job.EndValue = job.ScaleMax
121 | log.Info("Scaling up Job: ", job.JobName, " Group: ", job.GroupName, " to maximum allowed. Max: ", job.ScaleMax)
122 | job.NoGo = false
123 | }
124 | log.Info("Job: "+job.JobName+" Group: "+job.GroupName+" on: "+job.Region+" NewCount is: ", job.EndValue)
125 | }
126 | structLocal := groupsMap[job.GroupName]
127 | structLocal.EndValue = job.EndValue
128 | groupsMap[job.GroupName] = structLocal
129 | }
130 |
131 | for _, newJob := range nomadJob.TaskGroups {
132 | if groupsMap[*newJob.Name].EndValue != 0 {
133 | *newJob.Count = groupsMap[*newJob.Name].EndValue
134 | }
135 | log.Info("Job: ", *nomadJob.Name, " Group: ", *newJob.Name, " NewCount: ", *newJob.Count)
136 | }
137 |
138 | ok, err := executeJob(nomadJob)
139 | if !ok {
140 | log.Error("Error executing scaleup operation!")
141 | return err
142 | }
143 |
144 | message := `SCALE UP:
145 | - Job: ` + *nomadJob.Name + `
146 | - Region: ` + *nomadJob.Region
147 | slack.SendMessage(message)
148 | slack.MessageBuffered(*nomadJob.Name, "up", time.Now())
149 |
150 | scalerVec.WithLabelValues(*nomadJob.Name, *nomadJob.Region, "up").Inc()
151 | LastJobs(*nomadJob.Name, *nomadJob.Region, "scaleUp", time.Now())
152 | return nil
153 | }
154 |
155 | func manualScaleUp(w http.ResponseWriter, r *http.Request) {
156 | jobName := chi.URLParam(r, "jobName")
157 | region := chi.URLParam(r, "region")
158 | user, pass, _ := r.BasicAuth()
159 | if user == username && pass == password {
160 | nomadJob, err := GetJob(jobName, region)
161 | if err != nil {
162 | log.Warn("Error getting job with err: ", err)
163 | return
164 | }
165 | for _, taskGroup := range nomadJob.TaskGroups {
166 | *taskGroup.Count++
167 | }
168 |
169 | ok, err := executeJob(nomadJob)
170 | if !ok {
171 | log.Error("Error executing manual scaleup operation!")
172 | fmt.Fprintf(w, "%s", "Error executing manual scaleup operation!")
173 | return
174 | }
175 |
176 | message := `MANUAL SCALE UP for ` + jobName + ` in Region: ` + region + `
177 | All taskGroups count have been increased by one!`
178 | slack.SendMessage(message)
179 | fmt.Fprintf(w, "%s", "Manual scale up triggered!")
180 | } else {
181 | fmt.Fprintf(w, "%s", "Wrong Username or password!")
182 | }
183 | }
184 |
--------------------------------------------------------------------------------
/scaler.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "encoding/json"
5 | "fmt"
6 | "io/ioutil"
7 | "net/http"
8 | "strings"
9 | "time"
10 |
11 | "github.com/trivago/scalad/slack"
12 | "github.com/trivago/scalad/structs"
13 | log "github.com/Sirupsen/logrus"
14 | "github.com/go-chi/chi"
15 | tparse "github.com/karrick/tparse/v2"
16 | )
17 |
18 | // Scaler jobMap handler.
19 | type Scaler struct {
20 | jobMap map[string]structs.JobStruct
21 | }
22 |
23 | // newScaler created the jobMap and also starts the startJobMapWatcher and
24 | // returns the map where all allocation in cooldown will be stored.
25 | func newScaler() Scaler {
26 | scaler := Scaler{jobMap: make(map[string]structs.JobStruct)}
27 | scaler.startJobMapWatcher()
28 | return scaler
29 | }
30 |
31 | // health function is an http enpoint used for consul to check the health of the application.
32 | // If it is healthy it will retun a: http/200 All Good message
33 | func (scaler *Scaler) health(w http.ResponseWriter, r *http.Request) {
34 | message := "All Good"
35 | fmt.Fprintf(w, "%s", message)
36 | }
37 |
38 | func (scaler *Scaler) stopScallingJob(w http.ResponseWriter, r *http.Request) {
39 | jobID := chi.URLParam(r, "jobName")
40 | timer := chi.URLParam(r, "timer")
41 | mapID := jobID + "-" + region
42 | now := time.Now()
43 | var job structs.JobStruct
44 | sleep, err := tparse.AddDuration(now, timer)
45 | if err != nil {
46 | log.Debug("Error parsing time for pause command with err: ", err)
47 | return
48 | }
49 | job.ScaleCooldown = sleep
50 | mutex.Lock()
51 | scaler.jobMap[mapID] = job
52 | mutex.Unlock()
53 | message := "Manually paused: " + mapID + " for " + timer
54 | slack.SendMessage(message)
55 | fmt.Fprintf(w, "%s", message)
56 | }
57 |
58 | func (scaler *Scaler) resumeScallingJob(w http.ResponseWriter, r *http.Request) {
59 | jobID := chi.URLParam(r, "jobName")
60 | mapID := jobID + "-" + region
61 |
62 | jobMapMutex.Lock()
63 | jobMapScaleMutex.Lock()
64 |
65 | log.Debug("Refreshing job config for ", jobID)
66 | delete(jobMap, jobID)
67 | delete(jobMapScale, jobID)
68 | jobMapScaleMutex.Unlock()
69 |
70 | nomadJob, err := GetJob(jobID, region)
71 | if err != nil {
72 | log.Warn("Error getting job ", jobID, " with err: ", err)
73 | } else {
74 | jobMap[jobID] = &nomadJob
75 | }
76 | jobMapMutex.Unlock()
77 |
78 | mutex.Lock()
79 | defer mutex.Unlock()
80 | delete(scaler.jobMap, mapID)
81 |
82 | message := "Manually resumed: " + mapID
83 | slack.SendMessage(message)
84 | fmt.Fprintf(w, "%s", message)
85 | }
86 |
87 | func (scaler *Scaler) scaleAction(body []byte) (err error) {
88 | postStruct := new(structs.PostRequest)
89 | err = json.Unmarshal(body, postStruct)
90 | if err != nil {
91 | log.Error("Body: ", string(body))
92 | log.Error("Error Unmarshalling postJson with err: ", err)
93 | return err
94 | }
95 |
96 | for k := range postStruct.Alerts {
97 | allocID := postStruct.Alerts[k].Labels.AllocID
98 | jobID := postStruct.Alerts[k].Labels.JobName
99 | log.Debug("Recieved scale for: ", jobID, " with alertname: ", postStruct.Alerts[k].Labels.Alertname)
100 |
101 | if len(region) == 0 {
102 | log.Error("No region defined for AllocID: ", allocID)
103 | continue
104 | }
105 | status := postStruct.Alerts[k].Status
106 | if len(region) < 1 {
107 | log.Info("No region defined for Alert: ", jobID)
108 | continue
109 | }
110 | if len(jobID) < 1 {
111 | log.Info("No JobName defined for Alert")
112 | continue
113 | }
114 | log.Debug("Jobname recieved: ", jobID, " Region: ", region)
115 |
116 | if strings.HasPrefix(postStruct.Alerts[k].Labels.Alertname, "scaleup") {
117 | if strings.HasPrefix(status, "firing") {
118 | err := scaler.ScaleUp(jobID, region)
119 | if err != nil {
120 | log.Debug("Job: ", jobID, " Error: ", err)
121 | }
122 | }
123 |
124 | log.Debug("Status: ", status, " for ", jobID, " Region: ", region)
125 |
126 | } else if strings.HasPrefix(postStruct.Alerts[k].Labels.Alertname, "scaledown") {
127 | if strings.HasPrefix(status, "firing") {
128 | err := scaler.ScaleDown(jobID, region)
129 | if err != nil {
130 | log.Debug("Job: ", jobID, " Error: ", err)
131 | }
132 | }
133 |
134 | log.Debug("Status: ", status, " for ", jobID, " Region: ", region)
135 |
136 | }
137 | }
138 | return nil
139 | }
140 |
141 | // scale function gets a POST requests and analizes the content to decide which scale direction to apply
142 | // or to discard the message. The POST requests comes from alertmanager.
143 | func (scaler *Scaler) scale(w http.ResponseWriter, r *http.Request) {
144 |
145 | body, err := ioutil.ReadAll(r.Body)
146 | if err != nil {
147 | w.WriteHeader(400)
148 | fmt.Fprintf(w, "Error reading request Body, with err: %v", err)
149 | return
150 | }
151 |
152 | go scaler.scaleAction(body)
153 |
154 | w.WriteHeader(200)
155 | }
156 |
--------------------------------------------------------------------------------
/slack/slack.go:
--------------------------------------------------------------------------------
1 | package slack
2 |
3 | import (
4 | "fmt"
5 | "os"
6 | "strings"
7 | "time"
8 |
9 | log "github.com/Sirupsen/logrus"
10 | slack "github.com/ashwanthkumar/slack-go-webhook"
11 | )
12 |
13 | var (
14 | useSlack string
15 | webhookURL string
16 | slackChannel string
17 | slackUsername string
18 | slackEmoji string
19 | compMessagesUp []string
20 | compMessagesDown []string
21 | region = os.Getenv("NOMAD_REGION")
22 | )
23 |
24 | func init() {
25 | useSlack = os.Getenv("USE_SLACK")
26 | webhookURL = os.Getenv("SLACK_WEBHOOK")
27 | slackChannel = os.Getenv("SLACK_CHANNEL")
28 | slackUsername = os.Getenv("SLACK_USERNAME")
29 | slackUsername = slackUsername + region
30 | slackEmoji = ":scalad:"
31 | if useSlack == "true" {
32 | if len(webhookURL) == 0 {
33 | log.Fatal("ENV variable SLACK_WEBHOOK is empty!")
34 | }
35 | }
36 | log.Info("Slack Channel: ", slackChannel)
37 | log.Info("Slack Username: ", slackUsername)
38 | log.Info("Slack emoji: ", slackEmoji)
39 |
40 | }
41 |
42 | //StartSlackTicker starts a clock to send a resume of scale events every 10 min.
43 | func StartSlackTicker() {
44 | tickerB := time.NewTicker(time.Minute * 30)
45 |
46 | go func() {
47 | for _ = range tickerB.C {
48 | sendBuffered()
49 | }
50 | }()
51 | }
52 |
53 | // SendMessage takes a message and send it to a slack channel or user and return an err in case of failure.
54 | func SendMessage(message string) error {
55 | if useSlack != "true" {
56 | return nil
57 | }
58 |
59 | payload := slack.Payload{
60 | Text: message,
61 | Username: slackUsername,
62 | Channel: slackChannel,
63 | IconEmoji: slackEmoji,
64 | }
65 | err := slack.Send(webhookURL, "", payload)
66 | if err != nil {
67 | log.Warn("Error sending slack message with err: ", err)
68 | return fmt.Errorf("Error sending slack message wit err: %v", err)
69 | }
70 | return nil
71 | }
72 |
73 | // SendMessageTo takes a message and a user with the @ and sends the message to that user. Returns an err in case of failure.
74 | func SendMessageTo(message string, user string) error {
75 | if useSlack != "true" {
76 | return nil
77 | }
78 |
79 | payload := slack.Payload{
80 | Text: message,
81 | Username: slackUsername,
82 | Channel: user,
83 | IconEmoji: slackEmoji,
84 | }
85 | err := slack.Send(webhookURL, "", payload)
86 | if err != nil {
87 | log.Warn("Error sending slack message with err: ", err)
88 | return fmt.Errorf("Error sending slack message wit err: %v", err)
89 | }
90 | return nil
91 | }
92 |
93 | // MessageBuffered creates a queue of messages and send them to a channel when a ticker expires.
94 | func MessageBuffered(message string, direction string, t time.Time) error {
95 | if useSlack != "true" {
96 | return nil
97 | }
98 | message = message + ` ` + t.Format("2006-01-02 15:04:05")
99 | if direction == "up" {
100 | compMessagesUp = append(compMessagesUp, message)
101 | } else {
102 | compMessagesDown = append(compMessagesDown, message)
103 | }
104 | return nil
105 | }
106 |
107 | func sendBuffered() error {
108 | var message string
109 | regionUp := strings.ToUpper(region)
110 | message = `30 min resume of autoscaler in ` + regionUp + `:
111 | Upscale:
112 | `
113 | for _, next := range compMessagesUp {
114 | message = message + `
115 | ` + next
116 | }
117 | message = message + `
118 |
119 | Downscale:
120 | `
121 | for _, next := range compMessagesDown {
122 | message = message + `
123 | ` + next
124 | }
125 | message = message + `
126 |
127 | `
128 |
129 | payload := slack.Payload{
130 | Text: message,
131 | Username: slackUsername,
132 | Channel: "#scalad-30m",
133 | IconEmoji: slackEmoji,
134 | }
135 | err := slack.Send(webhookURL, "", payload)
136 | if err != nil {
137 | log.Warn("Error sending slack message with err: ", err)
138 | return fmt.Errorf("Error sending slack message wit err: %v", err)
139 | }
140 | compMessagesUp = nil
141 | compMessagesDown = nil
142 | return nil
143 |
144 | }
145 |
--------------------------------------------------------------------------------
/structs/structs.go:
--------------------------------------------------------------------------------
1 | package structs
2 |
3 | import (
4 | "time"
5 |
6 | nomad "github.com/hashicorp/nomad/api"
7 | )
8 |
9 | // JobStruct is where the meta data extracted from each nomad job is keept.
10 | type JobStruct struct {
11 | JobName string
12 | Region string
13 | ScaleMin int
14 | ScaleMax int
15 | ScaleCountUp int
16 | ScaleCountDown int
17 | ScaleCooldown time.Time
18 | ScaleCooldownUp time.Time
19 | ScaleCooldownDown time.Time
20 | LastRun time.Time
21 | Count int
22 | Group int
23 | NoGo bool
24 | EndValue int
25 | GroupName string
26 | TaskName string
27 | }
28 |
29 | // PostR is the post response that is sent to nomad to trigger the scalling action.
30 | type PostR struct {
31 | Job nomad.Job
32 | }
33 |
34 | // PostRequest is the struct where the alert coming from alert manager would be stored.
35 | type PostRequest struct {
36 | Receiver string `json:"receiver"`
37 | Status string `json:"status"`
38 | Alerts []struct {
39 | Status string `json:"status"`
40 | Labels struct {
41 | Alertname string `json:"alertname"`
42 | Region string `json:"region"`
43 | Client string `json:"client"`
44 | Instance string `json:"instance"`
45 | Job string `json:"job"`
46 | JobName string `json:"exported_job"`
47 | AllocID string `json:"alloc_id"`
48 | Monitor string `json:"monitor"`
49 | Rt string `json:"rt"`
50 | Severity string `json:"severity"`
51 | } `json:"labels"`
52 | Annotations struct {
53 | Description string `json:"description"`
54 | Summary string `json:"summary"`
55 | } `json:"annotations"`
56 | StartsAt time.Time `json:"startsAt"`
57 | EndsAt time.Time `json:"endsAt"`
58 | GeneratorURL string `json:"generatorURL"`
59 | } `json:"alerts"`
60 | GroupLabels struct {
61 | Alertname string `json:"alertname"`
62 | } `json:"groupLabels"`
63 | CommonLabels struct {
64 | Alertname string `json:"alertname"`
65 | Job string `json:"job"`
66 | Monitor string `json:"monitor"`
67 | Severity string `json:"severity"`
68 | } `json:"commonLabels"`
69 | CommonAnnotations struct {
70 | } `json:"commonAnnotations"`
71 | ExternalURL string `json:"externalURL"`
72 | Version string `json:"version"`
73 | Time time.Time
74 | }
75 |
76 | // AllocRequest is the structure that matches the response for an allocation query to nomad
77 | // in order to get the JobName and JobRegion
78 | type AllocRequest struct {
79 | Job struct {
80 | Region string `json:"Region"`
81 | Name string `json:"Name"`
82 | } `json:"Job"`
83 | }
84 |
85 | type Meta struct {
86 | MinQuery string
87 | MaxQuery string
88 | FireTime string
89 | ScaleMin string
90 | ScaleMax string
91 | ScaleCountUp string
92 | ScaleCountDown string
93 | ScaleCooldown string
94 | ScaleCooldownUp string
95 | ScaleCooldownDown string
96 | }
97 |
98 | type Prometheus struct {
99 | Status string `json:"status"`
100 | Data struct {
101 | ResultType string `json:"resultType"`
102 | Result []struct {
103 | Metric struct {
104 | ExportedJob string `json:"exported_job"`
105 | } `json:"metric"`
106 | Value []interface{} `json:"value"`
107 | } `json:"result"`
108 | } `json:"data"`
109 | }
110 |
111 | type TrigeredAction struct {
112 | Time time.Time
113 | Direction string
114 | }
115 |
--------------------------------------------------------------------------------
/templates.go:
--------------------------------------------------------------------------------
1 | // Code generated by go-bindata.
2 | // sources:
3 | // templates/info.html
4 | // DO NOT EDIT!
5 |
6 | package main
7 |
8 | import (
9 | "bytes"
10 | "compress/gzip"
11 | "fmt"
12 | "io"
13 | "io/ioutil"
14 | "os"
15 | "path/filepath"
16 | "strings"
17 | "time"
18 | )
19 |
20 | func bindataRead(data []byte, name string) ([]byte, error) {
21 | gz, err := gzip.NewReader(bytes.NewBuffer(data))
22 | if err != nil {
23 | return nil, fmt.Errorf("Read %q: %v", name, err)
24 | }
25 |
26 | var buf bytes.Buffer
27 | _, err = io.Copy(&buf, gz)
28 | clErr := gz.Close()
29 |
30 | if err != nil {
31 | return nil, fmt.Errorf("Read %q: %v", name, err)
32 | }
33 | if clErr != nil {
34 | return nil, err
35 | }
36 |
37 | return buf.Bytes(), nil
38 | }
39 |
40 | type asset struct {
41 | bytes []byte
42 | info os.FileInfo
43 | }
44 |
45 | type bindataFileInfo struct {
46 | name string
47 | size int64
48 | mode os.FileMode
49 | modTime time.Time
50 | }
51 |
52 | func (fi bindataFileInfo) Name() string {
53 | return fi.name
54 | }
55 | func (fi bindataFileInfo) Size() int64 {
56 | return fi.size
57 | }
58 | func (fi bindataFileInfo) Mode() os.FileMode {
59 | return fi.mode
60 | }
61 | func (fi bindataFileInfo) ModTime() time.Time {
62 | return fi.modTime
63 | }
64 | func (fi bindataFileInfo) IsDir() bool {
65 | return false
66 | }
67 | func (fi bindataFileInfo) Sys() interface{} {
68 | return nil
69 | }
70 |
71 | var _templatesInfoHtml = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x7c\x92\x41\x6f\xd4\x30\x10\x85\xcf\xf8\x57\x8c\x44\x91\x40\xda\xac\x9b\x9e\x90\x6b\x2c\x55\x94\x43\x2b\xb8\xd0\xfe\x01\x6f\x3c\x89\x2d\x1c\x3b\xd8\xb3\xcb\x56\x91\xff\x3b\x4a\x9c\x42\x11\x82\x5c\x92\xcc\x7b\xfe\xf4\xe6\xc9\xd2\xd2\xe8\x15\x93\x16\xb5\xc1\xa4\x98\xcc\xf4\xe4\x51\x31\xd2\x07\x8f\x30\x33\x00\x80\x3e\x06\x6a\x7a\x3d\x3a\xff\x24\x40\x27\xa7\xfd\x0e\xb2\x0e\xb9\xc9\x98\x5c\x7f\xbd\x7a\x0e\x31\x19\x4c\x4d\x17\xbd\xd7\x53\x46\x01\xcf\x5f\x55\xfe\xe1\x0c\x59\x01\xed\xe5\xe5\x9b\x6b\x56\x18\x23\xb3\x03\xb2\x1b\xbf\x9e\x15\xd0\x4e\x67\xc8\xd1\x3b\x03\xaf\xcd\xfa\xd4\xb3\x84\x67\x6a\xb4\x77\x43\x10\xe0\xb1\xa7\x3a\x9d\xb4\x31\x2e\x0c\x02\xda\xf7\xd3\xb9\x32\x93\x08\x64\x9b\xce\x3a\x6f\xde\xe2\x09\xc3\xbb\x67\xbe\xee\xbe\x0d\x29\x1e\x83\x59\xf2\xc5\x24\x7e\xf3\x0b\x93\x7c\xdb\x58\xda\x56\x3d\x7e\x7c\x68\x1e\x3a\xed\x31\xc1\x5d\xe8\xa3\xe4\xb6\x55\x4c\xf2\x5f\xe5\xd8\x2b\xf5\x59\x67\x82\x79\xde\x2f\xfa\xa3\x1b\xb1\x14\xf8\xe2\xc2\x91\x30\x43\xc2\xef\x47\xcc\x04\x09\x3b\x87\x27\x34\x42\x72\x7b\xa5\x98\x5c\xbb\x5c\xde\x49\xb1\x57\x92\xac\xba\x8f\x87\xbb\x5b\xc9\xc9\x6e\xff\x5f\x71\x70\x31\xbc\x18\xdc\x78\x4c\x14\xf4\x88\x7f\x98\x2a\x15\x6e\xa8\x4e\x25\x5f\x80\x2b\x75\x9e\x21\xe9\x30\x20\x5c\xb8\x1d\x5c\xe4\x65\x83\x4f\x27\x0c\x04\xe2\x03\xec\x97\xc4\xf7\xf1\x90\xa1\x94\xb5\x0e\x49\x46\xcd\xf3\x0b\xd7\x7e\xcd\x53\x8a\xe4\x64\xd4\xdf\x6a\x4d\xf7\x4f\xf9\xd6\x25\xec\xe8\x7f\x8e\x5a\x53\x15\x79\x8d\x8b\xc1\x94\xc2\x96\x15\xb6\x6e\x78\xbd\x87\xac\xfc\x0c\x00\x00\xff\xff\xd5\xef\x0a\x26\x91\x02\x00\x00")
72 |
73 | func templatesInfoHtmlBytes() ([]byte, error) {
74 | return bindataRead(
75 | _templatesInfoHtml,
76 | "templates/info.html",
77 | )
78 | }
79 |
80 | func templatesInfoHtml() (*asset, error) {
81 | bytes, err := templatesInfoHtmlBytes()
82 | if err != nil {
83 | return nil, err
84 | }
85 |
86 | info := bindataFileInfo{name: "templates/info.html", size: 657, mode: os.FileMode(420), modTime: time.Unix(1566292097, 0)}
87 | a := &asset{bytes: bytes, info: info}
88 | return a, nil
89 | }
90 |
91 | // Asset loads and returns the asset for the given name.
92 | // It returns an error if the asset could not be found or
93 | // could not be loaded.
94 | func Asset(name string) ([]byte, error) {
95 | cannonicalName := strings.Replace(name, "\\", "/", -1)
96 | if f, ok := _bindata[cannonicalName]; ok {
97 | a, err := f()
98 | if err != nil {
99 | return nil, fmt.Errorf("Asset %s can't read by error: %v", name, err)
100 | }
101 | return a.bytes, nil
102 | }
103 | return nil, fmt.Errorf("Asset %s not found", name)
104 | }
105 |
106 | // MustAsset is like Asset but panics when Asset would return an error.
107 | // It simplifies safe initialization of global variables.
108 | func MustAsset(name string) []byte {
109 | a, err := Asset(name)
110 | if err != nil {
111 | panic("asset: Asset(" + name + "): " + err.Error())
112 | }
113 |
114 | return a
115 | }
116 |
117 | // AssetInfo loads and returns the asset info for the given name.
118 | // It returns an error if the asset could not be found or
119 | // could not be loaded.
120 | func AssetInfo(name string) (os.FileInfo, error) {
121 | cannonicalName := strings.Replace(name, "\\", "/", -1)
122 | if f, ok := _bindata[cannonicalName]; ok {
123 | a, err := f()
124 | if err != nil {
125 | return nil, fmt.Errorf("AssetInfo %s can't read by error: %v", name, err)
126 | }
127 | return a.info, nil
128 | }
129 | return nil, fmt.Errorf("AssetInfo %s not found", name)
130 | }
131 |
132 | // AssetNames returns the names of the assets.
133 | func AssetNames() []string {
134 | names := make([]string, 0, len(_bindata))
135 | for name := range _bindata {
136 | names = append(names, name)
137 | }
138 | return names
139 | }
140 |
141 | // _bindata is a table, holding each asset generator, mapped to its name.
142 | var _bindata = map[string]func() (*asset, error){
143 | "templates/info.html": templatesInfoHtml,
144 | }
145 |
146 | // AssetDir returns the file names below a certain
147 | // directory embedded in the file by go-bindata.
148 | // For example if you run go-bindata on data/... and data contains the
149 | // following hierarchy:
150 | // data/
151 | // foo.txt
152 | // img/
153 | // a.png
154 | // b.png
155 | // then AssetDir("data") would return []string{"foo.txt", "img"}
156 | // AssetDir("data/img") would return []string{"a.png", "b.png"}
157 | // AssetDir("foo.txt") and AssetDir("notexist") would return an error
158 | // AssetDir("") will return []string{"data"}.
159 | func AssetDir(name string) ([]string, error) {
160 | node := _bintree
161 | if len(name) != 0 {
162 | cannonicalName := strings.Replace(name, "\\", "/", -1)
163 | pathList := strings.Split(cannonicalName, "/")
164 | for _, p := range pathList {
165 | node = node.Children[p]
166 | if node == nil {
167 | return nil, fmt.Errorf("Asset %s not found", name)
168 | }
169 | }
170 | }
171 | if node.Func != nil {
172 | return nil, fmt.Errorf("Asset %s not found", name)
173 | }
174 | rv := make([]string, 0, len(node.Children))
175 | for childName := range node.Children {
176 | rv = append(rv, childName)
177 | }
178 | return rv, nil
179 | }
180 |
181 | type bintree struct {
182 | Func func() (*asset, error)
183 | Children map[string]*bintree
184 | }
185 | var _bintree = &bintree{nil, map[string]*bintree{
186 | "templates": &bintree{nil, map[string]*bintree{
187 | "info.html": &bintree{templatesInfoHtml, map[string]*bintree{}},
188 | }},
189 | }}
190 |
191 | // RestoreAsset restores an asset under the given directory
192 | func RestoreAsset(dir, name string) error {
193 | data, err := Asset(name)
194 | if err != nil {
195 | return err
196 | }
197 | info, err := AssetInfo(name)
198 | if err != nil {
199 | return err
200 | }
201 | err = os.MkdirAll(_filePath(dir, filepath.Dir(name)), os.FileMode(0755))
202 | if err != nil {
203 | return err
204 | }
205 | err = ioutil.WriteFile(_filePath(dir, name), data, info.Mode())
206 | if err != nil {
207 | return err
208 | }
209 | err = os.Chtimes(_filePath(dir, name), info.ModTime(), info.ModTime())
210 | if err != nil {
211 | return err
212 | }
213 | return nil
214 | }
215 |
216 | // RestoreAssets restores an asset under the given directory recursively
217 | func RestoreAssets(dir, name string) error {
218 | children, err := AssetDir(name)
219 | // File
220 | if err != nil {
221 | return RestoreAsset(dir, name)
222 | }
223 | // Dir
224 | for _, child := range children {
225 | err = RestoreAssets(dir, filepath.Join(name, child))
226 | if err != nil {
227 | return err
228 | }
229 | }
230 | return nil
231 | }
232 |
233 | func _filePath(dir, name string) string {
234 | cannonicalName := strings.Replace(name, "\\", "/", -1)
235 | return filepath.Join(append([]string{dir}, strings.Split(cannonicalName, "/")...)...)
236 | }
237 |
238 |
--------------------------------------------------------------------------------
/templates/info.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
20 | TCS-Scaler Info
21 |
22 | Last {{.InfoTime}} Minutes request recieved:
23 |
24 |
25 | JobID |
26 | Region |
27 | Alertname |
28 | Recieved At |
29 |
30 |
31 | {{ range $i, $scaleEvent := .LastJobs }}
32 | {{$scaleEvent.JobID}} | {{$scaleEvent.Region}} | {{$scaleEvent.Direction}} | {{$scaleEvent.Time}} |
33 | {{end}}
34 |
35 |
36 |
37 |
38 |
--------------------------------------------------------------------------------
/tickers.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "time"
5 |
6 | log "github.com/Sirupsen/logrus"
7 | )
8 |
9 | func fireMapTicker() {
10 | ticker := time.NewTicker(time.Second * time.Duration(fireMapTickerEnvInt))
11 |
12 | go func() {
13 | for _ = range ticker.C {
14 | checkFiringMap()
15 | }
16 | }()
17 |
18 | }
19 |
20 | func scalerTicker() {
21 | ticker := time.NewTicker(time.Second * time.Duration(scalerTickerEnvInt))
22 |
23 | var err error
24 | go func() {
25 | for _ = range ticker.C {
26 | jobMap, err = getJobs()
27 | if err != nil {
28 | log.Error("Error getting jobs from nomad from inside Ticker with err: ", err)
29 | }
30 | checkMeta(jobMap)
31 | prometheusQueries(jobMetaMap)
32 | }
33 | }()
34 |
35 | }
36 |
--------------------------------------------------------------------------------