├── RELEASE_NOTES.md ├── loadenv.sh ├── notifications └── notifications.go ├── LICENSE ├── logger └── logger.go ├── security.md ├── README.md └── main.go /RELEASE_NOTES.md: -------------------------------------------------------------------------------- 1 | ### 0.0.1 - 11.27.2017 2 | * initial 3 | -------------------------------------------------------------------------------- /loadenv.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | echo "Set Nomad Server" 3 | export nomad_server="" 4 | echo "Set Env" 5 | export env="" 6 | echo "Set region" 7 | export region="" 8 | echo "Set pdservicekey" 9 | export consul_server="" 10 | echo "Set consul datacenter" 11 | export consul_datacenter="" 12 | -------------------------------------------------------------------------------- /notifications/notifications.go: -------------------------------------------------------------------------------- 1 | package notifications 2 | 3 | import ( 4 | "log" 5 | 6 | pagerduty "github.com/PagerDuty/go-pagerduty" 7 | ) 8 | 9 | // PDAlert ... 10 | func PDAlert(action string, serviceName string, integrationKey string, message string, tag string) error { 11 | event := pagerduty.Event{ 12 | Type: action, 13 | ServiceKey: integrationKey, 14 | Description: message, 15 | IncidentKey: tag + serviceName, 16 | } 17 | resp, err := pagerduty.CreateEvent(event) 18 | if err != nil { 19 | log.Println(resp) 20 | return err 21 | } 22 | return nil 23 | } 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2018-present, Jet.com, Inc. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License." 14 | -------------------------------------------------------------------------------- /logger/logger.go: -------------------------------------------------------------------------------- 1 | package logger 2 | 3 | import ( 4 | "io" 5 | "log" 6 | ) 7 | 8 | //Log ... 9 | var ( 10 | Trace *log.Logger 11 | Info *log.Logger 12 | Warning *log.Logger 13 | Error *log.Logger 14 | ) 15 | 16 | //Init ... 17 | func Init( 18 | traceHandle io.Writer, 19 | infoHandle io.Writer, 20 | warningHandle io.Writer, 21 | errorHandle io.Writer) { 22 | Trace = log.New(traceHandle, 23 | "TRACE: ", 24 | log.Ldate|log.Ltime|log.Lshortfile) 25 | 26 | Info = log.New(infoHandle, 27 | "INFO: ", 28 | log.Ldate|log.Ltime|log.Lshortfile) 29 | 30 | Warning = log.New(warningHandle, 31 | "WARNING: ", 32 | log.Ldate|log.Ltime|log.Lshortfile) 33 | 34 | Error = log.New(errorHandle, 35 | "ERROR: ", 36 | log.Ldate|log.Ltime|log.Lshortfile) 37 | } 38 | -------------------------------------------------------------------------------- /security.md: -------------------------------------------------------------------------------- 1 | # Security Guidelines for this Project 2 | 3 | ## How the Walmart Security team manages security for this project 4 | Walmart takes security seriously and wants to ensure that we maintain a secure environment for our customers and that we also provide secure solutions for the open source community. To help us achieve these goals, please note the following before using this software: 5 | 6 | - Review the software license to understand Walmart's obligations in terms of warranties and suitability for purpose 7 | - Review our Responsible Disclosure Policy: https://corporate.walmart.com/article/responsible-disclosure-policy 8 | - Report any security concerns or questions using our reporting form at the bottom of our Responsible Disclosure Policy page: https://corporate.walmart.com/article/responsible-disclosure-policy 9 | - We enforce SLAs on our security team and software engineers to remediate security bugs in a timely manner 10 | - Please monitor this repository and update your environment in a timely manner as we release patches and updates 11 | 12 | ## Responsibly Disclosing Security Bugs to Walmart 13 | If you find a security bug in this repository, please work with Walmart's security team following responsible disclosure principles and these guidelines: 14 | 15 | - Do not submit a normal issue or pull request in our public repository, instead report directly on our reporting form found at the bottom of our Responsible Disclosure Policy page: https://corporate.walmart.com/article/responsible-disclosure-policy 16 | - We will review your submission and may follow up for additional details 17 | - If you have a patch, we will review it and approve it privately; once approved for release you can submit it as a pull request publicly in our repos (we give credit where credit is due) 18 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | NOTICE: SUPPORT FOR THIS PROJECT ENDED ON 18 November 2020 2 | 3 | This projected was owned and maintained by Jet.com (Walmart). This project has reached its end of life and Walmart no longer supports this project. 4 | 5 | We will no longer be monitoring the issues for this project or reviewing pull requests. You are free to continue using this project under the license terms or forks of this project at your own risk. This project is no longer subject to Jet.com/Walmart's bug bounty program or other security monitoring. 6 | 7 | 8 | ## Actions you can take 9 | 10 | We recommend you take the following action: 11 | 12 | * Review any configuration files used for build automation and make appropriate updates to remove or replace this project 13 | * Notify other members of your team and/or organization of this change 14 | * Notify your security team to help you evaluate alternative options 15 | 16 | ## Forking and transition of ownership 17 | 18 | For [security reasons](https://www.theregister.co.uk/2018/11/26/npm_repo_bitcoin_stealer/), Walmart does not transfer the ownership of our primary repos on Github or other platforms to other individuals/organizations. Further, we do not transfer ownership of packages for public package management systems. 19 | 20 | If you would like to fork this package and continue development, you should choose a new name for the project and create your own packages, build automation, etc. 21 | 22 | Please review the licensing terms of this project, which continue to be in effect even after decommission. 23 | 24 | ORIGINAL README BELOW 25 | 26 | ---------------------- 27 | 28 | # Nomad Service Alerter 29 | 30 | Nomad Service Alerter is a tool written in Go, whose primary goal is to provide alerting for your services running on Nomad (https://www.nomadproject.io/). It offers configurable opt-in alerting options which you can specify in your Nomad Job manifest (json file) as Environment Variables. The Nomad Service Alerter mainly covers Consul Health-Check Alerts and Service Restart-Loops Alerts. 31 | 32 | ## Alerts 33 | 34 | Nomad Service Alerter supports the following Alerts : 35 | 36 | ### Consul Health-Check Alerts 37 | 38 | This alert will monitor your service and alert on allocations and versions that are failing their defined consul health-checks. You will be able to set the duration threshold for which the service must remain unhealthy before alerting. The alert will include the details of all the allocations of the service which is failing the consul health check. 39 | 40 | ### Service Restart-Loops Alerts 41 | 42 | This alert will monitor jobs (and all of its allocations) and alert on the services which go into an un-ending restart loop. This indicates that there is an error in the service which is not allowing it to enter a successful Running state (the allocations are created but are constantly in pending state). This is a more accurate way to alert of Nomad jobs vs. monitoring Dead state (which may be a valid state if you set count to 0). 43 | 44 | ### Queued Instances Alerts 45 | 46 | You can configure Nomad Service Alerter to opt in into Queued Instances Alerts which will trigger an alert when the service has un-allocated instances for at least 3 minutes. 47 | 48 | ### Orphaned Instances Alerts 49 | 50 | You can configure Nomad Service Alerter to opt in into Orphaned Instances Alerts which will trigger an alert when the service has more number of allocations running than what it has asked for (In this case there is one or multiple rogue allocations running on some machine which do not have any parent nomad process, hence the name). Similar to Queued instances alert, this alert will be triggered when the service remains in described state for at least 3 minutes. 51 | 52 | ## Build and Test 53 | 54 | To run the tool on your local machine, you will have to : 55 | * Install and set up your Go environment. (https://golang.org/doc/install) 56 | * Install glide (https://github.com/Masterminds/glide) 57 | * Clone the repo (git clone https://github.com/jet/nomad-service-alerter) 58 | * cd into the code repo (```cd nomad-service-alerter```) 59 | * Run ```glide init``` 60 | * Run ```glide install``` 61 | * Make sure following environment variables are set with appropriate values. 62 | ``` 63 | 64 | "nomad_server" --> your nomad server address 65 | "env" --> the environment in which the tool would be running 66 | "region" --> the region in which your tool would be running 67 | "consul_server" --> your consul server address 68 | "consul_datacenter" --> datacenter of your consul server 69 | 70 | ``` 71 | You can use the script ```loadenv.sh``` after adding appropriate values to load all the above variables. 72 | * Run ```go build``` 73 | * Execute the binary. (Or you can skip the ```go build``` step and run ```go run main.go``` instead) 74 | 75 | 76 | ### Configuring a nomad service to be alerted on by Nomad Service Alerter upon being unhealthy 77 | 78 | You can configure your service by adding following key-value pairs to the **Meta** section of your Nomad Job. 79 | * consul_service_healthcheck_enabled --> true/false (to enable/disable consul healthcheck alerts) 80 | * consul_service_healthcheck_threshold --> Time duration for which service can remain in unhealthy state before getting alerted (eg. 2m0s) 81 | * pd_service_key --> 32 characters Pagerduty Serrvice integration key (all the alerts will be sent here) 82 | * restart_loop_alerting_enabled --> true/false (to enable/disable restart loop alerts) 83 | * orphaned_instances_alert_enabled --> true/false (to enable/disable orphaned allocations alert) 84 | * queued_instances_alert_enabled --> true/false (to enable/disable queued allocations alert) 85 | 86 | Following is an example of key-value pairs described above that your Job **Meta** section (Job level) should have : 87 | 88 | ``` 89 | consul_service_healthcheck_enabled: true 90 | consul_service_healthcheck_threshold: 3m0s 91 | restart_loop_alerting_enabled: true 92 | orphaned_instances_alert_enabled: true 93 | queued_instances_alert_enabled: true 94 | pd_service_key: 22221234567890123456789000000000 95 | 96 | ``` 97 | 98 | ## Running Nomad Service Alerter on Nomad 99 | 100 | If you want to run Nomad Service Alerter on Nomad, you would need to have the Environment Variables (ones described in 'Build and Test' section) set with appropriate values in your job manifest (json file): 101 | 102 | ``` 103 | 104 | "nomad_server" --> your nomad server address 105 | "env" --> the environment in which the tool would be running 106 | "region" --> the region in which your tool would be running 107 | "consul_server" --> your consul server address 108 | "consul_datacenter" --> datacenter of your consul server 109 | 110 | ``` 111 | Once your Job file is ready, use the standard method of submitting the job to nomad (https://www.nomadproject.io/docs/operating-a-job/submitting-jobs.html). 112 | 113 | ## Alert Integrations 114 | 115 | As of now, Nomad Service Alerter only supports integration with PagerDuty. 116 | 117 | ## Maintainers 118 | 119 | * [@bhope](https://github.com/bhope) (Prathamesh Bhope) 120 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "io" 5 | "io/ioutil" 6 | "log" 7 | "os" 8 | "strconv" 9 | "strings" 10 | "sync" 11 | "time" 12 | 13 | consul "github.com/hashicorp/consul/api" 14 | "github.com/hashicorp/nomad/api" 15 | 16 | "fmt" 17 | "net/http" 18 | 19 | "github.com/PagerDuty/go-pagerduty" 20 | "github.com/jet/nomad-service-alerter/logger" 21 | "github.com/jet/nomad-service-alerter/notifications" 22 | ) 23 | 24 | const ( 25 | nomadMapRefreshInterval = 30 26 | serviceAlertInterval = 60 27 | ) 28 | 29 | func main() { 30 | 31 | host := os.Getenv("nomad_server") 32 | env := os.Getenv("env") 33 | region := os.Getenv("region") 34 | alertSwitch := os.Getenv("alert_switch") 35 | consulHost := os.Getenv("consul_server") 36 | datacenter := os.Getenv("consul_datacenter") 37 | meta := make(map[string]map[string]string) 38 | var lock = sync.RWMutex{} 39 | logger.Init(ioutil.Discard, os.Stdout, os.Stdout, os.Stderr) 40 | go refreshMap(host, &lock, &meta) 41 | go serviceAlerts(host, env, region, &meta, alertSwitch) // This go routine generates alerts for orphaned/queued allocs and restarting services 42 | go consulAlerts(consulHost, host, env, region, datacenter, &meta, &lock) // This go routine generates alerts for consul service health checkpoints 43 | http.HandleFunc("/health", health) // health check 44 | http.ListenAndServe(":8000", nil) 45 | } 46 | 47 | func health(w http.ResponseWriter, r *http.Request) { 48 | io.WriteString(w, "OK") 49 | } 50 | 51 | func refreshMap(host string, lock *sync.RWMutex, meta *map[string]map[string]string) { 52 | client, cerr := api.NewClient(&api.Config{Address: host, TLSConfig: &api.TLSConfig{}}) 53 | if cerr != nil { 54 | logger.Error.Printf("Unable to create client(%v): %v", host, cerr) 55 | } 56 | optsNomad := &api.QueryOptions{AllowStale: true} 57 | for { 58 | jobList, _, err := client.Jobs().List(optsNomad) 59 | if err != nil { 60 | logger.Error.Printf("Cannot get job List from Nomad : %v \n", err.Error()) 61 | } 62 | pm := *meta 63 | for _, job := range jobList { 64 | value, _, err := client.Jobs().Info(job.ID, optsNomad) 65 | if value.IsPeriodic() == true || *value.Type == "system" || *value.Type == "batch" { 66 | continue 67 | } 68 | if err != nil { 69 | logger.Error.Printf("Cannot get job info from Nomad : %v \n", err.Error()) 70 | } 71 | if len(value.TaskGroups) > 0 { 72 | if len(value.TaskGroups[0].Tasks) > 0 { 73 | if len(value.TaskGroups[0].Tasks[0].Services) > 0 { 74 | pm[value.TaskGroups[0].Tasks[0].Services[0].Name] = value.Meta 75 | } 76 | } 77 | } 78 | } 79 | lock.Lock() 80 | meta = &pm 81 | lock.Unlock() 82 | time.Sleep(time.Second * time.Duration(nomadMapRefreshInterval)) 83 | } 84 | } 85 | 86 | func consulAlerts(consulHost string, host string, env string, region string, datacenter string, meta *map[string]map[string]string, lock *sync.RWMutex) { 87 | config := consul.DefaultConfig() 88 | config.Address = consulHost 89 | config.Datacenter = datacenter 90 | config.Token = "" 91 | consulClient, _ := consul.NewClient(config) 92 | alerts := make(map[string]time.Time) // This map will hold the details of the servcies in critical state. Key--> service name value-->first time it was reported as critical 93 | alertTriggered := make(map[string]string) 94 | var lastIndexConsul uint64 95 | for { 96 | //go through the alert map and see which jobs have reached threshold and alert based on them 97 | optsConsul := &consul.QueryOptions{AllowStale: true, WaitIndex: lastIndexConsul, WaitTime: (60 * time.Second)} 98 | healthChecks, qmConsul, err := consulClient.Health().State("critical", optsConsul) 99 | if err != nil { 100 | logger.Error.Println("Error creating Consul client : ", err.Error()) 101 | } 102 | lastIndexConsul = qmConsul.LastIndex 103 | criticalServices := make(map[string]bool) // This map helps us remove the services which have moved from critical to passing state 104 | for _, check := range healthChecks { 105 | criticalServices[check.ServiceName] = true 106 | if _, ok := alerts[check.ServiceName]; ok { 107 | continue 108 | } else { 109 | alerts[check.ServiceName] = time.Now() 110 | } 111 | } 112 | // Iterate through each member of the alerts map to check which ones need to be alerted 113 | lock.RLock() // Acquiring the read lock 114 | metaNew := *meta // This is the local version of map we will be using in this loop 115 | lock.RUnlock() // Releasing the read lock 116 | for k, v := range alerts { 117 | logger.Info.Printf("[Consul-Check %v-%v] : Job %v is in CRITICAL state\n", os.Getenv("env"), os.Getenv("region"), k) 118 | if _, ok1 := metaNew[k]; !ok1 { 119 | logger.Info.Printf("Service not registered on Nomad. Removed from alert list : %v \n", k) 120 | delete(criticalServices, k) 121 | } 122 | if _, ok := criticalServices[k]; ok { //This is to check if the service is still in critical state 123 | t1 := time.Now() 124 | metaKey := metaNew[k] 125 | consulCheck := "" 126 | consulThreshold := "" 127 | integrationKey := "" 128 | if _, ok := metaKey["consul_service_healthcheck_enabled"]; ok { 129 | consulCheck = metaKey["consul_service_healthcheck_enabled"] 130 | } 131 | if _, ok := metaKey["consul_service_healthcheck_threshold"]; ok { 132 | consulThreshold = metaKey["consul_service_healthcheck_threshold"] 133 | } 134 | if _, ok := metaKey["pd_service_key"]; ok { 135 | integrationKey = metaKey["pd_service_key"] 136 | } 137 | if consulCheck == "true" { 138 | threshold, _ := time.ParseDuration(consulThreshold) 139 | if t1.Sub(v).Seconds() >= threshold.Seconds() { 140 | opt := &consul.QueryOptions{AllowStale: true} 141 | hc, _, _ := consulClient.Health().Checks(k, opt) 142 | var criticalList []string // This will store list of Unhealthy allocations corresponding to critical service 143 | for _, service := range hc { 144 | if service.Status == "passing" { 145 | continue 146 | } 147 | s1 := service.ServiceID 148 | s1 = s1[16:24] // This will catch the allocation ID which is critical 149 | criticalList = append(criticalList, s1) 150 | } 151 | // criticalList := range hc 152 | message := fmt.Sprintf("[Consul Healthcheck %v %v] Job : %v is in CRITICAL state. Allocations in Critical state : %v", os.Getenv("env"), os.Getenv("region"), k, criticalList) 153 | fmt.Printf("%v \n", message) 154 | err := notifications.PDAlert("trigger", k, integrationKey, message, "consul") 155 | if err != nil { 156 | logger.Error.Println("Error in PD : ", err.Error()) 157 | } 158 | alertTriggered[k] = "triggered" 159 | } 160 | } 161 | } else { 162 | if alertTriggered[k] == "triggered" { // This means alert has been triggered. Resolve the alert 163 | metaKey := metaNew[k] 164 | integrationKey := "" 165 | if _, ok := metaKey["pd_service_key"]; ok { 166 | integrationKey = metaKey["pd_service_key"] 167 | } 168 | err := notifications.PDAlert("resolve", k, integrationKey, "resolved", "consul") 169 | if err != nil { 170 | logger.Error.Println("Error in PD : ", err.Error()) 171 | } 172 | logger.Info.Printf("Alert is resolved for service : %v \n", k) 173 | } 174 | delete(criticalServices, k) // Remove the services which have moved away from CRITICAL state 175 | delete(alerts, k) 176 | delete(alertTriggered, k) 177 | } 178 | } 179 | } 180 | } 181 | 182 | func serviceAlerts(host string, env string, region string, meta *map[string]map[string]string, alertSwitch string) { 183 | 184 | client, cerr := api.NewClient(&api.Config{Address: host, TLSConfig: &api.TLSConfig{}}) 185 | if cerr != nil { 186 | logger.Error.Printf("Unable to create client(%v): %v", host, cerr) 187 | } 188 | teamAlert := make(map[string]int) 189 | count := 0 190 | for { 191 | count++ 192 | nodes := client.Nodes() 193 | jobs := client.Jobs() 194 | opts := &api.QueryOptions{AllowStale: true} 195 | resp, _, err := nodes.List(opts) 196 | serviceAlert := make(map[string][]string) 197 | if err != nil { 198 | logger.Error.Printf("Failed to grab node list: %v \n", err.Error()) 199 | } 200 | for _, n := range resp { 201 | 202 | alerts(n, nodes, opts, serviceAlert, teamAlert) 203 | } 204 | for k, v := range serviceAlert { 205 | job, _, err := jobs.Info(k, opts) 206 | if err != nil { 207 | logger.Error.Println("error grabbing inofrmation of job : ", k) 208 | continue 209 | } 210 | if *job.Type == "system" { 211 | continue 212 | } 213 | integrationKey := "" 214 | if _, ok := job.Meta["pd_service_key"]; ok { 215 | integrationKey = job.Meta["pd_service_key"] 216 | } 217 | allocCount := 0 218 | taskGroupLen := len(job.TaskGroups) 219 | if taskGroupLen > 0 { 220 | for it := 0; it < taskGroupLen; it++ { 221 | allocCount = allocCount + *job.TaskGroups[it].Count 222 | } 223 | if allocCount != len(v) { 224 | if allocCount < len(v) { 225 | orphanCount := len(v) - allocCount 226 | logger.Info.Printf("[%v] Job=\"%v\" Error=\"orphaned allocations\" Orphaned Allocations Count=\"%v\"\n", time.Now(), k, orphanCount) 227 | message := " Job : " + k + " has " + strconv.Itoa(orphanCount) + " orphaned allocations " 228 | if _, ok := job.Meta["orphaned_instances_alert_enabled"]; ok { 229 | err := notifications.PDAlert("trigger", k, integrationKey, message, "service") 230 | if err != nil { 231 | logger.Error.Println("Error in PD : ", err.Error()) 232 | } 233 | } 234 | } else { 235 | queuedCount := allocCount - len(v) 236 | logger.Info.Printf("[%v] Job=\"%v\" Error=\"queued instances\" Queued Instances Count=\"%v\" \n", time.Now(), k, queuedCount) 237 | message := " Job : " + k + " has " + strconv.Itoa(queuedCount) + " queued instances " 238 | if _, ok := job.Meta["queued_instances_alert_enabled"]; ok { 239 | err := notifications.PDAlert("trigger", k, integrationKey, message, "service") 240 | if err != nil { 241 | logger.Error.Println("Error in PD : ", err.Error()) 242 | } 243 | } 244 | } 245 | } 246 | } 247 | } 248 | 249 | jobalertmap := make(map[string][]string) 250 | for k1, v1 := range teamAlert { 251 | result := strings.Split(k1, ",") 252 | logger.Info.Printf("[%v] Job=\"%v\" Error=\"pending allocations\" AllocationId=\"%v\" \n", time.Now(), result[1], result[0]) 253 | if v1 == 3 { 254 | jobalertmap[result[1]] = append(jobalertmap[result[1]], " "+result[0]) 255 | } 256 | } 257 | for k2, v2 := range jobalertmap { 258 | logger.Info.Printf("[%v] Job=\"%v\" Error=\"Service in Restart Loop\" Allocations=\"%v\" \n", time.Now(), k2, v2) 259 | restartmessage := "[Restart-Loop " + env + " " + region + "] Job = " + k2 + " has following allocations in restart loop : " + strings.Join(v2, " ") 260 | job, _, err := jobs.Info(k2, opts) 261 | if err != nil { 262 | continue 263 | } 264 | if job.Meta["restart_loop_alerting_enabled"] == "true" { 265 | pdKey := job.Meta["pd_service_key"] 266 | event1 := pagerduty.Event{ 267 | Type: "trigger", 268 | ServiceKey: pdKey, 269 | Description: restartmessage, 270 | IncidentKey: *job.ID, 271 | } 272 | resp1, err := pagerduty.CreateEvent(event1) 273 | if err != nil { 274 | log.Println(resp1) 275 | log.Fatalln("ERROR in PD:", err) 276 | } 277 | } 278 | } 279 | if count == 3 { 280 | count = 0 281 | teamAlert = make(map[string]int) 282 | } 283 | time.Sleep(time.Second * time.Duration(serviceAlertInterval)) 284 | } 285 | } 286 | 287 | func alerts(n *api.NodeListStub, nodes *api.Nodes, opts *api.QueryOptions, thisGroup map[string][]string, thisAlert map[string]int) { 288 | nodeAlloc, _, err := nodes.Allocations(n.ID, opts) 289 | if err != nil { 290 | logger.Error.Printf("error grabbing allocation info : %v", err.Error()) 291 | } 292 | 293 | for _, i := range nodeAlloc { 294 | status := i.ClientStatus 295 | t := float64(300) 296 | if status == "running" || status == "pending" && *i.Job.Type != "system" && !strings.Contains(i.JobID, "periodic") { 297 | z := epochToHumanReadable(int64(i.CreateTime / 1000000000)) 298 | if time.Since(z).Seconds() > t { 299 | 300 | //if(ok == false) 301 | if len(thisGroup[i.JobID]) > 0 { 302 | thisGroup[i.JobID] = append(thisGroup[i.JobID], "| "+i.ID) 303 | } else { 304 | thisGroup[i.JobID] = append(thisGroup[i.JobID], " "+i.ID) 305 | } 306 | } 307 | } 308 | if status == "pending" && !strings.Contains(i.JobID, "periodic") { 309 | z := epochToHumanReadable(int64(i.CreateTime / 1000000000)) 310 | if time.Since(z).Seconds() > t { 311 | 312 | thisAlert[i.ID+","+i.JobID]++ 313 | } 314 | } 315 | } 316 | } 317 | 318 | func epochToHumanReadable(epoch int64) time.Time { 319 | return time.Unix(epoch, 0) 320 | } 321 | --------------------------------------------------------------------------------