├── .gitignore ├── .travis.yml ├── LICENSE ├── LogPulse ├── PulseConfig.toml ├── api │ └── api.go ├── config │ ├── MailGun.toml │ ├── PulseConfig.toml │ ├── config.go │ ├── config_test.go │ └── s.toml ├── email │ └── email.go ├── file │ ├── TestData │ │ ├── ReadTest.txt │ │ └── smallkern.log.2.gz │ ├── file.go │ └── file_test.go ├── kern.log.2.gz └── main.go ├── README.md ├── images └── pulse_logo.png └── pulse ├── doc.go └── pulse.go /.gitignore: -------------------------------------------------------------------------------- 1 | *MailGun.toml 2 | !LogPulse/config/MailGun.toml 3 | *SMTP.toml 4 | demoData/ 5 | LogPulse/Logpulse 6 | *.csv 7 | *.json 8 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: go 2 | 3 | go: 4 | - 1.5.1 5 | - tip 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /LogPulse/PulseConfig.toml: -------------------------------------------------------------------------------- 1 | LogList = [ 2 | "demoData/kern.log.1", 3 | "demoData/kern.log.2" 4 | ] 5 | 6 | EmailList = [ 7 | "someuser@example.org", 8 | "AnneConley@example.org", 9 | "WeAreAwesome@example.org", 10 | "espinozamiguel349@gmail.com" 11 | ] 12 | 13 | OutputFile = "PulseOut.txt" 14 | SMTPConfig = "SMTP.toml" 15 | 16 | Port = 8080 17 | -------------------------------------------------------------------------------- /LogPulse/api/api.go: -------------------------------------------------------------------------------- 1 | // Package api is to start the api to be listening on different endpoints. 2 | // The API will listen on the port specified in the PulseConfig.toml. 3 | // There are 2 endpoints: 4 | // POST /log/file this will read the file line by line passing in each line to the algorithm 5 | // POST /log/message (in development) this will take a string and pass it directly to the algorithm 6 | package api 7 | 8 | import ( 9 | "encoding/json" 10 | "fmt" 11 | "io" 12 | "log" 13 | "net/http" 14 | "os" 15 | "path/filepath" 16 | "time" 17 | 18 | "github.com/davecgh/go-spew/spew" 19 | "github.com/gophergala2016/Pulse/LogPulse/config" 20 | "github.com/gophergala2016/Pulse/LogPulse/email" 21 | "github.com/gophergala2016/Pulse/LogPulse/file" 22 | "github.com/gophergala2016/Pulse/pulse" 23 | ) 24 | 25 | // Result is used for ResponseWriter in handlers 26 | type Result struct { 27 | Status int `json:"status"` 28 | Message string `json:"message"` 29 | } 30 | 31 | var buffStrings []string 32 | var port int 33 | 34 | func init() { 35 | defer func() { 36 | if r := recover(); r != nil { 37 | fmt.Println(r) 38 | os.Exit(0) 39 | } 40 | }() 41 | val, err := config.Load() 42 | if err != nil { 43 | panic(fmt.Errorf("API: %s", err)) 44 | } 45 | port = val.Port 46 | } 47 | 48 | // Start will run the REST API. 49 | func Start() { 50 | http.HandleFunc("/", HelloWorld) 51 | http.HandleFunc("/log/message", StreamLog) 52 | http.HandleFunc("/log/file", SendFile) 53 | 54 | fmt.Printf("Listening on localhost:%d\n", port) 55 | http.ListenAndServe(fmt.Sprintf(":%d", port), nil) 56 | 57 | } 58 | 59 | // HelloWorld ... testdummy handler for ec2 instance 60 | func HelloWorld(w http.ResponseWriter, r *http.Request) { 61 | w.Header().Set("Content-Type", "application/json") 62 | result, _ := json.Marshal(Result{200, "hello world"}) 63 | io.WriteString(w, string(result)) 64 | } 65 | 66 | // StreamLog listens for post request for a string value. 67 | // This string is then passed to the algorithm for analyzing. 68 | func StreamLog(w http.ResponseWriter, r *http.Request) { 69 | 70 | // Checking to see if the request was a post. 71 | // If not return a 400: bad request 72 | if r.Method != "POST" { 73 | w.Header().Set("Content-Type", "application/json") 74 | result, _ := json.Marshal(Result{400, "bad request"}) 75 | io.WriteString(w, string(result)) 76 | return 77 | } 78 | 79 | // Decoding the body of the response. 80 | // If we could not parse it as json then respond with a 400: bad request 81 | decoder := json.NewDecoder(r.Body) 82 | var body struct { 83 | Message string `json:"message"` 84 | } 85 | err := decoder.Decode(&body) 86 | if err != nil { 87 | w.Header().Set("Content-Type", "application/json") 88 | result, _ := json.Marshal(Result{400, "bad request"}) 89 | io.WriteString(w, string(result)) 90 | return 91 | } 92 | 93 | //TODO: post the string to the algorthm for analyzing 94 | 95 | // If we were able to decode and send string to algorithm return a 200: success 96 | w.Header().Set("Content-Type", "application/json") 97 | result, _ := json.Marshal(Result{200, "success"}) 98 | io.WriteString(w, string(result)) 99 | 100 | } 101 | 102 | // SendFile listens for a POST that has a form field named file and email in the body. 103 | // Using the file field we will download the specified file to the server. 104 | // The email field is used to email the user the results once algorithm is done. 105 | func SendFile(w http.ResponseWriter, r *http.Request) { 106 | 107 | // Checking to see if the request was a post. 108 | // If not return a 400: bad request 109 | if r.Method != "POST" { 110 | w.Header().Set("Content-Type", "application/json") 111 | result, _ := json.Marshal(Result{400, "bad request"}) 112 | io.WriteString(w, string(result)) 113 | return 114 | } 115 | compressed := false 116 | // Get the file field from the form in the response. 117 | // If we cannot parse it a 400 bad request is returned 118 | f, header, err := r.FormFile("file") 119 | fmt.Println("Form File") 120 | if err != nil { 121 | w.Header().Set("Content-Type", "application/json") 122 | result, _ := json.Marshal(Result{400, "bad request"}) 123 | io.WriteString(w, string(result)) 124 | return 125 | } 126 | 127 | defer f.Close() 128 | 129 | var body struct { 130 | Email string `json:"email"` 131 | } 132 | // Parse the Form in the response. 133 | // Check if the email field is a valid email. 134 | // If not return an 400: bad request 135 | r.ParseForm() 136 | body.Email = r.Form["email"][0] 137 | if !email.IsValid(body.Email) { 138 | w.Header().Set("Content-Type", "application/json") 139 | result, _ := json.Marshal(Result{400, "email not valid"}) 140 | io.WriteString(w, string(result)) 141 | return 142 | } 143 | if err != nil { 144 | w.Header().Set("Content-Type", "application/json") 145 | result, _ := json.Marshal(Result{400, "bad request"}) 146 | io.WriteString(w, string(result)) 147 | return 148 | } 149 | fmt.Println("Received bodys", body.Email) 150 | extension := filepath.Ext(header.Filename) 151 | filename := header.Filename[0 : len(header.Filename)-len(extension)] 152 | 153 | stdIn := make(chan string) 154 | email.ByPassMail = true // Needs to bypass emails and store in JSON 155 | email.OutputFile = fmt.Sprintf("%s-%s.json", filename, body.Email) 156 | email.EmailList = []string{body.Email} 157 | 158 | if _, err := os.Stat(email.OutputFile); err == nil { 159 | w.Header().Set("Content-Type", "application/json") 160 | result, _ := json.Marshal(Result{406, "file is being processed"}) 161 | io.WriteString(w, string(result)) 162 | return 163 | } 164 | spew.Dump(email.OutputFile) 165 | fmt.Println("File does not exist") 166 | 167 | if extension == ".gz" { 168 | // Load compressed file on disk 169 | out, err := os.Create(fmt.Sprintf("%s.gz", filename)) 170 | if err != nil { 171 | w.Header().Set("Content-Type", "application/json") 172 | result, _ := json.Marshal(Result{400, "bad request"}) 173 | io.WriteString(w, string(result)) 174 | return 175 | } 176 | 177 | defer out.Close() 178 | 179 | // Write the content from POST to the file 180 | _, err = io.Copy(out, f) 181 | if err != nil { 182 | w.Header().Set("Content-Type", "application/json") 183 | result, _ := json.Marshal(Result{400, "gzip copy failed"}) 184 | io.WriteString(w, string(result)) 185 | return 186 | } 187 | 188 | // Uncompress file 189 | err = file.UnGZip(fmt.Sprintf("%s.gz", filename)) 190 | if err != nil { 191 | log.Printf("api.UnGZip: %s\n", err) 192 | w.Header().Set("Content-Type", "application/json") 193 | result, _ := json.Marshal(Result{400, "gzip uncompressed failed"}) 194 | io.WriteString(w, string(result)) 195 | return 196 | } 197 | 198 | compressed = true 199 | } 200 | 201 | // Run on separat go routine so that we can give users a response on page first. 202 | go func() { 203 | // Clean up 204 | defer func() { 205 | fmt.Println("Deleting files") 206 | err = os.Remove(email.OutputFile) 207 | if err != nil { 208 | fmt.Println("Failed to delete output file, please delete") 209 | } 210 | 211 | if _, err := os.Stat(email.OutputFile); err == nil { 212 | err = os.Remove(email.OutputFile) 213 | if err != nil { 214 | fmt.Println("Failed to delete output file, please delete") 215 | } 216 | } 217 | 218 | if compressed { 219 | err := os.Remove(filename) 220 | if err != nil { 221 | fmt.Println("Failed to delete uncompressed file, please delete") 222 | } 223 | 224 | err = os.Remove(fmt.Sprintf("%s.gz", filename)) 225 | if err != nil { 226 | fmt.Println("Failed to delete uncompressed file, please delete") 227 | } 228 | } 229 | }() 230 | 231 | start := time.Now() 232 | // Start the pulse algorithm 233 | pulse.Run(stdIn, email.SaveToCache) 234 | line := make(chan string) 235 | 236 | if compressed { 237 | file.Read(filename, line) 238 | } else { 239 | file.StreamRead(f, line) 240 | } 241 | 242 | for l := range line { 243 | if l == "EOF" { 244 | email.ByPassMail = false 245 | // Once EOF, time to send email from cache JSON storage 246 | email.SendFromCache(email.OutputFile) 247 | close(stdIn) 248 | break 249 | } 250 | stdIn <- l 251 | } 252 | 253 | elapsed := time.Since(start) 254 | log.Printf("Pulse Algorithm took %s", elapsed) 255 | }() 256 | 257 | // Return a 200 success even if algorithm is still going. 258 | w.Header().Set("Content-Type", "application/json") 259 | result, _ := json.Marshal(Result{200, "success"}) 260 | io.WriteString(w, string(result)) 261 | } 262 | -------------------------------------------------------------------------------- /LogPulse/config/MailGun.toml: -------------------------------------------------------------------------------- 1 | Sender = "postmaster@clemsonopoly.com" 2 | Domain = "clemsonopoly.com" 3 | PrivateKey = "SECRET" 4 | PublicKey = "PUBLIC" 5 | -------------------------------------------------------------------------------- /LogPulse/config/PulseConfig.toml: -------------------------------------------------------------------------------- 1 | LogList = [ 2 | "demoData/kern.log.1", 3 | "demoData/kern.log.2" 4 | ] 5 | 6 | EmailList = [ 7 | "someuser@example.org", 8 | "AnneConley@example.org", 9 | "WeAreAwesome@example.org", 10 | "espinozamiguel349@gmail.com" 11 | ] 12 | 13 | OutputFile = "PulseOut.txt" 14 | SMTPConfig = "s.toml" 15 | 16 | Port = 8080 17 | -------------------------------------------------------------------------------- /LogPulse/config/config.go: -------------------------------------------------------------------------------- 1 | // Package config reads config files and returns the proper config structure. 2 | package config 3 | 4 | import ( 5 | "fmt" 6 | "path/filepath" 7 | 8 | "github.com/BurntSushi/toml" 9 | "github.com/mitchellh/go-homedir" 10 | ) 11 | 12 | // Configuration is the main configurations for the application. 13 | type Configuration struct { 14 | // LogList is a list of log file locations that should be read if no arguments are uncompressed. 15 | LogList []string `toml:"LogList"` 16 | 17 | // EmailList is the list of recipients that will get an email when algorithm is done. 18 | EmailList []string `toml:"EmailList"` 19 | 20 | // OutputFile is the location of a file to output the emails if an SMTP server is not present. 21 | OutputFile string `toml:"OutputFile"` 22 | 23 | // SMTPConfig is the locationn of the SMTP config file with credentials in it. 24 | SMTPConfig string `toml:"SMTPConfig"` 25 | 26 | // Port is the port at which the API is to listen on. 27 | Port int `toml:"Port"` 28 | } 29 | 30 | // SMTPConfig is the configurations for a personal SMTP server a user would like to use. 31 | type SMTPConfig struct { 32 | // Server has the information about the where the SMTP server is hosted and what port it is listening on. 33 | Server Server 34 | 35 | // User is the person who is going to be the person who is sending the emails. 36 | User User 37 | } 38 | 39 | // Server is the SMTP Server credentials. 40 | type Server struct { 41 | // Host is where the SMTP server is hosted. 42 | Host string `toml:"Host"` 43 | 44 | // Port is the Prot on which the SMTP server is listening on. 45 | Port int `toml:"Port"` 46 | } 47 | 48 | // User has the credentials for the person who is sending the email. 49 | type User struct { 50 | // UserName is the username of the person sending the email. 51 | UserName string `toml:"UserName"` 52 | 53 | // PassWord is the password of the user. 54 | PassWord string `toml:"PassWord"` 55 | } 56 | 57 | // SecretConfig is the configurations to hold the keys for MailGun. 58 | type SecretConfig struct { 59 | // Sender is the user who is sending the email. 60 | Sender string `toml:"Sender"` 61 | 62 | // Domain is the domain name of which we want to use. 63 | Domain string `toml:"Domain"` 64 | 65 | // PrivateKey is the private key to access MailGun's API. 66 | PrivateKey string `toml:"PrivateKey"` 67 | 68 | // PublicKey is the public key to access MailGun's API. 69 | PublicKey string `toml:"PublicKey"` 70 | } 71 | 72 | var ( 73 | mailGunConfig = "MailGun.toml" 74 | pulseConfig = "PulseConfig.toml" 75 | smtpConfig string 76 | ) 77 | 78 | //Load returns the main configuration file. 79 | func Load() (*Configuration, error) { 80 | cfg := &Configuration{} 81 | // Search in the same directory as the binary first. 82 | if _, err := toml.DecodeFile(pulseConfig, cfg); err != nil { 83 | // If we couldn't find it ther keep looking. 84 | 85 | // Find the home directory for user. 86 | home, err := homedir.Dir() 87 | if err != nil { 88 | return nil, fmt.Errorf("config.Load: Could not find %s in the executable directory and could not find home directory", pulseConfig) 89 | } 90 | // Look in the home directory of the user for the main config. 91 | if _, err := toml.DecodeFile(filepath.Join(home, pulseConfig), cfg); err != nil { 92 | return nil, fmt.Errorf("config.Load: Could not find %s in the %s or executable directory", pulseConfig, home) 93 | } 94 | } 95 | return cfg, nil 96 | } 97 | 98 | //LoadSMTP loads the settings for the smtp server. 99 | func LoadSMTP() (*SMTPConfig, error) { 100 | //SMTP file location is in the main config. 101 | 102 | // Try and load it. If we can't return an error 103 | maincfg, err := Load() 104 | if err != nil { 105 | return nil, fmt.Errorf("config.LoadSMTP: %s", err) 106 | } 107 | 108 | // Load the SMTP config and return if we can. 109 | cfg := &SMTPConfig{} 110 | if _, err := toml.DecodeFile(maincfg.SMTPConfig, cfg); err != nil { 111 | return nil, fmt.Errorf("config.LoadSMTP: %s", err) 112 | } 113 | return cfg, nil 114 | } 115 | 116 | //LoadSecret loads the keys for Mailgun. 117 | func LoadSecret() (*SecretConfig, error) { 118 | //Only search in directory of binary since we are the only ones with access to our MailGun client. 119 | cfg := &SecretConfig{} 120 | if _, err := toml.DecodeFile(mailGunConfig, cfg); err != nil { 121 | return nil, fmt.Errorf("config.LoadSecret: %s", err) 122 | } 123 | return cfg, nil 124 | } 125 | -------------------------------------------------------------------------------- /LogPulse/config/config_test.go: -------------------------------------------------------------------------------- 1 | package config_test 2 | 3 | import ( 4 | "testing" 5 | 6 | . "github.com/gophergala2016/Pulse/LogPulse/config" 7 | ) 8 | 9 | func TestLoad(t *testing.T) { 10 | expectedCfg := Configuration{} 11 | expectedCfg.LogList = []string{"demoData/kern.log.1", "demoData/kern.log.2"} 12 | expectedCfg.EmailList = []string{ 13 | "someuser@example.org", 14 | "AnneConley@example.org", 15 | "WeAreAwesome@example.org", 16 | "espinozamiguel349@gmail.com", 17 | } 18 | expectedCfg.OutputFile = "PulseOut.txt" 19 | expectedCfg.SMTPConfig = "s.toml" 20 | expectedCfg.Port = 8080 21 | cfg, err := Load() 22 | if err != nil { 23 | t.Errorf("Could not load config. %s", err) 24 | } 25 | 26 | if len(expectedCfg.LogList) != len(cfg.LogList) { 27 | t.Errorf("Loglist lengths are wrong") 28 | } 29 | for i, l := range expectedCfg.LogList { 30 | if cfg.LogList[i] != l { 31 | t.Errorf("Loglist does not match") 32 | } 33 | } 34 | 35 | if len(expectedCfg.EmailList) != len(cfg.EmailList) { 36 | t.Errorf("Emaillist lengths are wrong") 37 | } 38 | for i, l := range expectedCfg.EmailList { 39 | if cfg.EmailList[i] != l { 40 | t.Errorf("Emaillist does not match") 41 | } 42 | } 43 | 44 | if expectedCfg.OutputFile != cfg.OutputFile { 45 | t.Errorf("Output file does not match") 46 | } 47 | if expectedCfg.SMTPConfig != cfg.SMTPConfig { 48 | t.Errorf("SMTP File does not match") 49 | } 50 | if expectedCfg.Port != cfg.Port { 51 | t.Errorf("Prot numbers does not match") 52 | } 53 | } 54 | 55 | func TestLoadSMTP(t *testing.T) { 56 | expectedCfg := SMTPConfig{} 57 | expectedCfg.Server.Host = "smtp.mailgun.org" 58 | expectedCfg.Server.Port = 25 59 | expectedCfg.User.PassWord = "Password" 60 | expectedCfg.User.UserName = "postmaster@clemsonopoly.com" 61 | cfg, err := LoadSMTP() 62 | if err != nil { 63 | t.Errorf("Could not load SMTP file") 64 | } 65 | if expectedCfg.Server.Host != cfg.Server.Host { 66 | t.Errorf("Host does not match") 67 | } 68 | if expectedCfg.Server.Port != cfg.Server.Port { 69 | t.Errorf("Prot numbers does not match") 70 | } 71 | if expectedCfg.User.UserName != cfg.User.UserName { 72 | t.Errorf("Username does not match") 73 | } 74 | if expectedCfg.User.PassWord != cfg.User.PassWord { 75 | t.Errorf("Password does not match") 76 | } 77 | } 78 | 79 | func TestLoadSecret(t *testing.T) { 80 | expectedCfg := SecretConfig{} 81 | expectedCfg.Domain = "clemsonopoly.com" 82 | expectedCfg.PrivateKey = "SECRET" 83 | expectedCfg.PublicKey = "PUBLIC" 84 | expectedCfg.Sender = "postmaster@clemsonopoly.com" 85 | cfg, err := LoadSecret() 86 | if err != nil { 87 | t.Errorf("Could not load secret file") 88 | } 89 | if expectedCfg.Domain != cfg.Domain { 90 | t.Errorf("Domain does not match") 91 | } 92 | if expectedCfg.PrivateKey != cfg.PrivateKey { 93 | t.Errorf("Privatekey does not match") 94 | } 95 | if expectedCfg.PublicKey != cfg.PublicKey { 96 | t.Errorf("Publickey does not match") 97 | } 98 | if expectedCfg.Sender != cfg.Sender { 99 | t.Errorf("Sender does not match") 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /LogPulse/config/s.toml: -------------------------------------------------------------------------------- 1 | [Server] 2 | Host = "smtp.mailgun.org" 3 | Port = 25 4 | 5 | [User] 6 | UserName = "postmaster@clemsonopoly.com" 7 | PassWord = "Password" 8 | -------------------------------------------------------------------------------- /LogPulse/email/email.go: -------------------------------------------------------------------------------- 1 | // Package email will try and send an email using MailGun. 2 | // If we don't have the config for MailGun use the SMTP confg. 3 | // If we don't have that either save to the output file specified in config 4 | package email 5 | 6 | import ( 7 | "encoding/json" 8 | "fmt" 9 | "net/smtp" 10 | "os" 11 | "strconv" 12 | 13 | "github.com/gophergala2016/Pulse/LogPulse/config" 14 | "github.com/gophergala2016/Pulse/LogPulse/file" 15 | "github.com/mailgun/mailgun-go" 16 | ) 17 | 18 | const ( 19 | mailGunSend = iota 20 | smtpSend = iota 21 | jsonSend = iota 22 | ) 23 | 24 | // JSONAlert holds the message and body to send through email. 25 | type JSONAlert struct { 26 | Message string `json:"message"` 27 | Body string `json:"body"` 28 | } 29 | 30 | var ( 31 | emailOption = -1 32 | 33 | // ByPassMail is Whether or not we are using the email system. 34 | ByPassMail = false 35 | mGun *config.SecretConfig 36 | smtpConfig *config.SMTPConfig 37 | 38 | // EmailList is a list of emails to send messages to. 39 | EmailList []string 40 | 41 | // OutputFile is the output file specified in the main config 42 | OutputFile string 43 | stringBuffer []string 44 | ) 45 | 46 | // initialize email service used for notifications 47 | // 1. MailGun 48 | // 2. SMTP package 49 | // 3. Send to JSON 50 | func init() { 51 | defer func() { 52 | if r := recover(); r != nil { 53 | fmt.Println(r) 54 | os.Exit(0) 55 | } 56 | }() 57 | 58 | if ByPassMail { 59 | emailOption = jsonSend 60 | return 61 | } 62 | val, err := config.Load() 63 | if err != nil { 64 | panic(fmt.Errorf("email.init: Failed to load Main config file")) 65 | } 66 | 67 | // Get values from the main config. 68 | EmailList = val.EmailList 69 | OutputFile = val.OutputFile 70 | 71 | mGun, err = config.LoadSecret() 72 | if err != nil { 73 | // Check smtp server 74 | smtpConfig, err = config.LoadSMTP() 75 | if err != nil { 76 | // Use JSON 77 | emailOption = jsonSend 78 | return 79 | } 80 | // Use SMTP 81 | emailOption = smtpSend 82 | return 83 | } 84 | // Use MailGun 85 | emailOption = mailGunSend 86 | } 87 | 88 | // SendFromCache sends email via MailGun, smtp server, or simply a JSON file but loads body from cache file. 89 | // Filename is the location of the cache file 90 | func SendFromCache(filename string) { 91 | fmt.Println("email.SendFromCache: Sending from Cache") 92 | var body string 93 | 94 | line := make(chan string) 95 | file.Read(filename, line) 96 | for l := range line { 97 | body += l + "\n" 98 | } 99 | 100 | Send(body) 101 | } 102 | 103 | // Send sends email via MailGun, smtp server, or simply a JSON file. 104 | func Send(message string) { 105 | fmt.Println("email.Send: Sending") 106 | switch emailOption { 107 | case mailGunSend: 108 | go fireMailGun(message) 109 | case smtpSend: 110 | go fireSMTPMessage(message) 111 | case jsonSend: 112 | fireJSONOutput(message) // We want lines sent in saved in the order they were sent in. 113 | } 114 | } 115 | 116 | //SaveToCache takes a string and saves it to file. 117 | func SaveToCache(message string) { 118 | fireJSONOutput(message) // We want lines sent in saved in the order they were sent in. 119 | } 120 | 121 | // IsValid checks to see if the email that is passed in is a valid email or not. 122 | func IsValid(email string) bool { 123 | gun := mailgun.NewMailgun(mGun.Domain, mGun.PrivateKey, mGun.PublicKey) 124 | 125 | check, _ := gun.ValidateEmail(email) 126 | return check.IsValid 127 | } 128 | 129 | // fireMailGun uses MailGun API: thanks! for your service :) 130 | func fireMailGun(body string) { 131 | gun := mailgun.NewMailgun(mGun.Domain, mGun.PrivateKey, mGun.PublicKey) 132 | 133 | for _, email := range EmailList { // Get Addresses from PulseConfig 134 | if IsValid(email) { 135 | m := mailgun.NewMessage( 136 | fmt.Sprintf("LogPulse <%s>", mGun.Sender), 137 | "Alert! Found Anomaly in Log Files via LogPulse", 138 | body, 139 | fmt.Sprintf("Recipient <%s>", email)) 140 | 141 | response, id, _ := gun.Send(m) 142 | // TODO: for testing purpose will change later, maybe just fire goroutine 143 | fmt.Printf("Response ID: %s\n", id) 144 | fmt.Printf("Message from server: %s\n", response) 145 | } 146 | 147 | } 148 | 149 | } 150 | 151 | // fireSMTPMessage uses smtp client to fire an email based on config file settings. 152 | func fireSMTPMessage(body string) { 153 | 154 | auth := smtp.PlainAuth( 155 | "", // identity left blank because it will use UserName instead 156 | smtpConfig.User.UserName, 157 | smtpConfig.User.PassWord, 158 | smtpConfig.Server.Host, 159 | ) 160 | 161 | for _, email := range EmailList { // Get Addresses from PulseConfig 162 | 163 | to := []string{email} 164 | msg := []byte("To: " + email + ":\r\n" + 165 | "Subject: Alert! Found Anomaly in Log Files via LogPulse\r\n" + 166 | "\r\n" + 167 | body + "\r\n") 168 | 169 | err := smtp.SendMail( 170 | fmt.Sprintf("%s:%s", smtpConfig.Server.Host, strconv.Itoa(smtpConfig.Server.Port)), 171 | auth, 172 | smtpConfig.User.UserName, 173 | to, 174 | msg, 175 | ) 176 | if err != nil { 177 | fmt.Printf("fireSMTPMessage: Failed to send to %s\n", email) 178 | } 179 | } 180 | } 181 | 182 | // fireJSONOutput when all else fails... output body to JSON 183 | // Also used by chaching system. 184 | func fireJSONOutput(body string) { 185 | 186 | output := JSONAlert{"Alert! Found Anomaly in Log Files via LogPulse", body} 187 | val, err := json.Marshal(output) 188 | if err != nil { 189 | fmt.Println("email.fireJSONOutput: Failed to create JSON Alert") 190 | return 191 | } 192 | 193 | // Create a buffer of strings so we are not constantly opening and closing the file 194 | file.Write(OutputFile, string(val)) 195 | } 196 | -------------------------------------------------------------------------------- /LogPulse/file/TestData/ReadTest.txt: -------------------------------------------------------------------------------- 1 | This is a line. 2 | This is a new line. 3 | -------------------------------------------------------------------------------- /LogPulse/file/TestData/smallkern.log.2.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gophergala2016/Pulse/f1ab898087203a2abf93af9d2e2a9f8bf63743fa/LogPulse/file/TestData/smallkern.log.2.gz -------------------------------------------------------------------------------- /LogPulse/file/file.go: -------------------------------------------------------------------------------- 1 | // Package file is used for reading and writting to files. 2 | package file 3 | 4 | import ( 5 | "bufio" 6 | "compress/gzip" 7 | "fmt" 8 | "io" 9 | "mime/multipart" 10 | "os" 11 | ) 12 | 13 | // Read will read filename line by line and each line be returned to channel. 14 | func Read(filename string, lineOut chan<- string) { 15 | 16 | go func() { 17 | inFile, err := os.Open(filename) 18 | 19 | defer func() { 20 | inFile.Close() 21 | close(lineOut) 22 | }() 23 | 24 | if err != nil { 25 | panic(fmt.Errorf("file.Read: %s", err)) 26 | } 27 | scanner := bufio.NewScanner(inFile) 28 | for scanner.Scan() { 29 | lineOut <- scanner.Text() 30 | } 31 | }() 32 | } 33 | 34 | // StreamRead will read from io.Reader line by line and each line be returned to channel. 35 | func StreamRead(reader multipart.File, lineOut chan<- string) { 36 | go func() { 37 | scanner := bufio.NewScanner(reader) 38 | for scanner.Scan() { 39 | val := scanner.Text() 40 | fmt.Println(val) 41 | lineOut <- val 42 | } 43 | lineOut <- "EOF" 44 | }() 45 | } 46 | 47 | // Write will append or create filename and write the slice of strings seperated by a new line. 48 | func Write(filename string, line string) { 49 | outFile, err := os.OpenFile(filename, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0600) 50 | defer outFile.Close() 51 | if err != nil { 52 | panic(fmt.Errorf("file.Write: %s", err)) 53 | } 54 | if _, err = outFile.WriteString(line + "\n"); err != nil { 55 | panic(fmt.Errorf("file.Write: %s", err)) 56 | } 57 | } 58 | 59 | // UnGZip will decompress the file from filename.gz to filename 60 | func UnGZip(filename string) error { 61 | file, err := os.Open(filename) 62 | if err != nil { 63 | return fmt.Errorf("file.UnGZip: %s", err) 64 | } 65 | defer file.Close() 66 | newfile := filename[0 : len(filename)-len(".gz")] 67 | out, err := os.Create(newfile) 68 | if err != nil { 69 | return fmt.Errorf("file.UnGZip: %s", err) 70 | } 71 | 72 | defer out.Close() 73 | 74 | r, err := gzip.NewReader(file) 75 | io.Copy(out, r) 76 | r.Close() 77 | 78 | return nil 79 | } 80 | -------------------------------------------------------------------------------- /LogPulse/file/file_test.go: -------------------------------------------------------------------------------- 1 | package file_test 2 | 3 | import ( 4 | "io/ioutil" 5 | "os" 6 | "testing" 7 | 8 | . "github.com/gophergala2016/Pulse/LogPulse/file" 9 | ) 10 | 11 | func TestRead(t *testing.T) { 12 | filename := "TestData/ReadTest.txt" 13 | var fileArray []string 14 | expectedArray := []string{"This is a line.", "This is a new line."} 15 | line := make(chan string) 16 | Read(filename, line) 17 | 18 | for l := range line { 19 | fileArray = append(fileArray, l) 20 | } 21 | 22 | if len(expectedArray) != len(fileArray) { 23 | t.Errorf("File does not match expected.") 24 | t.Logf("Expected: %d", len(expectedArray)) 25 | t.Logf("Actual: %d", len(fileArray)) 26 | } 27 | 28 | for i, j := range expectedArray { 29 | if j != fileArray[i] { 30 | t.Errorf("File does not match expected.") 31 | t.Logf("Expected: %v", expectedArray) 32 | t.Logf("Actual: %v", fileArray) 33 | } 34 | } 35 | } 36 | 37 | func TestUnGZip(t *testing.T) { 38 | filenamegz := "TestData/smallkern.log.2.gz" 39 | filename := filenamegz[:len(filenamegz)-3] 40 | UnGZip(filenamegz) 41 | if _, err := os.Stat(filename); os.IsNotExist(err) { 42 | t.Errorf("Could not find unziped file") 43 | } 44 | 45 | if _, err := os.Stat(filename); os.IsNotExist(err) { 46 | t.Log("No file to remove") 47 | } 48 | if err := os.Remove(filename); err != nil { 49 | t.Log("Could not remove the file") 50 | } 51 | } 52 | 53 | func TestWrite(t *testing.T) { 54 | filename := "TestData/TestingFile.txt" 55 | string1 := "This is a line in the file" 56 | string2 := "This is another line in the file" 57 | 58 | Write(filename, string1) 59 | if _, err := os.Stat(filename); os.IsNotExist(err) { 60 | t.Errorf("Testing file was not created!") 61 | } 62 | 63 | Write(filename, string2) 64 | file, err := os.Open(filename) 65 | if err != nil { 66 | t.Errorf("%s", err) 67 | } 68 | fileBytes, err := ioutil.ReadAll(file) 69 | if err != nil { 70 | t.Errorf("%s", err) 71 | } 72 | expectedString := string1 + "\n" + string2 + "\n" 73 | if string(fileBytes) != expectedString { 74 | t.Errorf("Did not append string the way we expected") 75 | t.Logf("Expected: %s", expectedString) 76 | t.Logf("Actual: %s", string(fileBytes)) 77 | } 78 | 79 | file.Close() 80 | 81 | if _, err := os.Stat(filename); os.IsNotExist(err) { 82 | t.Log("No file to remove") 83 | } 84 | if err := os.Remove(filename); err != nil { 85 | t.Log("Could not remove the file") 86 | } 87 | } 88 | 89 | //TODO: StreamRead 90 | -------------------------------------------------------------------------------- /LogPulse/kern.log.2.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gophergala2016/Pulse/f1ab898087203a2abf93af9d2e2a9f8bf63743fa/LogPulse/kern.log.2.gz -------------------------------------------------------------------------------- /LogPulse/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "os" 7 | "os/signal" 8 | 9 | "github.com/gophergala2016/Pulse/LogPulse/api" 10 | "github.com/gophergala2016/Pulse/LogPulse/config" 11 | "github.com/gophergala2016/Pulse/LogPulse/email" 12 | "github.com/gophergala2016/Pulse/LogPulse/file" 13 | "github.com/gophergala2016/Pulse/pulse" 14 | ) 15 | 16 | var ( 17 | runAPI bool 18 | outputFile string 19 | buffStrings []string 20 | logList []string 21 | ) 22 | 23 | func init() { 24 | flag.BoolVar(&runAPI, "api", false, "Turn on API mode") 25 | flag.Parse() 26 | 27 | defer func() { 28 | if r := recover(); r != nil { 29 | fmt.Println(r) 30 | os.Exit(0) 31 | } 32 | }() 33 | 34 | cfg, err := config.Load() 35 | if err != nil { 36 | panic(fmt.Errorf("main.init: Could not load the config.\n %v", err)) 37 | } 38 | 39 | logList = cfg.LogList 40 | outputFile = cfg.OutputFile 41 | } 42 | 43 | func main() { 44 | defer func() { 45 | if r := recover(); r != nil { 46 | fmt.Println(r) 47 | os.Exit(0) 48 | } 49 | }() 50 | 51 | if len(flag.Args()) == 0 && !runAPI { 52 | if len(logList) == 0 { 53 | panic(fmt.Errorf("main.main: Must supply a list of log files in the config")) 54 | } 55 | startPulse(logList) 56 | } else if runAPI { 57 | startAPI() 58 | } else { 59 | startPulse(flag.Args()) 60 | } 61 | } 62 | 63 | func startAPI() { 64 | api.Start() 65 | } 66 | 67 | func startPulse(filenames []string) { 68 | checkList(filenames) 69 | stdIn := make(chan string) 70 | 71 | c := make(chan os.Signal, 1) 72 | signal.Notify(c, os.Interrupt) 73 | // On keyboard interrup cleanup the program 74 | go func() { 75 | for _ = range c { 76 | fmt.Println("Exiting for Keyboard Interupt") 77 | os.Exit(0) 78 | } 79 | }() 80 | 81 | pulse.Run(stdIn, email.Send) 82 | for _, filename := range filenames { 83 | line := make(chan string) 84 | file.Read(filename, line) 85 | for l := range line { 86 | stdIn <- l 87 | } 88 | } 89 | close(stdIn) 90 | } 91 | 92 | func checkList(filenames []string) { 93 | for i, filename := range filenames { 94 | if _, err := os.Stat(filename); os.IsNotExist(err) { 95 | panic(fmt.Errorf("main.checkList: %s", err)) 96 | } 97 | if len(filename) > 3 && filename[len(filename)-3:len(filename)] == ".gz" { 98 | if err := file.UnGZip(filename); err != nil { 99 | panic(fmt.Errorf("main.checkList: %s", err)) 100 | } 101 | if _, err := os.Stat(filename[:len(filename)-3]); os.IsNotExist(err) { 102 | panic(fmt.Errorf("main.checkList: %s", err)) 103 | } 104 | filenames[i] = filename[:len(filename)-3] 105 | } 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Pulse 2 | [![GoDoc](https://godoc.org/github.com/gophergala2016/Pulse?status.svg)](https://godoc.org/github.com/gophergala2016/Pulse) 3 | [![Build Status](https://travis-ci.org/gophergala2016/Pulse.svg?branch=master)](https://travis-ci.org/gophergala2016/Pulse) 4 | 5 | ![Pulse](https://raw.githubusercontent.com/gophergala2016/Pulse/master/images/pulse_logo.png) 6 | 7 | Log pulse learns from your log files. It uses a machine learning algorithm that Michael Dropps came up with. It is a go package that can be consumed and used for use for anyone. The package itself just reads lines of strings and returns what it thinks is out of place. That way when you are trying to find that error in your logs, you don't spend hours searching and looking. We have made a simple application around it to show case it's ability. 8 | 9 | The application is simple. If you run it with no flags or arguments it will read the `PulseConfig.toml` file and read those files listed there. If you include arguments but no flags then the arguments must be filepaths to logs you want to read. EX `LogPulse someFile.log anotherFile.log waitHereIsAnother.log`. 10 | 11 | LogPulse accepts one flag `-api`. It accepts a file on an endpoint in the body and runs the algorithm. It will email the user when it is done with all the anomalies it could find (we are using MailGun). If you wanted to run local you could supply an SMTP config file (location is set in `PulseConfig.toml` and must be a toml file). This is were the credentials are so you are able to send emails locally. You could have the SMTP config file setup and run LogPulse without the `-api` flag and it would send emails as well. If no email option is set it will save all emails (subject and body) to the output file that is specified in the `PulseConfig.toml` 12 | 13 | # Content 14 | - [As A Package](#as-a-package) 15 | - [Video Demonstration] (https://youtu.be/KddVBH__ZHw) 16 | - [Install](#install) 17 | - [Running](#running) 18 | - [Pulse Config](#pulse-config) 19 | - [SMTP Config](#smtp-config) 20 | - [Team](#team) 21 | - [Support](mailto:dixonwille@gmail.com) 22 | 23 | ## As A Package 24 | To use the algorithm just import the package as such! 25 | 26 | `import "github.com/gophergala2016/Pulse/pulse"` 27 | 28 | This package exposes the `Run(chan string, func(string))` function. You just need to create a channel that you are going to use. It does require that it is passed in line by line as well. The `func(string)` is a function that is called whenever an unusual string comes by. It is highly recommended that if this is being written to a file to buffer a few strings before you write. Then when you have read all strings dump the rest of the buffer in the file. 29 | 30 | ## Install 31 | Installing is as simple as: 32 | 33 | `go get github.com/gophergala2016/Pulse/LogPulse` 34 | 35 | ## Running 36 | `go run main.go ` 37 | 38 | ### Pulse Config 39 | The `PulseConfig.toml` needs to be located in the same directory as your executable. The file should look similar to this: 40 | ``` 41 | LogList = [ 42 | "demoData/kern.log.1", 43 | "demoData/kern.log.2" 44 | ] 45 | 46 | EmailList = [ 47 | "someuser@example.org", 48 | "AnneConley@example.org", 49 | "WeAreAwesome@example.org" 50 | ] 51 | 52 | OutputFile = "PulseOut.txt" 53 | SMTPConfig = "SMTP.toml" 54 | 55 | Port = 8080 56 | ``` 57 | `LogList` is a list of strings. This is where the log files are located that you want pulse to read. 58 | 59 | 60 | `EmailList` is also a list of strings. But this is everyone that you want to email when something is unusual 61 | 62 | `OutputFile` is just a string. It is where the emails are sent if you do not setup an SMTP server (don't have SMTPConf file). 63 | 64 | `SMTPConfig` is the location of you SMTP credentials (explained below). 65 | 66 | `Port` is the port on which the API server will listen on. 67 | 68 | ### SMTP Config 69 | The `SMTP.toml` can be anywhere you want it as long as the application can read the file. It is where all the required information is to send email to the SMTP server. It should look like: 70 | ``` 71 | [Server] 72 | Host = "smtp.server.com" 73 | Port = 25 74 | 75 | [User] 76 | UserName = "user@server.com" 77 | PassWord = "LovelyPassword" 78 | ``` 79 | `[Server]` is a table with `Host` and `Port` 80 | - `Host` is the where the server is listening to receive emails to send. 81 | - `Port` is the port on which the server is listening 82 | 83 | `[User]` is also a table but with `UserName` and `PassWord` 84 | - `UserName` is the email address at which the email is sending from. 85 | - `PassWord` is the password for the user that is sending the email 86 | 87 | ## Team 88 | - Michael Dropps [Github](https://github.com/michaeldropps) 89 | - Miguel Espinoza [Github](https://github.com/miguelespinoza) 90 | - Will Dixon [Github](https://github.com/dixonwille) [Email](mailto:dixonwille@gmail.com) 91 | -------------------------------------------------------------------------------- /images/pulse_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gophergala2016/Pulse/f1ab898087203a2abf93af9d2e2a9f8bf63743fa/images/pulse_logo.png -------------------------------------------------------------------------------- /pulse/doc.go: -------------------------------------------------------------------------------- 1 | /*Package pulse uses a pattern identification algorithm created by Michael Dropps. 2 | The custom maching-learning algorithm identifies patterns that it finds in strings. 3 | This is better to use than Levenshtein distance because the unique features of input strings are 4 | stored and available for future lookups. It is a more advanced approach than a simple distance comparison. 5 | The Levenshtein distance is used on first-pass comparisons to help the algorithm create initial patterns 6 | using the custom matrix-based algorithm. The inputs with the most similarities are compared first until 7 | a few patterns are in memory. A custom hashing approach is used to hash the unique aspects of each pattern 8 | into a constrained array of maps. By using the this map as a lookup table, we are easily able to detect 9 | any existing pattern that is likely to match the input. If this lookup does not present a pattern, we again 10 | fall back to the Levenstein distance to compare the input against all unmatched inputs, just as in the beginning. 11 | By following this order, new patterns are created only when they are not good matches to existing patterns. 12 | It will create patterns it finds and compare incoming strings to them. 13 | If it is a close match, the current pattern will likely be altered to account for the new information. 14 | This algorithm is always learning new patterns and revising existing patterns, according to the input. 15 | If a string doesn't match any pattern, it is put into an unmatched state. 16 | Unmatched strings that remain unmatched after a certain period of time are reported 17 | as an anomaly and are sent to the user using the function supplied by the consuming routine at startup. 18 | Using this method, the user can do anything they want with the anomalies found. 19 | */ 20 | package pulse 21 | -------------------------------------------------------------------------------- /pulse/pulse.go: -------------------------------------------------------------------------------- 1 | package pulse 2 | 3 | import ( 4 | "fmt" 5 | "math" 6 | "sort" 7 | "time" 8 | "unicode" 9 | ) 10 | 11 | type outputFunc func(string) 12 | 13 | type unmatchedLog struct { 14 | line string 15 | dateStored time.Time 16 | reported bool 17 | } 18 | 19 | type revision struct { 20 | tokenPtr *token 21 | tokenIndex int 22 | variations *[]variation 23 | text string 24 | } 25 | 26 | type variation struct { 27 | text string 28 | numMatches int64 29 | } 30 | 31 | type token struct { 32 | word string 33 | variable bool 34 | required bool 35 | variations []variation 36 | } 37 | 38 | type pattern struct { 39 | tokens []token 40 | numMatches int64 41 | } 42 | 43 | type vertex struct { 44 | x int 45 | y int 46 | startsSequenceOfLength int 47 | } 48 | 49 | type vertexDistance struct { 50 | distance int 51 | index int 52 | } 53 | 54 | type distArray []vertexDistance 55 | 56 | //Channel to receive log data from consuming application 57 | var input <-chan string 58 | var report outputFunc 59 | var patternCreationRate float64 60 | var patternCreationRateIncreasing bool 61 | var inputsSinceLastNewPattern int64 62 | var lastPatternCount int 63 | var unmatched []unmatchedLog 64 | var patterns []pattern 65 | 66 | const tokenMapSize int = 2048 67 | 68 | var tokenMap [tokenMapSize]map[*pattern]bool 69 | 70 | func (s distArray) Len() int { return len(s) } 71 | func (s distArray) Swap(i, j int) { s[i], s[j] = s[j], s[i] } 72 | func (s distArray) Less(i, j int) bool { return s[i].distance < s[j].distance } 73 | 74 | func max(a, b int) int { 75 | if a < b { 76 | return b 77 | } 78 | return a 79 | } 80 | 81 | //sets initial state of the token map, used to lookup existing patterns 82 | func initTokenMap() { 83 | for i := 0; i < tokenMapSize; i++ { 84 | tokenMap[i] = make(map[*pattern]bool) 85 | } 86 | } 87 | 88 | //converts a string into a slice of strings. symbols and contiguous strings of any other type 89 | //are returned as individual elements. all whitespace is excluded 90 | func getTokens(value string) []string { 91 | var buffer []rune 92 | var result []string 93 | chars := []rune(value) 94 | for i, r := range chars { 95 | if !unicode.IsLetter(r) && !unicode.IsNumber(r) && !unicode.IsDigit(r) && !unicode.IsSpace(r) { 96 | if len(buffer) > 0 { 97 | result = append(result, string(buffer)) 98 | buffer = nil 99 | } 100 | result = append(result, string(r)) 101 | } else if unicode.IsSpace(r) { 102 | if len(buffer) > 0 { 103 | result = append(result, string(buffer)) 104 | } 105 | buffer = nil 106 | } else { 107 | buffer = append(buffer, r) 108 | if i == len(chars)-1 { 109 | result = append(result, string(buffer)) 110 | } 111 | } 112 | } 113 | return result 114 | } 115 | 116 | //adds a vertex to a list of vertices, or updates a variable if the vertex already exists in the list 117 | func addUpdateVertex(newValue vertex, list []vertex) []vertex { 118 | var done = false 119 | for i := range list { 120 | if newValue.x == list[i].x && newValue.y == list[i].y { 121 | list[i].startsSequenceOfLength = newValue.startsSequenceOfLength 122 | done = true 123 | break 124 | } 125 | } 126 | 127 | if !done { 128 | list = append(list, newValue) 129 | } 130 | 131 | return list 132 | } 133 | 134 | //with a suppliced list of verticies and a particular vertex, this algorithm 135 | //locates the vertex in the list that is closest to the supplied vertex, 136 | //however some preferential treatment is given to vertices that begin a 137 | //longer sequence of shared substrings in the inputs being compared 138 | func getNextVertex(value vertex, vertices []vertex) (bool, vertex) { 139 | x := value.x 140 | y := value.y 141 | 142 | var distances []vertexDistance 143 | nextVertexExists := false 144 | 145 | for i := range vertices { 146 | v := vertices[i] 147 | if v.x < x || v.y < y { 148 | continue 149 | } 150 | 151 | nextVertexExists = true 152 | distances = append(distances, vertexDistance{(v.x - x) + (v.y - y), i}) 153 | } 154 | 155 | if !nextVertexExists { 156 | return false, vertex{0, 0, 0} 157 | } 158 | 159 | sort.Sort(distArray(distances)) 160 | 161 | var minDistance = distances[0] 162 | var nextVertex = vertices[minDistance.index] 163 | var nextMin = vertexDistance{0, 0} 164 | if len(distances) > 1 { 165 | nextMin = distances[1] 166 | var difference = nextMin.distance - minDistance.distance 167 | if difference <= 3 && vertices[nextMin.index].startsSequenceOfLength > nextVertex.startsSequenceOfLength { 168 | nextVertex = vertices[nextMin.index] 169 | } 170 | } 171 | 172 | return true, nextVertex 173 | } 174 | 175 | //removes the supplied vertex from the list of vertices, and returns the updated list 176 | func removeVertexFromList(val vertex, vertices []vertex) []vertex { 177 | for i := range vertices { 178 | if vertices[i].x == val.x && vertices[i].y == val.y { 179 | vertices = append(vertices[:i], vertices[i+1:]...) 180 | break 181 | } 182 | } 183 | return vertices 184 | } 185 | 186 | //returns sorted list of tokens in pattern, sorted in the order they appear in both strings 187 | func analyzeMatrix(matrix [][]int, vertices []vertex) (bool, []vertex) { 188 | //start with {0, 0} 189 | var tokens []vertex 190 | if matrix[0][0] > 0 { 191 | tokens = append(tokens, vertices[0]) 192 | vertices = removeVertexFromList(vertices[0], vertices) 193 | } 194 | var start = vertex{0, 0, 0} 195 | var foundNextPoint, nextPoint = getNextVertex(start, vertices) 196 | for foundNextPoint { 197 | tokens = append(tokens, nextPoint) 198 | vertices = removeVertexFromList(nextPoint, vertices) 199 | foundNextPoint, nextPoint = getNextVertex(nextPoint, vertices) 200 | } 201 | return float64(len(tokens)) > float64(len(matrix[0])/2), tokens 202 | } 203 | 204 | //using a list of words and a pointer to a pattern, update the 205 | //token map so that the words can be used to later locate the pattern 206 | func updateTokenMap(words []token, ref *pattern) { 207 | for i := range words { 208 | if words[i].variable { 209 | continue 210 | } 211 | 212 | chars := []rune(words[i].word) 213 | sum := 0 214 | for j := range chars { 215 | value := int(chars[j]) 216 | sum += value 217 | } 218 | 219 | sum = sum % tokenMapSize 220 | 221 | var pm = tokenMap[sum] 222 | pm[ref] = true 223 | } 224 | } 225 | 226 | //returns all patterns that a particular word is part of, using the token map 227 | func patternsFromToken(word string) []*pattern { 228 | chars := []rune(word) 229 | sum := 0 230 | for i := range chars { 231 | value := int(chars[i]) 232 | sum += value 233 | } 234 | 235 | sum = sum % tokenMapSize 236 | var pm = tokenMap[sum] 237 | keys := make([]*pattern, 0, len(pm)) 238 | for k := range pm { 239 | if pm[k] == true { 240 | keys = append(keys, k) 241 | } 242 | } 243 | 244 | return keys 245 | } 246 | 247 | //match a pattern against a new input, revising the pattern under certain circumstances 248 | func matchPattern(pat pattern, longTokens []string, input string) bool { 249 | foundPattern := false 250 | var vertices []vertex 251 | var shortTokens []string 252 | for i := range pat.tokens { 253 | shortTokens = append(shortTokens, pat.tokens[i].word) 254 | } 255 | 256 | matrix := make([][]int, len(shortTokens)) 257 | for i := range shortTokens { 258 | matrix[i] = make([]int, len(longTokens)) 259 | for j := range matrix[i] { 260 | var matches = 0 261 | if shortTokens[i] == longTokens[j] { 262 | matches++ 263 | vertices = addUpdateVertex(vertex{i, j, matches}, vertices) 264 | var prevRow = j - 1 265 | var prevCol = i - 1 266 | for prevRow > 0 && prevCol > 0 { 267 | if shortTokens[prevCol] == longTokens[prevRow] { 268 | matches++ 269 | vertices = addUpdateVertex(vertex{prevCol, prevRow, matches}, vertices) 270 | prevRow-- 271 | prevCol-- 272 | } else { 273 | break 274 | } 275 | } 276 | } 277 | matrix[i][j] = matches 278 | } 279 | } 280 | 281 | foundPattern, vertices = analyzeMatrix(matrix, vertices) 282 | var newPattern pattern 283 | if foundPattern { 284 | lastPoint := vertex{-1, -1, 0} 285 | for i := range vertices { 286 | var skippedBeginning = i == 0 && vertices[i].x != 0 && vertices[i].y != 0 287 | var vertex = vertices[i] 288 | var distance = (vertex.x - lastPoint.x) + (vertex.y - lastPoint.y) 289 | if distance <= 2 && !skippedBeginning { 290 | lastPoint = vertex 291 | text := shortTokens[lastPoint.x] 292 | newPattern.tokens = append(newPattern.tokens, token{text, false, true, nil}) 293 | } else { 294 | xDiff := vertex.x - lastPoint.x 295 | yDiff := vertex.y - lastPoint.y 296 | skippedColText := "" 297 | skippedRowText := "" 298 | if xDiff > 1 { 299 | var skipped = shortTokens[lastPoint.x+1 : vertex.x] 300 | for x := range skipped { 301 | skippedColText += skipped[x] 302 | } 303 | if skippedColText == "!WILDCARD!" { 304 | skippedColText = "" 305 | } 306 | } 307 | 308 | if yDiff > 1 { 309 | var skipped = longTokens[lastPoint.y+1 : vertex.y] 310 | for y := range skipped { 311 | skippedRowText += skipped[y] 312 | } 313 | } 314 | 315 | var variableText []variation 316 | if skippedColText != "" { 317 | variableText = append(variableText, variation{skippedColText, 1}) 318 | } 319 | if skippedRowText != "" { 320 | variableText = append(variableText, variation{skippedRowText, 1}) 321 | } 322 | lastPoint = vertex 323 | text := shortTokens[lastPoint.x] 324 | //add wildcard token to sequence 325 | newPattern.tokens = append(newPattern.tokens, token{"!WILDCARD!", true, len(variableText) > 1, variableText}) 326 | //add static token to sequence 327 | newPattern.tokens = append(newPattern.tokens, token{text, false, true, nil}) 328 | } 329 | } 330 | 331 | if len(newPattern.tokens) <= len(pat.tokens) { 332 | for i := range newPattern.tokens { 333 | var originalToken = pat.tokens[i] 334 | var newToken = newPattern.tokens[i] 335 | var newText string 336 | if newToken.variable && len(newToken.variations) == 1 { 337 | newText = newToken.variations[0].text 338 | } 339 | 340 | if originalToken.variable && newToken.variable { 341 | accountedForNewValue := false 342 | for j := range originalToken.variations { 343 | if originalToken.variations[j].text == newText { 344 | originalToken.variations[j].numMatches++ 345 | accountedForNewValue = true 346 | } 347 | } 348 | if !accountedForNewValue { 349 | originalToken.variations = append(originalToken.variations, variation{newText, 1}) 350 | } 351 | } else if newToken.variable && !originalToken.variable { 352 | originalToken.word = "!WILDCARD!" 353 | originalToken.variable = true 354 | for j := range newToken.variations { 355 | originalToken.variations = append(originalToken.variations, variation{newToken.variations[j].text, 1}) 356 | } 357 | } 358 | 359 | pat.tokens[i] = originalToken 360 | pat.numMatches++ 361 | } 362 | } else { 363 | //determine how close the patterns are 364 | var diff = math.Abs(float64(len(pat.tokens)) - float64(len(newPattern.tokens))) 365 | var maxLength = float64(max(len(pat.tokens), len(newPattern.tokens))) 366 | if ((maxLength - diff) / maxLength) >= 0.90 { 367 | return true 368 | } 369 | 370 | //a match was made above a certain threshold between the pattern and the input, but the length of tokens is too far off 371 | return false 372 | } 373 | 374 | return true 375 | } 376 | return false 377 | } 378 | 379 | //looks for a pattern between two input strings, and learns the new pattern if 380 | //a certain threshold value is reached when the matrix is analyzed. 381 | func findPattern(shortTokens []string, longTokens []string) bool { 382 | foundPattern := false 383 | var vertices []vertex 384 | matrix := make([][]int, len(shortTokens)) 385 | for i := range shortTokens { 386 | matrix[i] = make([]int, len(longTokens)) 387 | for j := range matrix[i] { 388 | var matches = 0 389 | if shortTokens[i] == longTokens[j] { 390 | matches++ 391 | vertices = addUpdateVertex(vertex{i, j, matches}, vertices) 392 | var prevRow = j - 1 393 | var prevCol = i - 1 394 | for prevRow > 0 && prevCol > 0 { 395 | if shortTokens[prevCol] == longTokens[prevRow] { 396 | matches++ 397 | vertices = addUpdateVertex(vertex{prevCol, prevRow, matches}, vertices) 398 | prevRow-- 399 | prevCol-- 400 | } else { 401 | break 402 | } 403 | } 404 | } 405 | matrix[i][j] = matches 406 | } 407 | } 408 | 409 | foundPattern, vertices = analyzeMatrix(matrix, vertices) 410 | if foundPattern { 411 | var p pattern 412 | 413 | lastPoint := vertex{-1, -1, 0} 414 | for i := range vertices { 415 | var skippedBeginning = i == 0 && vertices[i].x != 0 && vertices[i].y != 0 416 | var vertex = vertices[i] 417 | var distance = (vertex.x - lastPoint.x) + (vertex.y - lastPoint.y) 418 | if distance <= 2 && !skippedBeginning { 419 | lastPoint = vertex 420 | text := shortTokens[lastPoint.x] 421 | p.tokens = append(p.tokens, token{text, false, true, nil}) 422 | } else { 423 | xDiff := vertex.x - lastPoint.x 424 | yDiff := vertex.y - lastPoint.y 425 | 426 | skippedColText := "" 427 | skippedRowText := "" 428 | if xDiff > 1 { 429 | var skipped = shortTokens[lastPoint.x+1 : vertex.x] 430 | for x := range skipped { 431 | skippedColText += skipped[x] 432 | } 433 | } 434 | 435 | if yDiff > 1 { 436 | var skipped = longTokens[lastPoint.y+1 : vertex.y] 437 | for y := range skipped { 438 | skippedRowText += skipped[y] 439 | } 440 | } 441 | 442 | var variableText []variation 443 | if skippedColText != "" { 444 | variableText = append(variableText, variation{skippedColText, 1}) 445 | } 446 | if skippedRowText != "" { 447 | variableText = append(variableText, variation{skippedRowText, 1}) 448 | } 449 | lastPoint = vertex 450 | text := shortTokens[lastPoint.x] 451 | //add wildcard token to sequence 452 | p.tokens = append(p.tokens, token{"!WILDCARD!", true, len(variableText) > 1, variableText}) 453 | //add static token to sequence 454 | p.tokens = append(p.tokens, token{text, false, true, nil}) 455 | } 456 | } 457 | 458 | p.numMatches = 1 459 | patterns = append(patterns, p) 460 | 461 | var reference = &p 462 | updateTokenMap(p.tokens, reference) 463 | 464 | var numPatterns = len(patterns) 465 | var rate = 1.0 / float64(inputsSinceLastNewPattern) 466 | var newAvgRate = ((float64(numPatterns) * patternCreationRate) + rate) / float64(numPatterns+1) 467 | patternCreationRateIncreasing = newAvgRate > patternCreationRate 468 | patternCreationRate = newAvgRate 469 | 470 | inputsSinceLastNewPattern = 0 471 | lastPatternCount = numPatterns 472 | } 473 | return foundPattern 474 | } 475 | 476 | //simple index helper function to find a string in a slice 477 | func indexOfWord(value string, words []string) int { 478 | for i := range words { 479 | if words[i] == value { 480 | return i 481 | } 482 | } 483 | return -1 484 | } 485 | 486 | func indexOfWordInVariations(value string, words []variation) int { 487 | for i := range words { 488 | if words[i].text == value { 489 | return i 490 | } 491 | } 492 | return -1 493 | } 494 | 495 | func reportAnomaly(line string) { 496 | fmt.Printf("\nPattern count: %v\n", len(patterns)) 497 | 498 | if (!patternCreationRateIncreasing || patternCreationRate <= 0.20) && (len(patterns) != 0) { 499 | fmt.Printf("\nReporting anomaly...%v\n", line) 500 | report(line) 501 | } 502 | } 503 | 504 | func analyze(line string) { 505 | index := -1 506 | maxScore := 0.0 507 | patternFound := false 508 | inputsSinceLastNewPattern++ 509 | 510 | if len(patterns) == lastPatternCount { 511 | patternCreationRate = patternCreationRate * 0.99 512 | } 513 | 514 | //search for existing pattern using token map 515 | var tokenMatches = make(map[*pattern]int) 516 | var lineTokens = getTokens(line) 517 | for i := range lineTokens { 518 | var patterns = patternsFromToken(lineTokens[i]) 519 | for j := range patterns { 520 | var p = patterns[j] 521 | tokenMatches[p] = tokenMatches[p] + 1 522 | } 523 | } 524 | 525 | var mostLikelyPattern *pattern 526 | var tokensInCommon int 527 | 528 | for k := range tokenMatches { 529 | if tokenMatches[k] > tokensInCommon { 530 | tokensInCommon = tokenMatches[k] 531 | mostLikelyPattern = k 532 | } 533 | } 534 | 535 | if float64(tokensInCommon)/float64(len(lineTokens)) >= 0.5 { 536 | patternFound = matchPattern(*mostLikelyPattern, lineTokens, line) 537 | } 538 | 539 | //if no pattern found, compare to unmatched lines, see if a new pattern can be detected 540 | if !patternFound { 541 | for i := range unmatched { 542 | var compare = unmatched[i].line 543 | var distance = ld(line, compare) 544 | var timeUnmatched = time.Since(unmatched[i].dateStored).Seconds() 545 | if timeUnmatched > 30.0 && !unmatched[i].reported { 546 | reportAnomaly(unmatched[i].line) 547 | unmatched[i].reported = true 548 | } 549 | var maxLength = max(len(line), len(compare)) 550 | var score = float64(maxLength-distance) / float64(maxLength) 551 | if score > maxScore { 552 | maxScore = score 553 | index = i 554 | } 555 | } 556 | 557 | if maxScore >= 0.5 { 558 | var unmatchedTokens = getTokens(unmatched[index].line) 559 | if len(lineTokens) < len(unmatchedTokens) { 560 | patternFound = findPattern(lineTokens, unmatchedTokens) 561 | } else { 562 | patternFound = findPattern(unmatchedTokens, lineTokens) 563 | } 564 | } 565 | 566 | if !patternFound { 567 | unmatched = append(unmatched, unmatchedLog{line, time.Now(), true}) 568 | reportAnomaly(line) 569 | } else { //remove unmatched line from unmatched slice 570 | unmatched = append(unmatched[:index], unmatched[index+1:]...) 571 | } 572 | } 573 | } 574 | 575 | //Levenshtein distance algorithm Copied from http://rosettacode.org/wiki/Levenshtein_distance#Go 576 | func ld(s, t string) int { 577 | d := make([][]int, len(s)+1) 578 | for i := range d { 579 | d[i] = make([]int, len(t)+1) 580 | } 581 | for i := range d { 582 | d[i][0] = i 583 | } 584 | for j := range d[0] { 585 | d[0][j] = j 586 | } 587 | for j := 1; j <= len(t); j++ { 588 | for i := 1; i <= len(s); i++ { 589 | if s[i-1] == t[j-1] { 590 | d[i][j] = d[i-1][j-1] 591 | } else { 592 | min := d[i-1][j] 593 | if d[i][j-1] < min { 594 | min = d[i][j-1] 595 | } 596 | if d[i-1][j-1] < min { 597 | min = d[i-1][j-1] 598 | } 599 | d[i][j] = min + 1 600 | } 601 | } 602 | 603 | } 604 | return d[len(s)][len(t)] 605 | } 606 | 607 | //Run starts the pulse package 608 | func Run(in <-chan string, out outputFunc) { 609 | input = in 610 | report = out 611 | initTokenMap() 612 | go func() { 613 | for value := range in { 614 | analyze(value) 615 | } 616 | }() 617 | } 618 | --------------------------------------------------------------------------------