├── .gitignore ├── logtailer_test.go ├── environment.go ├── profiles ├── helpers │ └── mapre.go ├── profiles.go ├── sshd │ ├── authorized_keys.go │ ├── authorized_keys_test.go │ ├── key_mapping.go │ └── sshd.go ├── dummy │ └── dummy.go └── mongodb │ ├── mongodb.go │ └── mongodb_test.go ├── stats.go ├── LICENSE ├── PATENTS ├── README.md ├── cmd └── logtailer │ └── main.go └── logtailer.go /.gitignore: -------------------------------------------------------------------------------- 1 | cmd/logtailer/logtailer 2 | -------------------------------------------------------------------------------- /logtailer_test.go: -------------------------------------------------------------------------------- 1 | package logtailer 2 | 3 | import ( 4 | "fmt" 5 | "io/ioutil" 6 | "log" 7 | "os" 8 | 9 | "github.com/ParsePlatform/logtailer/profiles/dummy" 10 | ) 11 | 12 | func ExampleNewLogtailer() { 13 | tmpFile, _ := ioutil.TempFile("", "") 14 | defer os.Remove(tmpFile.Name()) 15 | 16 | logger := log.New(os.Stderr, "logtailer", log.LstdFlags) 17 | tailer := NewLogtailer(&dummy.DummyProfile{}, tmpFile.Name(), "/tmp/", logger) 18 | stats, _ := tailer.Run(1) 19 | fmt.Println(stats) 20 | // output: 21 | // {"Records":0,"ParseErrors":0,"SendErrors":0} 22 | } 23 | -------------------------------------------------------------------------------- /environment.go: -------------------------------------------------------------------------------- 1 | package logtailer 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "os/exec" 7 | ) 8 | 9 | // logtailBinary is the name of the binary that will be invoked to fetch new log 10 | // lines. 11 | const logtailBinary = "logtail2" 12 | 13 | // PrepEnvironment ensures the specified log file and state directories exist. 14 | func (lt *Logtailer) PrepEnvironment() error { 15 | if lt.LogFile == "-" { 16 | return nil 17 | } 18 | _, err := os.Stat(lt.LogFile) 19 | if err != nil { 20 | return err 21 | } 22 | os.MkdirAll(lt.StateDir, 0644) 23 | _, err = os.Stat(lt.StateDir) 24 | if err != nil { 25 | return err 26 | } 27 | if _, err = exec.LookPath(logtailBinary); err != nil { 28 | return fmt.Errorf("could not find %s on PATH: %v", logtailBinary, err) 29 | } 30 | return nil 31 | } 32 | -------------------------------------------------------------------------------- /profiles/helpers/mapre.go: -------------------------------------------------------------------------------- 1 | package helpers 2 | 3 | import ( 4 | "fmt" 5 | "regexp" 6 | ) 7 | 8 | // MapRe attempts to populate a map from a re and string. If an occurance is 9 | // found the first one will be used to populate a new map. 10 | func MapRe(re *regexp.Regexp, line string) (map[string]string, error) { 11 | fields := re.SubexpNames() 12 | match := re.FindStringSubmatch(line) 13 | if match == nil { 14 | return nil, fmt.Errorf("regex match fail: %s", line) 15 | } 16 | if len(match) != len(fields) { 17 | return nil, fmt.Errorf("expected len %d, got %d", len(fields), len(match)) 18 | } 19 | // construct a map of field names to values 20 | values := make(map[string]string, len(fields)) 21 | for i, field := range fields { 22 | values[field] = string(match[i]) 23 | } 24 | return values, nil 25 | } 26 | -------------------------------------------------------------------------------- /profiles/profiles.go: -------------------------------------------------------------------------------- 1 | // Package profiles describes the logtailer Profile interface and provides a 2 | // simple registry 3 | package profiles 4 | 5 | // A Profile is a log consumer. 6 | type Profile interface { 7 | // The unique identifier for the profile. 8 | Name() string 9 | // ProcessRecord processes a single input line from the input log file and returns 10 | // the parsed result 11 | ProcessRecord(record string) (result interface{}, err error) 12 | // HandleOutput is called for each result returned by ProcessRecord 13 | // This is where you should direct output (stdout, some http API, etc) 14 | HandleOutput(records <-chan interface{}, dryRun bool) (errors <-chan error) 15 | 16 | // Init is called just before parsing begins. This is where any setup specific 17 | // to your profile should go 18 | Init() error 19 | } 20 | -------------------------------------------------------------------------------- /stats.go: -------------------------------------------------------------------------------- 1 | package logtailer 2 | 3 | import ( 4 | "encoding/json" 5 | "sync" 6 | ) 7 | 8 | // Stats holds basic metrics about the logtailer run. 9 | type Stats struct { 10 | Records int 11 | ParseErrors int 12 | SendErrors int 13 | sync.Mutex 14 | } 15 | 16 | // what percentage of records must be parsed successfully to be considered healthy. 17 | var acceptableParseFailRatio = 0.1 18 | 19 | // IsHealthy returns true if the stats appear healthy. 20 | func (s *Stats) IsHealthy() bool { 21 | if s.SendErrors > 0 { 22 | return false 23 | } 24 | 25 | if s.Records > 0 && s.ParseErrors > 0 { 26 | if float64(s.ParseErrors)/float64(s.Records) > acceptableParseFailRatio { 27 | return false 28 | } 29 | } 30 | return true 31 | } 32 | 33 | func (s *Stats) String() string { 34 | buf, _ := json.Marshal(s) 35 | return string(buf) 36 | } 37 | -------------------------------------------------------------------------------- /profiles/sshd/authorized_keys.go: -------------------------------------------------------------------------------- 1 | package sshd 2 | 3 | import ( 4 | "crypto/md5" 5 | "fmt" 6 | 7 | "golang.org/x/crypto/ssh" 8 | ) 9 | 10 | // AuthorizedKey represents an ssh public key 11 | type AuthorizedKey struct { 12 | ssh.PublicKey 13 | Comment string 14 | } 15 | 16 | // ParseAuthorizedKey attempts to parse an ssh public key 17 | func ParseAuthorizedKey(in []byte) (*AuthorizedKey, error) { 18 | key, comment, _, _, err := ssh.ParseAuthorizedKey(in) 19 | if err != nil { 20 | return nil, err 21 | } 22 | return &AuthorizedKey{key, comment}, nil 23 | } 24 | 25 | // Fingerprint implements the RFC4716 key fingerprint for ssh keys 26 | func (k *AuthorizedKey) Fingerprint() []byte { 27 | result := make([]byte, 0, 48) 28 | for i, octet := range md5.Sum(k.Marshal()) { 29 | if i != 0 { 30 | result = append(result, ':') 31 | } 32 | result = append(result, []byte(fmt.Sprintf("%02x", octet))...) 33 | } 34 | return result 35 | } 36 | -------------------------------------------------------------------------------- /profiles/sshd/authorized_keys_test.go: -------------------------------------------------------------------------------- 1 | package sshd 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/facebookgo/ensure" 7 | ) 8 | 9 | func TestKeyParsing(t *testing.T) { 10 | k, err := ParseAuthorizedKey([]byte(`ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDAVlmAmXcn+mbc0wmWwz52AqSXde7BWkzLhWSrmY+49aZt6chkjYtDz/mTWrTHvJm4kI8SNj4UxmyS8VtofjsE8G5E6E/gVjOtd9q+9Xuv9TdLRjaQPUuXkW+MT+Y1sjShu8e6FzjN1j6IE+z5kYSfB3D96OqVxujof+Oda1ZwDpYO7CyUnna8W169KlJx6miH+uBfICiEHYcH8lt1ATIspcmWUruqc9E827hzroBOgWtInqy7rDZ9ni6S7zcoVxY5NxdvymZPQ1M7jkfy3D+UQmKjelMfC2qqTEn58p234/1RHxI/bSt1UVO3+PSwjr48KsXr1TmJxsbaVdgyDFKCnqRUETM1/q63ceLt06rEueIM3JQq7Yz3CmzlHi6UVOjLb7GFvT0inXihsIYSq5pE3DJv6Lpi/5me1yTuNzJuxXJITnxFaldFgyNzoS/2+0KXxNTh0BSsEXFogy2NLv2/PVo49wqheD2xcfA7+mk9y4qhl1bF3Menyg6ZiPZ9TV1zLEmaSmKBLoOLObG2akPgeshKnG9u4VvA8mqa2NXi7AQka8oqaJGgoFDNoWFsgjhbzKw3tcWWKDD9xjM+jPsEKnr7Dg9c3pKppetQ4YZ81JaM72ZJS1z4nrfeEv+hKuQnDvCrf7Pmh/WWCphKw4/uvNHWrmPPsCnm5JOMrduU8Q== test@fb.com`)) 11 | ensure.Nil(t, err) 12 | ensure.DeepEqual(t, string(k.Fingerprint()), "b5:ca:16:03:d4:10:41:80:3d:bc:3b:18:05:57:4f:56") 13 | } 14 | -------------------------------------------------------------------------------- /profiles/sshd/key_mapping.go: -------------------------------------------------------------------------------- 1 | package sshd 2 | 3 | import ( 4 | "bufio" 5 | "bytes" 6 | "flag" 7 | "io/ioutil" 8 | "log" 9 | "strings" 10 | ) 11 | 12 | var authorizedKeyPath = flag.String("authorized_keys_path", "/home/ubuntu/.ssh/authorized_keys", "path to authorized keys path to provide fingerprint to user mapping") 13 | 14 | func populateKeyMapping() map[string]string { 15 | result := make(map[string]string) 16 | 17 | f, err := ioutil.ReadFile(*authorizedKeyPath) 18 | if err != nil { 19 | log.Println("logtailer.sshd error reading authorized_keys:", err) 20 | return result 21 | } 22 | scanner := bufio.NewScanner(bytes.NewBuffer(f)) 23 | 24 | for scanner.Scan() { 25 | keyLine := scanner.Bytes() 26 | if len(keyLine) == 0 { 27 | continue 28 | } 29 | 30 | key, err := ParseAuthorizedKey(keyLine) 31 | if err != nil { 32 | log.Println("error parsing authorized key line:", string(keyLine), "error:", err) 33 | continue 34 | } 35 | 36 | username := strings.Split(key.Comment, "@")[0] 37 | result[string(key.Fingerprint())] = username 38 | } 39 | return result 40 | } 41 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD License 2 | 3 | For logtailer software 4 | 5 | Copyright (c) 2016, Facebook, Inc. All rights reserved. 6 | 7 | Redistribution and use in source and binary forms, with or without modification, 8 | are permitted provided that the following conditions are met: 9 | 10 | * Redistributions of source code must retain the above copyright notice, this 11 | list of conditions and the following disclaimer. 12 | 13 | * Redistributions in binary form must reproduce the above copyright notice, 14 | this list of conditions and the following disclaimer in the documentation 15 | and/or other materials provided with the distribution. 16 | 17 | * Neither the name Facebook nor the names of its contributors may be used to 18 | endorse or promote products derived from this software without specific 19 | prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 22 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 23 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 25 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 26 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 28 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 30 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | -------------------------------------------------------------------------------- /profiles/dummy/dummy.go: -------------------------------------------------------------------------------- 1 | // Package dummy implements a dummy skeleton logtailer profile for demonstration 2 | // purposes. 3 | // 4 | // This profile does not modify input lines and simply prints them to stdout. 5 | package dummy 6 | 7 | import "fmt" 8 | 9 | // DummyProfile provides a stripped down example of how to write a logtailer profile. 10 | type DummyProfile struct{} 11 | 12 | // Init does nothing in the dummy profile, but is here to satisfy the interface 13 | func (p *DummyProfile) Init() error { 14 | return nil 15 | } 16 | 17 | // Name returns the name of the profile and must be unique amongst registered. 18 | // profiles 19 | func (p *DummyProfile) Name() string { 20 | return "dummy" 21 | } 22 | 23 | // ProcessRecord is invoked for every input log line. It returns a transformed. 24 | // line or an error 25 | func (p *DummyProfile) ProcessRecord(record string) (interface{}, error) { 26 | return []byte(record), nil 27 | } 28 | 29 | // HandleOutput recieves a channel of input lines and a flag of whether or not 30 | // this is a dry run being invoked (to avoid side-effects). 31 | // 32 | // The return value is a channel of errors. parse.com/logtailer keeps track of 33 | // the number of errors and exits non-zero if they are over a threshold. 34 | func (p *DummyProfile) HandleOutput(records <-chan interface{}, dryRun bool) <-chan error { 35 | 36 | // Set up the error channel. 37 | errChan := make(chan error) 38 | 39 | // Launch the consumption goroutine. 40 | go func() { 41 | 42 | // Close errChan when this goroutine finishes to signal being done. 43 | defer close(errChan) 44 | 45 | // Consume lines input channel until it is closed (if consuming stdin), 46 | // this is potentially never. 47 | for record := range records { 48 | line, ok := record.([]byte) 49 | if !ok { 50 | errChan <- fmt.Errorf("Unexpected output record type: %t", record) 51 | continue 52 | } 53 | fmt.Println(string(line)) 54 | } 55 | }() 56 | 57 | return errChan 58 | } 59 | -------------------------------------------------------------------------------- /PATENTS: -------------------------------------------------------------------------------- 1 | Additional Grant of Patent Rights Version 2 2 | 3 | "Software" means the logtailer software distributed by Facebook, Inc. 4 | 5 | Facebook, Inc. ("Facebook") hereby grants to each recipient of the Software 6 | ("you") a perpetual, worldwide, royalty-free, non-exclusive, irrevocable 7 | (subject to the termination provision below) license under any Necessary 8 | Claims, to make, have made, use, sell, offer to sell, import, and otherwise 9 | transfer the Software. For avoidance of doubt, no license is granted under 10 | Facebook’s rights in any patent claims that are infringed by (i) modifications 11 | to the Software made by you or any third party or (ii) the Software in 12 | combination with any software or other technology. 13 | 14 | The license granted hereunder will terminate, automatically and without notice, 15 | if you (or any of your subsidiaries, corporate affiliates or agents) initiate 16 | directly or indirectly, or take a direct financial interest in, any Patent 17 | Assertion: (i) against Facebook or any of its subsidiaries or corporate 18 | affiliates, (ii) against any party if such Patent Assertion arises in whole or 19 | in part from any software, technology, product or service of Facebook or any of 20 | its subsidiaries or corporate affiliates, or (iii) against any party relating 21 | to the Software. Notwithstanding the foregoing, if Facebook or any of its 22 | subsidiaries or corporate affiliates files a lawsuit alleging patent 23 | infringement against you in the first instance, and you respond by filing a 24 | patent infringement counterclaim in that lawsuit against that party that is 25 | unrelated to the Software, the license granted hereunder will not terminate 26 | under section (i) of this paragraph due to such counterclaim. 27 | 28 | A "Necessary Claim" is a claim of a patent owned by Facebook that is 29 | necessarily infringed by the Software standing alone. 30 | 31 | A "Patent Assertion" is any lawsuit or other action alleging direct, indirect, 32 | or contributory infringement or inducement to infringe any patent, including a 33 | cross-claim or counterclaim. 34 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # logtailer 2 | 3 | ## Summary 4 | 5 | A simple log tailer written in go. Originally written by Parse to consume production log data of various formats and feed it into Facebook's analytics systems for day-to-day operations. logtailer uses a modular approach to consuming logs and directing output. To support new log types or change existing behavior, simply implement the Profile interface to suit your needs. The reference implementations in this release consume logs directly and output parsed lines as stdout. 6 | 7 | Reference implementations include: 8 | 9 | * a dummy profile used for demonstration. Consumes the input log file and prints to stdout 10 | * a mongodb log parser based on a Programmable Expression Grammar (PEG). At Parse we found the PEG parser to perform better, and more accurately, than any regex-based pattern we could come up with, due to the complex nature of MongoDB log lines. The PEG parser focuses on actual operations (queries, inserts, commands, etc) and ignores other noise. At Parse, we processed 4B operations/day with this tailer. The mongodb tailer converts lines into a consistent JSON format that can be processed by other analytics systems. 11 | * an sshd log parser that converts ssh login events to JSON 12 | 13 | ## Building 14 | 15 | Has been tested on go version 1.5.3, but will probably work with earlier versions. 16 | 17 | ```sh 18 | $ go install github.com/ParsePlatform/logtailer/cmd/logtailer 19 | ``` 20 | 21 | ## External Dependencies 22 | 23 | Logtailer was written to run cron once per minute. Since logs rotate less frequently than that, it relies on the [logtail2](http://manpages.ubuntu.com/manpages/trusty/man8/logtail2.8.html) command readily available in the Ubuntu repositories. When repeatedly run with the same log file for input, logtail2 ensures that only new lines are consumed. To make this work, ensure that the logtailer run directory exists: 24 | 25 | ```sh 26 | mkdir -p /var/run/logtailer 27 | chown /var/run/logtailer 28 | ``` 29 | 30 | Alternatively, logtailer accepts stdin as input. Simply specify *-* to the *log_file* flag when invoking logtailer. 31 | 32 | ## Testing 33 | 34 | The simplest test of the binary is to invoke the *dummy* profile with some simple input. It should be echoed back, along with some statistics that go to stderr. 35 | 36 | ```sh 37 | echo -n '1\n2\n3\n' | ./logtailer dummy -log_file - 38 | 1 39 | 2 40 | 3 41 | {"Records":3,"ParseErrors":0,"SendErrors":0} 42 | ``` 43 | ## Tuning 44 | 45 | By default, the *logtailer* creates a worker for each CPU on the system. You can override this by setting the *num_workers* flag. 46 | -------------------------------------------------------------------------------- /cmd/logtailer/main.go: -------------------------------------------------------------------------------- 1 | // Command logtailer is designed to process log files for consumption. 2 | // 3 | // When provided a file for the `log_file` argument it invokes logtail2 to 4 | // peform log file checkpointing including rotation detection. 5 | // 6 | // The first argument must be the profile identifier. 7 | // 8 | // When running as with `-` for the `log_file` argument it consumes stdin. 9 | // 10 | // Example invokation via cron: 11 | // 12 | // * * * * * /usr/bin/logtailer nginx -log_file=/mnt/log/nginx/access.log 13 | // 14 | // See profile_dummy.go for an example of adding your own profile. 15 | package main 16 | 17 | import ( 18 | "flag" 19 | "fmt" 20 | "log" 21 | "os" 22 | "os/signal" 23 | "runtime" 24 | "sort" 25 | "strings" 26 | "syscall" 27 | 28 | "github.com/ParsePlatform/logtailer" 29 | "github.com/ParsePlatform/logtailer/profiles" 30 | "github.com/ParsePlatform/logtailer/profiles/dummy" 31 | "github.com/ParsePlatform/logtailer/profiles/mongodb" 32 | "github.com/ParsePlatform/logtailer/profiles/sshd" 33 | ) 34 | 35 | var ( 36 | logFile = flag.String("log_file", "", "The input log file to consume.") 37 | stateDir = flag.String("state_dir", "/var/run/logtailer", "The directory that will hold log tailing state.") 38 | dryRun = flag.Bool("dry_run", false, "If True, will only print to stdout and will not update any state.") 39 | numWorkers = flag.Int("num_workers", 1, "Number of processing goroutines to run (1 for sequential).") 40 | goMaxProcs = flag.Int("gomaxprocs", runtime.NumCPU(), "Sets the number of os threads that will be utilized") 41 | ) 42 | 43 | func usage() { 44 | fmt.Fprintf(os.Stderr, "Usage of %[1]s:\n\t%[1]s profile_name [arguments]\n\n", os.Args[0]) 45 | fmt.Fprintf(os.Stderr, "Available profiles: %s\n\nArguments:\n", profileNames()) 46 | flag.PrintDefaults() 47 | } 48 | 49 | // availableProfiles defines the registered logtailer profiles. 50 | // To add a new profile you must add it to this map. 51 | // TODO(tredman): convert mysql, nginx, and haproxy tailers 52 | var availableProfiles = map[string]profiles.Profile{ 53 | "dummy": new(dummy.DummyProfile), 54 | "mongodb": new(mongodb.MongodbProfile), 55 | "sshd": new(sshd.SshdProfile), 56 | } 57 | 58 | func main() { 59 | // TODO: pick a better logger with support for Info, Debug, etc 60 | logger := log.New(os.Stderr, "DEBUG: ", log.LstdFlags|log.Lshortfile) 61 | flag.Usage = usage 62 | 63 | if len(os.Args) == 1 { 64 | flag.Usage() 65 | logger.Fatalln("No profile specified.") 66 | } 67 | 68 | profileName := os.Args[1] 69 | p, ok := availableProfiles[profileName] 70 | if !ok { 71 | flag.Usage() 72 | logger.Fatalln(fmt.Sprintf("Invalid profile '%s' profile selected.\n", profileName)) 73 | } 74 | 75 | flag.CommandLine.Parse(os.Args[2:]) 76 | 77 | runtime.GOMAXPROCS(*goMaxProcs) 78 | 79 | if *logFile == "" { 80 | flag.Usage() 81 | logger.Fatalln("No log file specified (-log_file argument).") 82 | } 83 | 84 | tailer := logtailer.NewLogtailer(p, *logFile, *stateDir, logger) 85 | tailer.DryRun = *dryRun 86 | 87 | if err := tailer.PrepEnvironment(); err != nil { 88 | logger.Fatalln("logtailer: issue with environment: ", err) 89 | } 90 | 91 | // on first first TERM/INT, stop the tailer and the signal handler 92 | ch := make(chan os.Signal, 2) 93 | signal.Notify(ch, syscall.SIGTERM, syscall.SIGINT) 94 | go func() { 95 | <-ch 96 | signal.Stop(ch) 97 | tailer.Stop() 98 | }() 99 | 100 | stats, err := tailer.Run(*numWorkers) 101 | if err != nil { 102 | logger.Fatalln("error in run: ", err) 103 | } 104 | fmt.Fprintln(os.Stderr, stats) 105 | } 106 | 107 | func profileNames() string { 108 | names := []string{} 109 | for k := range availableProfiles { 110 | names = append(names, k) 111 | } 112 | sort.Strings(names) 113 | return strings.Join(names, ", ") 114 | } 115 | -------------------------------------------------------------------------------- /logtailer.go: -------------------------------------------------------------------------------- 1 | // Package logtailer provides an easy way to write log file munging programs 2 | package logtailer 3 | 4 | import ( 5 | "bufio" 6 | "fmt" 7 | "io" 8 | "log" 9 | "os" 10 | "os/exec" 11 | "path/filepath" 12 | "sync" 13 | 14 | "github.com/ParsePlatform/logtailer/profiles" 15 | ) 16 | 17 | // Logtailer holds the state of a logtailer program which represents the 18 | // consumption of an input source with a particular profile 19 | type Logtailer struct { 20 | Logger *log.Logger 21 | Profile profiles.Profile 22 | LogFile string 23 | StateDir string 24 | DryRun bool 25 | 26 | shutdown chan struct{} 27 | logtailCmd *exec.Cmd 28 | } 29 | 30 | // Splitter supplies a custom function for a bufio.Scanner 31 | type Splitter interface { 32 | Split(data []byte, atEOF bool) (advance int, token []byte, err error) 33 | } 34 | 35 | // NewLogtailer prepares a new Logtailer from a profile, input logfile, state 36 | // directory, and a logger. 37 | func NewLogtailer(profile profiles.Profile, logFile string, stateDir string, logger *log.Logger) *Logtailer { 38 | return &Logtailer{ 39 | Logger: logger, 40 | Profile: profile, 41 | LogFile: logFile, 42 | StateDir: stateDir, 43 | shutdown: make(chan struct{}), 44 | } 45 | } 46 | 47 | func (lt *Logtailer) getInput() (io.Reader, error) { 48 | if lt.LogFile == "-" { 49 | return os.Stdin, nil 50 | } 51 | 52 | lt.newLogtailCmd() 53 | 54 | // set up output pipeline. 55 | stdout, err := lt.logtailCmd.StdoutPipe() 56 | if err != nil { 57 | return nil, fmt.Errorf("error connecting to logtail stdout: %v", err) 58 | } 59 | lt.logtailCmd.Stderr = os.Stderr 60 | if err := lt.logtailCmd.Start(); err != nil { 61 | return nil, fmt.Errorf("error starting logtail cmd: %v", err) 62 | } 63 | 64 | return stdout, nil 65 | } 66 | 67 | // Run starts the consumption of the input source and starts `numWorkers` 68 | // separate goroutines to process lines. 69 | // 70 | // If the log lines are ordered `numWorkers` should be 1. 71 | func (lt *Logtailer) Run(numWorkers int) (*Stats, error) { 72 | input, err := lt.getInput() 73 | stats := &Stats{} 74 | 75 | if err != nil { 76 | lt.Logger.Println("error getting logtail input:", err) 77 | return stats, err 78 | } 79 | scanner := bufio.NewScanner(input) 80 | inputRecords := make(chan string) 81 | outputRecords := make(chan interface{}) 82 | 83 | // run any initialization routines needed by the profile 84 | err = lt.Profile.Init() 85 | if err != nil { 86 | return stats, err 87 | } 88 | 89 | // start scanner goroutine 90 | go func() { 91 | defer close(inputRecords) 92 | // if the profile supplies a custom splitting function, use it 93 | if splitter, ok := lt.Profile.(Splitter); ok { 94 | scanner.Split(splitter.Split) 95 | } 96 | 97 | for scanner.Scan() { 98 | // hand every token to inputRecords to be consumed by the profile 99 | stats.Records++ 100 | select { 101 | case inputRecords <- scanner.Text(): 102 | case <-lt.shutdown: 103 | return 104 | } 105 | } 106 | }() 107 | var wg sync.WaitGroup 108 | for i := 0; i < numWorkers; i++ { 109 | wg.Add(1) 110 | go func() { 111 | defer wg.Done() 112 | for { 113 | line, ok := <-inputRecords 114 | if !ok { 115 | return 116 | } 117 | record, err := lt.Profile.ProcessRecord(line) 118 | 119 | if err != nil { 120 | lt.Logger.Println("error parsing:", err) 121 | stats.Lock() 122 | stats.ParseErrors++ 123 | stats.Unlock() 124 | } else { 125 | outputRecords <- record 126 | } 127 | } 128 | }() 129 | } 130 | 131 | errorChan := lt.Profile.HandleOutput(outputRecords, lt.DryRun) 132 | 133 | go func() { 134 | for err := range errorChan { 135 | stats.Lock() 136 | stats.SendErrors++ 137 | stats.Unlock() 138 | lt.Logger.Println("error sending:", err) 139 | } 140 | }() 141 | wg.Wait() 142 | close(outputRecords) 143 | 144 | if stats.IsHealthy() { 145 | err = nil 146 | } else { 147 | err = fmt.Errorf("stats indicate unhealthy run: %+v", stats) 148 | } 149 | return stats, err 150 | } 151 | 152 | // Stop stops consuming new input 153 | func (lt *Logtailer) Stop() { 154 | close(lt.shutdown) 155 | } 156 | 157 | func (lt *Logtailer) stateFilePath() string { 158 | logFileName := filepath.Base(lt.LogFile) 159 | fileName := fmt.Sprintf("logtailer-%s-%s.state", lt.Profile.Name(), logFileName) 160 | return filepath.Join(lt.StateDir, fileName) 161 | } 162 | 163 | func (lt *Logtailer) newLogtailCmd() { 164 | args := []string{ 165 | "-f", lt.LogFile, 166 | "-o", lt.stateFilePath(), 167 | } 168 | if lt.DryRun { 169 | args = append(args, "-t") 170 | } 171 | lt.Logger.Println("executing ", logtailBinary, args) 172 | lt.logtailCmd = exec.Command(logtailBinary, args...) 173 | } 174 | -------------------------------------------------------------------------------- /profiles/sshd/sshd.go: -------------------------------------------------------------------------------- 1 | // Package sshd parses ssh log lines and generates JSON representing ssh events 2 | package sshd 3 | 4 | import ( 5 | "encoding/json" 6 | "errors" 7 | "fmt" 8 | "log" 9 | "os" 10 | "regexp" 11 | "strconv" 12 | "time" 13 | ) 14 | 15 | // SshdProfile is a logtailer profile that parses ssh login events from sshd logs 16 | type SshdProfile struct { 17 | // maps key fingerprints to fb users 18 | fingerprintToFbUser map[string]string 19 | 20 | // events is the in-flight ssh events that are being built up 21 | events map[string]*sshEvent 22 | 23 | // completeEvents is populated with finished events 24 | completeEvents chan *sshEvent 25 | 26 | // Logger is used to report tailer issues to stderr 27 | logger *log.Logger 28 | } 29 | 30 | // sshEvent represents a successful ssh login event 31 | type sshEvent struct { 32 | Timestamp time.Time `json:"timestamp,omitempty"` 33 | Logtime time.Time `json:"logtime,omitempty"` 34 | PeType string `json:"pe_type,omitempty"` 35 | Hostname string `json:"hostname,omitempty"` 36 | Pid int `json:"pid,omitempty"` 37 | DstIP string `json:"dst_ip,omitempty"` 38 | SrcIP string `json:"src_ip,omitempty"` 39 | Port int `json:"port,omitempty"` 40 | Fingerprint string `json:"fingerprint,omitempty"` 41 | DstUser string `json:"dst_user,omitempty"` 42 | FbUser string `json:"fb_user,omitempty"` 43 | ChildPid int `json:"child_pid,omitempty"` 44 | Success bool `json:"success"` 45 | FailReason string `json:"fail_reason,omitempty"` 46 | 47 | Complete bool `json:"complete,omitempty"` 48 | } 49 | 50 | func (e *sshEvent) ID() string { 51 | return e.Hostname + ":" + strconv.Itoa(e.Pid) 52 | } 53 | 54 | func (e *sshEvent) String() string { 55 | buf, _ := json.Marshal(&e) 56 | return string(buf) 57 | } 58 | 59 | // string types to look for in ssh logs 60 | var ( 61 | timeFormat = "Jan 02 15:04:05" 62 | 63 | sshLogRe = regexp.MustCompile(`^([A-z]{3} [0-9]+ [^ ]+) ([a-zA-Z0-9-]+) sshd\[(\d+)\]: (.*)`) 64 | connectLineRe = regexp.MustCompile(`^Connection from ([0-9.]+) port (\d+)$`) 65 | foundKeyRe = regexp.MustCompile(`^Found matching RSA key: ([0-9a-f:]+)`) 66 | acceptKeyRe = regexp.MustCompile(`^Accepted publickey for (\w+) from ([0-9.]+) port (\d+) ssh2`) 67 | childPidRe = regexp.MustCompile(`^User child is on pid (\d+)`) 68 | badRevMapRe = regexp.MustCompile(`^reverse mapping checking getaddrinfo .*`) 69 | failedPubKeyRe = regexp.MustCompile(`^Failed publickey for (\w+) from ([0-9.]+) port (\d+)`) 70 | connectionClosedRe = regexp.MustCompile(`^Connection closed by ([0-9.]+) \[(.*)\]`) 71 | ) 72 | 73 | // Name returns the name of the profile and must be unique amongst registered. 74 | // profiles 75 | func (p *SshdProfile) Name() string { 76 | return "sshd" 77 | } 78 | 79 | // Init initializes the SshdProfile instance 80 | func (p *SshdProfile) Init() error { 81 | p.logger = log.New(os.Stderr, "DEBUG: ", log.LstdFlags|log.Lshortfile) 82 | p.completeEvents = make(chan *sshEvent) 83 | p.events = make(map[string]*sshEvent) 84 | p.fingerprintToFbUser = populateKeyMapping() 85 | return nil 86 | } 87 | 88 | // ProcessRecord is invoked for every input log line. It returns a transformed. 89 | // line or an error 90 | func (p *SshdProfile) ProcessRecord(line string) (interface{}, error) { 91 | ok := sshLogRe.MatchString(line) 92 | if !ok { 93 | // if it's not an sshd line, return error 94 | p.logger.Println("bad line:", line) 95 | return nil, errors.New("logtailer.sshd: unexpected log line not from sshd") 96 | } 97 | res := sshLogRe.FindStringSubmatch(line) 98 | partialEvent := sshEvent{} 99 | partialEvent.Timestamp = time.Now() 100 | partialEvent.Logtime, _ = time.Parse(timeFormat, res[1]) 101 | 102 | partialEvent.Hostname = res[2] 103 | partialEvent.Pid, _ = strconv.Atoi(res[3]) 104 | message := res[4] 105 | switch { 106 | case connectLineRe.MatchString(message): 107 | res = connectLineRe.FindStringSubmatch(message) 108 | partialEvent.PeType = "connectLine" 109 | partialEvent.SrcIP = res[1] 110 | partialEvent.Port, _ = strconv.Atoi(res[2]) 111 | case foundKeyRe.MatchString(message): 112 | res = foundKeyRe.FindStringSubmatch(message) 113 | partialEvent.PeType = "foundKey" 114 | partialEvent.Fingerprint = res[1] 115 | partialEvent.FbUser = p.fingerprintToFbUser[res[1]] 116 | case acceptKeyRe.MatchString(message): 117 | res = acceptKeyRe.FindStringSubmatch(message) 118 | partialEvent.PeType = "acceptKey" 119 | partialEvent.DstUser = res[1] 120 | partialEvent.SrcIP = res[2] 121 | partialEvent.Port, _ = strconv.Atoi(res[3]) 122 | partialEvent.Success = true 123 | case childPidRe.MatchString(message): 124 | res = childPidRe.FindStringSubmatch(message) 125 | partialEvent.PeType = "childPid" 126 | partialEvent.ChildPid, _ = strconv.Atoi(res[1]) 127 | partialEvent.Success = true 128 | case badRevMapRe.MatchString(message): 129 | res = badRevMapRe.FindStringSubmatch(message) 130 | partialEvent.PeType = "badRevMap" 131 | partialEvent.FailReason = "reverse mapping checking getaddrinfo" 132 | partialEvent.Success = false 133 | case failedPubKeyRe.MatchString(message): 134 | res = failedPubKeyRe.FindStringSubmatch(message) 135 | partialEvent.PeType = "failedPubKey" 136 | partialEvent.Success = false 137 | case connectionClosedRe.MatchString(message): 138 | res = connectionClosedRe.FindStringSubmatch(message) 139 | partialEvent.PeType = "connectionClosed" 140 | partialEvent.SrcIP = res[1] 141 | partialEvent.FailReason = res[2] 142 | partialEvent.Success = false 143 | default: 144 | return []byte("{}"), nil 145 | } 146 | return json.Marshal(partialEvent) 147 | } 148 | 149 | func (p *SshdProfile) handlePartialEvent(partialEvent []byte) error { 150 | var event sshEvent 151 | if err := json.Unmarshal(partialEvent, &event); err != nil { 152 | return err 153 | } 154 | 155 | // check for existing event 156 | key := event.ID() 157 | fullEvent, ok := p.events[key] 158 | // if not present, insert new 159 | if !ok { 160 | fullEvent = &sshEvent{} 161 | p.events[key] = fullEvent 162 | } 163 | // if present, merge values, check if complete 164 | fullEvent.Timestamp = time.Now() 165 | fullEvent.Logtime = event.Logtime 166 | fullEvent.Hostname = event.Hostname 167 | fullEvent.Pid = event.Pid 168 | // TODO nuke this and replace it with a reflection that sets values in 169 | // fullEvent for all non-nil fields in event 170 | // 171 | // If we run with multiple workers we have a data race here and should move 172 | // Complete calculation after the switch 173 | switch event.PeType { 174 | case "connectLine": 175 | fullEvent.SrcIP = event.SrcIP 176 | fullEvent.Port = event.Port 177 | case "foundKey": 178 | fullEvent.Fingerprint = event.Fingerprint 179 | fullEvent.FbUser = event.FbUser 180 | case "acceptKey": 181 | fullEvent.DstUser = event.DstUser 182 | fullEvent.SrcIP = event.SrcIP 183 | fullEvent.Port = event.Port 184 | fullEvent.Success = event.Success 185 | case "childPid": 186 | fullEvent.ChildPid = event.ChildPid 187 | fullEvent.Success = event.Success 188 | fullEvent.Complete = true 189 | case "badRevMap": 190 | fullEvent.FailReason = event.FailReason 191 | fullEvent.Success = event.Success 192 | case "failedPubKey": 193 | fullEvent.Success = event.Success 194 | fullEvent.Complete = true 195 | case "connectionClosed": 196 | fullEvent.Success = event.Success 197 | fullEvent.SrcIP = event.SrcIP 198 | fullEvent.FailReason = event.FailReason 199 | fullEvent.Complete = true 200 | } 201 | // if complete 202 | if fullEvent.Complete { 203 | p.completeEvents <- fullEvent 204 | delete(p.events, key) 205 | } 206 | 207 | return nil 208 | } 209 | 210 | // HandleOutput recieves a channel of input lines and a flag of whether or not 211 | // this is a dry run being invoked (to avoid side-effects). 212 | // 213 | // The return value is a channel of errors. logtailer keeps track of 214 | // the number of errors and exits non-zero if they are over a threshold. 215 | func (p *SshdProfile) HandleOutput(records <-chan interface{}, dryRun bool) <-chan error { 216 | errChan := make(chan error) 217 | timeoutCheckTicker := time.NewTicker(5 * time.Second) 218 | 219 | // Launch the consumption goroutine. 220 | go func() { 221 | // Close errChan when this goroutine finishes to signal being done. 222 | defer close(errChan) 223 | 224 | // Consume lines input channel until it is closed (if consuming stdin), 225 | // this is potentially never. 226 | for record := range records { 227 | line, ok := record.([]byte) 228 | if !ok { 229 | errChan <- fmt.Errorf("Unexpected output record type: %t", record) 230 | continue 231 | } 232 | if err := p.handlePartialEvent(line); err != nil { 233 | errChan <- err 234 | } 235 | } 236 | // if we're closing up shop make sure the events cleaner goroutine stops too. 237 | timeoutCheckTicker.Stop() 238 | }() 239 | 240 | // expire events that aren't updated for 60s 241 | go func() { 242 | for { 243 | <-timeoutCheckTicker.C 244 | for key, event := range p.events { 245 | expireEventAt := event.Timestamp.Add(60 * time.Second) 246 | if time.Now().After(expireEventAt) { 247 | event.Success = false 248 | event.FailReason = "timeout waiting for complete event" 249 | p.completeEvents <- event 250 | delete(p.events, key) 251 | } 252 | } 253 | } 254 | }() 255 | 256 | go func() { 257 | // write events to stdout 258 | for event := range p.completeEvents { 259 | message := event.String() 260 | if len(message) > 0 { 261 | fmt.Println(message) 262 | } 263 | } 264 | }() 265 | 266 | return errChan 267 | } 268 | -------------------------------------------------------------------------------- /profiles/mongodb/mongodb.go: -------------------------------------------------------------------------------- 1 | // Package mongodb implements a mongodb logtailer profile that can parse mongodb log lines 2 | // and output JSON 3 | 4 | package mongodb 5 | 6 | import ( 7 | "encoding/json" 8 | "flag" 9 | "fmt" 10 | "log" 11 | "os" 12 | "strconv" 13 | "strings" 14 | "time" 15 | 16 | "github.com/tmc/mongologtools/parser" 17 | 18 | "github.com/davecgh/go-spew/spew" 19 | "gopkg.in/yaml.v1" 20 | ) 21 | 22 | var ( 23 | enableAdditionalRocksDBFields = flag.Bool("logtailer.enablerocksdbfields", false, "Enable reporting of additional rocksdb fields.") 24 | 25 | outputSchema = map[string][]string{ 26 | "int": {"ntoreturn", "idhack", "ntoskip", "nscanned", "nmoved", "scan_and_order", 27 | "nupdated", "fastmodinsert", "fastmod", "ninserted", "ndeleted", "keyUpdates", "num_yields", 28 | "global_read_lock_micros", "global_write_lock_micros", "read_lock_micros", "write_lock_micros", "nreturned", 29 | "reslen", "duration_ms", "sample_rate", "nscanned_objects", "nmatched", "nmodified", "upsert", 30 | "write_conflicts", "user_key_comparison_count", "block_cache_hit_count", "block_read_count", 31 | "block_read_byte", "internal_key_skipped_count", "internal_delete_skipped_count", 32 | "get_from_memtable_count", "seek_on_memtable_count", "seek_child_seek_count", 33 | }, 34 | "normal": {"hostname", "database", "collection", "op", "query_signature", 35 | "command_type", "ns", "rs_mismatch", "plan_summary", "comment", 36 | "logtailer_host", "exception", "warning", "code", "severity", 37 | "component", "parser_result", "host_state", 38 | }, 39 | } 40 | 41 | // additional rocks fields 42 | rocksDBFields = []string{ 43 | "block_read_time", "block_checksum_time", "block_decompress_time", "write_wal_time", "get_snapshot_time", "get_from_memtable_time", "get_post_process_time", "get_from_output_files_time", "seek_on_memtable_time", "seek_child_seek_time", "seek_min_heap_time", "seek_internal_seek_time", "find_next_user_entry_time", "write_pre_and_post_process_time", "write_memtable_time", "db_mutex_lock_nanos", "db_condition_wait_nanos", 44 | } 45 | 46 | // maps field names to field types (int, normal, etc) populated by init(). 47 | fieldToType map[string]string 48 | 49 | // holds hostname of machine running logtailer instance 50 | logtailerHost string 51 | ) 52 | 53 | // MongodbProfile is the profile used to parse mongodb logs. Output is JSON 54 | type MongodbProfile struct { 55 | Logger *log.Logger 56 | } 57 | 58 | // Init performs startup steps for the MongodbProfile 59 | func (p *MongodbProfile) Init() error { 60 | p.Logger = log.New(os.Stderr, "DEBUG: ", log.LstdFlags|log.Lshortfile) 61 | 62 | if *enableAdditionalRocksDBFields { 63 | outputSchema["int"] = append(outputSchema["int"], rocksDBFields...) 64 | } 65 | 66 | fieldToType = fieldToTypeFromSchema(outputSchema) 67 | 68 | return nil 69 | } 70 | 71 | // Name returns the name of the profile and must be unique amongst registered. 72 | // profiles 73 | func (p *MongodbProfile) Name() string { 74 | return "mongodb" 75 | } 76 | 77 | // Convert mongo timestamp to unix UTC 78 | func mongoTimeToUnixUTC(in string) int64 { 79 | // Go reference time: Mon Jan 2 15:04:05 -0700 MST 2006 80 | // Mongo Reference time: Thu Jul 10 06:46:11.890 81 | 82 | t, err := time.Parse("Mon Jan 2 15:04:05", in) 83 | if err != nil { 84 | t = time.Now().UTC() 85 | } else { 86 | // Add the current year to the end of the date string 87 | // This is a silly workaround for the fact that Mongo 2.4 doesn't log the current year 88 | // This will be fixed in 2.6 89 | t = t.AddDate(time.Now().UTC().Year(), 0, 0) 90 | } 91 | 92 | return t.Unix() 93 | } 94 | 95 | // ProcessRecord is invoked for every input log line. It returns a transformed. 96 | // line or an error 97 | func (p *MongodbProfile) ProcessRecord(line string) (interface{}, error) { 98 | values, err := parser.ParseLogLine(line) 99 | if err != nil { 100 | return nil, err 101 | } 102 | 103 | // report how well we parsed the line 104 | values["parser_result"] = "full" 105 | if _, ok := values["xextra"]; ok { 106 | values["parser_result"] = "partial" 107 | if strings.Contains(line, "......") { 108 | values["parser_result"] = "truncated" 109 | } else { 110 | // print parital parses to stdout for debugging purposes 111 | fmt.Println(line) 112 | spew.Dump(values) 113 | } 114 | } 115 | 116 | // apply transformations 117 | if err := p.applyTransformations(values); err != nil { 118 | return nil, err 119 | } 120 | 121 | var outputRecord string 122 | marshalled, err := json.Marshal(values) 123 | if err != nil { 124 | p.Logger.Printf("error serializing to json %s", err) 125 | } else { 126 | outputRecord = string(marshalled) 127 | } 128 | 129 | return outputRecord, nil 130 | } 131 | 132 | // applyTransformations takes the fields and populates more fields. 133 | func (p *MongodbProfile) applyTransformations(match map[string]interface{}) error { 134 | match["logtailer_host"] = logtailerHost 135 | if nsParts := strings.Split(fmt.Sprint(match["ns"]), "."); len(nsParts) > 1 { 136 | match["database"] = nsParts[0] 137 | match["collection"] = nsParts[1] 138 | } 139 | 140 | // expand extra field if present: 141 | if asmap, ok := match["extra"]; ok { 142 | for k, v := range asmap.(map[string]interface{}) { 143 | match[k] = v 144 | } 145 | } 146 | 147 | // field aliases 148 | // in 3.2, nscanned becomes keysExamined, but we map to nscanned so we can compare 149 | // apples:apples 150 | if match["keysExamined"] != nil { 151 | match["nscanned"] = match["keysExamined"] 152 | } 153 | // ditto for docsExamined 154 | if match["docsExamined"] != nil { 155 | match["nscanned_objects"] = match["docsExamined"] 156 | } 157 | if match["nscannedObjects"] != nil { 158 | match["nscanned_objects"] = match["nscannedObjects"] 159 | } 160 | match["global_write_lock_micros"] = match["W"] 161 | match["global_read_lock_micros"] = match["R"] 162 | match["write_lock_micros"] = match["w"] 163 | match["read_lock_micros"] = match["r"] 164 | match["num_yields"] = match["numYields"] 165 | match["write_conflicts"] = match["writeConflicts"] 166 | match["scan_and_order"] = match["scanAndOrder"] 167 | 168 | // The collection is in the command block for commands 169 | // in particular we want count and and findandmodify 170 | if match["op"] == "command" { 171 | cmdMap, ok := match["command"].(map[string]interface{}) 172 | cmdType, ctOk := match["command_type"].(string) 173 | if ok && ctOk { 174 | // If we've found a command type we're interested in, extract the value as app ID and/or 175 | // collection name 176 | if nsString, ok := cmdMap[cmdType].(string); ok { 177 | match["collection"] = nsString 178 | 179 | // Generate a query signature based on the query within the command 180 | if signature, err := generateQuerySignature(cmdMap, "command"); err == nil { 181 | match["query_signature"] = string(signature) 182 | } else { 183 | p.Logger.Printf("unable to generate command query signature. error: %s", err) 184 | } 185 | } else { 186 | p.Logger.Printf("unable to read the ns string from command for command type %s", cmdType) 187 | } 188 | } 189 | } else { 190 | var queryMap map[string]interface{} 191 | 192 | switch query := match["query"].(type) { 193 | case map[string]interface{}: 194 | queryMap = query 195 | case string: 196 | // if we didn't get a parsed query try to do our YAML/JSON dance 197 | queryMap, _ = loadMongoJSON(query) 198 | } 199 | 200 | if queryMap != nil { 201 | if comment, ok := queryMap["$comment"].(string); ok { 202 | // Store the comment as its own field 203 | match["comment"] = comment 204 | // Don't leave the comment in the query signature 205 | delete(queryMap, "$comment") 206 | } 207 | // Generate query signature if possible 208 | if signature, err := generateQuerySignature(queryMap, fmt.Sprint(match["op"])); err == nil { 209 | match["query_signature"] = string(signature) 210 | } else { 211 | p.Logger.Printf("unable to generate %s query signature. error: %s", match["op"], err) 212 | } 213 | } 214 | } 215 | 216 | if _, ok := match["planSummary"]; ok { 217 | if ps, err := json.Marshal(match["planSummary"]); err == nil { 218 | match["plan_summary"] = string(ps) 219 | } 220 | } 221 | 222 | return nil 223 | } 224 | 225 | // HandleOutput satisfies part of the profile.Profile interface, converting 226 | // lines to JSON and printing to stdout 227 | func (p *MongodbProfile) HandleOutput(records <-chan interface{}, dryRun bool) <-chan error { 228 | errChan := make(chan error) 229 | go func() { 230 | defer close(errChan) 231 | for record := range records { 232 | message, ok := record.(string) 233 | if !ok { 234 | errChan <- fmt.Errorf("Unexpected output record type: %t", record) 235 | continue 236 | } 237 | if dryRun { 238 | p.Logger.Println("skipping due to dry run") 239 | fmt.Println(message) 240 | continue 241 | } 242 | if len(message) > 0 { 243 | fmt.Println(message) 244 | } 245 | } 246 | }() 247 | return errChan 248 | } 249 | 250 | // Recurse through interface representing JSON and set values to where appropriate 251 | func scrubFields(query interface{}) { 252 | switch segment := query.(type) { 253 | case map[string]interface{}: 254 | for key := range segment { 255 | switch key { 256 | case "$or", "$and": 257 | scrubFields(segment[key]) 258 | case "$nearSphere": 259 | segment[key] = "[?,?]" 260 | case "$box": 261 | segment[key] = "[[?,?],[?,?]]" 262 | case "$nin", "$in", "$each", "$all": 263 | segment[key] = []string{"?"} 264 | case "_acl": 265 | // for legacy purposes, we still write an _acl object to some documents 266 | // just strip this out, it's not interesting 267 | segment[key] = "?" 268 | case "_rperm", "_wperm": 269 | // in insert and FAM docs, _rperm and _wperm are an array 270 | // in query docs, _rperm and _wperm have an $in clause 271 | // this handles both 272 | switch segment[key].(type) { 273 | case []interface{}: 274 | segment[key] = []string{"?"} 275 | case map[string]interface{}: 276 | scrubFields(segment[key]) 277 | } 278 | default: 279 | switch segment[key].(type) { 280 | case map[string]interface{}: 281 | scrubFields(segment[key]) 282 | case []interface{}, []string: 283 | scrubFields(segment[key]) 284 | default: 285 | segment[key] = "?" 286 | } 287 | } 288 | } 289 | case []interface{}: 290 | for index := range segment { 291 | scrubFields(segment[index]) 292 | } 293 | } 294 | } 295 | 296 | // package yaml generates map[interface{}]interface[] but json wants map[string][interface{} 297 | // This doesn't really convert YAML to JSON, it just recursively changes the type 298 | func convertYAMLToJSON(yaml interface{}) interface{} { 299 | switch yamlMap := yaml.(type) { 300 | case map[interface{}]interface{}: 301 | newMap := make(map[string]interface{}) 302 | 303 | for key := range yamlMap { 304 | if k, ok := key.(string); ok { 305 | newMap[k] = convertYAMLToJSON(yamlMap[key]) 306 | } 307 | } 308 | 309 | return newMap 310 | case []interface{}: 311 | for index := range yamlMap { 312 | yamlMap[index] = convertYAMLToJSON(yamlMap[index]) 313 | } 314 | } 315 | 316 | return yaml 317 | } 318 | 319 | // loadMongoJSON is a special case handler for the JSON used in the slow query logs 320 | func loadMongoJSON(json string) (map[string]interface{}, error) { 321 | var jsonMap interface{} 322 | 323 | // The queries logged in the slow query logs are not proper JSON 324 | // but can be parsed correctly using the yaml library 325 | if err := yaml.Unmarshal([]byte(json), &jsonMap); err != nil { 326 | return nil, err 327 | } 328 | 329 | if convertedJSON, ok := convertYAMLToJSON(jsonMap).(map[string]interface{}); ok { 330 | return convertedJSON, nil 331 | } 332 | 333 | return nil, fmt.Errorf("failed type assertion when loading json: '%s'", json) 334 | } 335 | 336 | func generateQuerySignature(queryMap map[string]interface{}, op string) ([]byte, error) { 337 | switch op { 338 | case "query": 339 | // For queries, scrub the $query portion of the doc if it exists 340 | // if it doesn't exist, scrub the whole thing 341 | if queryMap["$query"] == nil { 342 | scrubFields(queryMap) 343 | } else { 344 | scrubFields(queryMap["$query"]) 345 | } 346 | return json.Marshal(queryMap) 347 | case "command": 348 | scrubFields(queryMap) 349 | if queryMap["count"] != nil { 350 | // For count only serialize the query portio 351 | return json.Marshal(queryMap["query"]) 352 | } 353 | delete(queryMap, "findAndModify") // remove expected fields 354 | delete(queryMap, "findandmodify") 355 | return json.Marshal(queryMap) 356 | default: 357 | // For update, only the query is passed in so just return the full doc 358 | scrubFields(queryMap) 359 | return json.Marshal(queryMap) 360 | } 361 | } 362 | 363 | func fieldToTypeFromSchema(scubaSchema map[string][]string) map[string]string { 364 | result := make(map[string]string) 365 | for t, fieldList := range scubaSchema { 366 | for _, f := range fieldList { 367 | result[f] = t 368 | } 369 | } 370 | return result 371 | } 372 | 373 | func safeGetInt(v interface{}) int { 374 | switch value := v.(type) { 375 | case int: 376 | return value 377 | case int64: 378 | return int(value) 379 | case uint: 380 | return int(value) 381 | case uint64: 382 | return int(value) 383 | case string: 384 | i, err := strconv.Atoi(value) 385 | if err == nil { 386 | return i 387 | } 388 | return 0 389 | default: 390 | return 0 391 | } 392 | } 393 | 394 | // initalize logtailerHost 395 | func init() { 396 | if hostname, err := os.Hostname(); err != nil { 397 | logtailerHost = "unknown" 398 | } else { 399 | logtailerHost = hostname 400 | } 401 | } 402 | -------------------------------------------------------------------------------- /profiles/mongodb/mongodb_test.go: -------------------------------------------------------------------------------- 1 | package mongodb 2 | 3 | import ( 4 | "encoding/json" 5 | "testing" 6 | 7 | "github.com/facebookgo/ensure" 8 | "github.com/tmc/mongologtools/parser" 9 | ) 10 | 11 | var ( 12 | sampleUpdateLine = `Mon Sep 22 21:35:52.398 [conn6933409] update appdata403._Installation query: { _id: "0h8XoY2Mwp", _wperm: { $in: [ "*", null ] } } update: { $set: { uniqueId: "c458c6335282784e", _updated_at: new Date(1411421752261) }, $unset: { _p_uniqueId: 1 } } nscanned:1 nupdated:1 keyUpdates:1 locks(micros) w:384 135ms` 13 | sampleCountLine = `Thu Jul 10 07:20:44.797 [conn22953594] command appdata37.$cmd command: count { count: "_Installation", query: { _id: { $lt: "1" }, deviceType: { $in: [ "ios", "android" ] } }, fields: null } ntoreturn:1 keyUpdates:0 numYields: 3475 locks(micros) r:202135545 reslen:48 116584ms` 14 | sampleCountWithPlanSummary = `Tue Sep 15 22:12:47.551 I COMMAND [conn78495922] command appdata425.$cmd command: count { count: "Leaderboards", query: { table: "ld", score: { $gt: 3000.0 } }, fields: {} } planSummary: COUNT_SCAN { table: 1, score: -1 } keyUpdates:0 writeConflicts:0 numYields:1 reslen:44 locks:{ Global: { acquireCount: { r: 4 } }, Database: { acquireCount: { r: 2 } }, Collection: { acquireCount: { r: 2 } } } user_key_comparison_count:2030897 block_cache_hit_count:553 block_read_count:0 block_read_byte:0 internal_key_skipped_count:221520 internal_delete_skipped_count:290609 get_from_memtable_count:0 seek_on_memtable_count:7 seek_child_seek_count:42 762ms` 15 | sampleEmptyCountLine = `Thu Jul 10 06:54:27.772 [conn23086439] command data.$cmd command: count { count: "_EventDimension", query: {}, fields: null } ntoreturn:1 keyUpdates:0 locks(micros) r:158621 reslen:58 158ms` 16 | sampleInsertLine = `Mon Sep 22 21:35:52.401 [conn6943826] insert appdata350._ScriptLog:INFO ninserted:1 keyUpdates:0 locks(micros) w:114787 114ms` 17 | sampleQueryLine = `Mon Sep 22 21:35:02.658 [conn44275481] query appdata50.City query: { $query: { _id: { $nin: [ "w4swoNXOpD", "rnWQFxnF3Z", "ADbvzAfnLr", "kHvMcnyQot", "9iFALGKUsf", "AjwcWKxJoz", "ka4zmuedVb", "sbcbcNKUaB", "Emd4pZRLlm", "GYmAdbqMN4", "Har7wFdF8d", "MpIS9kCmlw", "Oi9beqwSfq", "Xx2EfJbquY", "vADandEYMl", "vYGZ0IEVfC", "JfULQjqfbQ", "k7OuqBjXgU", "BDsONO7dCB", "Nyo0pgG6Mg", "U0m05hvQcf", "92PeBpIKUe", "Y8BwxnVQC5", "43ddOvfeyu", "Q6nb94A8R4", "g3J3sLdQtH", "jaA3H4KFCd", "v2THvLYMBf", "y4vzd63LJM", "CUGyEc7w9q", "iyY3q9JUva", "8N0pXkDh7Y", "KcMewVKZIN", "pSjxilb3eU", "3Izfcv0Gkh", "owWqUXeCut", "1jAjkijG3h", "JWGfIkusAu", "ONbT3YcLWj", "ZSgRAxj9lJ", "fbXcWyL3eh", "jJxNZhbl3S", "mtQ0ykhMqp", "M1QDB2muML", "68FvkneVmD", "Fbc4AUrB45", "a2G8le9grl", "CCEHG6j0ge", "cLVr03agLR", "M82VOd4h5n", "1JknKBQ6g9", "QwOykLkHGy", "QmJ5NyFgPE", "ZmRDT2aiZ1", "gKUjtjfAFW", "gi7Eh3QUtE", "p0DFrXaAve", "txbklugcrr", "WMKb8tJiDz", "Qq6mtXX3Zz", "IAUUYUOS7O", "BWUQgi8gra", "Q2TrBRjzxt", "LDe7qh4Ceu", "vzbw5ozrZU", "PAjCGCKDdd", "SsixxrnCoI", "UpCsmnHT2J", "VQV5vIqgx3", "Y0MhP6RKDs", "nllIFh43YY", "o1qlyASB4z", "XSKN7zRcqj", "2bFITT0Klb", "3sbgGVBOiZ", "4E4euSg3dS", "JeEaM5j0Hg", "H8cKQAU5pB", "4xrUm4razs", "6wMyCU3Lyp", "FoeduzRktc", "X9EhKPWk9j", "ljbvbPQZWZ", "trtmraCCsv", "uyctqsQQth", "zFQSg3Syar", "q6wb3xaIGR", "TcBz4E58sX", "DHCp9YrU5y", "XIvTVvO6vr", "FD8LbQZOGY", "A3QMCQ5u2P", "9GSsU1R9ng", "68v2MCvq0l", "Fl4FnSZXDX", "Kv3DturDqk", "azuoUpI7d6", "cglL29oOXE", "jMCJVA4Lvk", "p8HpDNsjMw", "ph8wB7Hryy", "0NKwjgyZcb", "Ck6JwhLQZ9", "KBKWs8rOmf", "tuvMH390GE", "vRIRlZFPda", "n3I3deDlHP", "4oGeY77kU6", "605lZTQHAM", "G3CaNGyyew", "4WgRFODOyE", "CF4rkGJpcs", "5OgZIpE6DM" ] }, _updated_at: { $lte: new Date(1411298935000) }, $or: [ { a: { $exists: false }, b: { $lte: 8 } }, { a: { $gte: -32400, $lte: -3600 }, b: { $lte: 8 } } ], c: false, d: { $gte: 20, $lte: 26 }, _rperm: { $in: [ "*", null ] } }, $orderby: { _updated_at: -1 }, $maxScan: 500000 } cursorid:5542728988973737585 ntoreturn:3 ntoskip:0 nscanned:40229 keyUpdates:0 numYields: 29 locks(micros) r:967962 nreturned:3 reslen:221 585ms` 18 | sampleRemoveLine = `Wed Dec 10 22:06:00.877 [conn18684074] remove appdata315.crud_test query: { _id: "QfnHYiOQRL", _wperm: { $in: [ null, "*" ] } } ndeleted:1 keyUpdates:0 locks(micros) w:198 0ms` 19 | alternateQueryLine = `Wed Dec 10 22:18:32.425 [conn18471299] query appdata386._Join:roles:_Role query: { relatedId: "ciuxUnr9Yr" } ntoreturn:0 ntoskip:0 nscanned:0 keyUpdates:0 locks(micros) r:88 nreturned:0 reslen:20 0ms` 20 | sampleNonOpLine = `Fri Oct 10 22:05:24.458 [repl writer worker 7] appdata50.DeviceCity Btree::insert: key too large to index, skipping appdata50.DeviceCity.$data_1 45697 { : "ABCDEFGHIJKLMNOPQRSTUV..." }` 21 | sampleNearSphereLine = `Thu Oct 30 00:25:47.448 [conn66678403] query appdata23.Hostel query: { $query: { location: { $nearSphere: [ 0.48651, 0.8586600065713 ], $maxDistance: 0.007848061528802385 }, _rperm: { $in: [ "*", null ] } }, $maxScan: 500000 } ntoreturn:100 ntoskip:0 nscanned:13 keyUpdates:0 locks(micros) r:957 nreturned:13 reslen:6581 0ms` 22 | sampleGeoBoxLine = `Thu Oct 30 00:42:39.820 [conn66670511] query appdata43.SpeedSpot2 query: { $query: { location: { $within: { $box: [ [ 0.7727840564627, 0.69502913304249 ], [ 0.7762172836747, 0.69937692810909 ] ] } }, Venue: "Hotel", _rperm: { $in: [ "*", null ] } }, $orderby: { TestDate: -1 }, $maxScan: 500000 } ntoreturn:500 ntoskip:0 nscanned:1 scanAndOrder:1 keyUpdates:0 locks(micros) r:528 nreturned:1 reslen:963 0ms` 23 | sample26QueryLine = `Wed Dec 10 23:57:46.747 [conn2] query appdata401._User query: { $maxScan: 500000.0, $query: { _id: "RTnTOIAkzx", _rperm: { $in: [ "*", null, "RTnTOIAkzx" ] } } } planSummary: IXSCAN { _id: -1 }, IXSCAN { _rperm: 1.0 } ntoreturn:0 ntoskip:0 nscanned:1 nscannedObjects:1 keyUpdates:0 numYields:0 locks(micros) r:225 nreturned:1 reslen:284 0ms` 24 | sample26GeoQueryLine = `Wed Dec 10 23:57:46.747 [conn2] query appdata401._User query: { $maxScan: 500000.0, $query: { _id: "RTnTOIAkzx", _rperm: { $in: [ "*", null, "RTnTOIAkzx" ] } } } planSummary: GEO_NEAR_2D { lastUserLocation: "2d" } ntoreturn:0 ntoskip:0 nscanned:1 nscannedObjects:1 keyUpdates:0 numYields:0 locks(micros) r:225 nreturned:1 reslen:284 0ms` 25 | sample26UpdateLine = `Wed Dec 10 23:57:46.747 [conn395] update test.foo query: { a: 1.0 } update: { c: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa..." } nscanned:4 nscannedObjects:4 nmoved:1 nMatched:1 nModified:1 keyUpdates:0 numYields:0 locks(micros) w:174 0ms` 26 | sample26CommentLine = `Mon Jan 12 16:19:17.894 [conn1410300] query appdata9._Installation query: { $query: { channels: { $in: [ "user_hsE3uMnSLA" ] }, deviceType: { $in: [ "android", "winphone", "js" ] } }, $comment: "{app_id:19015, pushd_id:8jhKA4aaad}" } planSummary: IXSCAN { channels: 1, _created_at: 1 } ntoreturn:1854540 ntoskip:0 nscanned:1 nscannedObjects:1 keyUpdates:0 numYields:0 locks(micros) r:182 nreturned:0 reslen:20 0ms` 27 | sample26FAMLine = `Thu Feb 12 21:51:29.072 [conn23218] command appdata345.$cmd command: findandmodify { findandmodify: "_Installation", query: { _id: "qaKos25LyK", _wperm: { $in: [ null, "*" ] } }, update: { $set: {_updated_at: new Date(1423777889072), country: "United States", numUses: 4 } }, new: true } update: { $set: { _updated_at: new Date(142377788907), country: "United States", numUses: 4 } } nscanned:1 nscannedObjects:1 nMatched:1 nModified:1 keyUpdates:1 numYields:0 locks(micros) w:225 reslen:704 0ms` 28 | sample26FAMCCLine = `Thu Feb 12 21:51:29.072 [conn23218] command appdata345.$cmd command: findAndModify { findAndModify: "_Installation", query: { _id: "qaKos25LyK", _wperm: { $in: [ null, "*" ] } }, update: { $set: {_updated_at: new Date(1423777889072), country: "United States", numUses: 4 } }, new: true } update: { $set: { _updated_at: new Date(142377788907), country: "United States", numUses: 4 } } nscanned:1 nscannedObjects:1 nMatched:1 nModified:1 keyUpdates:1 numYields:0 locks(micros) w:225 reslen:704 0ms` 29 | sample26IDHackQueryLine = `Thu Feb 12 21:51:29.072 [conn136068] query appdata368._Installation query: { _id: "w9aETYE3FO" } planSummary: IDHACK ntoreturn:0 ntoskip:0 nscanned:1 nscannedObjects:1 idhack:1 keyUpdates:0 numYields:0 locks(micros) r:33 nreturned:1 reslen:601 0ms` 30 | sample26CmdInsertLine = `Tue Feb 17 21:35:02.677 [conn591462] command appdata104.$cmd command: insert { insert: "_JobStatus", documents: [ { _id: "4Ps2WjKpkX", createdAt: new Date(1424208902552), jobName: "abc123", description: "", source: "api", params: "{}", status: "pending", expiresAt: new Date(1426800902598) } ], writeConcern: { w: 1, wtimeout: 10.0 }, ordered: true } keyUpdates:0 numYields:0 locks(micros) w:144 reslen:80 0ms` 31 | // make sure we handle $all and $each 32 | sampleAllQueryLine = `Mon Oct 5 20:53:27.002 I QUERY [conn1098902] query appdata352.Message query: { $query: { received: { $ne: true }, b: { $ne: "20150617071237576PHM" }, _rperm: { $in: [ null, "*", "pF13eUh1pl" ] }, a: { $all: [ "1234567" ], $in: [ "1234568", "1234569", "1234570" ] } }, $maxScan: 500000, $maxTimeMS: 29000, $comment: "queryhash:d1066b6bc1549f0662fe03e0002ab6da" } planSummary: IXSCAN { a: 1, b: 1 } ntoreturn:300 ntoskip:0 nscanned:25 nscannedObjects:24 keyUpdates:0 writeConflicts:0 numYields:0 nreturned:0 reslen:20 locks:{ Global: { acquireCount: { r: 2 } }, Database: { acquireCount: { r: 1 } }, Collection: { acquireCount: { r: 1 } } } user_key_comparison_count:1959 block_cache_hit_count:28 block_read_count:0 block_read_byte:0 internal_key_skipped_count:24 internal_delete_skipped_count:0 get_from_memtable_count:24 seek_on_memtable_count:2 seek_child_seek_count:12 0ms` 33 | sampleEachQueryLine = `Mon Oct 5 20:21:54.712 I COMMAND [conn888146369] command appdata495.$cmd command: findAndModify { findAndModify: "_Installation", query: { _id: "pe4lID8GWy", _wperm: { $in: [ null, "*" ] } }, update: { $set: { _updated_at: new Date(1444076514710) }, $addToSet: { channels: { $each: [ "cAPA91bGzIeB7L6HFMbyjnh-USg0Y9X53Fjgqx6lLOFlUZJuV2DtGFSCPMZ2NYQV68HU", "" ] } } }, new: true } update: { $set: { _updated_at: new Date(1444076514710) }, $addToSet: { channels: { $each: [ "cAPA91bGzIeB7L6H2XW5x8XHpRFMbyjnh-USg0Y9X53Fjgqx6lLOFlUZJuV2DtGFSCPMZ2NYQV68HU", "" ] } } } nscanned:1 nscannedObjects:1 nMatched:1 nModified:1 keyUpdates:1 writeConflicts:0 numYields:0 reslen:794 locks:{ Global: { acquireCount: { r: 2, w: 2 } }, Database: { acquireCount: { w: 2 } }, Collection: { acquireCount: { w: 1 } }, oplog: { acquireCount: { w: 1 } } } user_key_comparison_count:852 block_cache_hit_count:15 block_read_count:0 block_read_byte:0 internal_key_skipped_count:2 internal_delete_skipped_count:0 get_from_memtable_count:3 seek_on_memtable_count:3 seek_child_seek_count:15 0ms` 34 | // handle write ops that set ACLs 35 | sampleACLWriteLine = `Mon Oct 5 20:21:46.464 I COMMAND [conn1017392768] command appdata709.$cmd command: findAndModify { findAndModify: "_Session", query: { _id: "abcdefghij" }, update: { $set: { _session_token: "r:123456", _acl: { abc123: { r: true, w: true } }, _wperm: [ "987zyx" ], createdWith: { action: "signup", authProvider: "password" }, restricted: false, _p_user: "_User$r3bC9HwcFq", _updated_at: new Date(1444076506463), _rperm: [ "987zyx" ], installationId: "44ee14bc-44ae-44d4-871e-dafc54ccdf0c", expiresAt: new Date(1475612506452) } }, new: true } update: { $set: { _session_token: "r:123456", _acl: { abc123: { r: true, w: true } }, _wperm: [ "987zyx" ], createdWith: { action: "signup", authProvider: "password" }, restricted: false, _p_user: "_User$r3bC9HwcFq", _updated_at: new Date(1444076506463), _rperm: [ "987zyx" ], installationId: "44ee14bc-44ae-44d4-871e-dafc54ccdf0c", expiresAt: new Date(1475612506452) } } nscanned:1 nscannedObjects:1 nMatched:1 nModified:1 keyUpdates:1 writeConflicts:0 numYields:0 reslen:487 locks:{ Global: { acquireCount: { r: 2, w: 2 } }, Database: { acquireCount: { w: 2 } }, Collection: { acquireCount: { w: 1 } }, oplog: { acquireCount: { w: 1 } } } user_key_comparison_count:659 block_cache_hit_count:11 block_read_count:0 block_read_byte:0 internal_key_skipped_count:0 internal_delete_skipped_count:0 get_from_memtable_count:3 seek_on_memtable_count:2 seek_child_seek_count:10 0ms` 36 | sample32QueryLine = `Thu Dec 17 01:01:42.311 I QUERY [conn43] query appdata352.HistoricPotential query: { $query: { a: "123456789", b: true, _rperm: { $in: [ null, "*", "abcdefghik" ] } }, $orderby: { _created_at: -1 }, $maxScan: 500000, $maxTimeMS: 29000, $comment: "queryhash:4dc1bff80c867af8d6a484c8d63edd9c" } planSummary: IXSCAN { a: 1, _created_at: -1 } ntoreturn:1000 ntoskip:0 keysExamined:200 docsExamined:200 cursorExhausted:1 keyUpdates:0 writeConflicts:0 numYields:0 nreturned:119 reslen:68247 locks:{ Global: { acquireCount: { r: 2 } }, Database: { acquireCount: { r: 1 } }, Collection: { acquireCount: { r: 1 } } } 2ms` 37 | ) 38 | 39 | func TestPEGParser(t *testing.T) { 40 | t.Parallel() 41 | 42 | lines := []string{sampleUpdateLine, sampleCountLine, sampleCountWithPlanSummary, sampleEmptyCountLine, sampleInsertLine, sampleQueryLine, sampleRemoveLine, alternateQueryLine, sampleNearSphereLine, sampleGeoBoxLine, sample26QueryLine, sample26GeoQueryLine, sample26UpdateLine, sample26CommentLine, sample26FAMLine, sample26FAMCCLine, sample26IDHackQueryLine, sample26CmdInsertLine, sampleAllQueryLine, sampleEachQueryLine, sampleACLWriteLine, sample32QueryLine} 43 | 44 | for _, line := range lines { 45 | _, err := parser.ParseLogLine(line) 46 | if err != nil { 47 | t.Error(line, err) 48 | } 49 | } 50 | 51 | // check explicit output for one command 52 | result, _ := parser.ParseLogLine(sample26FAMLine) 53 | resultJSON, err := json.Marshal(result) 54 | if err != nil { 55 | t.Error(err) 56 | } 57 | ensure.DeepEqual(t, string(resultJSON), 58 | `{"command":{"findandmodify":"_Installation","new":true,"query":{"_id":"qaKos25LyK","_wperm":{"$in":[null,"*"]}},"update":{"$set":{"_updated_at":{"$date":"2015-02-12T21:51:29.072Z"},"country":"United States","numUses":4}}},"command_type":"findandmodify","context":"conn23218","duration_ms":"0","keyUpdates":1,"nMatched":1,"nModified":1,"ns":"appdata345.$cmd","nscanned":1,"nscannedObjects":1,"numYields":0,"op":"command","reslen":704,"timestamp":"Thu Feb 12 21:51:29.072","update":{"$set":{"_updated_at":{"$date":"1974-07-06T21:23:08.907Z"},"country":"United States","numUses":4}},"w":225}`) 59 | 60 | } 61 | 62 | func TestGetQuerySignature(t *testing.T) { 63 | t.Parallel() 64 | 65 | cases := []struct{ line, key, queryType, expected string }{ 66 | {sampleQueryLine, "query", "query", `{"$maxScan":500000,"$orderby":{"_updated_at":-1},"$query":{"$or":[{"a":{"$exists":"?"},"b":{"$lte":"?"}},{"a":{"$gte":"?","$lte":"?"},"b":{"$lte":"?"}}],"_id":{"$nin":["?"]},"_rperm":{"$in":["?"]},"_updated_at":{"$lte":"?"},"c":"?","d":{"$gte":"?","$lte":"?"}}}`}, 67 | {sampleUpdateLine, "query", "update", `{"_id":"?","_wperm":{"$in":["?"]}}`}, 68 | {sample26FAMLine, "command", "command", `{"new":"?","query":{"_id":"?","_wperm":{"$in":["?"]}},"update":{"$set":{"_updated_at":"?","country":"?","numUses":"?"}}}`}, 69 | {sample26FAMCCLine, "command", "command", `{"new":"?","query":{"_id":"?","_wperm":{"$in":["?"]}},"update":{"$set":{"_updated_at":"?","country":"?","numUses":"?"}}}`}, 70 | {sampleCountLine, "command", "command", `{"_id":{"$lt":"?"},"deviceType":{"$in":["?"]}}`}, 71 | {sampleEmptyCountLine, "command", "command", `{}`}, 72 | {sampleNearSphereLine, "query", "query", `{"$maxScan":500000,"$query":{"_rperm":{"$in":["?"]},"location":{"$maxDistance":"?","$nearSphere":"[?,?]"}}}`}, 73 | {sampleGeoBoxLine, "query", "query", `{"$maxScan":500000,"$orderby":{"TestDate":-1},"$query":{"Venue":"?","_rperm":{"$in":["?"]},"location":{"$within":{"$box":"[[?,?],[?,?]]"}}}}`}, 74 | {alternateQueryLine, "query", "query", `{"relatedId":"?"}`}, 75 | {sampleRemoveLine, "query", "query", `{"_id":"?","_wperm":{"$in":["?"]}}`}, 76 | {sampleAllQueryLine, "query", "query", `{"$comment":"queryhash:d1066b6bc1549f0662fe03e0002ab6da","$maxScan":500000,"$maxTimeMS":29000,"$query":{"_rperm":{"$in":["?"]},"a":{"$all":["?"],"$in":["?"]},"b":{"$ne":"?"},"received":{"$ne":"?"}}}`}, 77 | {sampleEachQueryLine, "command", "command", `{"new":"?","query":{"_id":"?","_wperm":{"$in":["?"]}},"update":{"$addToSet":{"channels":{"$each":["?"]}},"$set":{"_updated_at":"?"}}}`}, 78 | {sampleACLWriteLine, "command", "command", `{"new":"?","query":{"_id":"?"},"update":{"$set":{"_acl":"?","_p_user":"?","_rperm":["?"],"_session_token":"?","_updated_at":"?","_wperm":["?"],"createdWith":{"action":"?","authProvider":"?"},"expiresAt":"?","installationId":"?","restricted":"?"}}}`}, 79 | } 80 | 81 | for i, c := range cases { 82 | values, err := parser.ParseLogLine(c.line) 83 | if err != nil { 84 | t.Fatal(c.line, err) 85 | } 86 | queryMap := values[c.key].(map[string]interface{}) 87 | qs, err := generateQuerySignature(queryMap, c.queryType) 88 | if err != nil { 89 | t.Fatal(err) 90 | } 91 | signature := string(qs) 92 | if signature != c.expected { 93 | t.Fatalf("case %d: Query signature did not match expected signature, got:\n'%s' but expected:\n'%s'", 94 | i+1, signature, c.expected) 95 | } 96 | } 97 | 98 | } 99 | 100 | func TestParseQueryComment(t *testing.T) { 101 | t.Parallel() 102 | 103 | // Parse the line 104 | values, err := parser.ParseLogLine(sample26CommentLine) 105 | if err != nil { 106 | t.Fatal(err) 107 | } 108 | // Convert the query to a map 109 | queryMap := values["query"].(map[string]interface{}) 110 | 111 | _, err = generateQuerySignature(queryMap, "query") 112 | if err != nil { 113 | t.Fatal(err) 114 | } 115 | // Make sure the comment exists in the map 116 | if queryMap["$comment"] != "{app_id:19015, pushd_id:8jhKA4aaad}" { 117 | t.Fatal("Failed to parse query comment.") 118 | } 119 | } 120 | 121 | func TestParsePlanSummary(t *testing.T) { 122 | t.Parallel() 123 | 124 | cases := []struct { 125 | input string 126 | expected string 127 | }{ 128 | { 129 | input: sampleCountWithPlanSummary, 130 | expected: "[{\"COUNT_SCAN\":[{\"table\":1},{\"score\":-1}]}]", 131 | }, 132 | { 133 | input: sample26QueryLine, 134 | expected: "[{\"IXSCAN\":[{\"_id\":-1}]},{\"IXSCAN\":[{\"_rperm\":1}]}]", 135 | }, 136 | } 137 | 138 | // Parse each line and check for the expected planSummary output 139 | for _, c := range cases { 140 | values, err := parser.ParseLogLine(c.input) 141 | ensure.Nil(t, err) 142 | planSummary, err := json.Marshal(values["planSummary"]) 143 | ensure.Nil(t, err) 144 | 145 | ensure.DeepEqual(t, string(planSummary), c.expected) 146 | } 147 | } 148 | 149 | // Ensure we can extract the NS of several types of commands 150 | // This is stored in a field where the key is the command type. Example: 151 | // insert: "_JobStatus" 152 | // findAndModify: "_Installation" 153 | func TestParseCmdDoc(t *testing.T) { 154 | t.Parallel() 155 | 156 | values, _ := parser.ParseLogLine(sample26FAMCCLine) 157 | 158 | queryMap := values["command"].(map[string]interface{}) 159 | if queryMap[values["command_type"].(string)] == nil { 160 | t.Fatal("Could not find key ", values["command_type"], " in command document.") 161 | } 162 | if queryMap[values["command_type"].(string)] == nil { 163 | t.Fatalf("Could not find key '%s' in command document.", values["command_type"]) 164 | } 165 | 166 | values, _ = parser.ParseLogLine(sample26CmdInsertLine) 167 | 168 | queryMap = values["command"].(map[string]interface{}) 169 | 170 | if queryMap[values["command_type"].(string)] == nil { 171 | t.Fatalf("Could not find key '%s' in command document.", values["command_type"]) 172 | } 173 | 174 | } 175 | 176 | func TestScubaRecordPreparation(t *testing.T) { 177 | t.Parallel() 178 | 179 | profile := &MongodbProfile{} 180 | 181 | logtailerHost = `test-host` 182 | record, err := profile.ProcessRecord(sample26QueryLine) 183 | ensure.Nil(t, err) 184 | ensure.DeepEqual(t, record.(string), 185 | `{"collection":"_User","context":"conn2","database":"appdata401","duration_ms":"0","global_read_lock_micros":null,"global_write_lock_micros":null,"keyUpdates":0,"logtailer_host":"test-host","nreturned":1,"ns":"appdata401._User","nscanned":1,"nscannedObjects":1,"nscanned_objects":1,"ntoreturn":0,"ntoskip":0,"numYields":0,"num_yields":0,"op":"query","parser_result":"full","planSummary":[{"IXSCAN":[{"_id":-1}]},{"IXSCAN":[{"_rperm":1}]}],"plan_summary":"[{\"IXSCAN\":[{\"_id\":-1}]},{\"IXSCAN\":[{\"_rperm\":1}]}]","query":{"$maxScan":500000,"$query":{"_id":"?","_rperm":{"$in":["?"]}}},"query_signature":"{\"$maxScan\":500000,\"$query\":{\"_id\":\"?\",\"_rperm\":{\"$in\":[\"?\"]}}}","r":225,"read_lock_micros":225,"reslen":284,"scan_and_order":null,"timestamp":"Wed Dec 10 23:57:46.747","write_conflicts":null,"write_lock_micros":null}`) 186 | } 187 | 188 | func Test32NscannedAlias(t *testing.T) { 189 | t.Parallel() 190 | 191 | profile := &MongodbProfile{} 192 | 193 | logtailerHost = `test-host` 194 | record, err := profile.ProcessRecord(sample32QueryLine) 195 | ensure.Nil(t, err) 196 | ensure.DeepEqual(t, record.(string), 197 | `{"collection":"HistoricPotential","comment":"queryhash:4dc1bff80c867af8d6a484c8d63edd9c","component":"QUERY","context":"conn43","cursorExhausted":1,"database":"appdata352","docsExamined":200,"duration_ms":"2","global_read_lock_micros":null,"global_write_lock_micros":null,"keyUpdates":0,"keysExamined":200,"locks":{"Collection":{"acquireCount":{"r":1}},"Database":{"acquireCount":{"r":1}},"Global":{"acquireCount":{"r":2}}},"logtailer_host":"test-host","nreturned":119,"ns":"appdata352.HistoricPotential","nscanned":200,"nscanned_objects":200,"ntoreturn":1000,"ntoskip":0,"numYields":0,"num_yields":0,"op":"query","parser_result":"full","planSummary":[{"IXSCAN":[{"a":1},{"_created_at":-1}]}],"plan_summary":"[{\"IXSCAN\":[{\"a\":1},{\"_created_at\":-1}]}]","query":{"$maxScan":500000,"$maxTimeMS":29000,"$orderby":{"_created_at":-1},"$query":{"_rperm":{"$in":["?"]},"a":"?","b":"?"}},"query_signature":"{\"$maxScan\":500000,\"$maxTimeMS\":29000,\"$orderby\":{\"_created_at\":-1},\"$query\":{\"_rperm\":{\"$in\":[\"?\"]},\"a\":\"?\",\"b\":\"?\"}}","read_lock_micros":null,"reslen":68247,"scan_and_order":null,"severity":"I","timestamp":"Thu Dec 17 01:01:42.311","writeConflicts":0,"write_conflicts":0,"write_lock_micros":null}`) 198 | } 199 | --------------------------------------------------------------------------------