├── .github ├── CODEOWNERS ├── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── feature_request.md │ ├── question-discussion.md │ └── security-vulnerability-report.md ├── PULL_REQUEST_TEMPLATE.md ├── release.yml └── workflows │ ├── add-to-project-v2.yml │ ├── apply-labels.yml │ ├── stale.yml │ └── validate-pr-title.yml ├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── CONTRIBUTORS ├── LICENSE ├── NOTICE ├── OSSMETADATA ├── README.md ├── SECURITY.md ├── SUPPORT.md ├── cmd ├── ingest │ ├── .gitignore │ └── main.go └── shape │ ├── .gitignore │ └── main.go ├── go.mod ├── go.sum ├── logparser ├── internal │ └── logparser │ │ └── log_line.go ├── log_line.go └── log_line_test.go ├── queryshape ├── internal │ └── queryshape │ │ └── shape.go ├── shape.go └── shape_test.go └── scripts └── mongo_stats.sh /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | # Code owners file. 2 | # This file controls who is tagged for review for any given pull request. 3 | 4 | # For anything not explicitly taken by someone else: 5 | * @honeycombio/telemetry-team 6 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Let us know if something is not working as expected 4 | title: '' 5 | labels: 'type: bug' 6 | assignees: '' 7 | 8 | --- 9 | 10 | 17 | 18 | **Versions** 19 | 20 | - Go: 21 | - Mongodbtools: 22 | 23 | **Steps to reproduce** 24 | 25 | 1. 26 | 27 | **Additional context** 28 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: 'type: enhancement' 6 | assignees: '' 7 | 8 | --- 9 | 10 | 15 | 16 | **Is your feature request related to a problem? Please describe.** 17 | 18 | 19 | **Describe the solution you'd like** 20 | 21 | 22 | **Describe alternatives you've considered** 23 | 24 | 25 | **Additional context** 26 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/question-discussion.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Question/Discussion 3 | about: General question about how things work or a discussion 4 | title: '' 5 | labels: 'type: discussion' 6 | assignees: '' 7 | 8 | --- 9 | 10 | 15 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/security-vulnerability-report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Security vulnerability report 3 | about: Let us know if you discover a security vulnerability 4 | title: '' 5 | labels: 'type: security' 6 | assignees: '' 7 | 8 | --- 9 | 10 | 15 | **Versions** 16 | 17 | - Go: 18 | - Mongodbtools: 19 | 20 | **Description** 21 | 22 | (Please include any relevant CVE advisory links) 23 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 12 | 13 | ## Which problem is this PR solving? 14 | 15 | - 16 | 17 | ## Short description of the changes 18 | 19 | - 20 | 21 | -------------------------------------------------------------------------------- /.github/release.yml: -------------------------------------------------------------------------------- 1 | # .github/release.yml 2 | 3 | changelog: 4 | exclude: 5 | labels: 6 | - no-changelog 7 | categories: 8 | - title: 💥 Breaking Changes 💥 9 | labels: 10 | - "version: bump major" 11 | - breaking-change 12 | - title: 💡 Enhancements 13 | labels: 14 | - "type: enhancement" 15 | - title: 🐛 Fixes 16 | labels: 17 | - "type: bug" 18 | - title: 🛠 Maintenance 19 | labels: 20 | - "type: maintenance" 21 | - title: 🤷 Other Changes 22 | labels: 23 | - "*" -------------------------------------------------------------------------------- /.github/workflows/add-to-project-v2.yml: -------------------------------------------------------------------------------- 1 | name: Add to project 2 | on: 3 | issues: 4 | types: [opened] 5 | pull_request_target: 6 | types: [opened] 7 | jobs: 8 | add-to-project: 9 | runs-on: ubuntu-latest 10 | name: Add issues and PRs to project 11 | steps: 12 | - uses: actions/add-to-project@main 13 | with: 14 | project-url: https://github.com/orgs/honeycombio/projects/11 15 | github-token: ${{ secrets.GHPROJECTS_TOKEN }} 16 | -------------------------------------------------------------------------------- /.github/workflows/apply-labels.yml: -------------------------------------------------------------------------------- 1 | name: Apply project labels 2 | on: [issues, pull_request_target, label] 3 | jobs: 4 | apply-labels: 5 | runs-on: ubuntu-latest 6 | name: Apply common project labels 7 | steps: 8 | - uses: honeycombio/oss-management-actions/labels@v1 9 | with: 10 | github-token: ${{ secrets.GITHUB_TOKEN }} 11 | -------------------------------------------------------------------------------- /.github/workflows/stale.yml: -------------------------------------------------------------------------------- 1 | name: 'Close stale issues and PRs' 2 | on: 3 | schedule: 4 | - cron: '30 1 * * *' 5 | 6 | jobs: 7 | stale: 8 | name: 'Close stale issues and PRs' 9 | runs-on: ubuntu-latest 10 | permissions: 11 | issues: write 12 | pull-requests: write 13 | 14 | steps: 15 | - uses: actions/stale@v4 16 | with: 17 | start-date: '2021-09-01T00:00:00Z' 18 | stale-issue-message: 'Marking this issue as stale because it has been open 14 days with no activity. Please add a comment if this is still an ongoing issue; otherwise this issue will be automatically closed in 7 days.' 19 | stale-pr-message: 'Marking this PR as stale because it has been open 30 days with no activity. Please add a comment if this PR is still relevant; otherwise this PR will be automatically closed in 7 days.' 20 | close-issue-message: 'Closing this issue due to inactivity. Please see our [Honeycomb OSS Lifecyle and Practices](https://github.com/honeycombio/home/blob/main/honeycomb-oss-lifecycle-and-practices.md).' 21 | close-pr-message: 'Closing this PR due to inactivity. Please see our [Honeycomb OSS Lifecyle and Practices](https://github.com/honeycombio/home/blob/main/honeycomb-oss-lifecycle-and-practices.md).' 22 | days-before-issue-stale: 14 23 | days-before-pr-stale: 30 24 | days-before-issue-close: 7 25 | days-before-pr-close: 7 26 | any-of-labels: 'status: info needed,status: revision needed' 27 | -------------------------------------------------------------------------------- /.github/workflows/validate-pr-title.yml: -------------------------------------------------------------------------------- 1 | name: "Validate PR Title" 2 | 3 | on: 4 | pull_request: 5 | types: 6 | - opened 7 | - edited 8 | - synchronize 9 | 10 | jobs: 11 | main: 12 | name: Validate PR title 13 | runs-on: ubuntu-latest 14 | steps: 15 | - uses: amannn/action-semantic-pull-request@v5 16 | id: lint_pr_title 17 | name: "🤖 Check PR title follows conventional commit spec" 18 | env: 19 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 20 | with: 21 | # Have to specify all types because `maint` and `rel` aren't defaults 22 | types: | 23 | maint 24 | rel 25 | fix 26 | feat 27 | chore 28 | ci 29 | docs 30 | style 31 | refactor 32 | perf 33 | test 34 | ignoreLabels: | 35 | "type: dependencies" 36 | # When the previous steps fails, the workflow would stop. By adding this 37 | # condition you can continue the execution with the populated error message. 38 | - if: always() && (steps.lint_pr_title.outputs.error_message != null) 39 | name: "📝 Add PR comment about using conventional commit spec" 40 | uses: marocchino/sticky-pull-request-comment@v2 41 | with: 42 | header: pr-title-lint-error 43 | message: | 44 | Thank you for contributing to the project! 🎉 45 | 46 | We require pull request titles to follow the [Conventional Commits specification](https://www.conventionalcommits.org/en/v1.0.0/) and it looks like your proposed title needs to be adjusted. 47 | 48 | Make sure to prepend with `feat:`, `fix:`, or another option in the list below. 49 | 50 | Once you update the title, this workflow will re-run automatically and validate the updated title. 51 | 52 | Details: 53 | 54 | ``` 55 | ${{ steps.lint_pr_title.outputs.error_message }} 56 | ``` 57 | 58 | # Delete a previous comment when the issue has been resolved 59 | - if: ${{ steps.lint_pr_title.outputs.error_message == null }} 60 | name: "❌ Delete PR comment after title has been updated" 61 | uses: marocchino/sticky-pull-request-comment@v2 62 | with: 63 | header: pr-title-lint-error 64 | delete: true 65 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | .idea 3 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | This project has adopted the Honeycomb User Community Code of Conduct to clarify expected behavior in our community. 4 | 5 | https://www.honeycomb.io/honeycomb-user-community-code-of-conduct/ -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guide 2 | 3 | Please see our [general guide for OSS lifecycle and practices.](https://github.com/honeycombio/home/blob/main/honeycomb-oss-lifecycle-and-practices.md) 4 | -------------------------------------------------------------------------------- /CONTRIBUTORS: -------------------------------------------------------------------------------- 1 | Mongodbtools contributors: 2 | 3 | Chris Toshok 4 | Wan Bachtiar 5 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016-Present Honeycomb, Hound Technology, Inc. All Rights Reserved. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /OSSMETADATA: -------------------------------------------------------------------------------- 1 | osslifecycle=archived 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # mongodbtools 2 | 3 | [![OSS Lifecycle](https://img.shields.io/osslifecycle/honeycombio/mongodbtools)](https://github.com/honeycombio/home/blob/main/honeycomb-oss-lifecycle-and-practices.md) 4 | 5 | **STATUS: this project is [archived](https://github.com/honeycombio/home/blob/main/honeycomb-oss-lifecycle-and-practices.md).** 6 | 7 | We recommend using [honeytail](https://github.com/honeycombio/honeytail) to move MongoDB logs into Honeycomb. 8 | 9 | Questions? You can chat with us in the Honeycomb Pollinators Slack. You can find a direct link to request an invite in [Spread the Love: Appreciating Our Pollinators Community](https://www.honeycomb.io/blog/spread-the-love-appreciating-our-pollinators-community/). 10 | 11 | ## Summary 12 | 13 | Two packages. One for ingesting mongodb logs (logparser) and the other for query normalization (queryshape). 14 | 15 | See [our docs](https://honeycomb.io/docs) for more about Honeycomb, and our [MongoDB-specific docs](https://honeycomb.io/docs/connect/mongodb/). 16 | 17 | ## Stats script 18 | 19 | scripts/mongo_stats.sh is a shell script that collects some statistics from the server and the mongo instance and submits them to Honeycomb. It is a template for you use in creating your own stats scripts - it might work for you as is but likely needs modification to fit your environment. It has been tested against MongoDB 3.2. 20 | 21 | Though it is technically a shell script, the majority of the logic in the script is javascript interpreted by the mongo client. The javascript functions parse server information and collect information about locks and other server statistics, then return it as a JSON object. That JSON object is the payload that is sent to Honeycomb. 22 | 23 | It is intended to be run from cron every minute - internally it runs 4 times, submitting statistics every 15 seconds. 24 | 25 | ## Thanks 26 | 27 | The logparser package is spiritually derived from Travis Cline's PEG 28 | parser over at https://github.com/tmc/mongologtools. While the parser 29 | code itself is obviously different (this parser is hand-coded), 30 | there are api similarities, and Travis's parser was definitely helpful 31 | when figuring out just what the heck the logs were supposed to look like. 32 | 33 | His license is replicated here: 34 | 35 | ``` 36 | Copyright (c) 2015, Travis Cline 37 | 38 | Permission to use, copy, modify, and/or distribute this software for any purpose 39 | with or without fee is hereby granted, provided that the above copyright notice 40 | and this permission notice appear in all copies. 41 | 42 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH 43 | REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND 44 | FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, 45 | INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS 46 | OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER 47 | TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 48 | THIS SOFTWARE. 49 | ``` 50 | 51 | Another debt of gratitude goes out to Thomas Rückstieß and his 52 | _awesome_ mongodb log spec at 53 | https://github.com/rueckstiess/mongodb-log-spec. In particular the 54 | queryshape package attempts (and fails in some cases) to match up with his spec. 55 | 56 | ## Contributions 57 | 58 | Features, bug fixes and other changes to mongodbtools are gladly accepted. Please 59 | open issues or a pull request with your change. Remember to add your name to the 60 | CONTRIBUTORS file! 61 | 62 | All contributions will be released under the Apache License 2.0. 63 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Reporting Security Issues 2 | 3 | If you discover a security vulnerability, please open an issue with label `type: security`. 4 | -------------------------------------------------------------------------------- /SUPPORT.md: -------------------------------------------------------------------------------- 1 | # How to Get Help 2 | 3 | This project uses GitHub issues to track bugs, feature requests, and questions about using the project. Please search for existing issues before filing a new one. 4 | -------------------------------------------------------------------------------- /cmd/ingest/.gitignore: -------------------------------------------------------------------------------- 1 | ingest 2 | -------------------------------------------------------------------------------- /cmd/ingest/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "flag" 6 | "fmt" 7 | "log" 8 | "os" 9 | "runtime/pprof" 10 | "time" 11 | 12 | "github.com/honeycombio/mongodbtools/logparser" 13 | ) 14 | 15 | var cpuprofile = flag.String("cpuprofile", "", "write cpu profile to file") 16 | var failures = flag.Bool("fail", false, "write failed lines to stdout") 17 | var successes = flag.Bool("success", false, "write successfully parsed maps to stdout") 18 | var timings = flag.Bool("timings", false, "write intermediate timings/counts to stdout") 19 | 20 | func main() { 21 | flag.Parse() 22 | if *cpuprofile != "" { 23 | f, err := os.Create(*cpuprofile) 24 | if err != nil { 25 | log.Fatal(err) 26 | } 27 | pprof.StartCPUProfile(f) 28 | defer pprof.StopCPUProfile() 29 | } 30 | 31 | file, err := os.Open(flag.Args()[0]) 32 | if err != nil { 33 | log.Fatal(err) 34 | } 35 | defer file.Close() 36 | 37 | var logparserTime time.Duration 38 | var logparserSuccess int64 39 | var logparserFailure int64 40 | 41 | scanner := bufio.NewScanner(file) 42 | scanner.Buffer(make([]byte, 1024*1024), 0) 43 | for scanner.Scan() { 44 | line := scanner.Text() 45 | now := time.Now() 46 | values, err := logparser.ParseLogLine(line) 47 | 48 | logparserTime += time.Since(now) 49 | 50 | if err != nil { 51 | logparserFailure++ 52 | if *failures { 53 | fmt.Println("FAIL:", line, err.Error()) 54 | } 55 | } else { 56 | logparserSuccess++ 57 | if *successes { 58 | fmt.Println("SUCCESS:", values) 59 | } 60 | } 61 | 62 | if *timings && logparserSuccess > 0 && logparserSuccess%50000 == 0 { 63 | fmt.Printf("%dms for %d successfully parsed log lines (%d lines/sec). %d failures (%.2f%%)\n", logparserTime.Nanoseconds()/1e6, logparserSuccess, int64(float64(logparserSuccess)/logparserTime.Seconds()), logparserFailure, 100.0*float64(logparserFailure)/float64(logparserFailure+logparserSuccess)) 64 | } 65 | } 66 | 67 | if err := scanner.Err(); err != nil { 68 | log.Fatal(err) 69 | } 70 | 71 | fmt.Printf("%dms for %d successfully parsed log lines (%d lines/sec). %d failures (%.2f%%)\n", logparserTime.Nanoseconds()/1e6, logparserSuccess, int64(float64(logparserSuccess)/logparserTime.Seconds()), logparserFailure, 100.0*float64(logparserFailure)/float64(logparserFailure+logparserSuccess)) 72 | } 73 | -------------------------------------------------------------------------------- /cmd/shape/.gitignore: -------------------------------------------------------------------------------- 1 | shape 2 | -------------------------------------------------------------------------------- /cmd/shape/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | 7 | "github.com/honeycombio/mongodbtools/logparser" 8 | "github.com/honeycombio/mongodbtools/queryshape" 9 | ) 10 | 11 | func main() { 12 | query, err := logparser.ParseQuery(os.Args[1]) 13 | if err != nil { 14 | fmt.Println(err.Error()) 15 | os.Exit(1) 16 | } 17 | 18 | fmt.Println("Query shape:", queryshape.GetQueryShape(query)) 19 | } 20 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/honeycombio/mongodbtools 2 | 3 | go 1.15 4 | 5 | require github.com/stretchr/testify v1.6.1 6 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= 2 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 3 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 4 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 5 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 6 | github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0= 7 | github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 8 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= 9 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 10 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= 11 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 12 | -------------------------------------------------------------------------------- /logparser/internal/logparser/log_line.go: -------------------------------------------------------------------------------- 1 | package logparser 2 | 3 | import ( 4 | "bytes" 5 | "errors" 6 | "fmt" 7 | "math" 8 | "strconv" 9 | "strings" 10 | "time" 11 | "unicode" 12 | ) 13 | 14 | const ( 15 | endRune rune = 1114112 16 | ) 17 | 18 | type partialLogLineError struct { 19 | InnerError error 20 | } 21 | 22 | func (p partialLogLineError) Error() string { 23 | return fmt.Sprintf("Partial log line: %v", p.InnerError) 24 | } 25 | 26 | func IsPartialLogLine(err error) bool { 27 | _, ok := err.(partialLogLineError) 28 | return ok 29 | } 30 | 31 | func ParseLogLine(input string) (map[string]interface{}, error) { 32 | p := LogLineParser{Buffer: input} 33 | p.Init() 34 | if err := p.Parse(); err != nil { 35 | return nil, err 36 | } 37 | return p.Fields, nil 38 | } 39 | 40 | func ParseQuery(query string) (map[string]interface{}, error) { 41 | p := LogLineParser{Buffer: query} 42 | p.Init() 43 | rv, err := p.parseFieldValue("query") 44 | if err != nil { 45 | return nil, err 46 | } 47 | if m, ok := rv.(map[string]interface{}); ok { 48 | return m, nil 49 | } 50 | return nil, errors.New("query string does not parse to a doc") 51 | } 52 | 53 | type LogLineParser struct { 54 | Buffer string 55 | Fields map[string]interface{} 56 | 57 | runes []rune 58 | position int 59 | } 60 | 61 | func (p *LogLineParser) Init() { 62 | p.runes = append([]rune(p.Buffer), endRune) 63 | p.Fields = make(map[string]interface{}) 64 | } 65 | 66 | func (p *LogLineParser) Parse() error { 67 | var err error 68 | if err = p.parseTimestamp(); err != nil { 69 | return err 70 | } 71 | if p.eatWS().lookahead(0) == '[' { 72 | // we assume version < 3.0 73 | if err = p.parseContext(); err != nil { 74 | return err 75 | } 76 | err = p.parseMessage() 77 | } else { 78 | // we assume version > 3.0 79 | if err = p.parseSeverity(); err != nil { 80 | return err 81 | } 82 | if err = p.parseComponent(); err != nil { 83 | return err 84 | } 85 | if err = p.parseContext(); err != nil { 86 | return err 87 | } 88 | err = p.parseMessage() 89 | } 90 | 91 | if err != nil { 92 | return partialLogLineError{InnerError: err} 93 | } 94 | 95 | return nil 96 | } 97 | 98 | func (p *LogLineParser) parseTimestamp() error { 99 | var readTimestamp string 100 | var err error 101 | 102 | c := p.eatWS().lookahead(0) 103 | if unicode.IsDigit(c) { 104 | // we assume it's either iso8601-utc or iso8601-local 105 | if readTimestamp, err = p.readUntil(unicode.Space); err != nil { 106 | return err 107 | } 108 | } else { 109 | // we assume it's ctime or ctime-no-ms 110 | var dayOfWeek, month, day, time string 111 | 112 | if dayOfWeek, err = validDayOfWeek(p.readUntil(unicode.Space)); err != nil { 113 | return err 114 | } 115 | 116 | if month, err = validMonth(p.eatWS().readUntil(unicode.Space)); err != nil { 117 | return err 118 | } 119 | 120 | if day, err = p.eatWS().readUntil(unicode.Space); err != nil { 121 | return err 122 | } 123 | 124 | if time, err = p.eatWS().readUntil(unicode.Space); err != nil { 125 | return err 126 | } 127 | readTimestamp = dayOfWeek + " " + month + " " + day + " " + time 128 | } 129 | 130 | p.Fields["timestamp"] = readTimestamp 131 | return nil 132 | } 133 | 134 | func (p *LogLineParser) parseSeverity() error { 135 | var err error 136 | if p.Fields["severity"], err = severityToString(p.eatWS().advance()); err != nil { 137 | return err 138 | } 139 | if err = p.expect(unicode.Space); err != nil { 140 | return err 141 | } 142 | return nil 143 | } 144 | 145 | func (p *LogLineParser) parseComponent() error { 146 | var component string 147 | var err error 148 | 149 | if p.eatWS().lookahead(0) == '-' { 150 | component = "-" 151 | p.advance() // skip the '-' 152 | } else { 153 | if component, err = p.readWhile([]interface{}{unicode.Upper}); err != nil { 154 | return err 155 | } 156 | } 157 | if !p.validComponentName(component) { 158 | return errors.New("unrecognized component name") 159 | } 160 | 161 | p.Fields["component"] = component 162 | return nil 163 | } 164 | 165 | func (p *LogLineParser) parseContext() error { 166 | var err error 167 | if err = p.eatWS().expect('['); err != nil { 168 | return err 169 | } 170 | 171 | var context string 172 | if context, err = p.readUntilRune(']'); err != nil { 173 | return err 174 | } 175 | p.advance() // skip the ']' 176 | 177 | p.Fields["context"] = context 178 | return nil 179 | } 180 | 181 | func (p *LogLineParser) parseSharding() error { 182 | message, err := p.readUntilRune(':') 183 | if err != nil { 184 | return err 185 | } 186 | 187 | p.advance() // skip the ':' 188 | p.eatWS() 189 | 190 | if !strings.HasPrefix(message, "about to log metadata event into") { 191 | return errors.New("unrecognized sharding log line") 192 | } 193 | 194 | p.Fields["sharding_message"] = message 195 | lastSpace := strings.LastIndex(message, " ") 196 | p.Fields["sharding_collection"] = message[lastSpace+1:] 197 | 198 | var changelog interface{} 199 | if changelog, err = p.parseJSONMap(); err != nil { 200 | return err 201 | } 202 | p.Fields["sharding_changelog"] = changelog 203 | return nil 204 | } 205 | 206 | func (p *LogLineParser) parseMessage() error { 207 | p.eatWS() 208 | 209 | savedPosition := p.position 210 | 211 | if p.Fields["component"] == "SHARDING" { 212 | savedPosition := p.position 213 | err := p.parseSharding() 214 | if err == nil { 215 | return nil 216 | } 217 | p.position = savedPosition 218 | } 219 | 220 | // check if this message is an operation 221 | operation, err := p.readUntil(unicode.Space) 222 | if err == nil && p.validOperationName(operation) { 223 | // yay, an operation. 224 | p.Fields["operation"] = operation 225 | 226 | var namespace string 227 | if namespace, err = p.eatWS().readUntil(unicode.Space); err != nil { 228 | return err 229 | } 230 | p.Fields["namespace"] = namespace 231 | 232 | if err = p.parseOperationBody(); err != nil { 233 | return err 234 | } 235 | } else { 236 | p.position = savedPosition 237 | 238 | if p.Fields["message"], err = p.readUntilEOL(); err != nil { 239 | return err 240 | } 241 | } 242 | 243 | return nil 244 | } 245 | 246 | func (p *LogLineParser) parseOperationBody() error { 247 | for p.runes[p.position] != endRune { 248 | var err error 249 | var done bool 250 | 251 | if done, err = p.parseFieldAndValue(); err != nil { 252 | return err 253 | } 254 | if done { 255 | // check for a duration 256 | dur, err := p.readDuration() 257 | if err != nil { 258 | return err 259 | } 260 | p.Fields["duration_ms"] = dur 261 | break 262 | } 263 | } 264 | return nil 265 | } 266 | 267 | func (p *LogLineParser) parseFieldAndValue() (bool, error) { 268 | var fieldName string 269 | var fieldValue interface{} 270 | var err error 271 | 272 | p.eatWS() 273 | 274 | savedPosition := p.position 275 | if fieldName, err = p.readWhileNot([]interface{}{':', unicode.Space}); err != nil { 276 | p.position = savedPosition 277 | return true, nil // swallow the error to give our caller a change to backtrack 278 | } 279 | p.advance() // skip the ':'/WS 280 | p.eatWS() // end eat any remaining WS 281 | 282 | // some known fields have a more complicated structure 283 | if fieldName == "planSummary" { 284 | if fieldValue, err = p.parsePlanSummary(); err != nil { 285 | return false, err 286 | } 287 | } else if fieldName == "command" { 288 | // >=2.6 has: command: ? 289 | // <2.6 has: command: 290 | firstCharInVal := p.lookahead(0) 291 | if firstCharInVal != '{' { 292 | name, err := p.readJSONIdentifier() 293 | if err != nil { 294 | return false, err 295 | } 296 | p.eatWS() 297 | p.Fields["command_type"] = name 298 | } 299 | 300 | if fieldValue, err = p.parseJSONMap(); err != nil { 301 | return false, err 302 | } 303 | } else if fieldName == "locks(micros)" { 304 | // < 2.8 305 | if fieldValue, err = p.parseLocksMicro(); err != nil { 306 | return false, err 307 | } 308 | p.eatWS() 309 | } else { 310 | if fieldValue, err = p.parseFieldValue(fieldName); err != nil { 311 | return false, err 312 | } 313 | if !p.validFieldName(fieldName) { 314 | return false, nil 315 | } 316 | } 317 | p.Fields[fieldName] = fieldValue 318 | return false, nil 319 | } 320 | 321 | func (p *LogLineParser) validFieldName(fieldName string) bool { 322 | if len(fieldName) == 0 { 323 | return false 324 | } 325 | for _, c := range fieldName { 326 | switch { 327 | case unicode.IsLetter(c): 328 | continue 329 | case unicode.IsDigit(c): 330 | continue 331 | case c == '_': 332 | continue 333 | case c == '$': 334 | continue 335 | default: 336 | return false 337 | } 338 | } 339 | return true 340 | } 341 | 342 | func (p *LogLineParser) parseFieldValue(fieldName string) (interface{}, error) { 343 | var fieldValue interface{} 344 | var err error 345 | 346 | firstCharInVal := p.lookahead(0) 347 | switch { 348 | case firstCharInVal == '{': 349 | if fieldValue, err = p.parseJSONMap(); err != nil { 350 | return nil, err 351 | } 352 | case unicode.IsDigit(firstCharInVal): 353 | if fieldValue, err = p.readNumber(); err != nil { 354 | return nil, err 355 | } 356 | case unicode.IsLetter(firstCharInVal): 357 | if fieldValue, err = p.readJSONIdentifier(); err != nil { 358 | return nil, err 359 | } 360 | case firstCharInVal == '"': 361 | if fieldValue, err = p.readStringValue(firstCharInVal); err != nil { 362 | return nil, err 363 | } 364 | default: 365 | return nil, fmt.Errorf("unexpected start character for value of field '%s'", fieldName) 366 | } 367 | return fieldValue, nil 368 | } 369 | 370 | func (p *LogLineParser) parseLocksMicro() (map[string]int64, error) { 371 | rv := make(map[string]int64) 372 | 373 | for { 374 | c := p.eatWS().lookahead(0) 375 | if c != 'r' && c != 'R' && c != 'w' && c != 'W' { 376 | return rv, nil 377 | } else if p.lookahead(1) != ':' { 378 | return rv, nil 379 | } 380 | 381 | p.advance() 382 | p.advance() 383 | 384 | // not strictly correct - the value here should be an integer, not a float 385 | var duration float64 386 | var err error 387 | if duration, err = p.readNumber(); err != nil { 388 | return nil, err 389 | } 390 | rv[string([]rune{c})] = int64(duration) 391 | } 392 | 393 | } 394 | 395 | func (p *LogLineParser) parsePlanSummary() (interface{}, error) { 396 | var rv []interface{} 397 | 398 | for { 399 | elem, err := p.parsePlanSummaryElement() 400 | if err != nil { 401 | return nil, err 402 | } 403 | if elem != nil { 404 | rv = append(rv, elem) 405 | } 406 | 407 | if p.eatWS().lookahead(0) != ',' { 408 | break 409 | } else { 410 | p.advance() // skip the ',' 411 | } 412 | } 413 | 414 | return rv, nil 415 | } 416 | 417 | func (p *LogLineParser) parsePlanSummaryElement() (interface{}, error) { 418 | rv := make(map[string]interface{}) 419 | 420 | p.eatWS() 421 | 422 | savedPosition := p.position 423 | 424 | var stage string 425 | var err error 426 | 427 | if stage, err = p.readUpcaseIdentifier(); err != nil { 428 | p.position = savedPosition 429 | return nil, nil 430 | } 431 | 432 | c := p.eatWS().lookahead(0) 433 | if c == '{' { 434 | if rv[stage], err = p.parseJSONMap(); err != nil { 435 | return nil, nil 436 | } 437 | } else { 438 | rv[stage] = true 439 | } 440 | 441 | return rv, nil 442 | } 443 | 444 | func (p *LogLineParser) readNumber() (float64, error) { 445 | startPosition := p.position 446 | endPosition := startPosition 447 | numberChecks := []interface{}{unicode.Digit, '.', '+', '-', 'e', 'E'} 448 | for check(p.runes[endPosition], numberChecks) { 449 | endPosition++ 450 | } 451 | 452 | if p.runes[endPosition] == endRune { 453 | return 0, errors.New("found end of line before expected unicode range") 454 | } 455 | 456 | p.position = endPosition 457 | 458 | return strconv.ParseFloat(string(p.runes[startPosition:endPosition]), 64) 459 | } 460 | 461 | func (p *LogLineParser) readDuration() (float64, error) { 462 | startPosition := p.position 463 | endPosition := startPosition 464 | 465 | for unicode.IsDigit(p.runes[endPosition]) { 466 | endPosition++ 467 | } 468 | 469 | if p.runes[endPosition] != 'm' || p.runes[endPosition+1] != 's' { 470 | return 0, errors.New("invalid duration specifier") 471 | } 472 | 473 | rv, err := strconv.ParseFloat(string(p.runes[startPosition:endPosition]), 64) 474 | p.position = endPosition + 2 475 | return rv, err 476 | } 477 | 478 | func (p *LogLineParser) parseJSONMap() (interface{}, error) { 479 | // we assume we're on the '{' 480 | if err := p.expect('{'); err != nil { 481 | return nil, err 482 | } 483 | 484 | rv := make(map[string]interface{}) 485 | 486 | for { 487 | var key string 488 | var value interface{} 489 | var err error 490 | 491 | // we support keys both of the form: { foo: ... } and { "foo": ... } 492 | fc := p.eatWS().lookahead(0) 493 | if fc == '"' || fc == '\'' { 494 | if key, err = p.readStringValue(fc); err != nil { 495 | return nil, err 496 | } 497 | } else { 498 | if key, err = p.readJSONIdentifier(); err != nil { 499 | return nil, err 500 | } 501 | } 502 | 503 | if key != "" { 504 | if err = p.eatWS().expect(':'); err != nil { 505 | return nil, err 506 | } 507 | if value, err = p.eatWS().parseJSONValue(); err != nil { 508 | return nil, err 509 | } 510 | rv[key] = value 511 | } 512 | 513 | commaOrRbrace := p.eatWS().lookahead(0) 514 | if commaOrRbrace == '}' { 515 | p.position++ 516 | break 517 | } else if commaOrRbrace == ',' { 518 | p.position++ 519 | } else { 520 | return nil, errors.New("expected '}' or ',' in json") 521 | } 522 | 523 | } 524 | 525 | return rv, nil 526 | } 527 | 528 | func (p *LogLineParser) parseJSONArray() (interface{}, error) { 529 | var rv []interface{} 530 | 531 | // we assume we're on the '[' 532 | if err := p.expect('['); err != nil { 533 | return nil, err 534 | } 535 | 536 | if p.eatWS().lookahead(0) == ']' { 537 | p.advance() 538 | return rv, nil 539 | } 540 | 541 | for { 542 | var value interface{} 543 | var err error 544 | 545 | if value, err = p.eatWS().parseJSONValue(); err != nil { 546 | return nil, err 547 | } 548 | 549 | rv = append(rv, value) 550 | 551 | commaOrRbrace := p.eatWS().lookahead(0) 552 | if commaOrRbrace == ']' { 553 | p.position++ 554 | break 555 | } else if commaOrRbrace == ',' { 556 | p.position++ 557 | } else { 558 | return nil, errors.New("expected ']' or ',' in json") 559 | } 560 | } 561 | 562 | return rv, nil 563 | } 564 | 565 | func (p *LogLineParser) parseJSONValue() (interface{}, error) { 566 | var value interface{} 567 | var err error 568 | 569 | firstCharInVal := p.lookahead(0) 570 | switch { 571 | case firstCharInVal == '{': 572 | if value, err = p.parseJSONMap(); err != nil { 573 | return nil, err 574 | } 575 | case firstCharInVal == '[': 576 | if value, err = p.parseJSONArray(); err != nil { 577 | return nil, err 578 | } 579 | case check(firstCharInVal, []interface{}{unicode.Digit, '-', '+', '.'}): 580 | if value, err = p.readNumber(); err != nil { 581 | return nil, err 582 | } 583 | case firstCharInVal == '"': 584 | // mongo doesn't follow generally accepted rules on how to handle nested quotes 585 | // when the inner quote character matches the outer quote character (escaping the inner 586 | // quote with a \). 587 | 588 | // so we have to do something equally terrible to read these values. we look ahead until we 589 | // find a value separator or an end to a json value - , ] } 590 | // that occurs after an even number of quotes. 591 | 592 | savedPosition := p.position + 1 593 | endPosition := savedPosition 594 | 595 | quoteCount := 1 596 | quotePosition := savedPosition - 1 597 | 598 | if endPosition < len(p.runes)-1 { 599 | lastRune := '"' 600 | 601 | for { 602 | r := p.runes[endPosition] 603 | if r == '"' { 604 | quoteCount++ 605 | quotePosition = endPosition 606 | } else if (r == ',' || r == '}' || r == ']') && lastRune == '"' { 607 | if quoteCount%2 == 0 { 608 | value = string(p.runes[savedPosition:quotePosition]) 609 | p.position = quotePosition + 1 610 | break 611 | } 612 | } 613 | 614 | if !unicode.IsSpace(r) { 615 | lastRune = r 616 | } 617 | 618 | endPosition++ 619 | if endPosition == len(p.runes) { 620 | return nil, errors.New("unexpected end of line reading json value") 621 | } 622 | } 623 | } 624 | case unicode.IsLetter(firstCharInVal): 625 | if value, err = p.readJSONIdentifier(); err != nil { 626 | return nil, err 627 | } 628 | if value == "null" { 629 | value = nil 630 | } else if value == "true" { 631 | value = true 632 | } else if value == "false" { 633 | value = false 634 | } else if value == "new" { 635 | if value, err = p.eatWS().readJSONIdentifier(); err != nil { 636 | return nil, err 637 | } 638 | if value != "Date" { 639 | return nil, fmt.Errorf("unexpected constructor: %s", value) 640 | } 641 | // we expect "new Date(123456789)" 642 | if err = p.expect('('); err != nil { 643 | return nil, err 644 | } 645 | var dateNum float64 646 | if dateNum, err = p.readNumber(); err != nil { 647 | return nil, err 648 | } 649 | if err = p.expect(')'); err != nil { 650 | return nil, err 651 | } 652 | 653 | if math.Floor(dateNum) != dateNum { 654 | return nil, errors.New("expected int in `new Date()`") 655 | } 656 | unixSec := int64(dateNum) / 1000 657 | unixNS := int64(dateNum) % 1000 * 1000000 658 | value = time.Unix(unixSec, unixNS) 659 | } else if value == "Timestamp" { 660 | var ts string 661 | if p.lookahead(0) == '(' { 662 | p.position++ 663 | if ts, err = p.readUntilRune(')'); err != nil { 664 | return nil, err 665 | } 666 | p.position++ 667 | } else { 668 | if ts, err = p.eatWS().readWhile([]interface{}{unicode.Digit, '|'}); err != nil { 669 | return nil, err 670 | } 671 | } 672 | value = "Timestamp(" + ts + ")" 673 | } else if value == "ObjectId" { 674 | if err = p.expect('('); err != nil { 675 | return nil, err 676 | } 677 | quote := p.lookahead(0) // keep ahold of the quote so we can match it 678 | if p.lookahead(0) != '\'' && p.lookahead(0) != '"' { 679 | return nil, errors.New("expected ' or \" in ObjectId") 680 | } 681 | p.position++ 682 | 683 | hexRunes := []interface{}{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'a', 'b', 'c', 'd', 'e', 'f'} 684 | var hex string 685 | if hex, err = p.eatWS().readWhile(hexRunes); err != nil { 686 | return nil, err 687 | } 688 | if err = p.expect(quote); err != nil { 689 | return nil, err 690 | } 691 | if err = p.expect(')'); err != nil { 692 | return nil, err 693 | } 694 | value = "ObjectId(" + hex + ")" 695 | } else if value == "BinData" { 696 | // BinData looks something like this: 697 | // BinData(0, D296E984640196C4D2977BECF468865948F7704C) 698 | // It's not very interesting, but we need to handle it 699 | if err = p.expect('('); err != nil { 700 | return nil, err 701 | } 702 | 703 | var bindata string 704 | if bindata, err = p.readUntilRune(')'); err != nil { 705 | return nil, err 706 | } 707 | 708 | if err = p.expect(')'); err != nil { 709 | return nil, err 710 | } 711 | 712 | value = "BinData(" + bindata + ")" 713 | } else if value == "UUID" { 714 | // UUID looks something like this: 715 | // UUID("bac26ad1-3d76-4da1-94cc-d541942f6889") 716 | // It's not very interesting, but we need to handle it 717 | if err = p.expect('('); err != nil { 718 | return nil, err 719 | } 720 | 721 | var uuid string 722 | if uuid, err = p.readUntilRune(')'); err != nil { 723 | return nil, err 724 | } 725 | 726 | if err = p.expect(')'); err != nil { 727 | return nil, err 728 | } 729 | 730 | value = "UUID(" + uuid + ")" 731 | } else { 732 | return nil, fmt.Errorf("unexpected start of JSON value: %s", value) 733 | } 734 | case firstCharInVal == '/': 735 | // Case for regex query field value 736 | // for example: { field: /^numbersOrLetters.?.?$/ } 737 | var exp string 738 | if err := p.expect('/'); err != nil { 739 | return nil, err 740 | } 741 | if exp, err = p.readUntilRune('/'); err != nil { 742 | return nil, err 743 | } 744 | 745 | var moreExp string 746 | var expression bytes.Buffer 747 | expression.WriteString(exp) 748 | // The '/' can be escaped, so keep looking for unescaped end of expression 749 | for { 750 | if p.lookahead(-1) != '\\' { 751 | break 752 | } 753 | p.position++ 754 | if moreExp, err = p.readUntilRune('/'); err != nil { 755 | return nil, err 756 | } 757 | expression.WriteString("/") 758 | expression.WriteString(moreExp) 759 | } 760 | if err = p.expect('/'); err != nil { 761 | return nil, err 762 | } 763 | value = "/" + expression.String() + "/" 764 | default: 765 | return nil, fmt.Errorf("unexpected start character for JSON value of field: %c", firstCharInVal) 766 | } 767 | 768 | return value, nil 769 | } 770 | 771 | func (p *LogLineParser) readStringValue(quote rune) (string, error) { 772 | var s string 773 | var err error 774 | 775 | p.advance() // skip starting quote 776 | if s, err = p.readUntilRune(quote); err != nil { 777 | return "", err 778 | } 779 | p.advance() // skip ending quote 780 | 781 | return s, nil 782 | } 783 | 784 | func (p *LogLineParser) readJSONIdentifier() (string, error) { 785 | startPosition := p.position 786 | endPosition := startPosition 787 | 788 | for check(p.runes[endPosition], []interface{}{unicode.Letter, unicode.Digit, '$', '_', '.', '*'}) { 789 | endPosition++ 790 | } 791 | 792 | p.position = endPosition 793 | return string(p.runes[startPosition:endPosition]), nil 794 | } 795 | 796 | func (p *LogLineParser) readUpcaseIdentifier() (string, error) { 797 | return p.readWhile([]interface{}{unicode.Upper, unicode.Digit, '_'}) 798 | } 799 | 800 | func (p *LogLineParser) readAlphaIdentifier() (string, error) { 801 | return p.readWhile([]interface{}{unicode.Letter, unicode.Digit, '_'}) 802 | } 803 | 804 | func (p *LogLineParser) readUntil(untilRangeTable *unicode.RangeTable) (string, error) { 805 | startPosition := p.position 806 | endPosition := startPosition 807 | for p.runes[endPosition] != endRune && !unicode.Is(untilRangeTable, p.runes[endPosition]) { 808 | endPosition++ 809 | } 810 | 811 | if p.runes[endPosition] == endRune { 812 | return "", errors.New("found end of line before expected unicode range") 813 | } 814 | 815 | p.position = endPosition 816 | 817 | return string(p.runes[startPosition:endPosition]), nil 818 | } 819 | 820 | func (p *LogLineParser) readUntilRune(untilRune rune) (string, error) { 821 | startPosition := p.position 822 | endPosition := startPosition 823 | for p.runes[endPosition] != untilRune && p.runes[endPosition] != endRune { 824 | endPosition++ 825 | } 826 | 827 | if p.runes[endPosition] == endRune && untilRune != endRune { 828 | return "", fmt.Errorf("found end of line before expected rune '%c'", untilRune) 829 | } 830 | 831 | p.position = endPosition 832 | 833 | return string(p.runes[startPosition:endPosition]), nil 834 | } 835 | 836 | func (p *LogLineParser) readUntilEOL() (string, error) { 837 | return p.readUntilRune(endRune) 838 | } 839 | 840 | func (p *LogLineParser) readWhile(checks []interface{}) (string, error) { 841 | return p._readWhile(checks, false) 842 | } 843 | 844 | func (p *LogLineParser) readWhileNot(checks []interface{}) (string, error) { 845 | return p._readWhile(checks, true) 846 | } 847 | 848 | func (p *LogLineParser) _readWhile(checks []interface{}, checkStopVal bool) (string, error) { 849 | startPosition := p.position 850 | endPosition := startPosition 851 | 852 | for p.runes[endPosition] != endRune { 853 | if check(p.runes[endPosition], checks) == checkStopVal { 854 | break 855 | } 856 | endPosition++ 857 | } 858 | 859 | if p.runes[endPosition] == endRune { 860 | return "", errors.New("unexpected end of line") 861 | } 862 | 863 | p.position = endPosition 864 | 865 | return string(p.runes[startPosition:endPosition]), nil 866 | } 867 | 868 | func (p *LogLineParser) lookahead(amount int) rune { 869 | return p.runes[p.position+amount] 870 | } 871 | 872 | func (p *LogLineParser) matchAhead(startIdx int, s string) bool { 873 | runes := []rune(s) 874 | for i, r := range runes { 875 | if r != p.runes[startIdx+i] { 876 | return false 877 | } 878 | } 879 | return true 880 | } 881 | 882 | func (p *LogLineParser) advance() rune { 883 | r := p.runes[p.position] 884 | p.position++ 885 | return r 886 | } 887 | 888 | func (p *LogLineParser) expect(c interface{}) error { 889 | r := p.advance() 890 | matches := doCheck(r, c) 891 | if !matches { 892 | return fmt.Errorf("unexpected '%c'", r) 893 | } 894 | return nil 895 | } 896 | 897 | func (p *LogLineParser) eatWS() *LogLineParser { 898 | for unicode.Is(unicode.Space, p.runes[p.position]) { 899 | p.position++ 900 | } 901 | return p 902 | } 903 | 904 | func severityToString(sev rune) (string, error) { 905 | switch sev { 906 | case 'D': 907 | return "debug", nil 908 | case 'I': 909 | return "informational", nil 910 | case 'W': 911 | return "warning", nil 912 | case 'E': 913 | return "error", nil 914 | case 'F': 915 | return "fatal", nil 916 | default: 917 | return "", fmt.Errorf("unknown severity '%c'", sev) 918 | } 919 | } 920 | 921 | func check(r rune, checks []interface{}) bool { 922 | for _, c := range checks { 923 | if doCheck(r, c) { 924 | return true 925 | } 926 | } 927 | return false 928 | } 929 | 930 | func doCheck(r rune, c interface{}) bool { 931 | if rt, ok := c.(*unicode.RangeTable); ok { 932 | if unicode.Is(rt, r) { 933 | return true 934 | } 935 | } else if runeCheck, ok := c.(rune); ok { 936 | if r == runeCheck { 937 | return true 938 | } 939 | } else { 940 | panic("unhandled check in doCheck") 941 | } 942 | return false 943 | } 944 | func validDayOfWeek(dayOfWeek string, err error) (string, error) { 945 | if len(dayOfWeek) != 3 { 946 | return "", errors.New("invalid day of week") 947 | } 948 | // XXX(toshok) validate against a list? 949 | return dayOfWeek, nil 950 | } 951 | 952 | func validMonth(month string, err error) (string, error) { 953 | if len(month) != 3 { 954 | return "", errors.New("invalid month") 955 | } 956 | // XXX(toshok) validate against a list? 957 | return month, nil 958 | } 959 | 960 | func (p *LogLineParser) validOperationName(s string) bool { 961 | return s == "query" || 962 | s == "getmore" || 963 | s == "insert" || 964 | s == "update" || 965 | s == "remove" || 966 | s == "command" || 967 | s == "killcursors" 968 | } 969 | 970 | func (p *LogLineParser) validComponentName(s string) bool { 971 | return s == "ACCESS" || 972 | s == "COMMAND" || 973 | s == "CONTROL" || 974 | s == "GEO" || 975 | s == "INDEX" || 976 | s == "NETWORK" || 977 | s == "QUERY" || 978 | s == "REPL" || 979 | s == "SHARDING" || 980 | s == "STORAGE" || 981 | s == "JOURNAL" || 982 | s == "WRITE" || 983 | s == "TOTAL" || 984 | s == "-" 985 | } 986 | -------------------------------------------------------------------------------- /logparser/log_line.go: -------------------------------------------------------------------------------- 1 | package logparser 2 | 3 | import "github.com/honeycombio/mongodbtools/logparser/internal/logparser" 4 | 5 | // ParseLogLine attempts to parse a MongoDB log line into a structured representation 6 | func ParseLogLine(input string) (map[string]interface{}, error) { 7 | return logparser.ParseLogLine(input) 8 | } 9 | 10 | func IsPartialLogLine(err error) bool { 11 | return logparser.IsPartialLogLine(err) 12 | } 13 | 14 | // parse just a mongodb query as it exists in the log 15 | func ParseQuery(query string) (map[string]interface{}, error) { 16 | return logparser.ParseQuery(query) 17 | } 18 | -------------------------------------------------------------------------------- /logparser/log_line_test.go: -------------------------------------------------------------------------------- 1 | package logparser_test 2 | 3 | import ( 4 | "encoding/json" 5 | "testing" 6 | 7 | "github.com/honeycombio/mongodbtools/logparser" 8 | "github.com/stretchr/testify/assert" 9 | ) 10 | 11 | func TestParseLogLine(t *testing.T) { 12 | testCases := []struct { 13 | line string 14 | output string 15 | }{ 16 | { 17 | line: "Mon Feb 23 03:20:19.670 [TTLMonitor] query local.system.indexes query: { expireAfterSeconds: { $exists: true } } ntoreturn:0 ntoskip:0 nscanned:0 keyUpdates:0 locks(micros) r:86 nreturned:0 reslen:20 0ms", 18 | output: `{"context":"TTLMonitor","duration_ms":0,"keyUpdates":0,"locks(micros)":{"r":86},"namespace":"local.system.indexes","nreturned":0,"nscanned":0,"ntoreturn":0,"ntoskip":0,"operation":"query","query":{"expireAfterSeconds":{"$exists":true}},"reslen":20,"timestamp":"Mon Feb 23 03:20:19.670"}`, 19 | }, 20 | { 21 | line: "2017-08-14T00:09:17.028-0400 I COMMAND [conn555555] query foo.bar query: { $query: { fieldA: /^123456789.?\\/test.?$/ } } planSummary: IXSCAN { fieldA: 1, fieldB: 1 } ntoreturn:0 ntoskip:0 nscanned:2 nscannedObjects:1 keyUpdates:0 writeConflicts:0 numYields:1 nreturned:1 reslen:1337 locks:{ Global: { acquireCount: { r: 4 } }, Database: { acquireCount: { r: 2 } }, Collection: { acquireCount: { r: 2 } } } 134ms", 22 | output: `{"component":"COMMAND","context":"conn555555","duration_ms":134,"keyUpdates":0,"locks":{"Collection":{"acquireCount":{"r":2}},"Database":{"acquireCount":{"r":2}},"Global":{"acquireCount":{"r":4}}},"namespace":"foo.bar","nreturned":1,"nscanned":2,"nscannedObjects":1,"ntoreturn":0,"ntoskip":0,"numYields":1,"operation":"query","planSummary":[{"IXSCAN":{"fieldA":1,"fieldB":1}}],"query":{"$query":{"fieldA":"/^123456789.?\\/test.?$/"}},"reslen":1337,"severity":"informational","timestamp":"2017-08-14T00:09:17.028-0400","writeConflicts":0}`, 23 | }, 24 | { 25 | line: "2020-10-22T01:55:27.585+0000 I COMMAND [conn111918] command cm.auditLog command: find { find: \"auditLog\", filter: { resourceLevelInfos.resourceId: { $in: [ \"5f90e685c8352e00014abb3f\" ] } }, projection: {}, $db: \"crisismanagement\", $clusterTime: { clusterTime: Timestamp(1603331719, 18), signature: { hash: BinData(0, 164ACBB1F99D1E544AAC2497483A20E6FEAF126A), keyId: 6860053861185355777 } }, lsid: { id: UUID(\"bac26ad1-3d76-4da1-94cc-d541942f6889\") } } planSummary: COLLSCAN keysExamined:0 docsExamined:4765871 cursorExhausted:1 numYields:37237 nreturned:2 reslen:3038 locks:{ Global: { acquireCount: { r: 37238 } }, Database: { acquireCount: { r: 37238 } }, Collection: { acquireCount: { r: 37238 } } } storage:{ data: { bytesRead: 485564359, timeReadingMicros: 366237 } } protocol:op_msg 7616ms", 26 | output: `{"command":{"$clusterTime":{"clusterTime":"Timestamp(1603331719, 18)","signature":{"hash":"BinData(0, 164ACBB1F99D1E544AAC2497483A20E6FEAF126A)","keyId":6860053861185356000}},"$db":"crisismanagement","filter":{"resourceLevelInfos.resourceId":{"$in":["5f90e685c8352e00014abb3f"]}},"find":"auditLog","lsid":{"id":"UUID(\"bac26ad1-3d76-4da1-94cc-d541942f6889\")"},"projection":{}},"command_type":"find","component":"COMMAND","context":"conn111918","cursorExhausted":1,"docsExamined":4765871,"duration_ms":7616,"keysExamined":0,"locks":{"Collection":{"acquireCount":{"r":37238}},"Database":{"acquireCount":{"r":37238}},"Global":{"acquireCount":{"r":37238}}},"namespace":"cm.auditLog","nreturned":2,"numYields":37237,"operation":"command","planSummary":[{"COLLSCAN":true}],"protocol":"op_msg","reslen":3038,"severity":"informational","storage":{"data":{"bytesRead":485564359,"timeReadingMicros":366237}},"timestamp":"2020-10-22T01:55:27.585+0000"}`, 27 | }, 28 | } 29 | for _, tc := range testCases { 30 | doc, err := logparser.ParseLogLine(tc.line) 31 | assert.NoError(t, err) 32 | buf, _ := json.Marshal(doc) 33 | assert.Equal(t, string(buf), tc.output) 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /queryshape/internal/queryshape/shape.go: -------------------------------------------------------------------------------- 1 | package queryshape 2 | 3 | import ( 4 | "sort" 5 | "strings" 6 | ) 7 | 8 | // rough rules XXX(toshok) needs editing 9 | // 1. if key is not an op: 10 | // 1a. if value is a primitive, set value = 1 11 | // 1b. if value is an aggregate, walk subtree flattening everything but ops and their values if necessary 12 | // 2. if key is an op: 13 | // 2a. if value is a primitive, set value = 1 14 | // 2b. if value is a map, keep map + all keys, and process keys (starting at step 1) 15 | // 2c. if value is a list, walk list. 16 | // 2c1. if all values are primitive, set value = 1 17 | // 2c2. if any values are maps/lists, keep map + all keys, and process keys (starting at step 1) 18 | 19 | func GetQueryShape(q map[string]interface{}) string { 20 | if q_, ok := q["$query"].(map[string]interface{}); ok { 21 | return GetQueryShape(q_) 22 | } 23 | 24 | pruned := make(map[string]interface{}) 25 | for k, v := range q { 26 | if k[0] == '$' { 27 | pruned[k] = flattenOp(v) 28 | } else { 29 | pruned[k] = flatten(v) 30 | } 31 | } 32 | 33 | // flatten pruned to a string, sorting keys alphabetically ($ coming before a/A) 34 | return serializeShape(pruned) 35 | } 36 | 37 | func isAggregate(v interface{}) bool { 38 | if _, ok := v.([]interface{}); ok { 39 | return true 40 | } else if _, ok := v.(map[string]interface{}); ok { 41 | return true 42 | } 43 | return false 44 | } 45 | 46 | func flattenSlice(slice []interface{}, fromOp bool) interface{} { 47 | var rv []interface{} 48 | for _, v := range slice { 49 | if s, ok := v.([]interface{}); ok { 50 | sv := flattenSlice(s, false) 51 | if isAggregate(sv) { 52 | rv = append(rv, sv) 53 | } 54 | } else if m, ok := v.(map[string]interface{}); ok { 55 | mv := flattenMap(m, fromOp) 56 | if isAggregate(mv) || fromOp { 57 | rv = append(rv, mv) 58 | } 59 | } 60 | } 61 | // if the slice is empty, return 1 (since it's entirely primitives). 62 | // otherwise return the slice 63 | if len(rv) == 0 { 64 | return 1 65 | } 66 | return rv 67 | } 68 | 69 | func flattenMap(m map[string]interface{}, fromOp bool) interface{} { 70 | rv := make(map[string]interface{}) 71 | for k, v := range m { 72 | if k[0] == '$' { 73 | rv[k] = flattenOp(v) 74 | } else { 75 | flattened := flatten(v) 76 | if isAggregate(flattened) || fromOp { 77 | rv[k] = flattened 78 | } 79 | } 80 | } 81 | // if the slice is empty, return 1 (since it's entirely primitives). 82 | // otherwise return the slice 83 | if len(rv) == 0 { 84 | return 1 85 | } 86 | return rv 87 | } 88 | 89 | func flatten(v interface{}) interface{} { 90 | if s, ok := v.([]interface{}); ok { 91 | return flattenSlice(s, false) 92 | } else if m, ok := v.(map[string]interface{}); ok { 93 | return flattenMap(m, false) 94 | } else { 95 | return 1 96 | } 97 | } 98 | 99 | func flattenOp(v interface{}) interface{} { 100 | if s, ok := v.([]interface{}); ok { 101 | return flattenSlice(s, true) 102 | } else if m, ok := v.(map[string]interface{}); ok { 103 | return flattenMap(m, true) 104 | } else { 105 | return 1 106 | } 107 | } 108 | 109 | func serializeShape(shape interface{}) string { 110 | // we can't just json marshal, since we need ordered keys 111 | if m, ok := shape.(map[string]interface{}); ok { 112 | var keys []string 113 | var keyAndVal []string 114 | for k := range m { 115 | keys = append(keys, k) 116 | } 117 | 118 | sort.Strings(keys) 119 | for _, k := range keys { 120 | keyAndVal = append(keyAndVal, "\""+k+"\": "+serializeShape(m[k])) 121 | } 122 | 123 | return "{ " + strings.Join(keyAndVal, ", ") + " }" 124 | 125 | } else if s, ok := shape.([]interface{}); ok { 126 | var vals []string 127 | for _, v := range s { 128 | vals = append(vals, serializeShape(v)) 129 | } 130 | return "[ " + strings.Join(vals, ", ") + " ]" 131 | } else { 132 | return "1" 133 | } 134 | } 135 | -------------------------------------------------------------------------------- /queryshape/shape.go: -------------------------------------------------------------------------------- 1 | package queryshape 2 | 3 | import "github.com/honeycombio/mongodbtools/queryshape/internal/queryshape" 4 | 5 | // GetQueryShape takes a query map (provided by the logparser) and returns the query shape serialized as a string 6 | func GetQueryShape(q map[string]interface{}) string { 7 | return queryshape.GetQueryShape(q) 8 | } 9 | -------------------------------------------------------------------------------- /queryshape/shape_test.go: -------------------------------------------------------------------------------- 1 | package queryshape_test 2 | 3 | import ( 4 | "fmt" 5 | "path/filepath" 6 | "reflect" 7 | "runtime" 8 | "strings" 9 | "testing" 10 | 11 | "github.com/honeycombio/mongodbtools/logparser" 12 | "github.com/honeycombio/mongodbtools/queryshape" 13 | ) 14 | 15 | func testQueryStringShape(t *testing.T, queryString, queryShape string) { 16 | q, err := logparser.ParseQuery(queryString) 17 | testOK(t, err) 18 | testEquals(t, queryshape.GetQueryShape(q), queryShape) 19 | } 20 | 21 | func TestSortedKeys(t *testing.T) { 22 | testQueryStringShape(t, "{ b: 1, c: 2, a: 3 }", `{ "a": 1, "b": 1, "c": 1 }`) 23 | } 24 | 25 | func TestFlattenedSlice(t *testing.T) { 26 | testQueryStringShape(t, "{ $in: [1, 2, 3] }", `{ "$in": 1 }`) 27 | } 28 | 29 | // helper function 30 | func testEquals(t testing.TB, actual, expected interface{}, msg ...string) { 31 | if !reflect.DeepEqual(actual, expected) { 32 | message := strings.Join(msg, ", ") 33 | _, file, line, _ := runtime.Caller(2) 34 | 35 | t.Errorf( 36 | "%s:%d: %s -- actual(%T): %v, expected(%T): %v", 37 | filepath.Base(file), 38 | line, 39 | message, 40 | testDeref(actual), 41 | testDeref(actual), 42 | testDeref(expected), 43 | testDeref(expected), 44 | ) 45 | } 46 | } 47 | 48 | func testDeref(v interface{}) interface{} { 49 | switch t := v.(type) { 50 | case *string: 51 | return fmt.Sprintf("*(%v)", *t) 52 | case *int64: 53 | return fmt.Sprintf("*(%v)", *t) 54 | case *float64: 55 | return fmt.Sprintf("*(%v)", *t) 56 | case *bool: 57 | return fmt.Sprintf("*(%v)", *t) 58 | default: 59 | return v 60 | } 61 | } 62 | 63 | func testOK(t testing.TB, err error, msg ...string) { 64 | if err != nil { 65 | message := strings.Join(msg, ", ") 66 | _, file, line, _ := runtime.Caller(2) 67 | 68 | t.Errorf("%s:%d: %s -- unexpected error: %s", 69 | filepath.Base(file), 70 | line, 71 | message, 72 | err.Error(), 73 | ) 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /scripts/mongo_stats.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -u 4 | set -e 5 | 6 | # this script is a template - you will probably need to modify it to suit your 7 | # environment. It has been tested to work against a Mongo 3.2 server running on 8 | # Ubuntu linux. 9 | 10 | # this script expects two arguments: a Honeycomb writekey and dataset name. 11 | # the third argument, if present, is the mongo server to talk to 12 | # the third argument, if present, is the Honeycomb URL to which to send events 13 | 14 | # this script will collect locks and a few other metrics from a locally running 15 | # mongo instance and submit them to honeycomb as a single event. It should be 16 | # called from cron every minute and will submit these metrics 4 times at 0, 15, 17 | # 30, and 45 seconds. 18 | # 19 | 20 | # TODO features to add 21 | # calculate lock percentage instead of just counts/sec 22 | 23 | 24 | # If you wish to kill queries, set FANGS="yes". When doing so, set these two 25 | # variables to the age (in seconds) over which queries should be killed 26 | FANGS="no" 27 | SLOW_QUERY_KILL_AGE=30 28 | NON_YIELDING_KILL_AGE=15 29 | 30 | if [ $# -lt 2 ] ; then 31 | echo "Usage: $0 [host:port]" 32 | echo "" 33 | echo "$0 collects stats from a MongoDB instance and reports them to Honeycomb." 34 | echo " It expects a Honeycomb writekey and dataset name." 35 | echo " The optional third argument is a mongo target (default localhost:27017)." 36 | echo " https://honeycomb.io" 37 | echo "" 38 | exit 1 39 | fi 40 | writekey=$1 41 | dataset=$2 42 | # replace spaces in the datase neame with %20s so curl works 43 | dataset=${dataset// /%20} 44 | 45 | # host:port for the mongo instance; defaults to localhost:27017 46 | if [ $# -eq 3 ] ; then 47 | mongo_host=$3 48 | else 49 | mongo_host="localhost:27017" 50 | fi 51 | 52 | # honeycomb url is the fourth argument, if present 53 | if [ $# -eq 4 ] ; then 54 | url=$4 55 | else 56 | url="https://api.honeycomb.io" 57 | fi 58 | 59 | getStats(){ 60 | cuser=$1 61 | csystem=$2 62 | cidle=$3 63 | cwait=$4 64 | csteal=$5 65 | 66 | cat < nonYieldingKillAge && 125 | tojson(x.query).indexOf('nearSphere') && 126 | !(x.msg && x.msg.indexOf('Index Build') !== -1); 127 | } 128 | 129 | function isSlowQuery(x) { 130 | return x.secs_running > slowQueryKillAge && 131 | x.ns.indexOf('system') === -1 && 132 | x.ns.indexOf('oplog') === -1 && 133 | x.op !== 'getmore' && 134 | !(x.query && x.query['\$comment'] && x.query['\$comment'].match(/push_id/)) && 135 | !(x.msg && (x.msg.match(/bg index build/) || x.msg.indexOf('Index Build') !== -1 || x.msg.match(/compact extent/))) && 136 | !(x.desc && x.desc.match(/repl writer worker/)); 137 | } 138 | 139 | function killOps(ops) { 140 | ops.forEach(function(x) { db.killOp(x.opid); }); 141 | } 142 | 143 | function getHoneycombDB() { 144 | var status = rs.status(); 145 | var mongo; 146 | if (!status.ok) { 147 | mongo = db.getMongo(); 148 | } else { 149 | mongo = new Mongo(status.set + "/" + status.members.map(function(m) { return m.name; }).join(",")); 150 | } 151 | return mongo.getDB("honeycomb"); 152 | } 153 | 154 | function calcLockChange() { 155 | var serverStatus = db.serverStatus(); 156 | if (serverStatus.version.startsWith("2")) { 157 | // Crude version check: serverStatus().locks.*.acquireCount doesn't 158 | // exist in mongodb before version 3.0 159 | return; 160 | } 161 | var honeydb = getHoneycombDB(); 162 | var myname = db.serverStatus().repl ? db.serverStatus().repl.me : db.getMongo().host; 163 | data.hostname = myname; 164 | 165 | var now = new Date(); 166 | var newLocksData = {}; 167 | 168 | addGlobalLocks(newLocksData); 169 | addDatabaseLocks(newLocksData); 170 | 171 | // fetch the old lock data 172 | var oldData = honeydb.locks.find({host:myname}).toArray(); 173 | if (oldData.length > 0) { 174 | var oldLocksDoc = oldData[0]; 175 | var oldLocksData = oldLocksDoc.locksData; 176 | 177 | // compute new_locks/sec for the values that are in both 178 | var timeDelta = (now - oldLocksDoc.time) / 1000; 179 | for (var k in newLocksData) { 180 | var oldVal = oldLocksData[k] || 0; 181 | var lockDiff = (newLocksData[k] - oldLocksData[k]) / timeDelta; 182 | // skip negative locks from server restart 183 | data[k] = lockDiff > 0 ? lockDiff : 0; 184 | } 185 | 186 | // remove old lock data 187 | honeydb.locks.remove({ _id: oldLocksDoc._id }); 188 | } 189 | 190 | // store the current locks along with a timestamp 191 | honeydb.locks.insert({ 192 | time: now, 193 | host: myname, 194 | locksData: newLocksData 195 | }); 196 | } 197 | 198 | // Capture as much as possible from https://docs.mongodb.com/v3.2/reference/command/serverStatus/#repl 199 | function addReplSetAttrs() { 200 | var repl = db.serverStatus().repl; 201 | if (!repl) { 202 | return; 203 | } 204 | 205 | if (repl.setName) { 206 | data.replica_set_name = repl.setName; 207 | } 208 | if (repl.setVersion) { 209 | data.replica_set_version = repl.setVersion; 210 | } 211 | if (repl.primary) { 212 | data.replica_set_primary = repl.primary; 213 | } 214 | if (repl.electionId) { 215 | data.replica_set_election_id = repl.electionId.valueOf(); 216 | } 217 | } 218 | 219 | db.getMongo().setSlaveOk(); 220 | 221 | data.ismaster = db.isMaster().ismaster; 222 | data.version = db.serverStatus().version; 223 | 224 | addInProgMetrics() 225 | calcLockChange() 226 | addReplSetAttrs() 227 | 228 | data.cpu_user = $cuser 229 | data.cpu_system = $csystem 230 | data.cpu_idle = $cidle 231 | data.cpu_wait = $cwait 232 | data.cpu_steal = $csteal 233 | 234 | print(JSON.stringify(data)); 235 | } 236 | mongoCron($SLOW_QUERY_KILL_AGE,$NON_YIELDING_KILL_AGE) 237 | EOJS 238 | } 239 | 240 | # run everything 4 times. collect cpu util for 15s, then get mongo stats, repeat 241 | for i in {0..3} ; do 242 | # grab 15sec worth of CPU utilization data 243 | cpu=($(vmstat 15 2 | tail -n 1 | awk '{print $13,$14,$15,$16,$17}')) 244 | cpu_user=${cpu[0]} 245 | cpu_system=${cpu[1]} 246 | cpu_idle=${cpu[2]} 247 | cpu_wait=${cpu[3]} 248 | cpu_steal=${cpu[4]} 249 | # grab the mongo data, hand it CPU util to stuff into the same event 250 | payload=$(getStats $cpu_user $cpu_system $cpu_idle $cpu_wait $cpu_steal | tail -n 1) 251 | # send the event to Honeycomb 252 | curl -q -X POST -H "X-Honeycomb-Team: $writekey" "${url}/1/events/${dataset}" -d "$payload" 253 | done 254 | --------------------------------------------------------------------------------