├── .clang-format ├── .github └── workflows │ └── test.yaml ├── .gitignore ├── LICENSE ├── README.md ├── bininfo └── bininfo.go ├── e2e_test.go ├── ebpf ├── bpf_x86_bpfel.go ├── c │ ├── gmon.c │ ├── goroutine.h │ └── maps.h ├── config.go ├── event_handler.go ├── gmon.go ├── reporter.go └── reporter_test.go ├── fixture ├── go.mod └── main.go ├── gmon.sh ├── go.mod ├── go.sum └── main.go /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | Language: Cpp 3 | AccessModifierOffset: -4 4 | AlignAfterOpenBracket: DontAlign 5 | AlignConsecutiveAssignments: false 6 | AlignConsecutiveDeclarations: false 7 | AlignEscapedNewlines: Left 8 | AlignOperands: true 9 | AlignTrailingComments: false 10 | AllowAllParametersOfDeclarationOnNextLine: false 11 | AllowShortBlocksOnASingleLine: false 12 | AllowShortCaseLabelsOnASingleLine: false 13 | AllowShortEnumsOnASingleLine: false 14 | AllowShortFunctionsOnASingleLine: None 15 | AllowShortIfStatementsOnASingleLine: true 16 | AllowShortLoopsOnASingleLine: false 17 | AlwaysBreakAfterDefinitionReturnType: None 18 | AlwaysBreakAfterReturnType: None 19 | AlwaysBreakBeforeMultilineStrings: false 20 | AlwaysBreakTemplateDeclarations: false 21 | BinPackArguments: true 22 | BinPackParameters: true 23 | BraceWrapping: 24 | AfterCaseLabel: false 25 | AfterClass: false 26 | AfterControlStatement: Never 27 | AfterEnum: false 28 | AfterFunction: false 29 | AfterNamespace: true 30 | AfterObjCDeclaration: false 31 | AfterStruct: false 32 | AfterUnion: false 33 | AfterExternBlock: false 34 | BeforeCatch: false 35 | BeforeElse: false 36 | IndentBraces: false 37 | SplitEmptyFunction: true 38 | SplitEmptyRecord: true 39 | SplitEmptyNamespace: true 40 | BreakBeforeBinaryOperators: None 41 | BreakBeforeBraces: Custom 42 | BreakBeforeInheritanceComma: false 43 | BreakBeforeTernaryOperators: false 44 | BreakConstructorInitializersBeforeComma: false 45 | BreakConstructorInitializers: BeforeComma 46 | BreakAfterJavaFieldAnnotations: false 47 | BreakStringLiterals: false 48 | ColumnLimit: 0 49 | CommentPragmas: '^ IWYU pragma:' 50 | CompactNamespaces: false 51 | ConstructorInitializerAllOnOneLineOrOnePerLine: false 52 | ConstructorInitializerIndentWidth: 4 53 | ContinuationIndentWidth: 4 54 | Cpp11BracedListStyle: false 55 | DerivePointerAlignment: false 56 | DisableFormat: false 57 | ExperimentalAutoDetectBinPacking: false 58 | FixNamespaceComments: false 59 | IncludeBlocks: Preserve 60 | IncludeCategories: 61 | - Regex: '.*' 62 | Priority: 1 63 | IncludeIsMainRegex: '(Test)?$' 64 | IndentCaseLabels: false 65 | IndentPPDirectives: None 66 | IndentWidth: 4 67 | IndentWrappedFunctionNames: false 68 | JavaScriptQuotes: Leave 69 | JavaScriptWrapImports: true 70 | KeepEmptyLinesAtTheStartOfBlocks: false 71 | MacroBlockBegin: '' 72 | MacroBlockEnd: '' 73 | MaxEmptyLinesToKeep: 1 74 | NamespaceIndentation: None 75 | ObjCBinPackProtocolList: Auto 76 | ObjCBlockIndentWidth: 4 77 | ObjCSpaceAfterProperty: true 78 | ObjCSpaceBeforeProtocolList: true 79 | PenaltyBreakAssignment: 10 80 | PenaltyBreakBeforeFirstCallParameter: 30 81 | PenaltyBreakComment: 10 82 | PenaltyBreakFirstLessLess: 0 83 | PenaltyBreakString: 10 84 | PenaltyExcessCharacter: 100 85 | PenaltyReturnTypeOnItsOwnLine: 60 86 | PointerAlignment: Right 87 | ReflowComments: false 88 | SortIncludes: false 89 | SortUsingDeclarations: false 90 | SpaceAfterCStyleCast: false 91 | SpaceAfterTemplateKeyword: true 92 | SpaceBeforeAssignmentOperators: true 93 | SpaceBeforeCtorInitializerColon: true 94 | SpaceBeforeInheritanceColon: true 95 | SpaceBeforeParens: ControlStatements 96 | SpaceBeforeRangeBasedForLoopColon: true 97 | SpaceInEmptyParentheses: false 98 | SpacesBeforeTrailingComments: 1 99 | SpacesInAngles: false 100 | SpacesInContainerLiterals: false 101 | SpacesInCStyleCastParentheses: false 102 | SpacesInParentheses: false 103 | SpacesInSquareBrackets: false 104 | Standard: Cpp03 105 | TabWidth: 4 106 | UseTab: Never 107 | -------------------------------------------------------------------------------- /.github/workflows/test.yaml: -------------------------------------------------------------------------------- 1 | on: 2 | pull_request: 3 | push: 4 | branches: 5 | - main 6 | name: gmon test 7 | jobs: 8 | test: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v4 12 | with: 13 | fetch-depth: 0 14 | - name: Build & Format 15 | run: ./gmon.sh format 16 | - name: Check changes 17 | run: | 18 | if ! git diff --quiet || ! git diff --staged --quiet; then 19 | echo "Changes detected" 20 | uname -a 21 | git diff 22 | git diff --staged 23 | exit 1 24 | fi 25 | - run: ./gmon.sh test 26 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # If you prefer the allow list template instead of the deny list, see community template: 2 | # https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore 3 | # 4 | # Binaries for programs and plugins 5 | *.exe 6 | *.exe~ 7 | *.dll 8 | *.so 9 | *.dylib 10 | 11 | # Test binary, built with `go test -c` 12 | *.test 13 | 14 | # Output of the go coverage tool, specifically when used with LiteIDE 15 | *.out 16 | 17 | # Dependency directories (remove the comment below to include it) 18 | vendor/ 19 | 20 | # Go workspace file 21 | go.work 22 | 23 | bin 24 | 25 | .vscode/launch.json 26 | 27 | fixture/fixture 28 | ebpf/c/vmlinux.h 29 | ebpf/bpf_x86_bpfel.o 30 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [2024] [Keisuke Umegaki] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Goroutine MONitor (gmon) 2 | 3 | 4 | 5 | `gmon` is a tool designed to monitor the creation and destruction of goroutines in a Go program, drawing inspiration from the presentation [Real World Debugging with eBPF](https://www.usenix.org/conference/srecon23apac/presentation/liang). 6 | 7 | # Prerequisites 8 | 9 | - amd64 (x86_64) 10 | - Linux Kernel 5.8+ since `gmon` uses [BPF ring buffer](https://nakryiko.com/posts/bpf-ringbuf/) 11 | - Target Go binary must be compiled with Go 1.23+ since `gmon` uses fixed offset to get goroutine ID 12 | 13 | # Usage 14 | 15 | ``` 16 | Usage of gmon: 17 | -level string 18 | log level could be one of ["DEBUG" "INFO" "WARN" "ERROR"] (default "INFO") 19 | -metrics int 20 | Port to be used for metrics server, /metrics endpoint (default 5500) 21 | -path string 22 | Path to executable file to be monitored (required) 23 | -pid int 24 | Useful when tracing programs that have many running instances 25 | -pprof int 26 | Port to be used for pprof server. If 0, pprof server is not started 27 | -trace string 28 | Path to Go runtime/trace output 29 | ``` 30 | 31 | ## Demo 32 | 33 | https://github.com/keisku/gmon/assets/41987730/838fa12d-d622-4ad6-a9f0-6aab88acec55 34 | 35 | ## Stdout 36 | 37 | `gmon` logs the creation of goroutines to stdout with stack traces. 38 | 39 | ```bash 40 | sudo gmon -path /path/to/executable 41 | time=2024-03-20T05:10:57.752Z level=INFO msg="goroutine is created" goroutine_id=22 stack.0=runtime.newproc stack.1=runtime.systemstack stack.2=runtime.newproc stack.3=net/http.(*connReader).startBackgroundRead stack.4=net/http.(*conn).serve stack.5=net/http.(*Server).Serve.gowrap3 stack.6=runtime.goexit 42 | time=2024-03-20T05:10:57.752Z level=INFO msg="goroutine is created" goroutine_id=21 stack.0=runtime.newproc stack.1=runtime.systemstack stack.2=runtime.newproc stack.3=net/http.(*Server).Serve stack.4=net/http.(*Server).ListenAndServe stack.5=main.main.gowrap1 stack.6=runtime.goexit 43 | time=2024-03-20T05:10:57.752Z level=INFO msg="goroutine is created" goroutine_id=23 stack.0=runtime.newproc stack.1=runtime.systemstack stack.2=runtime.newproc stack.3=net/http.(*Server).Serve stack.4=net/http.(*Server).ListenAndServe stack.5=main.main.gowrap1 stack.6=runtime.goexit 44 | time=2024-03-20T05:10:57.752Z level=INFO msg="goroutine is created" goroutine_id=34 stack.0=runtime.newproc stack.1=runtime.systemstack stack.2=runtime.newproc stack.3=net/http.(*Server).Serve stack.4=net/http.(*Server).ListenAndServe stack.5=main.main.gowrap1 stack.6=runtime.goexit 45 | time=2024-03-20T05:10:57.752Z level=INFO msg="goroutine is created" goroutine_id=24 stack.0=runtime.newproc stack.1=runtime.systemstack stack.2=runtime.newproc stack.3=net/http.(*connReader).startBackgroundRead stack.4=net/http.(*conn).serve stack.5=net/http.(*Server).Serve.gowrap3 stack.6=runtime.goexit 46 | time=2024-03-20T05:10:57.752Z level=INFO msg="goroutine is created" goroutine_id=35 stack.0=runtime.newproc stack.1=runtime.systemstack stack.2=runtime.newproc stack.3=net/http.(*connReader).startBackgroundRead stack.4=net/http.(*conn).serve stack.5=net/http.(*Server).Serve.gowrap3 stack.6=runtime.goexit 47 | ``` 48 | 49 | ## OpenMetrics 50 | 51 | `gmon` exposes the following metrics in the [OpenMetrics](https://www.cncf.io/projects/openmetrics/) format on the `GET /metrics`. 52 | 53 | - `gmon_goroutine_creation` 54 | - `gmon_goroutine_exit` 55 | - `gmon_goroutine_uptime` 56 | 57 | ```bash 58 | curl -s http://localhost:5500/metrics 59 | 60 | # HELP gmon_goroutine_creation The number of goroutines that have been creaated 61 | # TYPE gmon_goroutine_creation counter 62 | gmon_goroutine_creation{stack_0="runtime.goexit",stack_1="main.main.gowrap1",stack_2="net/http.(*Server).ListenAndServe",stack_3="net/http.(*Server).Serve",stack_4="runtime.newproc"} 1 63 | gmon_goroutine_creation{stack_0="runtime.goexit",stack_1="net/http.(*Server).Serve.gowrap3",stack_2="net/http.(*conn).serve",stack_3="net/http.(*connReader).startBackgroundRead",stack_4="runtime.newproc"} 3 64 | # HELP gmon_goroutine_exit The number of goroutines that have been exited 65 | # TYPE gmon_goroutine_exit counter 66 | gmon_goroutine_exit{stack_0="runtime.goexit",stack_1="net/http.(*Server).Serve.gowrap3",stack_2="net/http.(*conn).serve",stack_3="net/http.(*connReader).startBackgroundRead",stack_4="runtime.newproc"} 3 67 | # HELP gmon_goroutine_uptime Uptime of goroutines in seconds 68 | # TYPE gmon_goroutine_uptime histogram 69 | gmon_goroutine_uptime_bucket{stack_0="runtime.goexit",stack_1="main.main.gowrap1",stack_2="net/http.(*Server).ListenAndServe",stack_3="net/http.(*Server).Serve",stack_4="runtime.newproc",le="1"} 2 70 | gmon_goroutine_uptime_bucket{stack_0="runtime.goexit",stack_1="main.main.gowrap1",stack_2="net/http.(*Server).ListenAndServe",stack_3="net/http.(*Server).Serve",stack_4="runtime.newproc",le="3"} 2 71 | gmon_goroutine_uptime_bucket{stack_0="runtime.goexit",stack_1="main.main.gowrap1",stack_2="net/http.(*Server).ListenAndServe",stack_3="net/http.(*Server).Serve",stack_4="runtime.newproc",le="5"} 2 72 | gmon_goroutine_uptime_bucket{stack_0="runtime.goexit",stack_1="main.main.gowrap1",stack_2="net/http.(*Server).ListenAndServe",stack_3="net/http.(*Server).Serve",stack_4="runtime.newproc",le="10"} 2 73 | gmon_goroutine_uptime_bucket{stack_0="runtime.goexit",stack_1="main.main.gowrap1",stack_2="net/http.(*Server).ListenAndServe",stack_3="net/http.(*Server).Serve",stack_4="runtime.newproc",le="30"} 2 74 | gmon_goroutine_uptime_bucket{stack_0="runtime.goexit",stack_1="main.main.gowrap1",stack_2="net/http.(*Server).ListenAndServe",stack_3="net/http.(*Server).Serve",stack_4="runtime.newproc",le="60"} 2 75 | gmon_goroutine_uptime_bucket{stack_0="runtime.goexit",stack_1="main.main.gowrap1",stack_2="net/http.(*Server).ListenAndServe",stack_3="net/http.(*Server).Serve",stack_4="runtime.newproc",le="120"} 2 76 | gmon_goroutine_uptime_bucket{stack_0="runtime.goexit",stack_1="main.main.gowrap1",stack_2="net/http.(*Server).ListenAndServe",stack_3="net/http.(*Server).Serve",stack_4="runtime.newproc",le="180"} 2 77 | gmon_goroutine_uptime_bucket{stack_0="runtime.goexit",stack_1="main.main.gowrap1",stack_2="net/http.(*Server).ListenAndServe",stack_3="net/http.(*Server).Serve",stack_4="runtime.newproc",le="+Inf"} 2 78 | gmon_goroutine_uptime_sum{stack_0="runtime.goexit",stack_1="main.main.gowrap1",stack_2="net/http.(*Server).ListenAndServe",stack_3="net/http.(*Server).Serve",stack_4="runtime.newproc"} 0.9001332019999999 79 | gmon_goroutine_uptime_count{stack_0="runtime.goexit",stack_1="main.main.gowrap1",stack_2="net/http.(*Server).ListenAndServe",stack_3="net/http.(*Server).Serve",stack_4="runtime.newproc"} 2 80 | ...skip... 81 | ``` 82 | 83 | # Development 84 | 85 | Follow [the Docker installation guide](https://docs.docker.com/engine/install/#supported-platforms) to build and run tests. 86 | 87 | ```bash 88 | # Build and output the binary to ./bin 89 | ./gmon.sh build 90 | # Build and install the binary to /usr/bin 91 | ./gmon.sh install 92 | # Run tests 93 | ./gmon.sh test 94 | ``` 95 | -------------------------------------------------------------------------------- /bininfo/bininfo.go: -------------------------------------------------------------------------------- 1 | package bininfo 2 | 3 | import ( 4 | "debug/elf" 5 | "errors" 6 | "fmt" 7 | "log/slog" 8 | "runtime" 9 | "sort" 10 | 11 | "github.com/go-delve/delve/pkg/proc" 12 | ) 13 | 14 | // Translator translates information about an executable. 15 | type Translator interface { 16 | // Address returns the address of the given symbol in the executable. 17 | Address(symbol string) uint64 18 | // Stack returns a stack trace from the given stack bytes. 19 | PCToFunc(pc uint64) *proc.Function 20 | } 21 | 22 | // NewTranslator creates a new Translator for the given executable. 23 | func NewTranslator(path string) (Translator, error) { 24 | bi, err := newBinInfo(path) 25 | if err == nil { 26 | slog.Debug("loaded binary info") 27 | return bi, nil 28 | } else { 29 | slog.Debug("failed to load binary info", slog.Any("error", err)) 30 | } 31 | s, err := newSymbolTable(path) 32 | if err != nil { 33 | return nil, fmt.Errorf("failed to load symbol table: %w", err) 34 | } 35 | slog.Debug("loaded symbol table") 36 | return s, nil 37 | } 38 | 39 | type binaryInfo struct { 40 | *proc.BinaryInfo 41 | } 42 | 43 | func newBinInfo(path string) (*binaryInfo, error) { 44 | bininfo := proc.NewBinaryInfo(runtime.GOOS, runtime.GOARCH) 45 | if err := bininfo.LoadBinaryInfo(path, 0, nil); err != nil { 46 | return nil, fmt.Errorf("failed to load binary info: %w", err) 47 | } 48 | var bi binaryInfo 49 | bi.BinaryInfo = bininfo 50 | return &bi, nil 51 | } 52 | 53 | func (bi *binaryInfo) Address(symbol string) uint64 { 54 | funcs, err := bi.FindFunction(symbol) 55 | if err != nil { 56 | return 0 57 | } 58 | for _, f := range funcs { 59 | if f.Name == symbol { 60 | return f.Entry 61 | } 62 | } 63 | return 0 64 | } 65 | 66 | type symbolTable struct { 67 | addresses map[string]uint64 68 | functions []*proc.Function 69 | } 70 | 71 | // Copy from https://github.com/cilium/ebpf/blob/v0.12.3/link/uprobe.go#L116-L160 72 | func newSymbolTable(path string) (*symbolTable, error) { 73 | f, err := elf.Open(path) 74 | if err != nil { 75 | return nil, err 76 | } 77 | syms, err := f.Symbols() 78 | if err != nil && !errors.Is(err, elf.ErrNoSymbols) { 79 | return nil, err 80 | } 81 | dynsyms, err := f.DynamicSymbols() 82 | if err != nil && !errors.Is(err, elf.ErrNoSymbols) { 83 | return nil, err 84 | } 85 | syms = append(syms, dynsyms...) 86 | if len(syms) == 0 { 87 | return nil, elf.ErrNoSymbols 88 | } 89 | addresses := make(map[string]uint64) 90 | functions := make([]*proc.Function, 0, len(syms)) 91 | for _, s := range syms { 92 | if elf.ST_TYPE(s.Info) != elf.STT_FUNC { 93 | continue 94 | } 95 | address := s.Value 96 | for _, prog := range f.Progs { 97 | if prog.Type != elf.PT_LOAD || (prog.Flags&elf.PF_X) == 0 { 98 | continue 99 | } 100 | if prog.Vaddr <= s.Value && s.Value < (prog.Vaddr+prog.Memsz) { 101 | address = s.Value - prog.Vaddr + prog.Off 102 | break 103 | } 104 | } 105 | addresses[s.Name] = address 106 | index := sort.Search(len(functions), func(i int) bool { return functions[i].Entry >= address }) 107 | functions = append(functions, &proc.Function{}) 108 | copy(functions[index+1:], functions[index:]) 109 | functions[index] = &proc.Function{ 110 | Name: s.Name, 111 | Entry: address, 112 | } 113 | } 114 | for i := 0; i < len(functions)-1; i++ { 115 | functions[i].End = functions[i+1].Entry 116 | } 117 | return &symbolTable{ 118 | addresses: addresses, 119 | functions: functions, 120 | }, nil 121 | } 122 | 123 | func (s *symbolTable) Address(symbol string) uint64 { 124 | if address, ok := s.addresses[symbol]; ok { 125 | return address 126 | } 127 | return 0 128 | } 129 | 130 | func (s *symbolTable) PCToFunc(pc uint64) *proc.Function { 131 | low := 0 132 | high := len(s.functions) - 1 133 | 134 | for low <= high { 135 | mid := low + (high-low)/2 136 | f := s.functions[mid] 137 | 138 | if pc < f.Entry { 139 | high = mid - 1 140 | } else if pc > f.End { 141 | low = mid + 1 142 | } else { 143 | return f 144 | } 145 | } 146 | return nil 147 | } 148 | -------------------------------------------------------------------------------- /e2e_test.go: -------------------------------------------------------------------------------- 1 | package main_test 2 | 3 | import ( 4 | "bufio" 5 | "bytes" 6 | "errors" 7 | "fmt" 8 | "io" 9 | "net/http" 10 | "os" 11 | "os/exec" 12 | "strconv" 13 | "strings" 14 | "testing" 15 | "time" 16 | 17 | dto "github.com/prometheus/client_model/go" 18 | "github.com/prometheus/common/expfmt" 19 | "github.com/stretchr/testify/require" 20 | ) 21 | 22 | func Test_e2e(t *testing.T) { 23 | fixture, err := runProcess(os.Stdout, os.Stderr, "/usr/bin/fixture") 24 | if err != nil { 25 | t.Fatalf("failed to run fixture: %v", err) 26 | } 27 | 28 | // Wait for fixture to be ready. 29 | time.Sleep(time.Second) 30 | 31 | var gmonLogs bytes.Buffer 32 | gmon, err := runProcess( 33 | &gmonLogs, &gmonLogs, 34 | "/usr/bin/gmon", 35 | "-path", 36 | "/usr/bin/fixture", 37 | "-metrics", 38 | "5500", 39 | "-level", 40 | "DEBUG", 41 | ) 42 | if err != nil { 43 | t.Fatalf("failed to run gmon: %v", err) 44 | } 45 | // Wait for gmon to be ready. 46 | time.Sleep(time.Second) 47 | 48 | t.Cleanup(func() { 49 | procs := []*os.Process{fixture, gmon} 50 | for i := range procs { 51 | if procs[i] != nil { 52 | if err := procs[i].Kill(); err != nil { 53 | t.Logf("failed to kill process: %v", err) 54 | } 55 | } 56 | } 57 | }) 58 | 59 | requestFixtureCount := 3 60 | for range requestFixtureCount { 61 | resp, err := http.Get("http://localhost:8080/get/200") 62 | require.NoError(t, err, "GET /get/200 of fixture server failed") 63 | require.Equal(t, http.StatusOK, resp.StatusCode, "expect 200 from GET /get/200 of fixture server") 64 | } 65 | 66 | // Wait gmon detects goroutine events and writes logs. 67 | time.Sleep(time.Second) 68 | 69 | evaluateGmonOutput(t, &gmonLogs, requestFixtureCount) 70 | evaluateGmonMetrics(t) 71 | } 72 | 73 | func runProcess(stdout, stderr io.Writer, name string, arg ...string) (*os.Process, error) { 74 | cmd := exec.Command(name, arg...) 75 | cmd.Stdout = stdout 76 | cmd.Stderr = stderr 77 | if err := cmd.Start(); err != nil { 78 | return nil, fmt.Errorf("failed to run %q: %w", cmd, err) 79 | } 80 | return cmd.Process, nil 81 | } 82 | 83 | func evaluateGmonOutput(t *testing.T, gmonLogs io.Reader, expectValidLineCount int) { 84 | // A valid line should have: 85 | // - msg="goroutine is created" 86 | // - goroutine_id=%d, where %d is an integer 87 | // - stack.%d=funcName, where %d is an integer and funcName is a function name 88 | // - When a HTTP server receives a request, it should cause a goroutine and have a stack trace. 89 | validLineCount := 0 90 | 91 | scanner := bufio.NewScanner(gmonLogs) 92 | for scanner.Scan() { 93 | // Valid line example: 94 | // time=2024-03-20T05:10:57.752Z level=INFO msg="goroutine is created" goroutine_id=22 stack.0=runtime.newproc stack.1=runtime.systemstack stack.2=runtime.newproc stack.3=net/http.(*connReader).startBackgroundRead stack.4=net/http.(*conn).serve stack.5=net/http.(*Server).Serve.gowrap3 stack.6=runtime.goexit 95 | text := scanner.Text() 96 | 97 | if !strings.Contains(text, "msg=\"goroutine is created\"") { 98 | continue 99 | } 100 | 101 | goroutineIdBegin := strings.Index(text, "goroutine_id=") 102 | require.Greaterf(t, goroutineIdBegin, 8, "goroutine_id is not found in %q", text) 103 | goroutineIdEnd := strings.Index(text[goroutineIdBegin:], " ") 104 | goroutineId, err := strconv.Atoi(strings.Split(text[goroutineIdBegin:goroutineIdBegin+goroutineIdEnd], "=")[1]) 105 | require.NoErrorf(t, err, "goroutine_id is not an integer in %q", text) 106 | require.Greaterf(t, goroutineId, 0, "goroutine_id is not a positive integer in %q", text) 107 | 108 | hasValidStack := false 109 | for stackIdx := 0; ; stackIdx++ { 110 | stackBegin := strings.Index(text, fmt.Sprintf("stack.%d=", stackIdx)) 111 | if stackBegin < 0 { 112 | break 113 | } 114 | stackEnd := strings.Index(text[stackBegin:], " ") 115 | if stackEnd < 0 { 116 | break 117 | } 118 | stackValue := strings.Split(text[stackBegin:stackBegin+stackEnd], "=")[1] 119 | require.NotEmptyf(t, stackValue, "stack.%d should have a function name in %q", stackIdx, text) 120 | hasValidStack = true 121 | } 122 | require.Truef(t, hasValidStack, "no stack trace is found in %q", text) 123 | validLineCount++ 124 | } 125 | 126 | require.Greaterf(t, validLineCount, expectValidLineCount, "valid line count is less than %d", expectValidLineCount) 127 | } 128 | 129 | func evaluateGmonMetrics(t *testing.T) { 130 | resp, err := http.Get("http://localhost:5500/metrics") 131 | require.NoError(t, err, "GET /metrics of gmon") 132 | require.Equal(t, http.StatusOK, resp.StatusCode, "expect 200 from GET /metrics of gmon") 133 | b, err := io.ReadAll(resp.Body) 134 | require.NoError(t, err, "read response body from GET /metrics of gmon") 135 | defer func() { 136 | if t.Failed() { 137 | t.Logf("------ response body ------\n%s\n------ response body ------", b) 138 | } 139 | }() 140 | 141 | expectedMetrics := []string{ 142 | "gmon_goroutine_creation", 143 | "gmon_goroutine_exit", 144 | "gmon_goroutine_uptime", 145 | } 146 | // Due to the high cardinality concern, we add up to 5 stack labels to metrics. 147 | expectedLabels := map[string]struct{}{ 148 | "stack_0": {}, 149 | "stack_1": {}, 150 | "stack_2": {}, 151 | "stack_3": {}, 152 | "stack_4": {}, 153 | } 154 | actualMetrics := make(map[string][]*dto.Metric) 155 | 156 | dec := expfmt.NewDecoder(bytes.NewReader(b), expfmt.NewFormat(expfmt.TypeTextPlain)) 157 | for { 158 | var mf dto.MetricFamily 159 | err := dec.Decode(&mf) 160 | if errors.Is(err, io.EOF) { 161 | break 162 | } 163 | require.NoError(t, err, "decode response body of GET /metrics") 164 | actualMetrics[mf.GetName()] = mf.GetMetric() 165 | } 166 | for _, expected := range expectedMetrics { 167 | ms, ok := actualMetrics[expected] 168 | require.Truef(t, ok, "metric %q is not found", expected) 169 | for _, m := range ms { 170 | for _, l := range m.GetLabel() { 171 | _, ok := expectedLabels[l.GetName()] 172 | require.Truef(t, ok, "%q doesn't exist in %q", l.GetName(), m) 173 | require.NotEmptyf(t, l.GetValue(), "%q should not have an empty", l.GetName()) 174 | require.NotEqualf(t, "none", l.GetValue(), "%q should not have \"none\"", l.GetName()) 175 | } 176 | } 177 | } 178 | } 179 | -------------------------------------------------------------------------------- /ebpf/bpf_x86_bpfel.go: -------------------------------------------------------------------------------- 1 | // Code generated by bpf2go; DO NOT EDIT. 2 | //go:build 386 || amd64 3 | 4 | package ebpf 5 | 6 | import ( 7 | "bytes" 8 | _ "embed" 9 | "fmt" 10 | "io" 11 | 12 | "github.com/cilium/ebpf" 13 | ) 14 | 15 | type bpfEvent struct { 16 | GoroutineId int64 17 | StackId int32 18 | Exit bool 19 | _ [3]byte 20 | } 21 | 22 | type bpfStackTraceT [20]uint64 23 | 24 | // loadBpf returns the embedded CollectionSpec for bpf. 25 | func loadBpf() (*ebpf.CollectionSpec, error) { 26 | reader := bytes.NewReader(_BpfBytes) 27 | spec, err := ebpf.LoadCollectionSpecFromReader(reader) 28 | if err != nil { 29 | return nil, fmt.Errorf("can't load bpf: %w", err) 30 | } 31 | 32 | return spec, err 33 | } 34 | 35 | // loadBpfObjects loads bpf and converts it into a struct. 36 | // 37 | // The following types are suitable as obj argument: 38 | // 39 | // *bpfObjects 40 | // *bpfPrograms 41 | // *bpfMaps 42 | // 43 | // See ebpf.CollectionSpec.LoadAndAssign documentation for details. 44 | func loadBpfObjects(obj interface{}, opts *ebpf.CollectionOptions) error { 45 | spec, err := loadBpf() 46 | if err != nil { 47 | return err 48 | } 49 | 50 | return spec.LoadAndAssign(obj, opts) 51 | } 52 | 53 | // bpfSpecs contains maps and programs before they are loaded into the kernel. 54 | // 55 | // It can be passed ebpf.CollectionSpec.Assign. 56 | type bpfSpecs struct { 57 | bpfProgramSpecs 58 | bpfMapSpecs 59 | } 60 | 61 | // bpfSpecs contains programs before they are loaded into the kernel. 62 | // 63 | // It can be passed ebpf.CollectionSpec.Assign. 64 | type bpfProgramSpecs struct { 65 | RuntimeGoexit1 *ebpf.ProgramSpec `ebpf:"runtime_goexit1"` 66 | RuntimeNewproc1 *ebpf.ProgramSpec `ebpf:"runtime_newproc1"` 67 | } 68 | 69 | // bpfMapSpecs contains maps before they are loaded into the kernel. 70 | // 71 | // It can be passed ebpf.CollectionSpec.Assign. 72 | type bpfMapSpecs struct { 73 | Events *ebpf.MapSpec `ebpf:"events"` 74 | StackAddresses *ebpf.MapSpec `ebpf:"stack_addresses"` 75 | } 76 | 77 | // bpfObjects contains all objects after they have been loaded into the kernel. 78 | // 79 | // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. 80 | type bpfObjects struct { 81 | bpfPrograms 82 | bpfMaps 83 | } 84 | 85 | func (o *bpfObjects) Close() error { 86 | return _BpfClose( 87 | &o.bpfPrograms, 88 | &o.bpfMaps, 89 | ) 90 | } 91 | 92 | // bpfMaps contains all maps after they have been loaded into the kernel. 93 | // 94 | // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. 95 | type bpfMaps struct { 96 | Events *ebpf.Map `ebpf:"events"` 97 | StackAddresses *ebpf.Map `ebpf:"stack_addresses"` 98 | } 99 | 100 | func (m *bpfMaps) Close() error { 101 | return _BpfClose( 102 | m.Events, 103 | m.StackAddresses, 104 | ) 105 | } 106 | 107 | // bpfPrograms contains all programs after they have been loaded into the kernel. 108 | // 109 | // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign. 110 | type bpfPrograms struct { 111 | RuntimeGoexit1 *ebpf.Program `ebpf:"runtime_goexit1"` 112 | RuntimeNewproc1 *ebpf.Program `ebpf:"runtime_newproc1"` 113 | } 114 | 115 | func (p *bpfPrograms) Close() error { 116 | return _BpfClose( 117 | p.RuntimeGoexit1, 118 | p.RuntimeNewproc1, 119 | ) 120 | } 121 | 122 | func _BpfClose(closers ...io.Closer) error { 123 | for _, closer := range closers { 124 | if err := closer.Close(); err != nil { 125 | return err 126 | } 127 | } 128 | return nil 129 | } 130 | 131 | // Do not access this directly. 132 | // 133 | //go:embed bpf_x86_bpfel.o 134 | var _BpfBytes []byte 135 | -------------------------------------------------------------------------------- /ebpf/c/gmon.c: -------------------------------------------------------------------------------- 1 | #include "vmlinux.h" 2 | #include "maps.h" 3 | #include "goroutine.h" 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | // read_stack_id reads the stack id from stack trace map. 10 | // 1 on failure 11 | static __always_inline int read_stack_id(struct pt_regs *ctx, int *stack_id) { 12 | int id = bpf_get_stackid(ctx, &stack_addresses, BPF_F_USER_STACK); 13 | if (id < 0) { 14 | return 1; 15 | } 16 | *stack_id = id; 17 | return 0; 18 | } 19 | 20 | SEC("uretprobe/runtime.newproc1") 21 | int runtime_newproc1(struct pt_regs *ctx) { 22 | void *newg_p = (void *)PT_REGS_RC_CORE(ctx); 23 | if (newg_p == NULL) { 24 | bpf_printk("%s:%d | failed to extract new goroutine pointer from retval\n", __FILE__, __LINE__); 25 | return 0; 26 | } 27 | // `pahole -C runtime.g /path/to/gobinary 2>/dev/null` shows the offsets of the goid. 28 | int64_t goid = 0; 29 | if (bpf_core_read_user(&goid, sizeof(int64_t), newg_p + 160)) { 30 | bpf_printk("%s:%d | failed to read goroutine id from newg with the offset\n", __FILE__, __LINE__); 31 | return 0; 32 | } 33 | if (goid == 0) { 34 | bpf_printk("%s:%d | goroutine id is zero\n", __FILE__, __LINE__); 35 | return 0; 36 | } 37 | int stack_id = 0; 38 | if (read_stack_id(ctx, &stack_id)) { 39 | bpf_printk("%s:%d | failed to read stackid\n", __FILE__, __LINE__); 40 | return 0; 41 | } 42 | 43 | struct event *ev; 44 | ev = bpf_ringbuf_reserve(&events, sizeof(*ev), 0); 45 | if (!ev) { 46 | bpf_printk("%s:%d | failed to reserve ringbuf\n", __FILE__, __LINE__); 47 | return 0; 48 | } 49 | ev->goroutine_id = goid; 50 | ev->stack_id = stack_id; 51 | ev->exit = false; 52 | bpf_ringbuf_submit(ev, 0); 53 | 54 | return 0; 55 | } 56 | 57 | SEC("uprobe/runtime.goexit1") 58 | int runtime_goexit1(struct pt_regs *ctx) { 59 | struct task_struct *task = (struct task_struct *)bpf_get_current_task(); 60 | int64_t go_id = 0; 61 | if (read_goroutine_id(task, &go_id)) { 62 | bpf_printk("%s:%d | failed to read goroutine id\n", __FILE__, __LINE__); 63 | return 0; 64 | } 65 | 66 | int stack_id = 0; 67 | if (read_stack_id(ctx, &stack_id)) { 68 | bpf_printk("%s:%d | failed to read stackid\n", __FILE__, __LINE__); 69 | return 0; 70 | } 71 | 72 | struct event *ev; 73 | ev = bpf_ringbuf_reserve(&events, sizeof(*ev), 0); 74 | if (!ev) { 75 | bpf_printk("%s:%d | failed to reserve ringbuf\n", __FILE__, __LINE__); 76 | return 0; 77 | } 78 | ev->goroutine_id = go_id; 79 | ev->stack_id = stack_id; 80 | ev->exit = true; 81 | bpf_ringbuf_submit(ev, 0); 82 | 83 | return 0; 84 | } 85 | 86 | char LICENSE[] SEC("license") = "GPL"; 87 | -------------------------------------------------------------------------------- /ebpf/c/goroutine.h: -------------------------------------------------------------------------------- 1 | #include "vmlinux.h" 2 | 3 | #include 4 | #include 5 | 6 | struct stack_t { 7 | uintptr_t lo; 8 | uintptr_t hi; 9 | }; 10 | 11 | struct gobuf_t { 12 | uintptr_t sp; 13 | uintptr_t pc; 14 | uintptr_t g; 15 | uintptr_t ctxt; 16 | uintptr_t ret; 17 | uintptr_t lr; 18 | uintptr_t bp; 19 | }; 20 | 21 | // https://github.com/golang/go/blob/release-branch.go1.23/src/runtime/runtime2.go#L458 22 | struct g_t { 23 | struct stack_t stack_instance; 24 | uintptr_t stackguard0; 25 | uintptr_t stackguard1; 26 | uintptr_t _panic; 27 | uintptr_t _defer; 28 | uintptr_t m; 29 | struct gobuf_t sched; 30 | uintptr_t syscallsp; 31 | uintptr_t syscallpc; 32 | uintptr_t syscallbp; 33 | uintptr_t stktopsp; 34 | uintptr_t param; 35 | uint32_t atomicstatus; 36 | uint32_t stackLock; 37 | int64_t goid; 38 | }; 39 | 40 | // read_goroutine_id reads the goroutine id from the task_struct. 41 | // 1 on failure. 42 | static __always_inline int read_goroutine_id(struct task_struct *task, int64_t *goroutine_id) { 43 | void *base; 44 | BPF_CORE_READ_INTO(&base, &(task->thread), fsbase); 45 | if (base == NULL) { 46 | return 1; 47 | } 48 | 49 | // https://www.usenix.org/conference/srecon23apac/presentation/liang 50 | uintptr_t g_addr = 0; 51 | if (bpf_core_read_user(&g_addr, sizeof(uintptr_t), base - 8)) { 52 | return 1; 53 | } 54 | 55 | struct g_t g; 56 | if (bpf_core_read_user(&g, sizeof(struct g_t), (void *)g_addr)) { 57 | return 1; 58 | } 59 | *goroutine_id = g.goid; 60 | 61 | // TODO: Why is this happening? We may be able to ignore this. 62 | // The Go runtime manages goroutines, and developers generally don't need to interact with 63 | // goroutine IDs directly. In fact, the language specification does not provide a built-in way 64 | // to obtain a goroutine's ID, as the designers of Go intended to keep goroutines abstracted 65 | // away from such details. 66 | if (*goroutine_id == 0) { 67 | return 1; 68 | } 69 | 70 | return 0; 71 | } 72 | -------------------------------------------------------------------------------- /ebpf/c/maps.h: -------------------------------------------------------------------------------- 1 | #ifndef __MAPS_H__ 2 | #define __MAPS_H__ 3 | 4 | #include "vmlinux.h" 5 | 6 | #include 7 | 8 | #define MAX_STACK_ADDRESSES 1024 // max amount of diff stack trace addrs to buffer 9 | #define MAX_STACK_DEPTH 20 // max depth of each stack trace to track 10 | 11 | #define BPF_MAP(_name, _type, _key_type, _value_type, _max_entries) \ 12 | struct { \ 13 | __uint(type, _type); \ 14 | __uint(max_entries, _max_entries); \ 15 | __type(key, _key_type); \ 16 | __type(value, _value_type); \ 17 | } _name SEC(".maps"); 18 | 19 | // stack traces: the value is 1 big byte array of the stack addresses 20 | typedef __u64 stack_trace_t[MAX_STACK_DEPTH]; 21 | #define BPF_STACK_TRACE(_name, _max_entries) \ 22 | BPF_MAP(_name, BPF_MAP_TYPE_STACK_TRACE, u32, stack_trace_t, _max_entries) 23 | 24 | BPF_STACK_TRACE(stack_addresses, MAX_STACK_ADDRESSES); // store stack traces 25 | 26 | struct { 27 | __uint(type, BPF_MAP_TYPE_RINGBUF); 28 | __uint(max_entries, 1 << 24); 29 | } events SEC(".maps"); 30 | 31 | struct event { 32 | int64_t goroutine_id; 33 | int stack_id; 34 | bool exit; 35 | }; 36 | 37 | struct event *unused __attribute__((unused)); 38 | 39 | #endif /* __MAPS_H__ */ 40 | -------------------------------------------------------------------------------- /ebpf/config.go: -------------------------------------------------------------------------------- 1 | package ebpf 2 | 3 | import ( 4 | "fmt" 5 | ) 6 | 7 | type Config struct { 8 | binPath string 9 | pid int 10 | } 11 | 12 | func NewConfig( 13 | binPath string, 14 | Pid int, 15 | ) (Config, error) { 16 | return Config{ 17 | binPath: binPath, 18 | pid: Pid, 19 | }, nil 20 | } 21 | 22 | func (c Config) String() string { 23 | return fmt.Sprintf("binPath: %s, pid: %d", 24 | c.binPath, 25 | c.pid, 26 | ) 27 | } 28 | -------------------------------------------------------------------------------- /ebpf/event_handler.go: -------------------------------------------------------------------------------- 1 | package ebpf 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "encoding/binary" 7 | "errors" 8 | "fmt" 9 | "log/slog" 10 | "runtime/trace" 11 | "strconv" 12 | "time" 13 | 14 | "github.com/cilium/ebpf/ringbuf" 15 | "github.com/go-delve/delve/pkg/proc" 16 | "github.com/hashicorp/golang-lru/v2/expirable" 17 | "github.com/keisku/gmon/bininfo" 18 | ) 19 | 20 | type eventHandler struct { 21 | goroutineQueue chan<- goroutine 22 | objs *bpfObjects 23 | biTranslator bininfo.Translator 24 | reader *ringbuf.Reader 25 | } 26 | 27 | func (h *eventHandler) run(ctx context.Context) { 28 | var event bpfEvent 29 | // To delete stack_addresses that is not used recently. 30 | stackIdCache := expirable.NewLRU[int32, []*proc.Function]( 31 | 32, // cache size 32 | func(key int32, _ []*proc.Function) { 33 | slog.Debug("delete stack_addresses", slog.Int("stack_id", int(key))) 34 | if err := h.objs.StackAddresses.Delete(key); err != nil { 35 | slog.Debug("Failed to delete stack_addresses", slog.Any("error", err)) 36 | } 37 | }, 38 | time.Minute, // TTL of each cache entry 39 | ) 40 | for { 41 | if err := h.readRecord(ctx, &event); err != nil { 42 | if errors.Is(err, ringbuf.ErrClosed) { 43 | slog.Debug("ring buffer is closed") 44 | return 45 | } 46 | slog.Warn("Failed to read bpf ring buffer", slog.Any("error", err)) 47 | continue 48 | } 49 | var stack []*proc.Function 50 | var ok bool 51 | var err error 52 | stack, ok = stackIdCache.Get(event.StackId) 53 | if !ok { 54 | stack, err = h.lookupStack(ctx, event.StackId) 55 | if err != nil { 56 | slog.Warn(err.Error()) 57 | continue 58 | } 59 | } 60 | h.sendGoroutine(goroutine{ 61 | Id: event.GoroutineId, 62 | ObservedAt: time.Now(), 63 | Stack: stack, 64 | Exit: event.Exit, 65 | }) 66 | _ = stackIdCache.Add(event.StackId, stack) 67 | } 68 | } 69 | 70 | func (h *eventHandler) readRecord(ctx context.Context, event *bpfEvent) error { 71 | _, task := trace.NewTask(ctx, "event_handler.read_ring_buffer") 72 | defer task.End() 73 | record, err := h.reader.Read() 74 | if err != nil { 75 | return err 76 | } 77 | if err := binary.Read(bytes.NewBuffer(record.RawSample), binary.LittleEndian, event); err != nil { 78 | return fmt.Errorf("decode ring buffer record: %w", err) 79 | } 80 | return nil 81 | } 82 | 83 | // lookupStack is a copy of the function in tracee. 84 | // https://github.com/aquasecurity/tracee/blob/f61866b4e2277d2a7dddc6cd77a67cd5a5da3b14/pkg/ebpf/events_pipeline.go#L642-L681 85 | const maxStackDepth = 20 86 | 87 | var stackFrameSize = (strconv.IntSize / 8) 88 | 89 | func (h *eventHandler) lookupStack(ctx context.Context, stackId int32) ([]*proc.Function, error) { 90 | _, task := trace.NewTask(ctx, "event_handler.lookup_stack") 91 | defer task.End() 92 | stackBytes, err := h.objs.StackAddresses.LookupBytes(stackId) 93 | if err != nil { 94 | return nil, fmt.Errorf("failed to lookup stack addresses: %w", err) 95 | } 96 | if stackBytes == nil { 97 | return nil, fmt.Errorf("bytes not found by stack_id=%d", stackId) 98 | } 99 | stack := make([]*proc.Function, maxStackDepth) 100 | stackCounter := 0 101 | for i := 0; i < len(stackBytes); i += stackFrameSize { 102 | stackBytes[stackCounter] = 0 103 | stackAddr := binary.LittleEndian.Uint64(stackBytes[i : i+stackFrameSize]) 104 | if stackAddr == 0 { 105 | break 106 | } 107 | f := h.biTranslator.PCToFunc(stackAddr) 108 | if f == nil { 109 | // I don't know why, but a function address sometime should be last 3 bytes. 110 | // At leaset, I observerd this behavior in the following binaries: 111 | // - /usr/bin/dockerd 112 | // - /usr/bin/containerd 113 | f = h.biTranslator.PCToFunc(stackAddr & 0xffffff) 114 | if f == nil { 115 | f = &proc.Function{Name: fmt.Sprintf("%#x", stackAddr), Entry: stackAddr} 116 | } 117 | } 118 | stack[stackCounter] = f 119 | stackCounter++ 120 | } 121 | return stack[0:stackCounter], nil 122 | } 123 | 124 | func (h *eventHandler) sendGoroutine(g goroutine) { 125 | maxRetries := 3 126 | retryInterval := 10 * time.Millisecond 127 | for attempts := 0; attempts < maxRetries; attempts++ { 128 | select { 129 | case h.goroutineQueue <- g: 130 | if attempts > 0 { 131 | slog.Info( 132 | "goroutine is sent successfully after retries", 133 | slog.Int("retry", attempts+1), 134 | slog.String("goroutine_id", fmt.Sprintf("%d", g.Id)), 135 | slog.Bool("exit", g.Exit), 136 | stackLogAttr(g.Stack), 137 | ) 138 | } 139 | return // Successfully sent 140 | default: 141 | if attempts < maxRetries-1 { 142 | time.Sleep(retryInterval) // Wait before retrying 143 | } else { 144 | slog.Warn( 145 | "goroutine queue is full, retrying", 146 | slog.String("goroutine_id", fmt.Sprintf("%d", g.Id)), 147 | slog.Bool("exit", g.Exit), 148 | stackLogAttr(g.Stack), 149 | ) 150 | } 151 | } 152 | } 153 | } 154 | -------------------------------------------------------------------------------- /ebpf/gmon.go: -------------------------------------------------------------------------------- 1 | package ebpf 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "fmt" 7 | "log/slog" 8 | 9 | "github.com/cilium/ebpf" 10 | "github.com/cilium/ebpf/link" 11 | "github.com/cilium/ebpf/ringbuf" 12 | "github.com/keisku/gmon/bininfo" 13 | ) 14 | 15 | // $BPF_CLANG and $BPF_CFLAGS are set by the Makefile. 16 | //go:generate go run github.com/cilium/ebpf/cmd/bpf2go -type event -cc $BPF_CLANG -target amd64 -cflags $BPF_CFLAGS bpf ./c/gmon.c -- -I./c 17 | 18 | func Run(ctx context.Context, config Config) (func(), error) { 19 | slog.Debug("eBPF programs start with config", slog.String("config", config.String())) 20 | objs := bpfObjects{} 21 | if err := loadBpfObjects(&objs, nil); err != nil { 22 | return func() {}, err 23 | } 24 | biTranslator, err := bininfo.NewTranslator(config.binPath) 25 | if err != nil { 26 | return func() {}, err 27 | } 28 | ex, err := link.OpenExecutable(config.binPath) 29 | if err != nil { 30 | return func() {}, err 31 | } 32 | var links [2]link.Link 33 | links[0], err = linkUprobe( 34 | ex, 35 | objs.RuntimeNewproc1, 36 | "runtime.newproc1", 37 | true, 38 | config.pid, 39 | biTranslator.Address, 40 | ) 41 | if err != nil { 42 | return func() {}, err 43 | } 44 | links[1], err = linkUprobe( 45 | ex, 46 | objs.RuntimeGoexit1, 47 | "runtime.goexit1", 48 | false, 49 | config.pid, 50 | biTranslator.Address, 51 | ) 52 | if err != nil { 53 | return func() {}, err 54 | } 55 | ringbufReader, err := ringbuf.NewReader(objs.Events) 56 | if err != nil { 57 | return func() {}, err 58 | } 59 | goroutineQueue := make(chan goroutine, 100) 60 | eventhandler := &eventHandler{ 61 | goroutineQueue: goroutineQueue, 62 | objs: &objs, 63 | biTranslator: biTranslator, 64 | reader: ringbufReader, 65 | } 66 | reporter := &reporter{ 67 | goroutineQueue: goroutineQueue, 68 | } 69 | go reporter.run(ctx) 70 | go eventhandler.run(ctx) 71 | return func() { 72 | ringbufReader.Close() 73 | for i := range links { 74 | if err := links[i].Close(); err != nil { 75 | slog.Warn("Failed to close link", slog.Any("error", err)) 76 | } 77 | } 78 | if err := objs.Close(); err != nil { 79 | slog.Warn("Failed to close bpf objects", slog.Any("error", err)) 80 | } 81 | }, nil 82 | } 83 | 84 | func linkUprobe( 85 | exe *link.Executable, 86 | program *ebpf.Program, 87 | symbol string, 88 | ret bool, 89 | pid int, 90 | lookupAddress func(string) uint64, 91 | ) (link.Link, error) { 92 | var l link.Link 93 | var err error 94 | if ret { 95 | l, err = exe.Uretprobe(symbol, program, &link.UprobeOptions{PID: pid}) 96 | } else { 97 | l, err = exe.Uprobe(symbol, program, &link.UprobeOptions{PID: pid}) 98 | } 99 | if err == nil { 100 | return l, nil 101 | } 102 | if errors.Is(err, link.ErrNoSymbol) { 103 | slog.Debug("no symbol table", slog.String("symbol", symbol)) 104 | } else { 105 | return nil, fmt.Errorf("failed to attach uprobe for %s: %w", symbol, err) 106 | } 107 | address := lookupAddress(symbol) 108 | if address == 0 { 109 | return nil, fmt.Errorf("no address found for %s", symbol) 110 | } 111 | if ret { 112 | l, err = exe.Uretprobe(symbol, program, &link.UprobeOptions{PID: pid, Address: address}) 113 | } else { 114 | l, err = exe.Uprobe(symbol, program, &link.UprobeOptions{PID: pid, Address: address}) 115 | } 116 | if err != nil { 117 | return nil, fmt.Errorf("failed to attach uprobe for %s: %w", symbol, err) 118 | } 119 | slog.Debug("attach uprobe with address", slog.String("symbol", symbol), slog.String("address", fmt.Sprintf("%#x", address))) 120 | return l, nil 121 | } 122 | -------------------------------------------------------------------------------- /ebpf/reporter.go: -------------------------------------------------------------------------------- 1 | package ebpf 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "log/slog" 7 | "runtime/trace" 8 | "sync" 9 | "time" 10 | 11 | "github.com/go-delve/delve/pkg/proc" 12 | "github.com/prometheus/client_golang/prometheus" 13 | "github.com/prometheus/client_golang/prometheus/promauto" 14 | ) 15 | 16 | var ( 17 | namespace = "gmon" 18 | stackLabelKeys = []string{"stack_0", "stack_1", "stack_2", "stack_3", "stack_4"} // 0 is the top 19 | goroutineExit = promauto.NewCounterVec( 20 | prometheus.CounterOpts{ 21 | Namespace: namespace, 22 | Name: "goroutine_exit", 23 | Help: "The number of goroutines that have been exited", 24 | }, 25 | stackLabelKeys, 26 | ) 27 | goroutineCreation = promauto.NewCounterVec( 28 | prometheus.CounterOpts{ 29 | Namespace: namespace, 30 | Name: "goroutine_creation", 31 | Help: "The number of goroutines that have been creaated", 32 | }, 33 | stackLabelKeys, 34 | ) 35 | goroutineUptime = promauto.NewHistogramVec( 36 | prometheus.HistogramOpts{ 37 | Namespace: namespace, 38 | Name: "goroutine_uptime", 39 | Help: "Uptime of goroutines in seconds", 40 | Buckets: []float64{1, 3, 5, 10, 30, 60, 120, 180}, 41 | }, 42 | stackLabelKeys, 43 | ) 44 | ) 45 | 46 | type goroutine struct { 47 | Id int64 48 | ObservedAt time.Time 49 | Stack []*proc.Function 50 | Exit bool 51 | } 52 | 53 | type reporter struct { 54 | goroutineQueue <-chan goroutine 55 | goroutineMap sync.Map 56 | } 57 | 58 | var reportInterval = 500 * time.Millisecond 59 | 60 | func (r *reporter) run(ctx context.Context) { 61 | go r.reportUptime(ctx) 62 | go r.subscribe(ctx) 63 | <-ctx.Done() 64 | } 65 | 66 | func (r *reporter) reportUptime(ctx context.Context) { 67 | ticker := time.NewTicker(reportInterval) 68 | for { 69 | select { 70 | case <-ctx.Done(): 71 | ticker.Stop() 72 | return 73 | case <-ticker.C: 74 | ctx, task := trace.NewTask(ctx, "reporter.report_goroutine_uptime") 75 | trace.WithRegion(ctx, "reporter.report_goroutine_uptime.iterate_goroutine_map", func() { 76 | r.goroutineMap.Range(func(_, value any) bool { 77 | g := value.(goroutine) 78 | goroutineUptime.With(stackLabels(g.Stack)).Observe(time.Since(g.ObservedAt).Seconds()) 79 | return true 80 | }) 81 | }) 82 | task.End() 83 | } 84 | } 85 | } 86 | 87 | func (r *reporter) subscribe(ctx context.Context) { 88 | for g := range r.goroutineQueue { 89 | ctx, task := trace.NewTask(ctx, "reporter.store_goroutine") 90 | r.storeGoroutine(ctx, g) 91 | task.End() 92 | } 93 | } 94 | 95 | func (r *reporter) storeGoroutine(ctx context.Context, g goroutine) { 96 | v, loaded := r.goroutineMap.Load(g.Id) 97 | if loaded { 98 | _, task := trace.NewTask(ctx, "reporter.store_goroutine_exit") 99 | oldg, ok := v.(goroutine) 100 | if !ok { 101 | slog.Error("goroutineMap has unexpected value", slog.Any("value", v)) 102 | return 103 | } 104 | goroutineExit.With(stackLabels(oldg.Stack)).Inc() 105 | goroutineUptime.With(stackLabels(oldg.Stack)).Observe(time.Since(oldg.ObservedAt).Seconds()) 106 | r.goroutineMap.Delete(oldg.Id) 107 | task.End() 108 | return 109 | } 110 | if g.Exit { 111 | // Avoid storing goroutines that lack a corresponding newproc1 pair. 112 | return 113 | } 114 | _, task := trace.NewTask(ctx, "reporter.store_goroutine_creation") 115 | slog.Info("goroutine is created", slog.Int64("goroutine_id", g.Id), stackLogAttr(g.Stack)) 116 | goroutineCreation.With(stackLabels(g.Stack)).Inc() 117 | r.goroutineMap.Store(g.Id, g) 118 | task.End() 119 | } 120 | 121 | // LogAttr returns a slog.Attr that can be used to log the stack. 122 | func stackLogAttr(stack []*proc.Function) slog.Attr { 123 | attrs := make([]any, len(stack)) 124 | for i, f := range stack { 125 | if f == nil { 126 | panic("stack must not have nil function") 127 | } 128 | attrs[i] = slog.String(fmt.Sprintf("%d", i), f.Name) 129 | } 130 | return slog.Group("stack", attrs...) 131 | } 132 | 133 | // stackLabels generates a set of Prometheus labels for the top functions in the stack. 134 | // If the stack has fewer than expected functions, it fills the remaining labels with "none". 135 | func stackLabels(stack []*proc.Function) prometheus.Labels { 136 | labels := prometheus.Labels{} 137 | 138 | // Ensure to only process the top 5 elements, or the stack length if shorter. 139 | topN := len(stack) 140 | if topN > len(stackLabelKeys) { 141 | topN = len(stackLabelKeys) 142 | } 143 | 144 | for i := 0; i < len(stackLabelKeys); i++ { 145 | labelKey := fmt.Sprintf("stack_%d", i) 146 | if i < topN { 147 | // Stack is reversed, so we start from the end of the slice. 148 | labels[labelKey] = stack[len(stack)-1-i].Name 149 | } else { 150 | labels[labelKey] = "none" 151 | } 152 | } 153 | 154 | return labels 155 | } 156 | -------------------------------------------------------------------------------- /ebpf/reporter_test.go: -------------------------------------------------------------------------------- 1 | package ebpf 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/go-delve/delve/pkg/proc" 7 | "github.com/prometheus/client_golang/prometheus" 8 | "github.com/stretchr/testify/assert" 9 | ) 10 | 11 | func Test_stackLabels(t *testing.T) { 12 | type args struct { 13 | stack []*proc.Function 14 | } 15 | tests := []struct { 16 | name string 17 | args args 18 | want prometheus.Labels 19 | }{ 20 | { 21 | name: "stack length is 5", 22 | args: args{ 23 | stack: []*proc.Function{ 24 | {Name: "func1"}, 25 | {Name: "func2"}, 26 | {Name: "func3"}, 27 | {Name: "func4"}, 28 | {Name: "func5"}, 29 | }, 30 | }, 31 | want: prometheus.Labels{ 32 | "stack_0": "func5", 33 | "stack_1": "func4", 34 | "stack_2": "func3", 35 | "stack_3": "func2", 36 | "stack_4": "func1", 37 | }, 38 | }, 39 | { 40 | name: "stack length is 3", 41 | args: args{ 42 | stack: []*proc.Function{ 43 | {Name: "func1"}, 44 | {Name: "func2"}, 45 | {Name: "func3"}, 46 | }, 47 | }, 48 | want: prometheus.Labels{ 49 | "stack_0": "func3", 50 | "stack_1": "func2", 51 | "stack_2": "func1", 52 | "stack_3": "none", 53 | "stack_4": "none", 54 | }, 55 | }, 56 | { 57 | name: "stack length is 10", 58 | args: args{ 59 | stack: []*proc.Function{ 60 | {Name: "func1"}, 61 | {Name: "func2"}, 62 | {Name: "func3"}, 63 | {Name: "func4"}, 64 | {Name: "func5"}, 65 | {Name: "func6"}, 66 | {Name: "func7"}, 67 | {Name: "func8"}, 68 | {Name: "func9"}, 69 | {Name: "func10"}, 70 | }, 71 | }, 72 | want: prometheus.Labels{ 73 | "stack_0": "func10", 74 | "stack_1": "func9", 75 | "stack_2": "func8", 76 | "stack_3": "func7", 77 | "stack_4": "func6", 78 | }, 79 | }, 80 | } 81 | for _, tt := range tests { 82 | t.Run(tt.name, func(t *testing.T) { 83 | assert.Equal(t, tt.want, stackLabels(tt.args.stack)) 84 | }) 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /fixture/go.mod: -------------------------------------------------------------------------------- 1 | module fixture 2 | 3 | go 1.23.1 4 | -------------------------------------------------------------------------------- /fixture/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "net/http" 6 | "os" 7 | "os/signal" 8 | ) 9 | 10 | func main() { 11 | ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt, os.Kill) 12 | defer cancel() 13 | http.HandleFunc("/get/200", func(w http.ResponseWriter, _ *http.Request) { 14 | w.WriteHeader(http.StatusOK) 15 | }) 16 | go http.ListenAndServe(":8080", nil) 17 | <-ctx.Done() 18 | } 19 | -------------------------------------------------------------------------------- /gmon.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e -x 4 | 5 | arch=$(uname -m) 6 | if [ "$arch" != "x86_64" ]; then 7 | echo "Unsupported architecture: $arch" 8 | exit 1 9 | fi 10 | 11 | kernel_version=$(uname -r) 12 | major_version=$(echo $kernel_version | cut -d. -f1) 13 | minor_version=$(echo $kernel_version | cut -d. -f2) 14 | if [ "$major_version" -gt 5 ] || ([ "$major_version" -eq 5 ] && [ "$minor_version" -ge 8 ]); then 15 | echo "Your kernel version is $kernel_version" 16 | else 17 | echo "Your kernel version should be >= 5.8, got $kernel_version" 18 | exit 1 19 | fi 20 | 21 | if [ "$1" = "build" ] || [ "$1" = "install" ] || [ "$1" = "test" ] || [ "$1" = "format" ]; then 22 | echo "Running $1 on $arch" 23 | else 24 | echo "Unsupported command: $1" 25 | exit 1 26 | fi 27 | 28 | image_buildenv=gmon-buildenv-$arch 29 | dockerfile_buildenv=$(mktemp) 30 | cat > "$dockerfile_buildenv" < ./ebpf/c/vmlinux.h && \ 63 | go generate -x ./... && \ 64 | GOFLAGS="-buildvcs=auto" CGO_ENABLED=0 go build \ 65 | -ldflags "-s -w -X main.Version=0.0.0-dev" \ 66 | -o /usr/src/bin/gmon' 67 | if [ "$1" = "build" ]; then 68 | exit 0 69 | fi 70 | 71 | if [ "$1" = "install" ]; then 72 | sudo rm -f /usr/bin/gmon || true 73 | sudo install ./bin/gmon /usr/bin/ 74 | exit 0 75 | fi 76 | 77 | if [ "$1" = "format" ]; then 78 | docker run --platform linux/$arch -i \ 79 | -v $(pwd):/usr/src \ 80 | --rm $image_buildenv bash -c '\ 81 | go mod tidy && \ 82 | go vet ./... && \ 83 | find . -type f \( -name '*.[ch]' -and -not -name 'vmlinux.h' \) -exec clang-format -i {} \;' 84 | exit 0 85 | fi 86 | 87 | if [ "$1" = "test" ]; then 88 | image_e2e=gmon-e2e-$arch 89 | dockerfile_e2e=$(mktemp) 90 | cat > "$dockerfile_e2e" <