├── .clang-format
├── .github
└── workflows
│ └── test.yaml
├── .gitignore
├── LICENSE
├── README.md
├── bininfo
└── bininfo.go
├── e2e_test.go
├── ebpf
├── bpf_x86_bpfel.go
├── c
│ ├── gmon.c
│ ├── goroutine.h
│ └── maps.h
├── config.go
├── event_handler.go
├── gmon.go
├── reporter.go
└── reporter_test.go
├── fixture
├── go.mod
└── main.go
├── gmon.sh
├── go.mod
├── go.sum
└── main.go
/.clang-format:
--------------------------------------------------------------------------------
1 | ---
2 | Language: Cpp
3 | AccessModifierOffset: -4
4 | AlignAfterOpenBracket: DontAlign
5 | AlignConsecutiveAssignments: false
6 | AlignConsecutiveDeclarations: false
7 | AlignEscapedNewlines: Left
8 | AlignOperands: true
9 | AlignTrailingComments: false
10 | AllowAllParametersOfDeclarationOnNextLine: false
11 | AllowShortBlocksOnASingleLine: false
12 | AllowShortCaseLabelsOnASingleLine: false
13 | AllowShortEnumsOnASingleLine: false
14 | AllowShortFunctionsOnASingleLine: None
15 | AllowShortIfStatementsOnASingleLine: true
16 | AllowShortLoopsOnASingleLine: false
17 | AlwaysBreakAfterDefinitionReturnType: None
18 | AlwaysBreakAfterReturnType: None
19 | AlwaysBreakBeforeMultilineStrings: false
20 | AlwaysBreakTemplateDeclarations: false
21 | BinPackArguments: true
22 | BinPackParameters: true
23 | BraceWrapping:
24 | AfterCaseLabel: false
25 | AfterClass: false
26 | AfterControlStatement: Never
27 | AfterEnum: false
28 | AfterFunction: false
29 | AfterNamespace: true
30 | AfterObjCDeclaration: false
31 | AfterStruct: false
32 | AfterUnion: false
33 | AfterExternBlock: false
34 | BeforeCatch: false
35 | BeforeElse: false
36 | IndentBraces: false
37 | SplitEmptyFunction: true
38 | SplitEmptyRecord: true
39 | SplitEmptyNamespace: true
40 | BreakBeforeBinaryOperators: None
41 | BreakBeforeBraces: Custom
42 | BreakBeforeInheritanceComma: false
43 | BreakBeforeTernaryOperators: false
44 | BreakConstructorInitializersBeforeComma: false
45 | BreakConstructorInitializers: BeforeComma
46 | BreakAfterJavaFieldAnnotations: false
47 | BreakStringLiterals: false
48 | ColumnLimit: 0
49 | CommentPragmas: '^ IWYU pragma:'
50 | CompactNamespaces: false
51 | ConstructorInitializerAllOnOneLineOrOnePerLine: false
52 | ConstructorInitializerIndentWidth: 4
53 | ContinuationIndentWidth: 4
54 | Cpp11BracedListStyle: false
55 | DerivePointerAlignment: false
56 | DisableFormat: false
57 | ExperimentalAutoDetectBinPacking: false
58 | FixNamespaceComments: false
59 | IncludeBlocks: Preserve
60 | IncludeCategories:
61 | - Regex: '.*'
62 | Priority: 1
63 | IncludeIsMainRegex: '(Test)?$'
64 | IndentCaseLabels: false
65 | IndentPPDirectives: None
66 | IndentWidth: 4
67 | IndentWrappedFunctionNames: false
68 | JavaScriptQuotes: Leave
69 | JavaScriptWrapImports: true
70 | KeepEmptyLinesAtTheStartOfBlocks: false
71 | MacroBlockBegin: ''
72 | MacroBlockEnd: ''
73 | MaxEmptyLinesToKeep: 1
74 | NamespaceIndentation: None
75 | ObjCBinPackProtocolList: Auto
76 | ObjCBlockIndentWidth: 4
77 | ObjCSpaceAfterProperty: true
78 | ObjCSpaceBeforeProtocolList: true
79 | PenaltyBreakAssignment: 10
80 | PenaltyBreakBeforeFirstCallParameter: 30
81 | PenaltyBreakComment: 10
82 | PenaltyBreakFirstLessLess: 0
83 | PenaltyBreakString: 10
84 | PenaltyExcessCharacter: 100
85 | PenaltyReturnTypeOnItsOwnLine: 60
86 | PointerAlignment: Right
87 | ReflowComments: false
88 | SortIncludes: false
89 | SortUsingDeclarations: false
90 | SpaceAfterCStyleCast: false
91 | SpaceAfterTemplateKeyword: true
92 | SpaceBeforeAssignmentOperators: true
93 | SpaceBeforeCtorInitializerColon: true
94 | SpaceBeforeInheritanceColon: true
95 | SpaceBeforeParens: ControlStatements
96 | SpaceBeforeRangeBasedForLoopColon: true
97 | SpaceInEmptyParentheses: false
98 | SpacesBeforeTrailingComments: 1
99 | SpacesInAngles: false
100 | SpacesInContainerLiterals: false
101 | SpacesInCStyleCastParentheses: false
102 | SpacesInParentheses: false
103 | SpacesInSquareBrackets: false
104 | Standard: Cpp03
105 | TabWidth: 4
106 | UseTab: Never
107 |
--------------------------------------------------------------------------------
/.github/workflows/test.yaml:
--------------------------------------------------------------------------------
1 | on:
2 | pull_request:
3 | push:
4 | branches:
5 | - main
6 | name: gmon test
7 | jobs:
8 | test:
9 | runs-on: ubuntu-latest
10 | steps:
11 | - uses: actions/checkout@v4
12 | with:
13 | fetch-depth: 0
14 | - name: Build & Format
15 | run: ./gmon.sh format
16 | - name: Check changes
17 | run: |
18 | if ! git diff --quiet || ! git diff --staged --quiet; then
19 | echo "Changes detected"
20 | uname -a
21 | git diff
22 | git diff --staged
23 | exit 1
24 | fi
25 | - run: ./gmon.sh test
26 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # If you prefer the allow list template instead of the deny list, see community template:
2 | # https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore
3 | #
4 | # Binaries for programs and plugins
5 | *.exe
6 | *.exe~
7 | *.dll
8 | *.so
9 | *.dylib
10 |
11 | # Test binary, built with `go test -c`
12 | *.test
13 |
14 | # Output of the go coverage tool, specifically when used with LiteIDE
15 | *.out
16 |
17 | # Dependency directories (remove the comment below to include it)
18 | vendor/
19 |
20 | # Go workspace file
21 | go.work
22 |
23 | bin
24 |
25 | .vscode/launch.json
26 |
27 | fixture/fixture
28 | ebpf/c/vmlinux.h
29 | ebpf/bpf_x86_bpfel.o
30 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [2024] [Keisuke Umegaki]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Goroutine MONitor (gmon)
2 |
3 |
4 |
5 | `gmon` is a tool designed to monitor the creation and destruction of goroutines in a Go program, drawing inspiration from the presentation [Real World Debugging with eBPF](https://www.usenix.org/conference/srecon23apac/presentation/liang).
6 |
7 | # Prerequisites
8 |
9 | - amd64 (x86_64)
10 | - Linux Kernel 5.8+ since `gmon` uses [BPF ring buffer](https://nakryiko.com/posts/bpf-ringbuf/)
11 | - Target Go binary must be compiled with Go 1.23+ since `gmon` uses fixed offset to get goroutine ID
12 |
13 | # Usage
14 |
15 | ```
16 | Usage of gmon:
17 | -level string
18 | log level could be one of ["DEBUG" "INFO" "WARN" "ERROR"] (default "INFO")
19 | -metrics int
20 | Port to be used for metrics server, /metrics endpoint (default 5500)
21 | -path string
22 | Path to executable file to be monitored (required)
23 | -pid int
24 | Useful when tracing programs that have many running instances
25 | -pprof int
26 | Port to be used for pprof server. If 0, pprof server is not started
27 | -trace string
28 | Path to Go runtime/trace output
29 | ```
30 |
31 | ## Demo
32 |
33 | https://github.com/keisku/gmon/assets/41987730/838fa12d-d622-4ad6-a9f0-6aab88acec55
34 |
35 | ## Stdout
36 |
37 | `gmon` logs the creation of goroutines to stdout with stack traces.
38 |
39 | ```bash
40 | sudo gmon -path /path/to/executable
41 | time=2024-03-20T05:10:57.752Z level=INFO msg="goroutine is created" goroutine_id=22 stack.0=runtime.newproc stack.1=runtime.systemstack stack.2=runtime.newproc stack.3=net/http.(*connReader).startBackgroundRead stack.4=net/http.(*conn).serve stack.5=net/http.(*Server).Serve.gowrap3 stack.6=runtime.goexit
42 | time=2024-03-20T05:10:57.752Z level=INFO msg="goroutine is created" goroutine_id=21 stack.0=runtime.newproc stack.1=runtime.systemstack stack.2=runtime.newproc stack.3=net/http.(*Server).Serve stack.4=net/http.(*Server).ListenAndServe stack.5=main.main.gowrap1 stack.6=runtime.goexit
43 | time=2024-03-20T05:10:57.752Z level=INFO msg="goroutine is created" goroutine_id=23 stack.0=runtime.newproc stack.1=runtime.systemstack stack.2=runtime.newproc stack.3=net/http.(*Server).Serve stack.4=net/http.(*Server).ListenAndServe stack.5=main.main.gowrap1 stack.6=runtime.goexit
44 | time=2024-03-20T05:10:57.752Z level=INFO msg="goroutine is created" goroutine_id=34 stack.0=runtime.newproc stack.1=runtime.systemstack stack.2=runtime.newproc stack.3=net/http.(*Server).Serve stack.4=net/http.(*Server).ListenAndServe stack.5=main.main.gowrap1 stack.6=runtime.goexit
45 | time=2024-03-20T05:10:57.752Z level=INFO msg="goroutine is created" goroutine_id=24 stack.0=runtime.newproc stack.1=runtime.systemstack stack.2=runtime.newproc stack.3=net/http.(*connReader).startBackgroundRead stack.4=net/http.(*conn).serve stack.5=net/http.(*Server).Serve.gowrap3 stack.6=runtime.goexit
46 | time=2024-03-20T05:10:57.752Z level=INFO msg="goroutine is created" goroutine_id=35 stack.0=runtime.newproc stack.1=runtime.systemstack stack.2=runtime.newproc stack.3=net/http.(*connReader).startBackgroundRead stack.4=net/http.(*conn).serve stack.5=net/http.(*Server).Serve.gowrap3 stack.6=runtime.goexit
47 | ```
48 |
49 | ## OpenMetrics
50 |
51 | `gmon` exposes the following metrics in the [OpenMetrics](https://www.cncf.io/projects/openmetrics/) format on the `GET /metrics`.
52 |
53 | - `gmon_goroutine_creation`
54 | - `gmon_goroutine_exit`
55 | - `gmon_goroutine_uptime`
56 |
57 | ```bash
58 | curl -s http://localhost:5500/metrics
59 |
60 | # HELP gmon_goroutine_creation The number of goroutines that have been creaated
61 | # TYPE gmon_goroutine_creation counter
62 | gmon_goroutine_creation{stack_0="runtime.goexit",stack_1="main.main.gowrap1",stack_2="net/http.(*Server).ListenAndServe",stack_3="net/http.(*Server).Serve",stack_4="runtime.newproc"} 1
63 | gmon_goroutine_creation{stack_0="runtime.goexit",stack_1="net/http.(*Server).Serve.gowrap3",stack_2="net/http.(*conn).serve",stack_3="net/http.(*connReader).startBackgroundRead",stack_4="runtime.newproc"} 3
64 | # HELP gmon_goroutine_exit The number of goroutines that have been exited
65 | # TYPE gmon_goroutine_exit counter
66 | gmon_goroutine_exit{stack_0="runtime.goexit",stack_1="net/http.(*Server).Serve.gowrap3",stack_2="net/http.(*conn).serve",stack_3="net/http.(*connReader).startBackgroundRead",stack_4="runtime.newproc"} 3
67 | # HELP gmon_goroutine_uptime Uptime of goroutines in seconds
68 | # TYPE gmon_goroutine_uptime histogram
69 | gmon_goroutine_uptime_bucket{stack_0="runtime.goexit",stack_1="main.main.gowrap1",stack_2="net/http.(*Server).ListenAndServe",stack_3="net/http.(*Server).Serve",stack_4="runtime.newproc",le="1"} 2
70 | gmon_goroutine_uptime_bucket{stack_0="runtime.goexit",stack_1="main.main.gowrap1",stack_2="net/http.(*Server).ListenAndServe",stack_3="net/http.(*Server).Serve",stack_4="runtime.newproc",le="3"} 2
71 | gmon_goroutine_uptime_bucket{stack_0="runtime.goexit",stack_1="main.main.gowrap1",stack_2="net/http.(*Server).ListenAndServe",stack_3="net/http.(*Server).Serve",stack_4="runtime.newproc",le="5"} 2
72 | gmon_goroutine_uptime_bucket{stack_0="runtime.goexit",stack_1="main.main.gowrap1",stack_2="net/http.(*Server).ListenAndServe",stack_3="net/http.(*Server).Serve",stack_4="runtime.newproc",le="10"} 2
73 | gmon_goroutine_uptime_bucket{stack_0="runtime.goexit",stack_1="main.main.gowrap1",stack_2="net/http.(*Server).ListenAndServe",stack_3="net/http.(*Server).Serve",stack_4="runtime.newproc",le="30"} 2
74 | gmon_goroutine_uptime_bucket{stack_0="runtime.goexit",stack_1="main.main.gowrap1",stack_2="net/http.(*Server).ListenAndServe",stack_3="net/http.(*Server).Serve",stack_4="runtime.newproc",le="60"} 2
75 | gmon_goroutine_uptime_bucket{stack_0="runtime.goexit",stack_1="main.main.gowrap1",stack_2="net/http.(*Server).ListenAndServe",stack_3="net/http.(*Server).Serve",stack_4="runtime.newproc",le="120"} 2
76 | gmon_goroutine_uptime_bucket{stack_0="runtime.goexit",stack_1="main.main.gowrap1",stack_2="net/http.(*Server).ListenAndServe",stack_3="net/http.(*Server).Serve",stack_4="runtime.newproc",le="180"} 2
77 | gmon_goroutine_uptime_bucket{stack_0="runtime.goexit",stack_1="main.main.gowrap1",stack_2="net/http.(*Server).ListenAndServe",stack_3="net/http.(*Server).Serve",stack_4="runtime.newproc",le="+Inf"} 2
78 | gmon_goroutine_uptime_sum{stack_0="runtime.goexit",stack_1="main.main.gowrap1",stack_2="net/http.(*Server).ListenAndServe",stack_3="net/http.(*Server).Serve",stack_4="runtime.newproc"} 0.9001332019999999
79 | gmon_goroutine_uptime_count{stack_0="runtime.goexit",stack_1="main.main.gowrap1",stack_2="net/http.(*Server).ListenAndServe",stack_3="net/http.(*Server).Serve",stack_4="runtime.newproc"} 2
80 | ...skip...
81 | ```
82 |
83 | # Development
84 |
85 | Follow [the Docker installation guide](https://docs.docker.com/engine/install/#supported-platforms) to build and run tests.
86 |
87 | ```bash
88 | # Build and output the binary to ./bin
89 | ./gmon.sh build
90 | # Build and install the binary to /usr/bin
91 | ./gmon.sh install
92 | # Run tests
93 | ./gmon.sh test
94 | ```
95 |
--------------------------------------------------------------------------------
/bininfo/bininfo.go:
--------------------------------------------------------------------------------
1 | package bininfo
2 |
3 | import (
4 | "debug/elf"
5 | "errors"
6 | "fmt"
7 | "log/slog"
8 | "runtime"
9 | "sort"
10 |
11 | "github.com/go-delve/delve/pkg/proc"
12 | )
13 |
14 | // Translator translates information about an executable.
15 | type Translator interface {
16 | // Address returns the address of the given symbol in the executable.
17 | Address(symbol string) uint64
18 | // Stack returns a stack trace from the given stack bytes.
19 | PCToFunc(pc uint64) *proc.Function
20 | }
21 |
22 | // NewTranslator creates a new Translator for the given executable.
23 | func NewTranslator(path string) (Translator, error) {
24 | bi, err := newBinInfo(path)
25 | if err == nil {
26 | slog.Debug("loaded binary info")
27 | return bi, nil
28 | } else {
29 | slog.Debug("failed to load binary info", slog.Any("error", err))
30 | }
31 | s, err := newSymbolTable(path)
32 | if err != nil {
33 | return nil, fmt.Errorf("failed to load symbol table: %w", err)
34 | }
35 | slog.Debug("loaded symbol table")
36 | return s, nil
37 | }
38 |
39 | type binaryInfo struct {
40 | *proc.BinaryInfo
41 | }
42 |
43 | func newBinInfo(path string) (*binaryInfo, error) {
44 | bininfo := proc.NewBinaryInfo(runtime.GOOS, runtime.GOARCH)
45 | if err := bininfo.LoadBinaryInfo(path, 0, nil); err != nil {
46 | return nil, fmt.Errorf("failed to load binary info: %w", err)
47 | }
48 | var bi binaryInfo
49 | bi.BinaryInfo = bininfo
50 | return &bi, nil
51 | }
52 |
53 | func (bi *binaryInfo) Address(symbol string) uint64 {
54 | funcs, err := bi.FindFunction(symbol)
55 | if err != nil {
56 | return 0
57 | }
58 | for _, f := range funcs {
59 | if f.Name == symbol {
60 | return f.Entry
61 | }
62 | }
63 | return 0
64 | }
65 |
66 | type symbolTable struct {
67 | addresses map[string]uint64
68 | functions []*proc.Function
69 | }
70 |
71 | // Copy from https://github.com/cilium/ebpf/blob/v0.12.3/link/uprobe.go#L116-L160
72 | func newSymbolTable(path string) (*symbolTable, error) {
73 | f, err := elf.Open(path)
74 | if err != nil {
75 | return nil, err
76 | }
77 | syms, err := f.Symbols()
78 | if err != nil && !errors.Is(err, elf.ErrNoSymbols) {
79 | return nil, err
80 | }
81 | dynsyms, err := f.DynamicSymbols()
82 | if err != nil && !errors.Is(err, elf.ErrNoSymbols) {
83 | return nil, err
84 | }
85 | syms = append(syms, dynsyms...)
86 | if len(syms) == 0 {
87 | return nil, elf.ErrNoSymbols
88 | }
89 | addresses := make(map[string]uint64)
90 | functions := make([]*proc.Function, 0, len(syms))
91 | for _, s := range syms {
92 | if elf.ST_TYPE(s.Info) != elf.STT_FUNC {
93 | continue
94 | }
95 | address := s.Value
96 | for _, prog := range f.Progs {
97 | if prog.Type != elf.PT_LOAD || (prog.Flags&elf.PF_X) == 0 {
98 | continue
99 | }
100 | if prog.Vaddr <= s.Value && s.Value < (prog.Vaddr+prog.Memsz) {
101 | address = s.Value - prog.Vaddr + prog.Off
102 | break
103 | }
104 | }
105 | addresses[s.Name] = address
106 | index := sort.Search(len(functions), func(i int) bool { return functions[i].Entry >= address })
107 | functions = append(functions, &proc.Function{})
108 | copy(functions[index+1:], functions[index:])
109 | functions[index] = &proc.Function{
110 | Name: s.Name,
111 | Entry: address,
112 | }
113 | }
114 | for i := 0; i < len(functions)-1; i++ {
115 | functions[i].End = functions[i+1].Entry
116 | }
117 | return &symbolTable{
118 | addresses: addresses,
119 | functions: functions,
120 | }, nil
121 | }
122 |
123 | func (s *symbolTable) Address(symbol string) uint64 {
124 | if address, ok := s.addresses[symbol]; ok {
125 | return address
126 | }
127 | return 0
128 | }
129 |
130 | func (s *symbolTable) PCToFunc(pc uint64) *proc.Function {
131 | low := 0
132 | high := len(s.functions) - 1
133 |
134 | for low <= high {
135 | mid := low + (high-low)/2
136 | f := s.functions[mid]
137 |
138 | if pc < f.Entry {
139 | high = mid - 1
140 | } else if pc > f.End {
141 | low = mid + 1
142 | } else {
143 | return f
144 | }
145 | }
146 | return nil
147 | }
148 |
--------------------------------------------------------------------------------
/e2e_test.go:
--------------------------------------------------------------------------------
1 | package main_test
2 |
3 | import (
4 | "bufio"
5 | "bytes"
6 | "errors"
7 | "fmt"
8 | "io"
9 | "net/http"
10 | "os"
11 | "os/exec"
12 | "strconv"
13 | "strings"
14 | "testing"
15 | "time"
16 |
17 | dto "github.com/prometheus/client_model/go"
18 | "github.com/prometheus/common/expfmt"
19 | "github.com/stretchr/testify/require"
20 | )
21 |
22 | func Test_e2e(t *testing.T) {
23 | fixture, err := runProcess(os.Stdout, os.Stderr, "/usr/bin/fixture")
24 | if err != nil {
25 | t.Fatalf("failed to run fixture: %v", err)
26 | }
27 |
28 | // Wait for fixture to be ready.
29 | time.Sleep(time.Second)
30 |
31 | var gmonLogs bytes.Buffer
32 | gmon, err := runProcess(
33 | &gmonLogs, &gmonLogs,
34 | "/usr/bin/gmon",
35 | "-path",
36 | "/usr/bin/fixture",
37 | "-metrics",
38 | "5500",
39 | "-level",
40 | "DEBUG",
41 | )
42 | if err != nil {
43 | t.Fatalf("failed to run gmon: %v", err)
44 | }
45 | // Wait for gmon to be ready.
46 | time.Sleep(time.Second)
47 |
48 | t.Cleanup(func() {
49 | procs := []*os.Process{fixture, gmon}
50 | for i := range procs {
51 | if procs[i] != nil {
52 | if err := procs[i].Kill(); err != nil {
53 | t.Logf("failed to kill process: %v", err)
54 | }
55 | }
56 | }
57 | })
58 |
59 | requestFixtureCount := 3
60 | for range requestFixtureCount {
61 | resp, err := http.Get("http://localhost:8080/get/200")
62 | require.NoError(t, err, "GET /get/200 of fixture server failed")
63 | require.Equal(t, http.StatusOK, resp.StatusCode, "expect 200 from GET /get/200 of fixture server")
64 | }
65 |
66 | // Wait gmon detects goroutine events and writes logs.
67 | time.Sleep(time.Second)
68 |
69 | evaluateGmonOutput(t, &gmonLogs, requestFixtureCount)
70 | evaluateGmonMetrics(t)
71 | }
72 |
73 | func runProcess(stdout, stderr io.Writer, name string, arg ...string) (*os.Process, error) {
74 | cmd := exec.Command(name, arg...)
75 | cmd.Stdout = stdout
76 | cmd.Stderr = stderr
77 | if err := cmd.Start(); err != nil {
78 | return nil, fmt.Errorf("failed to run %q: %w", cmd, err)
79 | }
80 | return cmd.Process, nil
81 | }
82 |
83 | func evaluateGmonOutput(t *testing.T, gmonLogs io.Reader, expectValidLineCount int) {
84 | // A valid line should have:
85 | // - msg="goroutine is created"
86 | // - goroutine_id=%d, where %d is an integer
87 | // - stack.%d=funcName, where %d is an integer and funcName is a function name
88 | // - When a HTTP server receives a request, it should cause a goroutine and have a stack trace.
89 | validLineCount := 0
90 |
91 | scanner := bufio.NewScanner(gmonLogs)
92 | for scanner.Scan() {
93 | // Valid line example:
94 | // time=2024-03-20T05:10:57.752Z level=INFO msg="goroutine is created" goroutine_id=22 stack.0=runtime.newproc stack.1=runtime.systemstack stack.2=runtime.newproc stack.3=net/http.(*connReader).startBackgroundRead stack.4=net/http.(*conn).serve stack.5=net/http.(*Server).Serve.gowrap3 stack.6=runtime.goexit
95 | text := scanner.Text()
96 |
97 | if !strings.Contains(text, "msg=\"goroutine is created\"") {
98 | continue
99 | }
100 |
101 | goroutineIdBegin := strings.Index(text, "goroutine_id=")
102 | require.Greaterf(t, goroutineIdBegin, 8, "goroutine_id is not found in %q", text)
103 | goroutineIdEnd := strings.Index(text[goroutineIdBegin:], " ")
104 | goroutineId, err := strconv.Atoi(strings.Split(text[goroutineIdBegin:goroutineIdBegin+goroutineIdEnd], "=")[1])
105 | require.NoErrorf(t, err, "goroutine_id is not an integer in %q", text)
106 | require.Greaterf(t, goroutineId, 0, "goroutine_id is not a positive integer in %q", text)
107 |
108 | hasValidStack := false
109 | for stackIdx := 0; ; stackIdx++ {
110 | stackBegin := strings.Index(text, fmt.Sprintf("stack.%d=", stackIdx))
111 | if stackBegin < 0 {
112 | break
113 | }
114 | stackEnd := strings.Index(text[stackBegin:], " ")
115 | if stackEnd < 0 {
116 | break
117 | }
118 | stackValue := strings.Split(text[stackBegin:stackBegin+stackEnd], "=")[1]
119 | require.NotEmptyf(t, stackValue, "stack.%d should have a function name in %q", stackIdx, text)
120 | hasValidStack = true
121 | }
122 | require.Truef(t, hasValidStack, "no stack trace is found in %q", text)
123 | validLineCount++
124 | }
125 |
126 | require.Greaterf(t, validLineCount, expectValidLineCount, "valid line count is less than %d", expectValidLineCount)
127 | }
128 |
129 | func evaluateGmonMetrics(t *testing.T) {
130 | resp, err := http.Get("http://localhost:5500/metrics")
131 | require.NoError(t, err, "GET /metrics of gmon")
132 | require.Equal(t, http.StatusOK, resp.StatusCode, "expect 200 from GET /metrics of gmon")
133 | b, err := io.ReadAll(resp.Body)
134 | require.NoError(t, err, "read response body from GET /metrics of gmon")
135 | defer func() {
136 | if t.Failed() {
137 | t.Logf("------ response body ------\n%s\n------ response body ------", b)
138 | }
139 | }()
140 |
141 | expectedMetrics := []string{
142 | "gmon_goroutine_creation",
143 | "gmon_goroutine_exit",
144 | "gmon_goroutine_uptime",
145 | }
146 | // Due to the high cardinality concern, we add up to 5 stack labels to metrics.
147 | expectedLabels := map[string]struct{}{
148 | "stack_0": {},
149 | "stack_1": {},
150 | "stack_2": {},
151 | "stack_3": {},
152 | "stack_4": {},
153 | }
154 | actualMetrics := make(map[string][]*dto.Metric)
155 |
156 | dec := expfmt.NewDecoder(bytes.NewReader(b), expfmt.NewFormat(expfmt.TypeTextPlain))
157 | for {
158 | var mf dto.MetricFamily
159 | err := dec.Decode(&mf)
160 | if errors.Is(err, io.EOF) {
161 | break
162 | }
163 | require.NoError(t, err, "decode response body of GET /metrics")
164 | actualMetrics[mf.GetName()] = mf.GetMetric()
165 | }
166 | for _, expected := range expectedMetrics {
167 | ms, ok := actualMetrics[expected]
168 | require.Truef(t, ok, "metric %q is not found", expected)
169 | for _, m := range ms {
170 | for _, l := range m.GetLabel() {
171 | _, ok := expectedLabels[l.GetName()]
172 | require.Truef(t, ok, "%q doesn't exist in %q", l.GetName(), m)
173 | require.NotEmptyf(t, l.GetValue(), "%q should not have an empty", l.GetName())
174 | require.NotEqualf(t, "none", l.GetValue(), "%q should not have \"none\"", l.GetName())
175 | }
176 | }
177 | }
178 | }
179 |
--------------------------------------------------------------------------------
/ebpf/bpf_x86_bpfel.go:
--------------------------------------------------------------------------------
1 | // Code generated by bpf2go; DO NOT EDIT.
2 | //go:build 386 || amd64
3 |
4 | package ebpf
5 |
6 | import (
7 | "bytes"
8 | _ "embed"
9 | "fmt"
10 | "io"
11 |
12 | "github.com/cilium/ebpf"
13 | )
14 |
15 | type bpfEvent struct {
16 | GoroutineId int64
17 | StackId int32
18 | Exit bool
19 | _ [3]byte
20 | }
21 |
22 | type bpfStackTraceT [20]uint64
23 |
24 | // loadBpf returns the embedded CollectionSpec for bpf.
25 | func loadBpf() (*ebpf.CollectionSpec, error) {
26 | reader := bytes.NewReader(_BpfBytes)
27 | spec, err := ebpf.LoadCollectionSpecFromReader(reader)
28 | if err != nil {
29 | return nil, fmt.Errorf("can't load bpf: %w", err)
30 | }
31 |
32 | return spec, err
33 | }
34 |
35 | // loadBpfObjects loads bpf and converts it into a struct.
36 | //
37 | // The following types are suitable as obj argument:
38 | //
39 | // *bpfObjects
40 | // *bpfPrograms
41 | // *bpfMaps
42 | //
43 | // See ebpf.CollectionSpec.LoadAndAssign documentation for details.
44 | func loadBpfObjects(obj interface{}, opts *ebpf.CollectionOptions) error {
45 | spec, err := loadBpf()
46 | if err != nil {
47 | return err
48 | }
49 |
50 | return spec.LoadAndAssign(obj, opts)
51 | }
52 |
53 | // bpfSpecs contains maps and programs before they are loaded into the kernel.
54 | //
55 | // It can be passed ebpf.CollectionSpec.Assign.
56 | type bpfSpecs struct {
57 | bpfProgramSpecs
58 | bpfMapSpecs
59 | }
60 |
61 | // bpfSpecs contains programs before they are loaded into the kernel.
62 | //
63 | // It can be passed ebpf.CollectionSpec.Assign.
64 | type bpfProgramSpecs struct {
65 | RuntimeGoexit1 *ebpf.ProgramSpec `ebpf:"runtime_goexit1"`
66 | RuntimeNewproc1 *ebpf.ProgramSpec `ebpf:"runtime_newproc1"`
67 | }
68 |
69 | // bpfMapSpecs contains maps before they are loaded into the kernel.
70 | //
71 | // It can be passed ebpf.CollectionSpec.Assign.
72 | type bpfMapSpecs struct {
73 | Events *ebpf.MapSpec `ebpf:"events"`
74 | StackAddresses *ebpf.MapSpec `ebpf:"stack_addresses"`
75 | }
76 |
77 | // bpfObjects contains all objects after they have been loaded into the kernel.
78 | //
79 | // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign.
80 | type bpfObjects struct {
81 | bpfPrograms
82 | bpfMaps
83 | }
84 |
85 | func (o *bpfObjects) Close() error {
86 | return _BpfClose(
87 | &o.bpfPrograms,
88 | &o.bpfMaps,
89 | )
90 | }
91 |
92 | // bpfMaps contains all maps after they have been loaded into the kernel.
93 | //
94 | // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign.
95 | type bpfMaps struct {
96 | Events *ebpf.Map `ebpf:"events"`
97 | StackAddresses *ebpf.Map `ebpf:"stack_addresses"`
98 | }
99 |
100 | func (m *bpfMaps) Close() error {
101 | return _BpfClose(
102 | m.Events,
103 | m.StackAddresses,
104 | )
105 | }
106 |
107 | // bpfPrograms contains all programs after they have been loaded into the kernel.
108 | //
109 | // It can be passed to loadBpfObjects or ebpf.CollectionSpec.LoadAndAssign.
110 | type bpfPrograms struct {
111 | RuntimeGoexit1 *ebpf.Program `ebpf:"runtime_goexit1"`
112 | RuntimeNewproc1 *ebpf.Program `ebpf:"runtime_newproc1"`
113 | }
114 |
115 | func (p *bpfPrograms) Close() error {
116 | return _BpfClose(
117 | p.RuntimeGoexit1,
118 | p.RuntimeNewproc1,
119 | )
120 | }
121 |
122 | func _BpfClose(closers ...io.Closer) error {
123 | for _, closer := range closers {
124 | if err := closer.Close(); err != nil {
125 | return err
126 | }
127 | }
128 | return nil
129 | }
130 |
131 | // Do not access this directly.
132 | //
133 | //go:embed bpf_x86_bpfel.o
134 | var _BpfBytes []byte
135 |
--------------------------------------------------------------------------------
/ebpf/c/gmon.c:
--------------------------------------------------------------------------------
1 | #include "vmlinux.h"
2 | #include "maps.h"
3 | #include "goroutine.h"
4 |
5 | #include
6 | #include
7 | #include
8 |
9 | // read_stack_id reads the stack id from stack trace map.
10 | // 1 on failure
11 | static __always_inline int read_stack_id(struct pt_regs *ctx, int *stack_id) {
12 | int id = bpf_get_stackid(ctx, &stack_addresses, BPF_F_USER_STACK);
13 | if (id < 0) {
14 | return 1;
15 | }
16 | *stack_id = id;
17 | return 0;
18 | }
19 |
20 | SEC("uretprobe/runtime.newproc1")
21 | int runtime_newproc1(struct pt_regs *ctx) {
22 | void *newg_p = (void *)PT_REGS_RC_CORE(ctx);
23 | if (newg_p == NULL) {
24 | bpf_printk("%s:%d | failed to extract new goroutine pointer from retval\n", __FILE__, __LINE__);
25 | return 0;
26 | }
27 | // `pahole -C runtime.g /path/to/gobinary 2>/dev/null` shows the offsets of the goid.
28 | int64_t goid = 0;
29 | if (bpf_core_read_user(&goid, sizeof(int64_t), newg_p + 160)) {
30 | bpf_printk("%s:%d | failed to read goroutine id from newg with the offset\n", __FILE__, __LINE__);
31 | return 0;
32 | }
33 | if (goid == 0) {
34 | bpf_printk("%s:%d | goroutine id is zero\n", __FILE__, __LINE__);
35 | return 0;
36 | }
37 | int stack_id = 0;
38 | if (read_stack_id(ctx, &stack_id)) {
39 | bpf_printk("%s:%d | failed to read stackid\n", __FILE__, __LINE__);
40 | return 0;
41 | }
42 |
43 | struct event *ev;
44 | ev = bpf_ringbuf_reserve(&events, sizeof(*ev), 0);
45 | if (!ev) {
46 | bpf_printk("%s:%d | failed to reserve ringbuf\n", __FILE__, __LINE__);
47 | return 0;
48 | }
49 | ev->goroutine_id = goid;
50 | ev->stack_id = stack_id;
51 | ev->exit = false;
52 | bpf_ringbuf_submit(ev, 0);
53 |
54 | return 0;
55 | }
56 |
57 | SEC("uprobe/runtime.goexit1")
58 | int runtime_goexit1(struct pt_regs *ctx) {
59 | struct task_struct *task = (struct task_struct *)bpf_get_current_task();
60 | int64_t go_id = 0;
61 | if (read_goroutine_id(task, &go_id)) {
62 | bpf_printk("%s:%d | failed to read goroutine id\n", __FILE__, __LINE__);
63 | return 0;
64 | }
65 |
66 | int stack_id = 0;
67 | if (read_stack_id(ctx, &stack_id)) {
68 | bpf_printk("%s:%d | failed to read stackid\n", __FILE__, __LINE__);
69 | return 0;
70 | }
71 |
72 | struct event *ev;
73 | ev = bpf_ringbuf_reserve(&events, sizeof(*ev), 0);
74 | if (!ev) {
75 | bpf_printk("%s:%d | failed to reserve ringbuf\n", __FILE__, __LINE__);
76 | return 0;
77 | }
78 | ev->goroutine_id = go_id;
79 | ev->stack_id = stack_id;
80 | ev->exit = true;
81 | bpf_ringbuf_submit(ev, 0);
82 |
83 | return 0;
84 | }
85 |
86 | char LICENSE[] SEC("license") = "GPL";
87 |
--------------------------------------------------------------------------------
/ebpf/c/goroutine.h:
--------------------------------------------------------------------------------
1 | #include "vmlinux.h"
2 |
3 | #include
4 | #include
5 |
6 | struct stack_t {
7 | uintptr_t lo;
8 | uintptr_t hi;
9 | };
10 |
11 | struct gobuf_t {
12 | uintptr_t sp;
13 | uintptr_t pc;
14 | uintptr_t g;
15 | uintptr_t ctxt;
16 | uintptr_t ret;
17 | uintptr_t lr;
18 | uintptr_t bp;
19 | };
20 |
21 | // https://github.com/golang/go/blob/release-branch.go1.23/src/runtime/runtime2.go#L458
22 | struct g_t {
23 | struct stack_t stack_instance;
24 | uintptr_t stackguard0;
25 | uintptr_t stackguard1;
26 | uintptr_t _panic;
27 | uintptr_t _defer;
28 | uintptr_t m;
29 | struct gobuf_t sched;
30 | uintptr_t syscallsp;
31 | uintptr_t syscallpc;
32 | uintptr_t syscallbp;
33 | uintptr_t stktopsp;
34 | uintptr_t param;
35 | uint32_t atomicstatus;
36 | uint32_t stackLock;
37 | int64_t goid;
38 | };
39 |
40 | // read_goroutine_id reads the goroutine id from the task_struct.
41 | // 1 on failure.
42 | static __always_inline int read_goroutine_id(struct task_struct *task, int64_t *goroutine_id) {
43 | void *base;
44 | BPF_CORE_READ_INTO(&base, &(task->thread), fsbase);
45 | if (base == NULL) {
46 | return 1;
47 | }
48 |
49 | // https://www.usenix.org/conference/srecon23apac/presentation/liang
50 | uintptr_t g_addr = 0;
51 | if (bpf_core_read_user(&g_addr, sizeof(uintptr_t), base - 8)) {
52 | return 1;
53 | }
54 |
55 | struct g_t g;
56 | if (bpf_core_read_user(&g, sizeof(struct g_t), (void *)g_addr)) {
57 | return 1;
58 | }
59 | *goroutine_id = g.goid;
60 |
61 | // TODO: Why is this happening? We may be able to ignore this.
62 | // The Go runtime manages goroutines, and developers generally don't need to interact with
63 | // goroutine IDs directly. In fact, the language specification does not provide a built-in way
64 | // to obtain a goroutine's ID, as the designers of Go intended to keep goroutines abstracted
65 | // away from such details.
66 | if (*goroutine_id == 0) {
67 | return 1;
68 | }
69 |
70 | return 0;
71 | }
72 |
--------------------------------------------------------------------------------
/ebpf/c/maps.h:
--------------------------------------------------------------------------------
1 | #ifndef __MAPS_H__
2 | #define __MAPS_H__
3 |
4 | #include "vmlinux.h"
5 |
6 | #include
7 |
8 | #define MAX_STACK_ADDRESSES 1024 // max amount of diff stack trace addrs to buffer
9 | #define MAX_STACK_DEPTH 20 // max depth of each stack trace to track
10 |
11 | #define BPF_MAP(_name, _type, _key_type, _value_type, _max_entries) \
12 | struct { \
13 | __uint(type, _type); \
14 | __uint(max_entries, _max_entries); \
15 | __type(key, _key_type); \
16 | __type(value, _value_type); \
17 | } _name SEC(".maps");
18 |
19 | // stack traces: the value is 1 big byte array of the stack addresses
20 | typedef __u64 stack_trace_t[MAX_STACK_DEPTH];
21 | #define BPF_STACK_TRACE(_name, _max_entries) \
22 | BPF_MAP(_name, BPF_MAP_TYPE_STACK_TRACE, u32, stack_trace_t, _max_entries)
23 |
24 | BPF_STACK_TRACE(stack_addresses, MAX_STACK_ADDRESSES); // store stack traces
25 |
26 | struct {
27 | __uint(type, BPF_MAP_TYPE_RINGBUF);
28 | __uint(max_entries, 1 << 24);
29 | } events SEC(".maps");
30 |
31 | struct event {
32 | int64_t goroutine_id;
33 | int stack_id;
34 | bool exit;
35 | };
36 |
37 | struct event *unused __attribute__((unused));
38 |
39 | #endif /* __MAPS_H__ */
40 |
--------------------------------------------------------------------------------
/ebpf/config.go:
--------------------------------------------------------------------------------
1 | package ebpf
2 |
3 | import (
4 | "fmt"
5 | )
6 |
7 | type Config struct {
8 | binPath string
9 | pid int
10 | }
11 |
12 | func NewConfig(
13 | binPath string,
14 | Pid int,
15 | ) (Config, error) {
16 | return Config{
17 | binPath: binPath,
18 | pid: Pid,
19 | }, nil
20 | }
21 |
22 | func (c Config) String() string {
23 | return fmt.Sprintf("binPath: %s, pid: %d",
24 | c.binPath,
25 | c.pid,
26 | )
27 | }
28 |
--------------------------------------------------------------------------------
/ebpf/event_handler.go:
--------------------------------------------------------------------------------
1 | package ebpf
2 |
3 | import (
4 | "bytes"
5 | "context"
6 | "encoding/binary"
7 | "errors"
8 | "fmt"
9 | "log/slog"
10 | "runtime/trace"
11 | "strconv"
12 | "time"
13 |
14 | "github.com/cilium/ebpf/ringbuf"
15 | "github.com/go-delve/delve/pkg/proc"
16 | "github.com/hashicorp/golang-lru/v2/expirable"
17 | "github.com/keisku/gmon/bininfo"
18 | )
19 |
20 | type eventHandler struct {
21 | goroutineQueue chan<- goroutine
22 | objs *bpfObjects
23 | biTranslator bininfo.Translator
24 | reader *ringbuf.Reader
25 | }
26 |
27 | func (h *eventHandler) run(ctx context.Context) {
28 | var event bpfEvent
29 | // To delete stack_addresses that is not used recently.
30 | stackIdCache := expirable.NewLRU[int32, []*proc.Function](
31 | 32, // cache size
32 | func(key int32, _ []*proc.Function) {
33 | slog.Debug("delete stack_addresses", slog.Int("stack_id", int(key)))
34 | if err := h.objs.StackAddresses.Delete(key); err != nil {
35 | slog.Debug("Failed to delete stack_addresses", slog.Any("error", err))
36 | }
37 | },
38 | time.Minute, // TTL of each cache entry
39 | )
40 | for {
41 | if err := h.readRecord(ctx, &event); err != nil {
42 | if errors.Is(err, ringbuf.ErrClosed) {
43 | slog.Debug("ring buffer is closed")
44 | return
45 | }
46 | slog.Warn("Failed to read bpf ring buffer", slog.Any("error", err))
47 | continue
48 | }
49 | var stack []*proc.Function
50 | var ok bool
51 | var err error
52 | stack, ok = stackIdCache.Get(event.StackId)
53 | if !ok {
54 | stack, err = h.lookupStack(ctx, event.StackId)
55 | if err != nil {
56 | slog.Warn(err.Error())
57 | continue
58 | }
59 | }
60 | h.sendGoroutine(goroutine{
61 | Id: event.GoroutineId,
62 | ObservedAt: time.Now(),
63 | Stack: stack,
64 | Exit: event.Exit,
65 | })
66 | _ = stackIdCache.Add(event.StackId, stack)
67 | }
68 | }
69 |
70 | func (h *eventHandler) readRecord(ctx context.Context, event *bpfEvent) error {
71 | _, task := trace.NewTask(ctx, "event_handler.read_ring_buffer")
72 | defer task.End()
73 | record, err := h.reader.Read()
74 | if err != nil {
75 | return err
76 | }
77 | if err := binary.Read(bytes.NewBuffer(record.RawSample), binary.LittleEndian, event); err != nil {
78 | return fmt.Errorf("decode ring buffer record: %w", err)
79 | }
80 | return nil
81 | }
82 |
83 | // lookupStack is a copy of the function in tracee.
84 | // https://github.com/aquasecurity/tracee/blob/f61866b4e2277d2a7dddc6cd77a67cd5a5da3b14/pkg/ebpf/events_pipeline.go#L642-L681
85 | const maxStackDepth = 20
86 |
87 | var stackFrameSize = (strconv.IntSize / 8)
88 |
89 | func (h *eventHandler) lookupStack(ctx context.Context, stackId int32) ([]*proc.Function, error) {
90 | _, task := trace.NewTask(ctx, "event_handler.lookup_stack")
91 | defer task.End()
92 | stackBytes, err := h.objs.StackAddresses.LookupBytes(stackId)
93 | if err != nil {
94 | return nil, fmt.Errorf("failed to lookup stack addresses: %w", err)
95 | }
96 | if stackBytes == nil {
97 | return nil, fmt.Errorf("bytes not found by stack_id=%d", stackId)
98 | }
99 | stack := make([]*proc.Function, maxStackDepth)
100 | stackCounter := 0
101 | for i := 0; i < len(stackBytes); i += stackFrameSize {
102 | stackBytes[stackCounter] = 0
103 | stackAddr := binary.LittleEndian.Uint64(stackBytes[i : i+stackFrameSize])
104 | if stackAddr == 0 {
105 | break
106 | }
107 | f := h.biTranslator.PCToFunc(stackAddr)
108 | if f == nil {
109 | // I don't know why, but a function address sometime should be last 3 bytes.
110 | // At leaset, I observerd this behavior in the following binaries:
111 | // - /usr/bin/dockerd
112 | // - /usr/bin/containerd
113 | f = h.biTranslator.PCToFunc(stackAddr & 0xffffff)
114 | if f == nil {
115 | f = &proc.Function{Name: fmt.Sprintf("%#x", stackAddr), Entry: stackAddr}
116 | }
117 | }
118 | stack[stackCounter] = f
119 | stackCounter++
120 | }
121 | return stack[0:stackCounter], nil
122 | }
123 |
124 | func (h *eventHandler) sendGoroutine(g goroutine) {
125 | maxRetries := 3
126 | retryInterval := 10 * time.Millisecond
127 | for attempts := 0; attempts < maxRetries; attempts++ {
128 | select {
129 | case h.goroutineQueue <- g:
130 | if attempts > 0 {
131 | slog.Info(
132 | "goroutine is sent successfully after retries",
133 | slog.Int("retry", attempts+1),
134 | slog.String("goroutine_id", fmt.Sprintf("%d", g.Id)),
135 | slog.Bool("exit", g.Exit),
136 | stackLogAttr(g.Stack),
137 | )
138 | }
139 | return // Successfully sent
140 | default:
141 | if attempts < maxRetries-1 {
142 | time.Sleep(retryInterval) // Wait before retrying
143 | } else {
144 | slog.Warn(
145 | "goroutine queue is full, retrying",
146 | slog.String("goroutine_id", fmt.Sprintf("%d", g.Id)),
147 | slog.Bool("exit", g.Exit),
148 | stackLogAttr(g.Stack),
149 | )
150 | }
151 | }
152 | }
153 | }
154 |
--------------------------------------------------------------------------------
/ebpf/gmon.go:
--------------------------------------------------------------------------------
1 | package ebpf
2 |
3 | import (
4 | "context"
5 | "errors"
6 | "fmt"
7 | "log/slog"
8 |
9 | "github.com/cilium/ebpf"
10 | "github.com/cilium/ebpf/link"
11 | "github.com/cilium/ebpf/ringbuf"
12 | "github.com/keisku/gmon/bininfo"
13 | )
14 |
15 | // $BPF_CLANG and $BPF_CFLAGS are set by the Makefile.
16 | //go:generate go run github.com/cilium/ebpf/cmd/bpf2go -type event -cc $BPF_CLANG -target amd64 -cflags $BPF_CFLAGS bpf ./c/gmon.c -- -I./c
17 |
18 | func Run(ctx context.Context, config Config) (func(), error) {
19 | slog.Debug("eBPF programs start with config", slog.String("config", config.String()))
20 | objs := bpfObjects{}
21 | if err := loadBpfObjects(&objs, nil); err != nil {
22 | return func() {}, err
23 | }
24 | biTranslator, err := bininfo.NewTranslator(config.binPath)
25 | if err != nil {
26 | return func() {}, err
27 | }
28 | ex, err := link.OpenExecutable(config.binPath)
29 | if err != nil {
30 | return func() {}, err
31 | }
32 | var links [2]link.Link
33 | links[0], err = linkUprobe(
34 | ex,
35 | objs.RuntimeNewproc1,
36 | "runtime.newproc1",
37 | true,
38 | config.pid,
39 | biTranslator.Address,
40 | )
41 | if err != nil {
42 | return func() {}, err
43 | }
44 | links[1], err = linkUprobe(
45 | ex,
46 | objs.RuntimeGoexit1,
47 | "runtime.goexit1",
48 | false,
49 | config.pid,
50 | biTranslator.Address,
51 | )
52 | if err != nil {
53 | return func() {}, err
54 | }
55 | ringbufReader, err := ringbuf.NewReader(objs.Events)
56 | if err != nil {
57 | return func() {}, err
58 | }
59 | goroutineQueue := make(chan goroutine, 100)
60 | eventhandler := &eventHandler{
61 | goroutineQueue: goroutineQueue,
62 | objs: &objs,
63 | biTranslator: biTranslator,
64 | reader: ringbufReader,
65 | }
66 | reporter := &reporter{
67 | goroutineQueue: goroutineQueue,
68 | }
69 | go reporter.run(ctx)
70 | go eventhandler.run(ctx)
71 | return func() {
72 | ringbufReader.Close()
73 | for i := range links {
74 | if err := links[i].Close(); err != nil {
75 | slog.Warn("Failed to close link", slog.Any("error", err))
76 | }
77 | }
78 | if err := objs.Close(); err != nil {
79 | slog.Warn("Failed to close bpf objects", slog.Any("error", err))
80 | }
81 | }, nil
82 | }
83 |
84 | func linkUprobe(
85 | exe *link.Executable,
86 | program *ebpf.Program,
87 | symbol string,
88 | ret bool,
89 | pid int,
90 | lookupAddress func(string) uint64,
91 | ) (link.Link, error) {
92 | var l link.Link
93 | var err error
94 | if ret {
95 | l, err = exe.Uretprobe(symbol, program, &link.UprobeOptions{PID: pid})
96 | } else {
97 | l, err = exe.Uprobe(symbol, program, &link.UprobeOptions{PID: pid})
98 | }
99 | if err == nil {
100 | return l, nil
101 | }
102 | if errors.Is(err, link.ErrNoSymbol) {
103 | slog.Debug("no symbol table", slog.String("symbol", symbol))
104 | } else {
105 | return nil, fmt.Errorf("failed to attach uprobe for %s: %w", symbol, err)
106 | }
107 | address := lookupAddress(symbol)
108 | if address == 0 {
109 | return nil, fmt.Errorf("no address found for %s", symbol)
110 | }
111 | if ret {
112 | l, err = exe.Uretprobe(symbol, program, &link.UprobeOptions{PID: pid, Address: address})
113 | } else {
114 | l, err = exe.Uprobe(symbol, program, &link.UprobeOptions{PID: pid, Address: address})
115 | }
116 | if err != nil {
117 | return nil, fmt.Errorf("failed to attach uprobe for %s: %w", symbol, err)
118 | }
119 | slog.Debug("attach uprobe with address", slog.String("symbol", symbol), slog.String("address", fmt.Sprintf("%#x", address)))
120 | return l, nil
121 | }
122 |
--------------------------------------------------------------------------------
/ebpf/reporter.go:
--------------------------------------------------------------------------------
1 | package ebpf
2 |
3 | import (
4 | "context"
5 | "fmt"
6 | "log/slog"
7 | "runtime/trace"
8 | "sync"
9 | "time"
10 |
11 | "github.com/go-delve/delve/pkg/proc"
12 | "github.com/prometheus/client_golang/prometheus"
13 | "github.com/prometheus/client_golang/prometheus/promauto"
14 | )
15 |
16 | var (
17 | namespace = "gmon"
18 | stackLabelKeys = []string{"stack_0", "stack_1", "stack_2", "stack_3", "stack_4"} // 0 is the top
19 | goroutineExit = promauto.NewCounterVec(
20 | prometheus.CounterOpts{
21 | Namespace: namespace,
22 | Name: "goroutine_exit",
23 | Help: "The number of goroutines that have been exited",
24 | },
25 | stackLabelKeys,
26 | )
27 | goroutineCreation = promauto.NewCounterVec(
28 | prometheus.CounterOpts{
29 | Namespace: namespace,
30 | Name: "goroutine_creation",
31 | Help: "The number of goroutines that have been creaated",
32 | },
33 | stackLabelKeys,
34 | )
35 | goroutineUptime = promauto.NewHistogramVec(
36 | prometheus.HistogramOpts{
37 | Namespace: namespace,
38 | Name: "goroutine_uptime",
39 | Help: "Uptime of goroutines in seconds",
40 | Buckets: []float64{1, 3, 5, 10, 30, 60, 120, 180},
41 | },
42 | stackLabelKeys,
43 | )
44 | )
45 |
46 | type goroutine struct {
47 | Id int64
48 | ObservedAt time.Time
49 | Stack []*proc.Function
50 | Exit bool
51 | }
52 |
53 | type reporter struct {
54 | goroutineQueue <-chan goroutine
55 | goroutineMap sync.Map
56 | }
57 |
58 | var reportInterval = 500 * time.Millisecond
59 |
60 | func (r *reporter) run(ctx context.Context) {
61 | go r.reportUptime(ctx)
62 | go r.subscribe(ctx)
63 | <-ctx.Done()
64 | }
65 |
66 | func (r *reporter) reportUptime(ctx context.Context) {
67 | ticker := time.NewTicker(reportInterval)
68 | for {
69 | select {
70 | case <-ctx.Done():
71 | ticker.Stop()
72 | return
73 | case <-ticker.C:
74 | ctx, task := trace.NewTask(ctx, "reporter.report_goroutine_uptime")
75 | trace.WithRegion(ctx, "reporter.report_goroutine_uptime.iterate_goroutine_map", func() {
76 | r.goroutineMap.Range(func(_, value any) bool {
77 | g := value.(goroutine)
78 | goroutineUptime.With(stackLabels(g.Stack)).Observe(time.Since(g.ObservedAt).Seconds())
79 | return true
80 | })
81 | })
82 | task.End()
83 | }
84 | }
85 | }
86 |
87 | func (r *reporter) subscribe(ctx context.Context) {
88 | for g := range r.goroutineQueue {
89 | ctx, task := trace.NewTask(ctx, "reporter.store_goroutine")
90 | r.storeGoroutine(ctx, g)
91 | task.End()
92 | }
93 | }
94 |
95 | func (r *reporter) storeGoroutine(ctx context.Context, g goroutine) {
96 | v, loaded := r.goroutineMap.Load(g.Id)
97 | if loaded {
98 | _, task := trace.NewTask(ctx, "reporter.store_goroutine_exit")
99 | oldg, ok := v.(goroutine)
100 | if !ok {
101 | slog.Error("goroutineMap has unexpected value", slog.Any("value", v))
102 | return
103 | }
104 | goroutineExit.With(stackLabels(oldg.Stack)).Inc()
105 | goroutineUptime.With(stackLabels(oldg.Stack)).Observe(time.Since(oldg.ObservedAt).Seconds())
106 | r.goroutineMap.Delete(oldg.Id)
107 | task.End()
108 | return
109 | }
110 | if g.Exit {
111 | // Avoid storing goroutines that lack a corresponding newproc1 pair.
112 | return
113 | }
114 | _, task := trace.NewTask(ctx, "reporter.store_goroutine_creation")
115 | slog.Info("goroutine is created", slog.Int64("goroutine_id", g.Id), stackLogAttr(g.Stack))
116 | goroutineCreation.With(stackLabels(g.Stack)).Inc()
117 | r.goroutineMap.Store(g.Id, g)
118 | task.End()
119 | }
120 |
121 | // LogAttr returns a slog.Attr that can be used to log the stack.
122 | func stackLogAttr(stack []*proc.Function) slog.Attr {
123 | attrs := make([]any, len(stack))
124 | for i, f := range stack {
125 | if f == nil {
126 | panic("stack must not have nil function")
127 | }
128 | attrs[i] = slog.String(fmt.Sprintf("%d", i), f.Name)
129 | }
130 | return slog.Group("stack", attrs...)
131 | }
132 |
133 | // stackLabels generates a set of Prometheus labels for the top functions in the stack.
134 | // If the stack has fewer than expected functions, it fills the remaining labels with "none".
135 | func stackLabels(stack []*proc.Function) prometheus.Labels {
136 | labels := prometheus.Labels{}
137 |
138 | // Ensure to only process the top 5 elements, or the stack length if shorter.
139 | topN := len(stack)
140 | if topN > len(stackLabelKeys) {
141 | topN = len(stackLabelKeys)
142 | }
143 |
144 | for i := 0; i < len(stackLabelKeys); i++ {
145 | labelKey := fmt.Sprintf("stack_%d", i)
146 | if i < topN {
147 | // Stack is reversed, so we start from the end of the slice.
148 | labels[labelKey] = stack[len(stack)-1-i].Name
149 | } else {
150 | labels[labelKey] = "none"
151 | }
152 | }
153 |
154 | return labels
155 | }
156 |
--------------------------------------------------------------------------------
/ebpf/reporter_test.go:
--------------------------------------------------------------------------------
1 | package ebpf
2 |
3 | import (
4 | "testing"
5 |
6 | "github.com/go-delve/delve/pkg/proc"
7 | "github.com/prometheus/client_golang/prometheus"
8 | "github.com/stretchr/testify/assert"
9 | )
10 |
11 | func Test_stackLabels(t *testing.T) {
12 | type args struct {
13 | stack []*proc.Function
14 | }
15 | tests := []struct {
16 | name string
17 | args args
18 | want prometheus.Labels
19 | }{
20 | {
21 | name: "stack length is 5",
22 | args: args{
23 | stack: []*proc.Function{
24 | {Name: "func1"},
25 | {Name: "func2"},
26 | {Name: "func3"},
27 | {Name: "func4"},
28 | {Name: "func5"},
29 | },
30 | },
31 | want: prometheus.Labels{
32 | "stack_0": "func5",
33 | "stack_1": "func4",
34 | "stack_2": "func3",
35 | "stack_3": "func2",
36 | "stack_4": "func1",
37 | },
38 | },
39 | {
40 | name: "stack length is 3",
41 | args: args{
42 | stack: []*proc.Function{
43 | {Name: "func1"},
44 | {Name: "func2"},
45 | {Name: "func3"},
46 | },
47 | },
48 | want: prometheus.Labels{
49 | "stack_0": "func3",
50 | "stack_1": "func2",
51 | "stack_2": "func1",
52 | "stack_3": "none",
53 | "stack_4": "none",
54 | },
55 | },
56 | {
57 | name: "stack length is 10",
58 | args: args{
59 | stack: []*proc.Function{
60 | {Name: "func1"},
61 | {Name: "func2"},
62 | {Name: "func3"},
63 | {Name: "func4"},
64 | {Name: "func5"},
65 | {Name: "func6"},
66 | {Name: "func7"},
67 | {Name: "func8"},
68 | {Name: "func9"},
69 | {Name: "func10"},
70 | },
71 | },
72 | want: prometheus.Labels{
73 | "stack_0": "func10",
74 | "stack_1": "func9",
75 | "stack_2": "func8",
76 | "stack_3": "func7",
77 | "stack_4": "func6",
78 | },
79 | },
80 | }
81 | for _, tt := range tests {
82 | t.Run(tt.name, func(t *testing.T) {
83 | assert.Equal(t, tt.want, stackLabels(tt.args.stack))
84 | })
85 | }
86 | }
87 |
--------------------------------------------------------------------------------
/fixture/go.mod:
--------------------------------------------------------------------------------
1 | module fixture
2 |
3 | go 1.23.1
4 |
--------------------------------------------------------------------------------
/fixture/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "context"
5 | "net/http"
6 | "os"
7 | "os/signal"
8 | )
9 |
10 | func main() {
11 | ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt, os.Kill)
12 | defer cancel()
13 | http.HandleFunc("/get/200", func(w http.ResponseWriter, _ *http.Request) {
14 | w.WriteHeader(http.StatusOK)
15 | })
16 | go http.ListenAndServe(":8080", nil)
17 | <-ctx.Done()
18 | }
19 |
--------------------------------------------------------------------------------
/gmon.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -e -x
4 |
5 | arch=$(uname -m)
6 | if [ "$arch" != "x86_64" ]; then
7 | echo "Unsupported architecture: $arch"
8 | exit 1
9 | fi
10 |
11 | kernel_version=$(uname -r)
12 | major_version=$(echo $kernel_version | cut -d. -f1)
13 | minor_version=$(echo $kernel_version | cut -d. -f2)
14 | if [ "$major_version" -gt 5 ] || ([ "$major_version" -eq 5 ] && [ "$minor_version" -ge 8 ]); then
15 | echo "Your kernel version is $kernel_version"
16 | else
17 | echo "Your kernel version should be >= 5.8, got $kernel_version"
18 | exit 1
19 | fi
20 |
21 | if [ "$1" = "build" ] || [ "$1" = "install" ] || [ "$1" = "test" ] || [ "$1" = "format" ]; then
22 | echo "Running $1 on $arch"
23 | else
24 | echo "Unsupported command: $1"
25 | exit 1
26 | fi
27 |
28 | image_buildenv=gmon-buildenv-$arch
29 | dockerfile_buildenv=$(mktemp)
30 | cat > "$dockerfile_buildenv" < ./ebpf/c/vmlinux.h && \
63 | go generate -x ./... && \
64 | GOFLAGS="-buildvcs=auto" CGO_ENABLED=0 go build \
65 | -ldflags "-s -w -X main.Version=0.0.0-dev" \
66 | -o /usr/src/bin/gmon'
67 | if [ "$1" = "build" ]; then
68 | exit 0
69 | fi
70 |
71 | if [ "$1" = "install" ]; then
72 | sudo rm -f /usr/bin/gmon || true
73 | sudo install ./bin/gmon /usr/bin/
74 | exit 0
75 | fi
76 |
77 | if [ "$1" = "format" ]; then
78 | docker run --platform linux/$arch -i \
79 | -v $(pwd):/usr/src \
80 | --rm $image_buildenv bash -c '\
81 | go mod tidy && \
82 | go vet ./... && \
83 | find . -type f \( -name '*.[ch]' -and -not -name 'vmlinux.h' \) -exec clang-format -i {} \;'
84 | exit 0
85 | fi
86 |
87 | if [ "$1" = "test" ]; then
88 | image_e2e=gmon-e2e-$arch
89 | dockerfile_e2e=$(mktemp)
90 | cat > "$dockerfile_e2e" <