├── .github
└── workflows
│ └── ci.yml
├── .gitignore
├── CONTRIBUTING.md
├── LICENSE
├── Makefile
├── consts.go
├── dingtalk.png
├── doc
├── example.md
└── zh.md
├── example
├── 1gbslice
│ └── 1gbslice.go
├── alloc
│ └── alloc.go
├── channelblock
│ └── channelblock.go
├── cpu_explode
│ └── cpu_explode.go
├── deadlock
│ └── deadlock.go
├── deadloop
│ └── deadloop.go
├── gcheap
│ ├── .gitignore
│ ├── README.md
│ ├── gcheap.go
│ ├── go.mod
│ ├── go.sum
│ ├── memory-spike.png
│ └── rand.sh
├── pyroscope_rideshare
│ ├── README.md
│ ├── admin.png
│ ├── bike
│ │ └── bike.go
│ ├── car
│ │ └── car.go
│ ├── go.mod
│ ├── go.sum
│ ├── main.go
│ ├── requests.py
│ ├── scooter
│ │ └── scooter.go
│ ├── start_client.sh
│ └── utility
│ │ └── utility.go
├── run_in_docker
│ └── run_in_docker.go
├── slowlyleak
│ └── slowlyleak.go
└── thread_trigger
│ └── thread_trigger.go
├── go.mod
├── go.sum
├── holmes.go
├── holmes_test.go
├── log.go
├── options.go
├── readme.md
├── report.go
├── reporters
├── http_reporter
│ ├── http_reporter.go
│ ├── http_reporter_test.go
│ └── reporter_filename_test
├── pyroscope_reporter
│ ├── client_config.go
│ ├── flameql
│ │ ├── error.go
│ │ ├── flameql.go
│ │ ├── key.go
│ │ ├── parse.go
│ │ └── sortedmap.go
│ ├── pyroscope_client.go
│ └── pyroscope_client_test.go
└── reporter_test.go
├── ring.go
├── ring_test.go
├── tool
└── build-example.sh
└── util.go
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
1 | name: actions
2 | on:
3 | push:
4 | branches:
5 | - master
6 | pull_request:
7 | jobs:
8 | golangci-lint:
9 | name: runner / golangci-lint
10 | runs-on: ubuntu-latest
11 | steps:
12 | - name: Check out code into the Go module directory
13 | uses: actions/checkout@v2
14 | - name: golangci-lint
15 | uses: reviewdog/action-golangci-lint@v1
16 | with:
17 | golangci_lint_flags: --timeout=10m --tests=false --skip-dirs=example
18 |
19 | test:
20 | name: Test
21 | runs-on: ubuntu-latest
22 | steps:
23 | - name: Set up Go
24 | uses: actions/setup-go@v1
25 | with:
26 | go-version: 1.14.13
27 |
28 | - name: Check out code
29 | uses: actions/checkout@v1
30 |
31 | - name: holmes test
32 | run: make test
33 |
34 | - name: example
35 | run: make example
36 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 |
3 | *.log
4 |
5 | ./test_case_gen
6 |
7 | vendor
8 |
9 | reporters/*.bin
10 |
11 | example/1gbslice/1gbslice
12 | example/alloc/alloc
13 | example/channelblock/channelblock
14 | example/cpu_explode/cpu_explode
15 | example/deadlock/deadlock
16 | example/deadloop/deadloop
17 | example/gcheap/m
18 | example/run_in_docker/run_in_docker
19 | example/slowlyleak/slowlyleak
20 | example/thread_trigger/thread_trigger
21 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Holmes contributor guide
2 |
3 | Holmes is released under the Apache 2.0 license, and follows a very standard Github development process, using Github tracker for issues and merging pull requests into master. If you would like to contribute something, or simply want to hack on the code this document should help you get started.
4 |
5 | Before we accept a non-trivial patch or pull request we will need you to sign the Contributor License Agreement. Signing the contributor’s agreement does not grant anyone commits rights to the main repository, but it does mean that we can accept your contributions, and you will get an author credit if we do. Active contributors might be asked to join the core team and given the ability to merge pull requests.
6 |
7 | ## Code Conventions
8 |
9 | None of these is essential for a pull request, but they will all help.
10 |
11 | 1. Code format
12 | - With cli, run `goimports -w yourfile.go` and `golint yourfile.go` to format the style
13 | - With ide like goland, select 'Group stdlib imports', 'Move all stdlib imports in a single group', 'Move all imports in a single declaration' in Go->imports page
14 | - We would check code format when run ci test, so please ensure that you have built project before you push branch.
15 | 2. Make sure all new `.go` files to have a simple doc class comment
16 | with at least an `author` tag identifying you, and preferably at least a
17 | paragraph on what the class is for.
18 | 3. Add the ASF license header comment to all new `.go` files (copy from existing files in the project)
19 | 4. Add yourself as an `author` to the `.go` files that you modify substantially (more than cosmetic changes).
20 | 5. Add some docs.
21 | 6. A few unit tests would help a lot as well — someone has to do it.
22 | 7. When writing a commit message please follow [these conventions](https://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html), if you are fixing an existing issue please add Fixes gh-XXXX at the end of the commit message (where XXXX is the issue number).
23 | 8. Please ensure that code coverage will not decrease.
24 | 9. Contribute a PR as the rule of Gitflow Workflow, and you should follow the pull request's rules.
25 |
26 | ## Version naming convention
27 |
28 | Holmes's version contains three-digit with the format x.x.x, the first one is for compatibility; the second one is for new features and enhancement; the last one is for a bug fix.
29 |
30 | ## PR review policy for maintainers
31 |
32 | The following strategies are recommended for project maintainers to review code:
33 |
34 | 1. Check the issue with this PR
35 | 2. Check the solution's reasonability
36 | 3. Check UT's and Benchmark's result
37 | 4. Pay attention to the code which makes the code structure change, the usage of the global variable, the handling of the corner case and concurrency
38 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | modules=$(shell go list ./... | grep -v example)
2 | test:
3 | GO111MODULE=on go test -gcflags "-N -l" $(modules)
4 |
5 | lint:
6 | golangci-lint run --timeout=10m --exclude-use-default=false --tests=false --skip-dirs=example
7 |
8 | .PHONY: example
9 | example:
10 | bash tool/build-example.sh
11 |
--------------------------------------------------------------------------------
/consts.go:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package holmes
19 |
20 | import (
21 | "os"
22 | "time"
23 | )
24 |
25 | const (
26 | defaultThreadTriggerMin = 10 // 10 threads
27 | defaultThreadTriggerAbs = 70 // 70 threads
28 | defaultThreadTriggerDiff = 25 // 25%
29 |
30 | defaultCPUTriggerMin = 10 // 10%
31 | defaultCPUTriggerAbs = 70 // 70%
32 | defaultCPUTriggerDiff = 25 // 25%
33 | defaultCPUSamplingTime = 5 * time.Second // collect 5s cpu profile
34 |
35 | defaultGoroutineTriggerMin = 3000 // 3000 goroutines
36 | defaultGoroutineTriggerAbs = 200000 // 200k goroutines
37 | defaultGoroutineTriggerDiff = 20 // 20% diff
38 |
39 | defaultMemTriggerMin = 10 // 10%
40 | defaultMemTriggerAbs = 80 // 80%
41 | defaultMemTriggerDiff = 25 // 25%
42 |
43 | defaultGCHeapTriggerMin = 10 // 10%
44 | defaultGCHeapTriggerAbs = 40 // 40%
45 | defaultGCHeapTriggerDiff = 20 // 20%
46 |
47 | defaultCooldown = time.Minute
48 | defaultThreadCoolDown = time.Hour
49 | defaultGoroutineCoolDown = time.Minute * 10
50 |
51 | defaultInterval = 5 * time.Second
52 | defaultDumpProfileType = binaryDump
53 | defaultDumpPath = "/tmp"
54 | defaultLoggerName = "holmes.log"
55 | defaultLoggerFlags = os.O_RDWR | os.O_CREATE | os.O_APPEND
56 | defaultLoggerPerm = 0644
57 | defaultShardLoggerSize = 5242880 // 5m
58 | )
59 |
60 | type dumpProfileType int
61 |
62 | const (
63 | binaryDump dumpProfileType = 0
64 | textDump dumpProfileType = 1
65 | )
66 |
67 | type configureType int
68 |
69 | const (
70 | mem configureType = iota
71 | cpu
72 | thread
73 | goroutine
74 | gcHeap
75 | )
76 |
77 | // check type to profile name, just align to pprof
78 | var type2name = map[configureType]string{
79 | mem: "heap",
80 | cpu: "cpu",
81 | thread: "threadcreate",
82 | goroutine: "goroutine",
83 | gcHeap: "heap",
84 | }
85 |
86 | // check type to check name
87 | var check2name = map[configureType]string{
88 | mem: "mem",
89 | cpu: "cpu",
90 | thread: "thread",
91 | goroutine: "goroutine",
92 | gcHeap: "GCHeap",
93 | }
94 |
95 | const (
96 | cgroupMemLimitPath = "/sys/fs/cgroup/memory/memory.limit_in_bytes"
97 | cgroupCpuQuotaPath = "/sys/fs/cgroup/cpu/cpu.cfs_quota_us"
98 | cgroupCpuPeriodPath = "/sys/fs/cgroup/cpu/cpu.cfs_period_us"
99 | )
100 |
101 | const minCollectCyclesBeforeDumpStart = 10
102 |
103 | const (
104 | // TrimResultTopN trimResult return only reserve the top n.
105 | TrimResultTopN = 10
106 |
107 | // TrimResultMaxBytes trimResultFront return only reserve the front n bytes.
108 | TrimResultMaxBytes = 512000
109 |
110 | // NotSupportTypeMaxConfig means this profile type is
111 | // not support control dump profile by max parameter.
112 | NotSupportTypeMaxConfig = 0
113 |
114 | // UniformLogFormat is the format of uniform logging.
115 | UniformLogFormat = "[Holmes] %v %v, config_min : %v, config_diff : %v, config_abs : %v, config_max : %v, previous : %v, current: %v"
116 | )
117 |
--------------------------------------------------------------------------------
/dingtalk.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mosn/holmes/efb1a7768843e83b645f6e683f7b6c5d826651ab/dingtalk.png
--------------------------------------------------------------------------------
/doc/example.md:
--------------------------------------------------------------------------------
1 | * [cases show](#cases-show)
2 | * [RSS peak caused by make a 1GB slice](#rss-peak-caused-by-make-a-1gb-slice)
3 | * [goroutine explosion caused by deadlock](#goroutine-explosion-caused-by-deadlock)
4 | * [goroutine explosion caused by channel block](#goroutine-explosion-caused-by-channel-block)
5 | * [process slowly leaks goroutines](#process-slowly-leaks-goroutines)
6 | * [large memory allocation caused by business logic](#large-memory-allocation-caused-by-business-logic)
7 | * [deadloop caused cpu outage](#deadloop-caused-cpu-outage)
8 | * [large thread allocation caused by cgo block](#large-thread-allocation-caused-by-cgo-block)
9 |
10 |
11 | ## cases show
12 | all example code in [here](../example)
13 |
14 | ### RSS peak caused by make a 1GB slice
15 |
16 | see this [example](example/1gbslice/1gbslice.go)
17 |
18 | after warming up, just curl http://localhost:10003/make1gb for some times, then you'll probably see:
19 |
20 | ```
21 | heap profile: 0: 0 [1: 1073741824] @ heap/1048576
22 | 0: 0 [1: 1073741824] @ 0x42ba3ef 0x4252254 0x4254095 0x4254fd3 0x425128c 0x40650a1
23 | # 0x42ba3ee main.make1gbslice+0x3e /Users/xargin/go/src/github.com/mosn/holmes/example/1gbslice.go:24
24 | # 0x4252253 net/http.HandlerFunc.ServeHTTP+0x43 /Users/xargin/sdk/go1.14.2/src/net/http/server.go:2012
25 | # 0x4254094 net/http.(*ServeMux).ServeHTTP+0x1a4 /Users/xargin/sdk/go1.14.2/src/net/http/server.go:2387
26 | # 0x4254fd2 net/http.serverHandler.ServeHTTP+0xa2 /Users/xargin/sdk/go1.14.2/src/net/http/server.go:2807
27 | # 0x425128b net/http.(*conn).serve+0x86b /Users/xargin/sdk/go1.14.2/src/net/http/server.go:1895
28 | ```
29 |
30 | 1: 1073741824 means 1 object and 1GB memory consumption.
31 |
32 | ### goroutine explosion caused by deadlock
33 |
34 | See this [example](./example/deadlock/deadlock.go)
35 |
36 | curl localhost:10003/lockorder1
37 |
38 | curl localhost:10003/lockorder2
39 |
40 | After warming up, wrk -c 100 http://localhost:10003/req, then you'll see the deadlock
41 | caused goroutine num peak:
42 |
43 | ```
44 | 100 @ 0x40380b0 0x4048c80 0x4048c6b 0x40489e7 0x406f72c 0x42badfc 0x42badfd 0x4252b94 0x42549d5 0x4255913 0x4251bcc 0x40659e1
45 | # 0x40489e6 sync.runtime_SemacquireMutex+0x46 /Users/xargin/sdk/go1.14.2/src/runtime/sema.go:71
46 | # 0x406f72b sync.(*Mutex).lockSlow+0xfb /Users/xargin/sdk/go1.14.2/src/sync/mutex.go:138
47 | # 0x42badfb sync.(*Mutex).Lock+0x8b /Users/xargin/sdk/go1.14.2/src/sync/mutex.go:81
48 | # 0x42badfc main.req+0x8c /Users/xargin/go/src/github.com/mosn/holmes/example/deadlock.go:30
49 | # 0x4252b93 net/http.HandlerFunc.ServeHTTP+0x43 /Users/xargin/sdk/go1.14.2/src/net/http/server.go:2012
50 | # 0x42549d4 net/http.(*ServeMux).ServeHTTP+0x1a4 /Users/xargin/sdk/go1.14.2/src/net/http/server.go:2387
51 | # 0x4255912 net/http.serverHandler.ServeHTTP+0xa2 /Users/xargin/sdk/go1.14.2/src/net/http/server.go:2807
52 | # 0x4251bcb net/http.(*conn).serve+0x86b /Users/xargin/sdk/go1.14.2/src/net/http/server.go:1895
53 | 1 @ 0x40380b0 0x4048c80 0x4048c6b 0x40489e7 0x406f72c 0x42bb041 0x42bb042 0x4252b94 0x42549d5 0x4255913 0x4251bcc 0x40659e1
54 |
55 | # 0x40489e6 sync.runtime_SemacquireMutex+0x46 /Users/xargin/sdk/go1.14.2/src/runtime/sema.go:71
56 | # 0x406f72b sync.(*Mutex).lockSlow+0xfb /Users/xargin/sdk/go1.14.2/src/sync/mutex.go:138
57 | # 0x42bb040 sync.(*Mutex).Lock+0xf0 /Users/xargin/sdk/go1.14.2/src/sync/mutex.go:81
58 | # 0x42bb041 main.lockorder2+0xf1 /Users/xargin/go/src/github.com/mosn/holmes/example/deadlock.go:50
59 | # 0x4252b93 net/http.HandlerFunc.ServeHTTP+0x43 /Users/xargin/sdk/go1.14.2/src/net/http/server.go:2012
60 | # 0x42549d4 net/http.(*ServeMux).ServeHTTP+0x1a4 /Users/xargin/sdk/go1.14.2/src/net/http/server.go:2387
61 | # 0x4255912 net/http.serverHandler.ServeHTTP+0xa2 /Users/xargin/sdk/go1.14.2/src/net/http/server.go:2807
62 | # 0x4251bcb net/http.(*conn).serve+0x86b /Users/xargin/sdk/go1.14.2/src/net/http/server.go:1895
63 |
64 | 1 @ 0x40380b0 0x4048c80 0x4048c6b 0x40489e7 0x406f72c 0x42baf11 0x42baf12 0x4252b94 0x42549d5 0x4255913 0x4251bcc 0x40659e1
65 | # 0x40489e6 sync.runtime_SemacquireMutex+0x46 /Users/xargin/sdk/go1.14.2/src/runtime/sema.go:71
66 | # 0x406f72b sync.(*Mutex).lockSlow+0xfb /Users/xargin/sdk/go1.14.2/src/sync/mutex.go:138
67 | # 0x42baf10 sync.(*Mutex).Lock+0xf0 /Users/xargin/sdk/go1.14.2/src/sync/mutex.go:81
68 | # 0x42baf11 main.lockorder1+0xf1 /Users/xargin/go/src/github.com/mosn/holmes/example/deadlock.go:40
69 | # 0x4252b93 net/http.HandlerFunc.ServeHTTP+0x43 /Users/xargin/sdk/go1.14.2/src/net/http/server.go:2012
70 | # 0x42549d4 net/http.(*ServeMux).ServeHTTP+0x1a4 /Users/xargin/sdk/go1.14.2/src/net/http/server.go:2387
71 | # 0x4255912 net/http.serverHandler.ServeHTTP+0xa2 /Users/xargin/sdk/go1.14.2/src/net/http/server.go:2807
72 | # 0x4251bcb net/http.(*conn).serve+0x86b /Users/xargin/sdk/go1.14.2/src/net/http/server.go:1895
73 | ```
74 |
75 | The req API was blocked by deadlock.
76 |
77 | Your should set DumpFullStack to true to locate deadlock bug.
78 |
79 | ### goroutine explosion caused by channel block
80 |
81 | see this [example](example/channelblock/channelblock.go)
82 |
83 | after warming up, just wrk -c100 http://localhost:10003/chanblock
84 |
85 | ```
86 | goroutine profile: total 203
87 | 100 @ 0x4037750 0x4007011 0x4006a15 0x42ba3c9 0x4252234 0x4254075 0x4254fb3 0x425126c 0x4065081
88 | # 0x42ba3c8 main.channelBlock+0x38 /Users/xargin/go/src/github.com/mosn/holmes/example/channelblock.go:26
89 | # 0x4252233 net/http.HandlerFunc.ServeHTTP+0x43 /Users/xargin/sdk/go1.14.2/src/net/http/server.go:2012
90 | # 0x4254074 net/http.(*ServeMux).ServeHTTP+0x1a4 /Users/xargin/sdk/go1.14.2/src/net/http/server.go:2387
91 | # 0x4254fb2 net/http.serverHandler.ServeHTTP+0xa2 /Users/xargin/sdk/go1.14.2/src/net/http/server.go:2807
92 | # 0x425126b net/http.(*conn).serve+0x86b /Users/xargin/sdk/go1.14.2/src/net/http/server.go:1895
93 | ```
94 |
95 | It's easy to locate.
96 |
97 | ### process slowly leaks goroutines
98 |
99 | See this [example](example/slowlyleak/slowlyleak.go)
100 |
101 | The producer forget to close the task channel after produce finishes, so every request
102 | to this URI will leak a goroutine, we could curl http://localhost:10003/leak several
103 | time and got the following log:
104 |
105 | ```
106 | goroutine profile: total 10
107 | 7 @ 0x4038380 0x4008497 0x400819b 0x42bb129 0x4065cb1
108 | # 0x42bb128 main.leak.func1+0x48 /Users/xargin/go/src/github.com/mosn/holmes/example/slowlyleak.go:26
109 | ```
110 |
111 | It's easy to find the leakage reason
112 |
113 | ### large memory allocation caused by business logic
114 |
115 | See this [example](example/alloc/alloc.go), this is a similar example as the large slice make.
116 |
117 | After warming up finished, wrk -c100 http://localhost:10003/alloc:
118 |
119 | ```
120 | pprof memory, config_min : 3, config_diff : 25, config_abs : 80, previous : [0 0 0 4 0 0 0 0 0 0], current : 4
121 | heap profile: 83: 374069984 [3300: 14768402720] @ heap/1048576
122 | 79: 374063104 [3119: 14768390144] @ 0x40104b3 0x401024f 0x42bb1ba 0x4252ff4 0x4254e35 0x4255d73 0x425202c 0x4065e41
123 | # 0x42bb1b9 main.alloc+0x69 /Users/xargin/go/src/github.com/mosn/holmes/example/alloc.go:25
124 | # 0x4252ff3 net/http.HandlerFunc.ServeHTTP+0x43 /Users/xargin/sdk/go1.14.2/src/net/http/server.go:2012
125 | # 0x4254e34 net/http.(*ServeMux).ServeHTTP+0x1a4 /Users/xargin/sdk/go1.14.2/src/net/http/server.go:2387
126 | # 0x4255d72 net/http.serverHandler.ServeHTTP+0xa2 /Users/xargin/sdk/go1.14.2/src/net/http/server.go:2807
127 | # 0x425202b net/http.(*conn).serve+0x86b /Users/xargin/sdk/go1.14.2/src/net/http/server.go:1895
128 | ```
129 |
130 | ### deadloop caused cpu outage
131 |
132 | See this [example](example/cpu_explode/cpu_explode.go).
133 |
134 | After warming up finished, curl http://localhost:10003/cpuex several times, then you'll
135 | see the cpu profile dump to your dump path.
136 |
137 | Notice the cpu profile currently doesn't support text mode.
138 |
139 | ```
140 | go tool pprof cpu.20201028100641.bin
141 |
142 | (pprof) top
143 | Showing nodes accounting for 19.45s, 99.95% of 19.46s total
144 | Dropped 6 nodes (cum <= 0.10s)
145 | flat flat% sum% cum cum%
146 | 17.81s 91.52% 91.52% 19.45s 99.95% main.cpuex.func1
147 | 1.64s 8.43% 99.95% 1.64s 8.43% runtime.asyncPreempt
148 |
149 | (pprof) list func1
150 | Total: 19.46s
151 | ROUTINE ======================== main.cpuex.func1 in /Users/xargin/go/src/github.com/mosn/holmes/example/cpu_explode.go
152 | 17.81s 19.45s (flat, cum) 99.95% of Total
153 | 80ms 80ms 1:/*
154 | * Licensed to the Apache Software Foundation (ASF) under one or more
155 | * contributor license agreements. See the NOTICE file distributed with
156 | * this work for additional information regarding copyright ownership.
157 | * The ASF licenses this file to You under the Apache License, Version 2.0
158 | * (the "License"); you may not use this file except in compliance with
159 | * the License. You may obtain a copy of the License at
160 | *
161 | * http://www.apache.org/licenses/LICENSE-2.0
162 | *
163 | * Unless required by applicable law or agreed to in writing, software
164 | * distributed under the License is distributed on an "AS IS" BASIS,
165 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
166 | * See the License for the specific language governing permissions and
167 | * limitations under the License.
168 | */
169 |
170 | package main
171 | . . 2:
172 | . . 3:import (
173 | . . 4: "net/http"
174 | . . 5: "time"
175 | . . 6:
176 | . . 7: "github.com/mosn/holmes"
177 | . . 8:)
178 | . . 9:
179 | . . 10:func init() {
180 | . . 11: http.HandleFunc("/cpuex", cpuex)
181 | . . 12: go http.ListenAndServe(":10003", nil)
182 | . . 13:}
183 | . . 14:
184 | . . 15:var h = holmes.New("2s", "1m", "/tmp", false).
185 | . . 16: EnableCPUDump().Config(20, 25, 80)
186 | . . 17:
187 | . . 18:func main() {
188 | . . 19: h.Start()
189 | . . 20: time.Sleep(time.Hour)
190 | . . 21:}
191 | . . 22:
192 | . . 23:func cpuex(wr http.ResponseWriter, req *http.Request) {
193 | . . 24: go func() {
194 | 17.73s 19.37s 25: for {
195 | . . 26: }
196 | . . 27: }()
197 | . . 28:}
198 |
199 | ```
200 |
201 | So we find out the criminal.
202 |
203 | ### large thread allocation caused by cgo block
204 |
205 | See this [example](./example/thread_trigger/thread_trigger.go)
206 |
207 | This is a cgo block example, massive cgo blocking will cause many threads created.
208 |
209 | After warming up, curl http://localhost:10003/leak, then the thread profile and goroutine
210 | profile will be dumped to dumpPath:
211 |
212 | ```
213 | [2020-11-10 19:49:52.145][Holmes] pprof thread, config_min : 10, config_diff : 25, config_abs : 100, previous : [8 8 8 8 8 8 8 8 8 1013], current : 1013
214 | [2020-11-10 19:49:52.146]threadcreate profile: total 1013
215 | 1012 @
216 | # 0x0
217 |
218 | 1 @ 0x403af6e 0x403b679 0x4037e34 0x4037e35 0x40677d1
219 | # 0x403af6d runtime.allocm+0x14d /Users/xargin/sdk/go1.14.2/src/runtime/proc.go:1390
220 | # 0x403b678 runtime.newm+0x38 /Users/xargin/sdk/go1.14.2/src/runtime/proc.go:1704
221 | # 0x4037e33 runtime.startTemplateThread+0x2c3 /Users/xargin/sdk/go1.14.2/src/runtime/proc.go:1768
222 | # 0x4037e34 runtime.main+0x2c4 /Users/xargin/sdk/go1.14.2/src/runtime/proc.go:186
223 |
224 | goroutine profile: total 1002
225 | 999 @ 0x4004f8b 0x4394a61 0x4394f79 0x40677d1
226 | # 0x4394a60 main._Cfunc_output+0x40 _cgo_gotypes.go:70
227 | # 0x4394f78 main.leak.func1.1+0x48 /Users/xargin/go/src/github.com/mosn/holmes/example/thread_trigger.go:45
228 |
229 | 1 @ 0x4038160 0x40317ca 0x4030d35 0x40c6555 0x40c8db4 0x40c8d96 0x41a8f92 0x41c2a52 0x41c1894 0x42d00cd 0x42cfe17 0x4394c57 0x4394c20 0x4037d82 0x40677d1
230 | # 0x4030d34 internal/poll.runtime_pollWait+0x54 /Users/xargin/sdk/go1.14.2/src/runtime/netpoll.go:203
231 | # 0x40c6554 internal/poll.(*pollDesc).wait+0x44 /Users/xargin/sdk/go1.14.2/src/internal/poll/fd_poll_runtime.go:87
232 | # 0x40c8db3 internal/poll.(*pollDesc).waitRead+0x1d3 /Users/xargin/sdk/go1.14.2/src/internal/poll/fd_poll_runtime.go:92
233 | # 0x40c8d95 internal/poll.(*FD).Accept+0x1b5 /Users/xargin/sdk/go1.14.2/src/internal/poll/fd_unix.go:384
234 | # 0x41a8f91 net.(*netFD).accept+0x41 /Users/xargin/sdk/go1.14.2/src/net/fd_unix.go:238
235 | # 0x41c2a51 net.(*TCPListener).accept+0x31 /Users/xargin/sdk/go1.14.2/src/net/tcpsock_posix.go:139
236 | # 0x41c1893 net.(*TCPListener).Accept+0x63 /Users/xargin/sdk/go1.14.2/src/net/tcpsock.go:261
237 | # 0x42d00cc net/http.(*Server).Serve+0x25c /Users/xargin/sdk/go1.14.2/src/net/http/server.go:2901
238 | # 0x42cfe16 net/http.(*Server).ListenAndServe+0xb6 /Users/xargin/sdk/go1.14.2/src/net/http/server.go:2830
239 | # 0x4394c56 net/http.ListenAndServe+0x96 /Users/xargin/sdk/go1.14.2/src/net/http/server.go:3086
240 | # 0x4394c1f main.main+0x5f /Users/xargin/go/src/github.com/mosn/holmes/example/thread_trigger.go:55
241 | # 0x4037d81 runtime.main+0x211 /Users/xargin/sdk/go1.14.2/src/runtime/proc.go:203
242 |
243 | 1 @ 0x4038160 0x4055bea 0x4394ead 0x40677d1
244 | # 0x4055be9 time.Sleep+0xb9 /Users/xargin/sdk/go1.14.2/src/runtime/time.go:188
245 | # 0x4394eac main.init.0.func1+0x1dc /Users/xargin/go/src/github.com/mosn/holmes/example/thread_trigger.go:34
246 |
247 | 1 @ 0x43506d5 0x43504f0 0x434d28a 0x4391872 0x43914cf 0x43902c2 0x40677d1
248 | # 0x43506d4 runtime/pprof.writeRuntimeProfile+0x94 /Users/xargin/sdk/go1.14.2/src/runtime/pprof/pprof.go:694
249 | # 0x43504ef runtime/pprof.writeGoroutine+0x9f /Users/xargin/sdk/go1.14.2/src/runtime/pprof/pprof.go:656
250 | # 0x434d289 runtime/pprof.(*Profile).WriteTo+0x3d9 /Users/xargin/sdk/go1.14.2/src/runtime/pprof/pprof.go:329
251 | # 0x4391871 github.com/mosn/holmes.(*Holmes).threadProfile+0x2e1 /Users/xargin/go/src/github.com/mosn/holmes/holmes.go:260
252 | # 0x43914ce github.com/mosn/holmes.(*Holmes).threadCheckAndDump+0x9e /Users/xargin/go/src/github.com/mosn/holmes/holmes.go:241
253 | # 0x43902c1 github.com/mosn/holmes.(*Holmes).startDumpLoop+0x571 /Users/xargin/go/src/github.com/mosn/holmes/holmes.go:158
254 | ```
255 |
256 | So we know that the threads are blocked by cgo calls.
257 |
--------------------------------------------------------------------------------
/doc/zh.md:
--------------------------------------------------------------------------------
1 |
2 | * [holmes](#holmes)
3 | * [设计](#设计)
4 | * [如何使用](#如何使用)
5 | * [Dump Goroutine profile](#dump-goroutine-profile)
6 | * [Dump cpu profile](#dump-cpu-profile)
7 | * [Dump Heap Memory Profile](#dump-heap-memory-profile)
8 | * [基于Gc周期的Heap Memory Dump](#基于gc周期的heap-memory-dump)
9 | * [动态设置holmes配置](#动态设置holmes配置)
10 | * [Dump事件上报](#dump事件上报)
11 | * [开启全部](#开启全部)
12 | * [在docker 或者cgroup环境下运行 holmes](#在docker-或者cgroup环境下运行-holmes)
13 | * [已知风险](#已知风险)
14 | * [使用示例](#使用示例)
15 |
16 | # holmes
17 |
18 | 基于规则的自动Golang Profile Dumper.
19 |
20 | 作为一名"懒惰"的程序员,如何避免在线上Golang系统半夜宕机
21 | (一般是OOM导致的)时起床保存现场呢?又或者如何dump压测时性能尖刺时刻的profile文件呢?
22 |
23 | holmes 或许能帮助您解决以上问题。
24 |
25 | ## 设计
26 |
27 | holmes 每隔一段时间收集一次以下应用指标:
28 |
29 | * 协程数,通过`runtime.NumGoroutine`。
30 | * 当前应用所占用的RSS,通过[gopsutil](https://github.com/shirou/gopsutil)第三方库。
31 | * CPU使用率,比如8C的机器,如果使用了4C,则使用率为50%,通过[gopsutil](https://github.com/shirou/gopsutil)第三方库。
32 |
33 | 除此之外,holmes还会根据Gc周期收集RSS指标,如果您开启了`GCheap dump`的话。
34 |
35 | 在预热阶段(应用启动后,holmes会收集十次指标)结束后,holmes会比较当前指标是否满足用户所设置的阈值/规则,如果满足的话,则dump profile,
36 | 以日志或者二进制文件的格式保留现场。
37 |
38 | ## 如何使用
39 |
40 | ```shell
41 | go get mosn.io/holmes
42 | ```
43 | 在应用初始化逻辑加上对应的holmes配置。
44 | ```go
45 | func main() {
46 |
47 | h := initHolmes()
48 |
49 | // start the metrics collect and dump loop
50 | h.Start()
51 | ......
52 |
53 | // quit the application and stop the dumper
54 | h.Stop()
55 | }
56 | func initHolmes() *Holmes{
57 | h, _ := holmes.New(
58 | holmes.WithCollectInterval("5s"),
59 | holmes.WithDumpPath("/tmp"),
60 | holmes.WithCPUDump(20, 25, 80, time.Minute),
61 | holmes.WithCPUMax(90),
62 | )
63 | h.EnableCPUDump()
64 | return h
65 | }
66 |
67 | ```
68 |
69 | holmes 支持对以下几种应用指标进行监控:
70 |
71 | * mem: 内存分配
72 | * cpu: cpu使用率
73 | * thread: 线程数
74 | * goroutine: 协程数
75 | * gcHeap: 基于GC周期的内存分配
76 |
77 |
78 | ### Dump Goroutine profile
79 |
80 | ```go
81 | h, _ := holmes.New(
82 | holmes.WithCollectInterval("5s"),
83 | holmes.WithDumpPath("/tmp"),
84 | holmes.WithTextDump(),
85 | holmes.WithDumpToLogger(true),
86 | holmes.WithGoroutineDump(10, 25, 2000, 10*1000, time.Minute),
87 | )
88 | h.EnableGoroutineDump()
89 |
90 | // start the metrics collect and dump loop
91 | h.Start()
92 |
93 | // stop the dumper
94 | h.Stop()
95 | ```
96 |
97 | * WithCollectInterval("5s") 每5s采集一次当前应用的各项指标,该值建议设置为大于1s。
98 | * WithDumpPath("/tmp") profile文件保存路径。
99 | * WithTextDump() 以文本格式保存profile内容。
100 | * WithDumpToLogger() profile内容将会输出到日志。
101 | * WithGoroutineDump(10, 25, 2000, 100*1000, time.Minute) 当goroutine指标满足以下条件时,将会触发dump操作。
102 | current_goroutine_num > `10` && current_goroutine_num < `100*1000` &&
103 | current_goroutine_num > `125`% * previous_average_goroutine_num or current_goroutine_num > `2000`.
104 | `time.Minute` 是两次dump操作之间最小时间间隔,避免频繁profiling对性能产生的影响。
105 |
106 | > WithGoroutineDump(min int, diff int, abs int, max int, coolDown time.Duration)
107 | > 当应用所启动的goroutine number大于`Max` 时,holmes会跳过dump操作,因为当goroutine number很大时,
108 | > dump goroutine profile操作成本很高(STW && dump),有可能拖垮应用。当`Max`=0 时代表没有限制。
109 |
110 | ### Dump cpu profile
111 |
112 | ```go
113 | h, _ := holmes.New(
114 | holmes.WithCollectInterval("5s"),
115 | holmes.WithDumpPath("/tmp"),
116 | holmes.WithCPUDump(20, 25, 80, time.Minute),
117 | holmes.WithCPUMax(90),
118 | )
119 | h.EnableCPUDump()
120 |
121 | // start the metrics collect and dump loop
122 | h.Start()
123 |
124 | // stop the dumper
125 | h.Stop()
126 | ```
127 |
128 | * WithCollectInterval("5s") 每5s采集一次当前应用的各项指标,该值建议设置为大于1s。
129 | * WithDumpPath("/tmp") profile文件保存路径。
130 | * cpu profile支持保存文件,不支持输出到日志中,所以WithBinaryDump()和 WithTextDump()在这场景会失效。
131 | * WithCPUDump(10, 25, 80, time.Minute) 会在满足以下条件时dump profile cpu usage > `10%` &&
132 | cpu usage > `125%` * previous cpu usage recorded or cpu usage > `80%`.
133 | `time.Minute` 是两次dump操作之间最小时间间隔,避免频繁profiling对性能产生的影响。
134 | * WithCPUMax 当cpu使用率大于`Max`, holmes会跳过dump操作,以防拖垮系统。
135 |
136 | ### Dump Heap Memory Profile
137 |
138 | ```go
139 | h, _ := holmes.New(
140 | holmes.WithCollectInterval("5s"),
141 | holmes.WithDumpPath("/tmp"),
142 | holmes.WithTextDump(),
143 | holmes.WithMemDump(30, 25, 80, time.Minute),
144 | )
145 |
146 | h.EnableMemDump()
147 |
148 | // start the metrics collect and dump loop
149 | h.Start()
150 |
151 | // stop the dumper
152 | h.Stop()
153 | ```
154 | * WithCollectInterval("5s") 每5s采集一次当前应用的各项指标,该值建议设置为大于1s。
155 | * WithDumpPath("/tmp") profile文件保存路径。
156 | * WithTextDump() profile的内容将会输出到日志中。
157 | * WithMemDump(30, 25, 80, time.Minute) 会在满足以下条件时抓取heap profile memory usage > `10%` &&
158 | memory usage > `125%` * previous memory usage or memory usage > `80%`,
159 | `time.Minute` 是两次dump操作之间最小时间间隔,避免频繁profiling对性能产生的影响。
160 |
161 | ### 基于Gc周期的Heap Memory Dump
162 |
163 | 在一些场景下,我们无法通过定时的`Memory Dump`保留到现场, 比如应用在一个`CollectInterval`周期内分配了大量内存,
164 | 又快速回收了它们,此时`holmes`在周期前后的采集到内存使用率没有产生过大波动,与实际情况不符。为了解决这种情况,`holmes`开发了基于GC周期的
165 | `Profile`类型,它会在内存使用率飙高的前后两个GC周期内各dump一次profile,然后开发人员可以使用`pprof --base`命令去对比
166 | 两个时刻堆内存之间的差异。 [具体实现介绍](https://uncledou.site/2022/go-pprof-heap/)。
167 |
168 | ```go
169 | h, _ := holmes.New(
170 | holmes.WithDumpPath("/tmp"),
171 | holmes.WithLogger(holmes.NewFileLog("/tmp/holmes.log", mlog.INFO)),
172 | holmes.WithBinaryDump(),
173 | holmes.WithMemoryLimit(100*1024*1024), // 100MB
174 | holmes.WithGCHeapDump(10, 20, 40, time.Minute),
175 | // holmes.WithProfileReporter(reporter),
176 | )
177 | h.EnableGCHeapDump().Start()
178 | time.Sleep(time.Hour)
179 | ```
180 |
181 | ### 动态设置holmes配置
182 |
183 | 您可以通过`Set`在系统运行时更新holmes的配置。它的使用十分简单,和初始化时的`New`方法一样。
184 |
185 | ```go
186 | h.Set(
187 | WithCollectInterval("2s"),
188 | WithGoroutineDump(10, 10, 50, 90, time.Minute))
189 | ```
190 |
191 | ### Dump事件上报
192 |
193 | 您可以通过实现`Reporter` 来实现以下功能:
194 | * 发送包含现场的告警信息,当`holmes`触发`Dump`操作时。
195 | * 将`Profiles`上传到其他地方,以防实例被销毁,从而导致profile丢失,或进行分析。
196 |
197 | ```go
198 | type ReporterImpl struct{}
199 | func (r *ReporterImpl) Report(pType string, filename string, reason ReasonType, eventID string, sampleTime time.Time, pprofBytes []byte, scene Scene) error{
200 | // do something
201 | }
202 | ......
203 | r := &ReporterImpl{} // a implement of holmes.ProfileReporter Interface.
204 | h, _ := holmes.New(
205 | holmes.WithProfileReporter(reporter),
206 | holmes.WithDumpPath("/tmp"),
207 | holmes.WithLogger(holmes.NewFileLog("/tmp/holmes.log", mlog.INFO)),
208 | holmes.WithBinaryDump(),
209 | holmes.WithMemoryLimit(100*1024*1024), // 100MB
210 | holmes.WithGCHeapDump(10, 20, 40, time.Minute),
211 | )
212 |
213 | ```
214 |
215 | ### 开启全部
216 |
217 | holmes当然不是只支持一个类型的dump啦,您可以按需选择您需要的dump类型。
218 |
219 | ```go
220 | h, _ := holmes.New(
221 | holmes.WithCollectInterval("5s"),
222 | holmes.WithDumpPath("/tmp"),
223 | holmes.WithTextDump(),
224 |
225 | holmes.WithCPUDump(10, 25, 80, time.Minute),
226 | holmes.WithMemDump(30, 25, 80, time.Minute),
227 | holmes.WithGCHeapDump(10, 20, 40, time.Minute),
228 | holmes.WithGoroutineDump(500, 25, 20000, 0, time.Minute),
229 | )
230 |
231 | h.EnableCPUDump().
232 | EnableGoroutineDump().
233 | EnableMemDump().
234 | EnableGCHeapDump().Start()
235 |
236 | ```
237 |
238 | ### 在docker 或者cgroup环境下运行 holmes
239 |
240 | ```go
241 | h, _ := holmes.New(
242 | holmes.WithCollectInterval("5s"),
243 | holmes.WithDumpPath("/tmp"),
244 | holmes.WithTextDump(),
245 |
246 | holmes.WithCPUDump(10, 25, 80, time.Minute),
247 | holmes.WithCGroup(true), // set cgroup to true
248 | )
249 | ```
250 |
251 | ## 已知风险
252 | Gorountine dump 会导致 STW,[从而导致时延](https://github.com/golang/go/issues/33250)。
253 | > 目前Go官方已经有一个[CL](https://go-review.googlesource.com/c/go/+/387415/)在优化这个问题了。
254 |
255 | ## 使用示例
256 | [点击这里](./example.md)
257 |
258 | ## Contributing
259 | See our [contributor guide](./CONTRIBUTING.md).
--------------------------------------------------------------------------------
/example/1gbslice/1gbslice.go:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package main
19 |
20 | import (
21 | mlog "mosn.io/pkg/log"
22 | "net/http"
23 | "time"
24 |
25 | "mosn.io/holmes"
26 | )
27 |
28 | // run `curl http://localhost:10003/make1gb` after 15s(warn up)
29 | func init() {
30 | http.HandleFunc("/make1gb", make1gbslice)
31 | go http.ListenAndServe(":10003", nil) //nolint:errcheck
32 | }
33 |
34 | func main() {
35 | h, _ := holmes.New(
36 | holmes.WithCollectInterval("2s"),
37 | holmes.WithDumpPath("./tmp"),
38 | holmes.WithLogger(holmes.NewFileLog("./tmp/holmes.log", mlog.DEBUG)),
39 | holmes.WithTextDump(),
40 | holmes.WithMemDump(3, 25, 80, time.Minute),
41 | )
42 | h.EnableMemDump().Start()
43 | time.Sleep(time.Hour)
44 | }
45 |
46 | func make1gbslice(wr http.ResponseWriter, req *http.Request) {
47 | var a = make([]byte, 1073741824)
48 | _ = a
49 | }
50 |
--------------------------------------------------------------------------------
/example/alloc/alloc.go:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package main
19 |
20 | import (
21 | "fmt"
22 | "net/http"
23 | "time"
24 |
25 | mlog "mosn.io/pkg/log"
26 |
27 | "mosn.io/holmes"
28 | )
29 |
30 | // run `curl http://localhost:10003/alloc` after 15s(warn up)
31 | func init() {
32 | http.HandleFunc("/alloc", alloc)
33 | go http.ListenAndServe(":10003", nil) //nolint:errcheck
34 | }
35 |
36 | func main() {
37 | h, _ := holmes.New(
38 | holmes.WithCollectInterval("2s"),
39 | holmes.WithDumpPath("./tmp"),
40 | holmes.WithLogger(holmes.NewFileLog("./tmp/holmes.log", mlog.DEBUG)),
41 | holmes.WithTextDump(),
42 | holmes.WithMemDump(3, 25, 80, time.Minute),
43 | )
44 | h.EnableMemDump().Start()
45 | time.Sleep(time.Hour)
46 | }
47 |
48 | func alloc(wr http.ResponseWriter, req *http.Request) {
49 | var m = make(map[string]string, 1073741824)
50 | for i := 0; i < 1000; i++ {
51 | m[fmt.Sprint(i)] = fmt.Sprint(i)
52 | }
53 | _ = m
54 | }
55 |
--------------------------------------------------------------------------------
/example/channelblock/channelblock.go:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package main
19 |
20 | import (
21 | mlog "mosn.io/pkg/log"
22 | "net/http"
23 | "time"
24 |
25 | "mosn.io/holmes"
26 | )
27 |
28 | // run `curl http://localhost:10003/chanblock` after 15s(warn up)
29 | func init() {
30 | http.HandleFunc("/chanblock", channelBlock)
31 | go http.ListenAndServe(":10003", nil) //nolint:errcheck
32 | }
33 |
34 | func main() {
35 | h, _ := holmes.New(
36 | holmes.WithCollectInterval("5s"),
37 | holmes.WithDumpPath("./tmp"),
38 | holmes.WithLogger(holmes.NewFileLog("./tmp/holmes.log", mlog.DEBUG)),
39 | holmes.WithTextDump(),
40 | holmes.WithGoroutineDump(10, 25, 2000, 10000, time.Minute),
41 | )
42 | h.EnableGoroutineDump().Start()
43 | time.Sleep(time.Hour)
44 | }
45 |
46 | var nilCh chan int
47 |
48 | func channelBlock(wr http.ResponseWriter, req *http.Request) {
49 | nilCh <- 1
50 | }
51 |
--------------------------------------------------------------------------------
/example/cpu_explode/cpu_explode.go:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package main
19 |
20 | import (
21 | "net/http"
22 | "time"
23 |
24 | mlog "mosn.io/pkg/log"
25 |
26 | "mosn.io/holmes"
27 | )
28 |
29 | // run `curl http://localhost:10003/cpuex` after 15s(warn up)
30 | func init() {
31 | http.HandleFunc("/cpuex", cpuex)
32 | go http.ListenAndServe(":10003", nil) //nolint:errcheck
33 | }
34 |
35 | func main() {
36 | h, _ := holmes.New(
37 | holmes.WithCollectInterval("2s"),
38 | holmes.WithDumpPath("./tmp"),
39 | holmes.WithLogger(holmes.NewFileLog("./tmp/holmes.log", mlog.DEBUG)),
40 | holmes.WithCPUDump(20, 25, 80, time.Minute),
41 | )
42 | h.EnableCPUDump().Start()
43 | time.Sleep(time.Hour)
44 | }
45 |
46 | func cpuex(wr http.ResponseWriter, req *http.Request) {
47 | go func() {
48 | for {
49 |
50 | }
51 | }()
52 | }
53 |
--------------------------------------------------------------------------------
/example/deadlock/deadlock.go:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package main
19 |
20 | import (
21 | mlog "mosn.io/pkg/log"
22 | "net/http"
23 | "sync"
24 | "time"
25 |
26 | "mosn.io/holmes"
27 | )
28 |
29 | // run `curl http://localhost:10003/lockorder1` after 15s(warn up)
30 | // run `curl http://localhost:10003/lockorder2` after 15s(warn up)
31 | // run `curl http://localhost:10003/req` after 15s(warn up)
32 | func init() {
33 | http.HandleFunc("/lockorder1", lockorder1)
34 | http.HandleFunc("/lockorder2", lockorder2)
35 | http.HandleFunc("/req", req)
36 | go http.ListenAndServe(":10003", nil) //nolint:errcheck
37 | }
38 |
39 | func main() {
40 | h, _ := holmes.New(
41 | holmes.WithCollectInterval("5s"),
42 | holmes.WithDumpPath("./tmp"),
43 | holmes.WithLogger(holmes.NewFileLog("./tmp/holmes.log", mlog.DEBUG)),
44 | holmes.WithTextDump(),
45 | holmes.WithGoroutineDump(10, 25, 2000, 10000, time.Minute),
46 | )
47 | h.EnableGoroutineDump().Start()
48 | time.Sleep(time.Hour)
49 | }
50 |
51 | var l1 sync.Mutex
52 | var l2 sync.Mutex
53 |
54 | func req(wr http.ResponseWriter, req *http.Request) {
55 | l1.Lock()
56 | defer l1.Unlock()
57 | }
58 |
59 | func lockorder1(wr http.ResponseWriter, req *http.Request) {
60 | l1.Lock()
61 | defer l1.Unlock()
62 |
63 | time.Sleep(time.Minute)
64 |
65 | l2.Lock()
66 | defer l2.Unlock()
67 | }
68 |
69 | func lockorder2(wr http.ResponseWriter, req *http.Request) {
70 | l2.Lock()
71 | defer l2.Unlock()
72 |
73 | time.Sleep(time.Minute)
74 |
75 | l1.Lock()
76 | defer l1.Unlock()
77 | }
78 |
--------------------------------------------------------------------------------
/example/deadloop/deadloop.go:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package main
19 |
20 | import (
21 | mlog "mosn.io/pkg/log"
22 | "net/http"
23 | "time"
24 |
25 | "mosn.io/holmes"
26 | )
27 |
28 | // run `curl http://localhost:10003/alldeadloopoc` after 15s(warn up)
29 | func init() {
30 | http.HandleFunc("/deadloop", deadloop)
31 | go http.ListenAndServe(":10003", nil) //nolint:errcheck
32 | }
33 |
34 | func main() {
35 | h, _ := holmes.New(
36 | holmes.WithCollectInterval("2s"),
37 | holmes.WithDumpPath("./tmp"),
38 | holmes.WithLogger(holmes.NewFileLog("./tmp/holmes.log", mlog.DEBUG)),
39 | holmes.WithCPUDump(10, 25, 80, time.Minute),
40 | )
41 | h.EnableCPUDump().Start()
42 | time.Sleep(time.Hour)
43 | }
44 |
45 | func deadloop(wr http.ResponseWriter, req *http.Request) {
46 | for {
47 | select {
48 | case <-req.Context().Done():
49 | break
50 | default:
51 | time.Sleep(time.Millisecond)
52 | }
53 | }
54 | }
55 |
--------------------------------------------------------------------------------
/example/gcheap/.gitignore:
--------------------------------------------------------------------------------
1 | gcheap
2 | vendor
3 |
--------------------------------------------------------------------------------
/example/gcheap/README.md:
--------------------------------------------------------------------------------
1 |
2 | ## How to test
3 |
4 | 1. change to the current gcheap directory
5 | ```
6 | cd /path/to/gcheap/
7 | ```
8 |
9 | 2. compile
10 | ```
11 | go build gcheap.go
12 | ```
13 |
14 | 3. start the sample http server with the gctrace enabled.
15 | ```
16 | GODEBUG=gctrace=1 ./gcheap
17 | ```
18 |
19 | 4. start the rand allocation case
20 | ```
21 | ./rand.sh
22 | ```
23 |
24 | the `/rand` api will allocation some memory and will be recycled soon,
25 | the internal `heapMarked` will stay about 10 MB, and the GC Goal will stay about 21MB.
26 |
27 | we can see the following gctrace log from stdout:
28 | ```
29 | gc 28 @11.666s 0%: 0.17+0.19+0.017 ms clock, 2.0+0.085/0.19/0.13+0.20 ms cpu, 20->20->10 MB, 21 MB goal, 12 P
30 | gc 29 @12.121s 0%: 0.065+0.21+0.015 ms clock, 0.78+0.11/0.23/0.13+0.18 ms cpu, 20->20->10 MB, 21 MB goal, 12 P
31 | ```
32 |
33 | Also, we can see the following holmes log from /tmp/holmes.log:
34 | ```
35 | [Holmes] NODUMP GCHeap, config_min : 10, config_diff : 20, config_abs : 40, config_max : 0, previous : [10 10 10 10 10 10 10 10 10 10], current: 10
36 | ```
37 |
38 | Everything works well now.
39 |
40 | 5. memory spike
41 | ```bash
42 | curl http://localhost:10024/spike
43 | ```
44 |
45 | The `/spike` API will allocate 10 MB memory and keep for a while.
46 |
47 | We can see the GC goal increased from the gctrace log:
48 | ```
49 | gc 432 @191.430s 0%: 0.14+0.40+0.004 ms clock, 1.7+0.26/0.41/0.52+0.051 ms cpu, 21->22->19 MB, 22 MB goal, 12 P
50 | gc 433 @192.079s 0%: 0.042+0.22+0.002 ms clock, 0.51+0.097/0.38/0.21+0.024 ms cpu, 37->37->10 MB, 38 MB goal, 12 P
51 | ```
52 |
53 | And we see that we got two profiles from holmes log:
54 | ```
55 | [2022-02-09 14:48:23.103][Holmes] pprof GCHeap, config_min : 10, config_diff : 20, config_abs : 40, config_max : 0, previous : [10 10 10 10 10 10 10 10 10 19], current: 19
56 | [2022-02-09 14:48:23.751][Holmes] pprof GCHeap, config_min : 10, config_diff : 20, config_abs : 40, config_max : 0, previous : [10 10 10 10 10 10 10 10 10 19], current: 10
57 | ```
58 |
59 | 6. generate flamegraph
60 |
61 | we will know what cause the GC goal increased exactly by using the following command.
62 | (we got the profile name by timestamp that from holmes log)
63 | ```
64 | go tool pprof -http=:8000 -base GCHeap.20220209144823.103.bin GCHeap.20220209144823.751.bin
65 | ```
66 |
67 | It shows the reason for memory spike clearly.
68 |
69 | 
--------------------------------------------------------------------------------
/example/gcheap/gcheap.go:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package main
19 |
20 | import (
21 | "fmt"
22 |
23 | mlog "mosn.io/pkg/log"
24 |
25 | //"mosn.io/holmes/reporters/http_reporter"
26 | "math/rand"
27 | "net/http"
28 | "time"
29 |
30 | "mosn.io/holmes"
31 | )
32 |
33 | // run `curl http://localhost:10024/rand` after 15s(warn up)
34 | // run `curl http://localhost:10024/spike` after 15s(warn up)
35 | func init() {
36 | http.HandleFunc("/rand", randAlloc)
37 | http.HandleFunc("/spike", spikeAlloc)
38 | go http.ListenAndServe(":10024", nil)
39 | }
40 |
41 | func main() {
42 | // reporter := http_reporter.NewReporter("TOKEN", "URL")
43 | h, _ := holmes.New(
44 | holmes.WithDumpPath("./tmp"),
45 | holmes.WithLogger(holmes.NewFileLog("./tmp/holmes.log", mlog.DEBUG)),
46 | holmes.WithBinaryDump(),
47 | holmes.WithMemoryLimit(100*1024*1024), // 100MB
48 | holmes.WithGCHeapDump(10, 20, 40, time.Minute),
49 | // holmes.WithProfileReporter(reporter),
50 | )
51 | h.EnableGCHeapDump().Start()
52 | time.Sleep(time.Hour)
53 | }
54 |
55 | var (
56 | base = make([]byte, 1024*1024*10) // 10 MB long live memory.
57 | )
58 |
59 | func randAlloc(wr http.ResponseWriter, req *http.Request) {
60 | var s = make([][]byte, 0) // short live
61 | for i := 0; i < 1024; i++ {
62 | len := rand.Intn(1024)
63 | bytes := make([]byte, len)
64 |
65 | s = append(s, bytes)
66 |
67 | if len == 0 {
68 | s = make([][]byte, 0)
69 | }
70 | }
71 | time.Sleep(time.Millisecond * 10)
72 | fmt.Fprintf(wr, "slice current length: %v\n", len(s))
73 | }
74 |
75 | func spikeAlloc(wr http.ResponseWriter, req *http.Request) {
76 | var s = make([][]byte, 0, 1024) // spike, 10MB
77 | for i := 0; i < 10; i++ {
78 | bytes := make([]byte, 1024*1024)
79 | s = append(s, bytes)
80 | }
81 | // live for a while
82 | time.Sleep(time.Millisecond * 500)
83 | fmt.Fprintf(wr, "spike slice length: %v\n", len(s))
84 | }
85 |
--------------------------------------------------------------------------------
/example/gcheap/go.mod:
--------------------------------------------------------------------------------
1 | module example.com/m
2 |
3 | go 1.17
4 |
5 | require (
6 | mosn.io/holmes v0.0.0-20220125114618-8cb365eb42ac
7 | mosn.io/pkg v0.0.0-20220308091858-ea728aacbe63
8 | )
9 |
10 | require (
11 | github.com/StackExchange/wmi v0.0.0-20190523213315-cbe66965904d // indirect
12 | github.com/go-ole/go-ole v1.2.4 // indirect
13 | github.com/hashicorp/go-syslog v1.0.0 // indirect
14 | github.com/rcrowley/go-metrics v0.0.0-20200313005456-10cdbea86bc0 // indirect
15 | github.com/shirou/gopsutil v3.20.11+incompatible // indirect
16 | golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e // indirect
17 | gopkg.in/natefinch/lumberjack.v2 v2.0.0 // indirect
18 | mosn.io/api v0.0.0-20210204052134-5b9a826795fd // indirect
19 | )
20 |
21 | replace mosn.io/holmes => ../../
22 |
--------------------------------------------------------------------------------
/example/gcheap/memory-spike.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mosn/holmes/efb1a7768843e83b645f6e683f7b6c5d826651ab/example/gcheap/memory-spike.png
--------------------------------------------------------------------------------
/example/gcheap/rand.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -x
4 |
5 | while true; do
6 | curl 'http://localhost:10024/rand'
7 | done
8 |
--------------------------------------------------------------------------------
/example/pyroscope_rideshare/README.md:
--------------------------------------------------------------------------------
1 |
2 | Enable holmes as pyroscope client and reports pprof
3 | event to pyroscope server.
4 |
5 | note: CAN NOT set TextDump while using holmes as pyroscope client,
6 | bcs pyroscope need profile in proto format.
7 |
8 | Step 1
9 |
10 | ``docker run -it -p 4040:4040 pyroscope/pyroscope:latest server``
11 |
12 | open browser on [pyroscope admin page](http://localhost:4040/)
13 |
14 | Step 2
15 | run the script `start_client.sh` at `rideshare/`
16 |
17 | Step 3
18 | wait 15 seconds, refresh pyroscope admin page, select
19 | `holmes-client` on the `Application` box as the following.
20 | 
21 |
22 |
--------------------------------------------------------------------------------
/example/pyroscope_rideshare/admin.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mosn/holmes/efb1a7768843e83b645f6e683f7b6c5d826651ab/example/pyroscope_rideshare/admin.png
--------------------------------------------------------------------------------
/example/pyroscope_rideshare/bike/bike.go:
--------------------------------------------------------------------------------
1 | package bike
2 |
3 | import "rideshare/utility"
4 |
5 | func OrderBike(searchRadius int64) {
6 | utility.FindNearestVehicle(searchRadius, "bike")
7 | for i := 0; i < 3; i++ {
8 | go utility.AllocMem()
9 | }
10 | }
11 |
--------------------------------------------------------------------------------
/example/pyroscope_rideshare/car/car.go:
--------------------------------------------------------------------------------
1 | package car
2 |
3 | import (
4 | "rideshare/utility"
5 | )
6 |
7 | func OrderCar(searchRadius int64) {
8 | utility.FindNearestVehicle(searchRadius, "car")
9 | }
10 |
--------------------------------------------------------------------------------
/example/pyroscope_rideshare/go.mod:
--------------------------------------------------------------------------------
1 | module rideshare
2 |
3 | go 1.14
4 |
5 | require mosn.io/holmes v1.1.0
6 |
7 | replace mosn.io/holmes => ../../
8 |
--------------------------------------------------------------------------------
/example/pyroscope_rideshare/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "fmt"
5 | "net/http"
6 | "os"
7 | "time"
8 |
9 | "mosn.io/holmes"
10 | "mosn.io/holmes/reporters/pyroscope_reporter"
11 | "rideshare/bike"
12 | "rideshare/car"
13 | "rideshare/scooter"
14 | )
15 |
16 | func bikeRoute(w http.ResponseWriter, r *http.Request) {
17 | bike.OrderBike(1)
18 | w.Write([]byte("
Bike ordered
"))
19 | }
20 |
21 | func scooterRoute(w http.ResponseWriter, r *http.Request) {
22 | scooter.OrderScooter(2)
23 | w.Write([]byte("Scooter ordered
"))
24 | }
25 |
26 | func carRoute(w http.ResponseWriter, r *http.Request) {
27 | car.OrderCar(3)
28 |
29 | w.Write([]byte("Car ordered
"))
30 | }
31 |
32 | func index(w http.ResponseWriter, r *http.Request) {
33 | result := "environment vars:
"
34 | for _, env := range os.Environ() {
35 | result += env + "
"
36 | }
37 | w.Write([]byte(result))
38 | }
39 |
40 | var h *holmes.Holmes
41 |
42 | func InitHolmes() {
43 | fmt.Println("holmes initialing")
44 | h, _ = holmes.New(
45 | holmes.WithCollectInterval("1s"),
46 | holmes.WithDumpPath("./log/"),
47 | // can not set text in pyroscope client
48 | )
49 | fmt.Println("holmes initial success")
50 | h.
51 | EnableCPUDump().
52 | EnableGoroutineDump().
53 | EnableMemDump().
54 | Start()
55 | time.Sleep(11 * time.Second)
56 | fmt.Println("on running")
57 | }
58 |
59 | func main() {
60 | InitHolmes()
61 | region := os.Getenv("region")
62 | port := os.Getenv("port")
63 | fmt.Printf("region is %v port is %v \n", region, port)
64 | cfg := pyroscope_reporter.RemoteConfig{
65 | //AuthToken: "",
66 | //UpstreamThreads: 4,
67 | UpstreamAddress: "http://localhost:4040",
68 | UpstreamRequestTimeout: 3 * time.Second,
69 | }
70 |
71 | tags := map[string]string{
72 | "region": region,
73 | }
74 |
75 | pReporter, err := pyroscope_reporter.NewPyroscopeReporter("holmes-client", tags, cfg, holmes.NewStdLogger())
76 | if err != nil {
77 | fmt.Printf("NewPyroscopeReporter error %v\n", err)
78 | return
79 | }
80 |
81 | err = h.Set(
82 | holmes.WithProfileReporter(pReporter),
83 | holmes.WithGoroutineDump(2, 2, 20, 90, 20*time.Second),
84 | holmes.WithCPUDump(2, 2, 80, 20*time.Second),
85 | holmes.WithMemDump(1, 2, 80, 20*time.Second),
86 | holmes.WithCollectInterval("5s"),
87 | )
88 | if err != nil {
89 | fmt.Printf("fail to set opts on running time.\n")
90 | return
91 | }
92 |
93 | http.HandleFunc("/", index)
94 | http.HandleFunc("/bike", bikeRoute)
95 | http.HandleFunc("/scooter", scooterRoute)
96 | http.HandleFunc("/car", carRoute)
97 | err = http.ListenAndServe(":"+port, nil)
98 | if err != nil {
99 | panic(err)
100 | }
101 |
102 | time.Sleep(1 * time.Minute)
103 |
104 | }
105 |
--------------------------------------------------------------------------------
/example/pyroscope_rideshare/requests.py:
--------------------------------------------------------------------------------
1 | import random
2 | import requests
3 | import time
4 |
5 | PORTS = [
6 | '15011',
7 | '15012',
8 | '15013',
9 | ]
10 |
11 | VEHICLES = [
12 | 'bike',
13 | 'scooter',
14 | 'car',
15 | ]
16 |
17 | if __name__ == "__main__":
18 | print(f"starting load generator")
19 | time.sleep(3)
20 | while True:
21 | port = PORTS[random.randint(0, len(PORTS) - 1)]
22 | vehicle = VEHICLES[random.randint(0, len(VEHICLES) - 1)]
23 | print(f"requesting {vehicle} from {port}")
24 | resp = requests.get(f'http://localhost:{port}/{vehicle}')
25 | print(f"received {resp}")
26 | time.sleep(random.uniform(0.2, 0.4))
27 |
--------------------------------------------------------------------------------
/example/pyroscope_rideshare/scooter/scooter.go:
--------------------------------------------------------------------------------
1 | package scooter
2 |
3 | import "rideshare/utility"
4 |
5 | func OrderScooter(searchRadius int64) {
6 | utility.FindNearestVehicle(searchRadius, "scooter")
7 | }
8 |
--------------------------------------------------------------------------------
/example/pyroscope_rideshare/start_client.sh:
--------------------------------------------------------------------------------
1 |
2 | region=us-east;port=15011 go run main.go &
3 | region=eu-north;port=15012 go run main.go &
4 | region=ap-south;port=15013 go run main.go &
5 |
6 | echo "wait holmes client init"
7 | sleep 15s
8 | echo "init done, start to send request"
9 |
10 | python3 requests.py
11 |
--------------------------------------------------------------------------------
/example/pyroscope_rideshare/utility/utility.go:
--------------------------------------------------------------------------------
1 | package utility
2 |
3 | import (
4 | "os"
5 | "time"
6 | )
7 |
8 | const durationConstant = time.Duration(200 * time.Millisecond)
9 |
10 | func mutexLock(n int64) {
11 | var i int64 = 0
12 |
13 | // start time is number of seconds since epoch
14 | startTime := time.Now()
15 |
16 | // This changes the amplitude of cpu bars
17 | for time.Since(startTime) < time.Duration(n*30)*durationConstant {
18 | i++
19 | }
20 | }
21 |
22 | func checkDriverAvailability(n int64) {
23 | var i int64 = 0
24 |
25 | // start time is number of seconds since epoch
26 | startTime := time.Now()
27 |
28 | for time.Since(startTime) < time.Duration(n)*durationConstant {
29 | i++
30 | }
31 |
32 | // Every other minute this will artificially create make requests.py in eu-north region slow
33 | // this is just for demonstration purposes to show how performance impacts show up in the
34 | // flamegraph
35 | force_mutex_lock := time.Now().Minute()%2 == 0
36 | if os.Getenv("REGION") == "eu-north" && force_mutex_lock {
37 | mutexLock(n)
38 | }
39 |
40 | }
41 |
42 | func FindNearestVehicle(searchRadius int64, vehicle string) {
43 | //pyroscope.TagWrapper(context.Background(), pyroscope.Labels("vehicle", vehicle), func(ctx context.Context) {
44 | var i int64 = 0
45 |
46 | startTime := time.Now()
47 | for time.Since(startTime) < time.Duration(searchRadius)*durationConstant {
48 | i++
49 | }
50 |
51 | if vehicle == "car" {
52 | checkDriverAvailability(searchRadius)
53 | go func() {
54 | go AllocMem()
55 | }()
56 | }
57 | if vehicle == "bike" {
58 | for i := 1; i < 10; i++ {
59 | go func() {
60 | time.Sleep(15 * time.Second)
61 | }()
62 | }
63 | }
64 | //})
65 | }
66 |
67 | func AllocMem() {
68 | var a = make([]byte, 1073741824)
69 | _ = a
70 | time.Sleep(10 * time.Second)
71 | }
72 |
--------------------------------------------------------------------------------
/example/run_in_docker/run_in_docker.go:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package main
19 |
20 | import (
21 | "net/http"
22 | "time"
23 |
24 | "mosn.io/holmes"
25 | )
26 |
27 | func init() {
28 | http.HandleFunc("/docker", dockermake1gb)
29 | http.HandleFunc("/docker/cpu", cpuex)
30 | http.HandleFunc("/docker/cpu_multi_core", cpuMulticore)
31 | go http.ListenAndServe(":10003", nil) //nolint:errcheck
32 | }
33 |
34 | func main() {
35 | h, _ := holmes.New(
36 | holmes.WithCollectInterval("2s"),
37 | holmes.WithDumpPath("/tmp"),
38 | holmes.WithTextDump(),
39 | holmes.WithMemDump(3, 25, 80, time.Minute),
40 | holmes.WithCPUDump(60, 10, 80, time.Minute),
41 | holmes.WithCGroup(true),
42 | )
43 | h.EnableCPUDump()
44 | h.EnableMemDump()
45 | h.Start()
46 | time.Sleep(time.Hour)
47 | }
48 |
49 | func cpuex(wr http.ResponseWriter, req *http.Request) {
50 | for {
51 | select {
52 | case <-req.Context().Done():
53 | break
54 | default:
55 | time.Sleep(time.Millisecond)
56 | }
57 | }
58 | }
59 |
60 | func cpuMulticore(wr http.ResponseWriter, req *http.Request) {
61 | for i := 1; i <= 100; i++ {
62 | go func() {
63 | for {
64 | select {
65 | case <-req.Context().Done():
66 | default:
67 | time.Sleep(time.Millisecond)
68 | }
69 | }
70 | }()
71 | }
72 |
73 | <-req.Context().Done()
74 | }
75 |
76 | func dockermake1gb(wr http.ResponseWriter, req *http.Request) {
77 | var a = make([]byte, 1073741824)
78 | _ = a
79 | }
80 |
--------------------------------------------------------------------------------
/example/slowlyleak/slowlyleak.go:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package main
19 |
20 | import (
21 | "net/http"
22 | "time"
23 |
24 | "mosn.io/holmes"
25 | )
26 |
27 | func init() {
28 | http.HandleFunc("/leak", leak)
29 | go http.ListenAndServe(":10003", nil) //nolint:errcheck
30 | }
31 |
32 | func main() {
33 | h, _ := holmes.New(
34 | holmes.WithCollectInterval("2s"),
35 | holmes.WithDumpPath("/tmp"),
36 | holmes.WithTextDump(),
37 | holmes.WithGoroutineDump(10, 25, 80, 10000, time.Minute),
38 | )
39 | h.EnableGoroutineDump().Start()
40 | time.Sleep(time.Hour)
41 | }
42 |
43 | func leak(wr http.ResponseWriter, req *http.Request) {
44 | taskChan := make(chan int)
45 | consumer := func() {
46 | for task := range taskChan {
47 | _ = task // do some tasks
48 | }
49 | }
50 |
51 | producer := func() {
52 | for i := 0; i < 10; i++ {
53 | taskChan <- i // generate some tasks
54 | }
55 | // forget to close the taskChan here
56 | }
57 |
58 | go consumer()
59 | go producer()
60 | }
61 |
--------------------------------------------------------------------------------
/example/thread_trigger/thread_trigger.go:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package main
19 |
20 | /*
21 | #include
22 | #include
23 | #include
24 | void output(char *str) {
25 | sleep(10000);
26 | printf("%s\n", str);
27 | }
28 | */
29 | import "C"
30 | import (
31 | "fmt"
32 | "net/http"
33 | "time"
34 | "unsafe"
35 |
36 | _ "net/http/pprof"
37 |
38 | "mosn.io/holmes"
39 | )
40 |
41 | func init() {
42 | go func() {
43 | h, _ := holmes.New(
44 | holmes.WithCollectInterval("2s"),
45 | holmes.WithDumpPath("/tmp"),
46 | holmes.WithTextDump(),
47 | holmes.WithThreadDump(10, 25, 100, time.Minute),
48 | )
49 | h.EnableThreadDump().Start()
50 | time.Sleep(time.Hour)
51 | }()
52 | }
53 |
54 | func leak(wr http.ResponseWriter, req *http.Request) {
55 | go func() {
56 | for i := 0; i < 1000; i++ {
57 | go func() {
58 | str := "hello cgo"
59 | //change to char*
60 | cstr := C.CString(str)
61 | C.output(cstr)
62 | C.free(unsafe.Pointer(cstr))
63 |
64 | }()
65 | }
66 | }()
67 | }
68 |
69 | func main() {
70 | http.HandleFunc("/leak", leak)
71 | err := http.ListenAndServe(":10003", nil)
72 | if err != nil {
73 | fmt.Println(err)
74 | return
75 | }
76 | select {}
77 | }
78 |
--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module mosn.io/holmes
2 |
3 | go 1.14
4 |
5 | require (
6 | github.com/gin-gonic/gin v1.7.7
7 | github.com/shirou/gopsutil v3.20.11+incompatible
8 | github.com/stretchr/testify v1.7.0
9 | mosn.io/pkg v1.6.0
10 | )
11 |
--------------------------------------------------------------------------------
/holmes.go:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package holmes
19 |
20 | import (
21 | "bytes"
22 | "fmt"
23 | "io/ioutil"
24 | "runtime"
25 | "runtime/pprof"
26 | "sync"
27 | "sync/atomic"
28 | "time"
29 | )
30 |
31 | // Holmes is a self-aware profile dumper.
32 | type Holmes struct {
33 | opts *options
34 |
35 | // stats
36 | collectCount int
37 | gcCycleCount int
38 | threadTriggerCount int
39 | cpuTriggerCount int
40 | memTriggerCount int
41 | grTriggerCount int
42 | gcHeapTriggerCount int
43 | shrinkThreadTriggerCount int
44 |
45 | // cooldown
46 | threadCoolDownTime time.Time
47 | cpuCoolDownTime time.Time
48 | memCoolDownTime time.Time
49 | gcHeapCoolDownTime time.Time
50 | grCoolDownTime time.Time
51 | shrinkThrCoolDownTime time.Time
52 |
53 | // GC heap triggered, need to dump next time.
54 | gcHeapTriggered bool
55 |
56 | // stats ring
57 | memStats ring
58 | cpuStats ring
59 | grNumStats ring
60 | threadStats ring
61 | gcHeapStats ring
62 |
63 | // switch
64 | stopped int64
65 |
66 | // lock Protect the following
67 | sync.Mutex
68 | // channel for GC sweep finalizer event
69 | gcEventsCh chan struct{}
70 | // profiler reporter channels
71 | rptEventsCh chan rptEvent
72 | }
73 |
74 | // New creates a holmes dumper.
75 | func New(opts ...Option) (*Holmes, error) {
76 | holmes := &Holmes{
77 |
78 | opts: newOptions(),
79 | stopped: 1, // Initialization should be off
80 | }
81 |
82 | for _, opt := range opts {
83 | if err := opt.apply(holmes.opts); err != nil {
84 | return nil, err
85 | }
86 | }
87 |
88 | return holmes, nil
89 | }
90 |
91 | // EnableThreadDump enables the goroutine dump.
92 | func (h *Holmes) EnableThreadDump() *Holmes {
93 | h.opts.threadOpts.Enable = true
94 | return h
95 | }
96 |
97 | // DisableThreadDump disables the goroutine dump.
98 | func (h *Holmes) DisableThreadDump() *Holmes {
99 | h.opts.threadOpts.Enable = false
100 | return h
101 | }
102 |
103 | // EnableGoroutineDump enables the goroutine dump.
104 | func (h *Holmes) EnableGoroutineDump() *Holmes {
105 | h.opts.grOpts.Enable = true
106 | return h
107 | }
108 |
109 | // DisableGoroutineDump disables the goroutine dump.
110 | func (h *Holmes) DisableGoroutineDump() *Holmes {
111 | h.opts.grOpts.Enable = false
112 | return h
113 | }
114 |
115 | // EnableCPUDump enables the CPU dump.
116 | func (h *Holmes) EnableCPUDump() *Holmes {
117 | h.opts.cpuOpts.Enable = true
118 | return h
119 | }
120 |
121 | // DisableCPUDump disables the CPU dump.
122 | func (h *Holmes) DisableCPUDump() *Holmes {
123 | h.opts.cpuOpts.Enable = false
124 | return h
125 | }
126 |
127 | // EnableMemDump enables the mem dump.
128 | func (h *Holmes) EnableMemDump() *Holmes {
129 | h.opts.memOpts.Enable = true
130 | return h
131 | }
132 |
133 | // DisableMemDump disables the mem dump.
134 | func (h *Holmes) DisableMemDump() *Holmes {
135 | h.opts.memOpts.Enable = false
136 | return h
137 | }
138 |
139 | // EnableGCHeapDump enables the GC heap dump.
140 | func (h *Holmes) EnableGCHeapDump() *Holmes {
141 | h.opts.gCHeapOpts.Enable = true
142 | return h
143 | }
144 |
145 | // DisableGCHeapDump disables the gc heap dump.
146 | func (h *Holmes) DisableGCHeapDump() *Holmes {
147 | h.opts.gCHeapOpts.Enable = false
148 | return h
149 | }
150 |
151 | // EnableShrinkThread enables shrink thread
152 | func (h *Holmes) EnableShrinkThread() *Holmes {
153 | h.opts.ShrinkThrOptions.Enable = true
154 | return h
155 | }
156 |
157 | // DisableShrinkThread disables shrink thread
158 | func (h *Holmes) DisableShrinkThread() *Holmes {
159 | h.opts.ShrinkThrOptions.Enable = false
160 | return h
161 | }
162 |
163 | func finalizerCallback(gc *gcHeapFinalizer) {
164 | defer func() {
165 | if r := recover(); r != nil {
166 | gc.h.Errorf("Panic in finalizer callback: %v", r)
167 | }
168 | }()
169 | // disable or stop gc clean up normally
170 | if atomic.LoadInt64(&gc.h.stopped) == 1 {
171 | return
172 | }
173 |
174 | // register the finalizer again
175 | runtime.SetFinalizer(gc, finalizerCallback)
176 |
177 | // read channel should be atomic.
178 | ch := gc.h.gcEventsCh
179 | if ch == nil {
180 | return
181 | }
182 | // Notice: here may be a litte race, will panic when ch is closed now.
183 | // we just leave it since it is very small and there is a recover.
184 | select {
185 | case ch <- struct{}{}:
186 | default:
187 | gc.h.Errorf("can not send event to finalizer channel immediately, may be analyzer blocked?")
188 | }
189 | }
190 |
191 | // it won't fit into tiny span since this struct contains point.
192 | type gcHeapFinalizer struct {
193 | h *Holmes
194 | }
195 |
196 | func (h *Holmes) startGCCycleLoop(ch chan struct{}) {
197 | h.gcHeapStats = newRing(minCollectCyclesBeforeDumpStart)
198 |
199 | gc := &gcHeapFinalizer{
200 | h,
201 | }
202 |
203 | runtime.SetFinalizer(gc, finalizerCallback)
204 |
205 | go gc.h.gcHeapCheckLoop(ch)
206 | }
207 |
208 | // Start starts the dump loop of holmes.
209 | func (h *Holmes) Start() {
210 | h.Lock()
211 | defer h.Unlock()
212 |
213 | if !atomic.CompareAndSwapInt64(&h.stopped, 1, 0) {
214 | //nolint
215 | h.Errorf("Holmes has started, please don't start it again.")
216 | return
217 | }
218 |
219 | gcEventsCh := make(chan struct{}, 1)
220 | rptCh := make(chan rptEvent, 32)
221 | h.gcEventsCh = gcEventsCh
222 | h.rptEventsCh = rptCh
223 |
224 | h.initEnvironment()
225 | go h.startDumpLoop()
226 | go h.startReporter(rptCh)
227 |
228 | h.startGCCycleLoop(gcEventsCh)
229 | }
230 |
231 | // Stop the dump loop.
232 | func (h *Holmes) Stop() {
233 | h.Lock()
234 | defer h.Unlock()
235 |
236 | if !atomic.CompareAndSwapInt64(&h.stopped, 0, 1) {
237 | //nolint
238 | fmt.Println("Holmes has stop, please don't stop it again.")
239 | return
240 | }
241 |
242 | if gcEventsCh := h.gcEventsCh; gcEventsCh != nil {
243 | h.gcEventsCh = nil
244 | close(gcEventsCh)
245 | }
246 | if rptEventsCh := h.rptEventsCh; rptEventsCh != nil {
247 | h.rptEventsCh = nil
248 | close(rptEventsCh)
249 | }
250 | }
251 |
252 | func (h *Holmes) startDumpLoop() {
253 | // init previous cool down time
254 | now := time.Now()
255 | h.cpuCoolDownTime = now
256 | h.memCoolDownTime = now
257 | h.grCoolDownTime = now
258 |
259 | // init stats ring
260 | h.cpuStats = newRing(minCollectCyclesBeforeDumpStart)
261 | h.memStats = newRing(minCollectCyclesBeforeDumpStart)
262 | h.grNumStats = newRing(minCollectCyclesBeforeDumpStart)
263 | h.threadStats = newRing(minCollectCyclesBeforeDumpStart)
264 |
265 | // dump loop
266 | ticker := time.NewTicker(h.opts.CollectInterval)
267 | defer ticker.Stop()
268 |
269 | for {
270 | select {
271 | case <-h.opts.intervalResetting:
272 | // wait for go version update to 1.15
273 | // can use Reset API directly here. pkg.go.dev/time#Ticker.Reset
274 | // we can't use the `for-range` here, because the range loop
275 | // caches the variable to be lopped and then it can't be overwritten
276 | itv := h.opts.CollectInterval
277 | h.Infof("[Holmes] collect interval is resetting to [%v]\n", itv) //nolint:forbidigo
278 | ticker = time.NewTicker(itv)
279 |
280 | default:
281 | // bug fix: https://github.com/mosn/holmes/issues/63
282 | // make sure that the message inside intervalResetting channel
283 | // would be consumed before ticker.C.
284 | <-ticker.C
285 | if atomic.LoadInt64(&h.stopped) == 1 {
286 | h.Infof("[Holmes] dump loop stopped") //nolint:forbidigo
287 | return
288 | }
289 |
290 | cpuCore, err := h.getCPUCore()
291 | if cpuCore == 0 || err != nil {
292 | h.Errorf("[Holmes] get CPU core failed, CPU core: %v, error: %v", cpuCore, err)
293 | return
294 | }
295 |
296 | memoryLimit, err := h.getMemoryLimit()
297 | if memoryLimit == 0 || err != nil {
298 | h.Errorf("[Holmes] get memory limit failed, memory limit: %v, error: %v", memoryLimit, err)
299 | return
300 | }
301 |
302 | cpu, mem, gNum, tNum, err := collect(cpuCore, memoryLimit)
303 | if err != nil {
304 | h.Errorf("failed to collect resource usage: %v", err.Error())
305 |
306 | continue
307 | }
308 |
309 | h.cpuStats.push(cpu)
310 | h.memStats.push(mem)
311 | h.grNumStats.push(gNum)
312 | h.threadStats.push(tNum)
313 |
314 | h.collectCount++
315 | if h.collectCount < minCollectCyclesBeforeDumpStart {
316 | // at least collect some cycles
317 | // before start to judge and dump
318 | h.Debugf("[Holmes] warming up cycle : %d", h.collectCount)
319 |
320 | continue
321 | }
322 |
323 | if err := h.EnableDump(cpu); err != nil {
324 | h.Infof("[Holmes] unable to dump: %v", err)
325 |
326 | continue
327 | }
328 |
329 | h.memCheckAndDump(mem)
330 | h.cpuCheckAndDump(cpu)
331 | h.threadCheckAndDump(tNum)
332 | h.threadCheckAndShrink(tNum)
333 | h.goroutineCheckAndDump(gNum)
334 | }
335 | }
336 | }
337 |
338 | // goroutine start.
339 | func (h *Holmes) goroutineCheckAndDump(gNum int) {
340 | // get a copy instead of locking it
341 | grOpts := h.opts.GetGrOpts()
342 | if !grOpts.Enable {
343 | return
344 | }
345 |
346 | if h.grCoolDownTime.After(time.Now()) {
347 | h.Debugf("[Holmes] goroutine dump is in cooldown")
348 | return
349 | }
350 | // grOpts is a struct, no escape.
351 | if triggered := h.goroutineProfile(gNum, grOpts); triggered {
352 | h.grCoolDownTime = time.Now().Add(grOpts.CoolDown)
353 | h.grTriggerCount++
354 | }
355 | }
356 |
357 | func (h *Holmes) goroutineProfile(gNum int, c grOptions) bool {
358 | match, reason := matchRule(h.grNumStats, gNum, c.TriggerMin, c.TriggerAbs, c.TriggerDiff, c.GoroutineTriggerNumMax)
359 | if !match {
360 | h.Infof(UniformLogFormat, "NODUMP", check2name[goroutine],
361 | c.TriggerMin, c.TriggerDiff, c.TriggerAbs,
362 | c.GoroutineTriggerNumMax, h.grNumStats.sequentialData(), gNum)
363 | return false
364 | }
365 |
366 | h.Alertf("holmes.goroutine", UniformLogFormat, "pprof ", check2name[goroutine],
367 | c.TriggerMin, c.TriggerDiff, c.TriggerAbs,
368 | c.GoroutineTriggerNumMax,
369 | h.grNumStats.sequentialData(), gNum)
370 |
371 | var buf bytes.Buffer
372 | _ = pprof.Lookup("goroutine").WriteTo(&buf, int(h.opts.DumpProfileType)) // nolint: errcheck
373 |
374 | scene := Scene{
375 | typeOption: *c.typeOption,
376 | CurVal: gNum,
377 | Avg: h.grNumStats.avg(),
378 | }
379 |
380 | h.ReportProfile(type2name[goroutine], h.writeProfileDataToFile(buf, goroutine, ""),
381 | reason, "", time.Now(), buf.Bytes(), scene)
382 | return true
383 | }
384 |
385 | // memory start.
386 | func (h *Holmes) memCheckAndDump(mem int) {
387 | // get a copy instead of locking it
388 | memOpts := h.opts.GetMemOpts()
389 | if !memOpts.Enable {
390 | return
391 | }
392 |
393 | if h.memCoolDownTime.After(time.Now()) {
394 | h.Debugf("[Holmes] mem dump is in cooldown")
395 | return
396 | }
397 | // memOpts is a struct, no escape.
398 | if triggered := h.memProfile(mem, memOpts); triggered {
399 | h.memCoolDownTime = time.Now().Add(memOpts.CoolDown)
400 | h.memTriggerCount++
401 | }
402 | }
403 |
404 | func (h *Holmes) memProfile(rss int, c typeOption) bool {
405 | match, reason := matchRule(h.memStats, rss, c.TriggerMin, c.TriggerAbs, c.TriggerDiff, NotSupportTypeMaxConfig)
406 | if !match {
407 | // let user know why this should not dump
408 | h.Infof(UniformLogFormat, "NODUMP", check2name[mem],
409 | c.TriggerMin, c.TriggerDiff, c.TriggerAbs, NotSupportTypeMaxConfig,
410 | h.memStats.sequentialData(), rss)
411 |
412 | return false
413 | }
414 |
415 | h.Alertf("holmes.memory", UniformLogFormat, "pprof", check2name[mem],
416 | c.TriggerMin, c.TriggerDiff, c.TriggerAbs,
417 | NotSupportTypeMaxConfig, h.memStats, rss)
418 |
419 | var buf bytes.Buffer
420 | _ = pprof.Lookup("heap").WriteTo(&buf, int(h.opts.DumpProfileType)) // nolint: errcheck
421 |
422 | scene := Scene{
423 | typeOption: c,
424 | CurVal: rss,
425 | Avg: h.memStats.avg(),
426 | }
427 |
428 | h.ReportProfile(type2name[mem], h.writeProfileDataToFile(buf, mem, ""), reason, "", time.Now(), buf.Bytes(), scene)
429 | return true
430 | }
431 |
432 | func (h *Holmes) threadCheckAndShrink(threadNum int) {
433 | opts := h.opts.GetShrinkThreadOpts()
434 |
435 | if !opts.Enable {
436 | return
437 | }
438 |
439 | if h.shrinkThrCoolDownTime.After(time.Now()) {
440 | return
441 | }
442 |
443 | if threadNum > opts.Threshold {
444 | // 100x Delay time a cooldown time as default
445 | delay := opts.Delay * 100
446 | // one hour at least
447 | if delay < time.Hour {
448 | delay = time.Hour
449 | }
450 | if delay > time.Hour*24 {
451 | delay = time.Hour * 24
452 | }
453 | h.shrinkThrCoolDownTime = time.Now().Add(delay)
454 |
455 | h.Alertf("holmes.thread", "current thread number(%v) larger than threshold(%v), will start to shrink thread after %v", threadNum, opts.Threshold, opts.Delay)
456 |
457 | // do not shrink thread immediately
458 | time.AfterFunc(opts.Delay, func() {
459 | h.startShrinkThread()
460 | })
461 | }
462 | }
463 |
464 | // thread start.
465 | func (h *Holmes) threadCheckAndDump(threadNum int) {
466 | threadOpts := h.opts.GetThreadOpts()
467 | if !threadOpts.Enable {
468 | return
469 | }
470 |
471 | if h.threadCoolDownTime.After(time.Now()) {
472 | h.Debugf("[Holmes] thread dump is in cooldown")
473 | return
474 | }
475 | // threadOpts is a struct, no escape.
476 | if triggered := h.threadProfile(threadNum, threadOpts); triggered {
477 | h.threadCoolDownTime = time.Now().Add(threadOpts.CoolDown)
478 | h.threadTriggerCount++
479 |
480 | // optimize: https://github.com/mosn/holmes/issues/84
481 | // Thread dump information contains goroutine information
482 | // skip goroutine dump
483 | h.goroutineCoolDownByThread()
484 | }
485 | }
486 |
487 | // The thread dump is triggered while operating goroutine dump CoolDown .
488 | // Thread dump information contains goroutine information .
489 | func (h *Holmes) goroutineCoolDownByThread() {
490 | grOpts := h.opts.GetGrOpts()
491 | if !grOpts.Enable {
492 | return
493 | }
494 |
495 | h.grCoolDownTime = time.Now().Add(grOpts.CoolDown)
496 | }
497 |
498 | // TODO: better only shrink the threads that are idle.
499 | func (h *Holmes) startShrinkThread() {
500 |
501 | curThreadNum := getThreadNum()
502 | opts := h.opts.GetShrinkThreadOpts()
503 |
504 | n := curThreadNum - opts.Threshold
505 |
506 | // check again after the timer triggered
507 | if opts.Enable && n > 0 {
508 | h.shrinkThreadTriggerCount++
509 | h.Infof("[holmes] start to shrink %v threads, now: %v", n, curThreadNum)
510 |
511 | var wg sync.WaitGroup
512 | wg.Add(n)
513 | for i := 0; i < n; i++ {
514 | // avoid close too much thread in batch.
515 | time.Sleep(time.Millisecond * 100)
516 |
517 | go func() {
518 | defer wg.Done()
519 | runtime.LockOSThread()
520 | }()
521 | }
522 | wg.Wait()
523 |
524 | h.Infof("[holmes] finished shrink threads, now: %v", getThreadNum())
525 | }
526 | }
527 |
528 | func (h *Holmes) threadProfile(curThreadNum int, c typeOption) bool {
529 | match, reason := matchRule(h.threadStats, curThreadNum, c.TriggerMin, c.TriggerAbs, c.TriggerDiff, NotSupportTypeMaxConfig)
530 | if !match {
531 | // let user know why this should not dump
532 | h.Infof(UniformLogFormat, "NODUMP", check2name[thread],
533 | c.TriggerMin, c.TriggerDiff, c.TriggerAbs, NotSupportTypeMaxConfig,
534 | h.threadStats.sequentialData(), curThreadNum)
535 |
536 | return false
537 | }
538 |
539 | h.Alertf("holmes.thread", UniformLogFormat, "pprof", check2name[thread],
540 | c.TriggerMin, c.TriggerDiff, c.TriggerAbs,
541 | NotSupportTypeMaxConfig, h.threadStats, curThreadNum)
542 |
543 | eventID := fmt.Sprintf("thr-%d", h.threadTriggerCount)
544 | var buf bytes.Buffer
545 |
546 | _ = pprof.Lookup("threadcreate").WriteTo(&buf, int(h.opts.DumpProfileType)) // nolint: errcheck
547 |
548 | scene := Scene{
549 | typeOption: c,
550 | CurVal: curThreadNum,
551 | Avg: h.threadStats.avg(),
552 | }
553 |
554 | h.ReportProfile(type2name[thread], h.writeProfileDataToFile(buf, thread, eventID),
555 | reason, eventID, time.Now(), buf.Bytes(), scene)
556 |
557 | buf.Reset()
558 | _ = pprof.Lookup("goroutine").WriteTo(&buf, int(h.opts.DumpProfileType)) // nolint: errcheck
559 |
560 | h.ReportProfile(type2name[goroutine], h.writeProfileDataToFile(buf, goroutine, eventID),
561 | reason, eventID, time.Now(), buf.Bytes(), scene)
562 |
563 | return true
564 | }
565 |
566 | // thread end.
567 |
568 | // cpu start.
569 | func (h *Holmes) cpuCheckAndDump(cpu int) {
570 | cpuOpts := h.opts.GetCPUOpts()
571 | if !cpuOpts.Enable {
572 | return
573 | }
574 |
575 | if h.cpuCoolDownTime.After(time.Now()) {
576 | h.Debugf("[Holmes] cpu dump is in cooldown")
577 | return
578 | }
579 | // cpuOpts is a struct, no escape.
580 | if triggered := h.cpuProfile(cpu, cpuOpts); triggered {
581 | h.cpuCoolDownTime = time.Now().Add(cpuOpts.CoolDown)
582 | h.cpuTriggerCount++
583 | }
584 | }
585 |
586 | func (h *Holmes) cpuProfile(curCPUUsage int, c typeOption) bool {
587 | match, reason := matchRule(h.cpuStats, curCPUUsage, c.TriggerMin, c.TriggerAbs, c.TriggerDiff, NotSupportTypeMaxConfig)
588 | if !match {
589 | // let user know why this should not dump
590 | h.Infof(UniformLogFormat, "NODUMP", check2name[cpu],
591 | c.TriggerMin, c.TriggerDiff, c.TriggerAbs, NotSupportTypeMaxConfig,
592 | h.cpuStats.sequentialData(), curCPUUsage)
593 |
594 | return false
595 | }
596 |
597 | h.Alertf("holmes.cpu", UniformLogFormat, "pprof dump", check2name[cpu],
598 | c.TriggerMin, c.TriggerDiff, c.TriggerAbs, NotSupportTypeMaxConfig,
599 | h.cpuStats.sequentialData(), curCPUUsage)
600 |
601 | bf, binFileName, err := getBinaryFileNameAndCreate(h.opts.DumpPath, cpu, "")
602 | if err != nil {
603 | h.Errorf("[Holmes] failed to create cpu profile file: %v", err.Error())
604 | return false
605 | }
606 | defer bf.Close() // nolint: errcheck
607 |
608 | err = pprof.StartCPUProfile(bf)
609 | if err != nil {
610 | h.Errorf("[Holmes] failed to profile cpu: %v", err.Error())
611 | return false
612 | }
613 |
614 | time.Sleep(h.opts.CPUSamplingTime)
615 | pprof.StopCPUProfile()
616 |
617 | rptOpts, bfCpy := h.opts.GetReporterOpts(), []byte{}
618 | if h.opts.DumpToLogger || rptOpts.active == 1 {
619 | bfCpy, err = ioutil.ReadFile(binFileName)
620 | if err != nil {
621 | h.Errorf("encounter error when dumping profile to logger, failed to read cpu profile file: %v", err)
622 | return true
623 | }
624 | }
625 |
626 | if h.opts.DumpToLogger {
627 | h.Infof("[Holmes] CPU profile name : " + "::" + binFileName + " \n" + string(bfCpy))
628 | }
629 |
630 | scene := Scene{
631 | typeOption: c,
632 | CurVal: curCPUUsage,
633 | Avg: h.cpuStats.avg(),
634 | }
635 |
636 | if rptOpts.active == 1 {
637 | h.ReportProfile(type2name[cpu], binFileName,
638 | reason, "", time.Now(), bfCpy, scene)
639 | }
640 |
641 | return true
642 | }
643 |
644 | func (h *Holmes) gcHeapCheckLoop(ch chan struct{}) {
645 | for range ch {
646 | h.gcHeapCheckAndDump()
647 | }
648 | }
649 |
650 | func (h *Holmes) gcHeapCheckAndDump() {
651 | gcHeapOpts := h.opts.GetGcHeapOpts()
652 |
653 | if !gcHeapOpts.Enable || atomic.LoadInt64(&h.stopped) == 1 {
654 | return
655 | }
656 |
657 | memStats := new(runtime.MemStats)
658 | runtime.ReadMemStats(memStats)
659 |
660 | // TODO: we can only use NextGC for now since runtime haven't expose heapmarked yet
661 | // and we hard code the gcPercent is 100 here.
662 | // may introduce a new API debug.GCHeapMarked? it can also has better performance(no STW).
663 | nextGC := memStats.NextGC
664 | prevGC := nextGC / 2 //nolint:gomnd
665 |
666 | memoryLimit, err := h.getMemoryLimit()
667 | if memoryLimit == 0 || err != nil {
668 | h.Errorf("[Holmes] get memory limit failed, memory limit: %v, error: %v", memoryLimit, err)
669 | return
670 | }
671 |
672 | ratio := int(100 * float64(prevGC) / float64(memoryLimit))
673 | h.gcHeapStats.push(ratio)
674 |
675 | h.gcCycleCount++
676 | if h.gcCycleCount < minCollectCyclesBeforeDumpStart {
677 | // at least collect some cycles
678 | // before start to judge and dump
679 | h.Debugf("[Holmes] GC cycle warming up : %d", h.gcCycleCount)
680 | return
681 | }
682 |
683 | if h.gcHeapCoolDownTime.After(time.Now()) {
684 | h.Debugf("[Holmes] GC heap dump is in cooldown")
685 | return
686 | }
687 |
688 | if triggered := h.gcHeapProfile(ratio, h.gcHeapTriggered, gcHeapOpts); triggered {
689 | if h.gcHeapTriggered {
690 | // already dump twice, mark it false
691 | h.gcHeapTriggered = false
692 | h.gcHeapCoolDownTime = time.Now().Add(gcHeapOpts.CoolDown)
693 | h.gcHeapTriggerCount++
694 | } else {
695 | // force dump next time
696 | h.gcHeapTriggered = true
697 | }
698 | }
699 | }
700 |
701 | func (h *Holmes) getCPUCore() (float64, error) {
702 | if h.opts.cpuCore > 0 {
703 | return h.opts.cpuCore, nil
704 | }
705 |
706 | if h.opts.UseGoProcAsCPUCore {
707 | return float64(runtime.GOMAXPROCS(-1)), nil
708 | }
709 |
710 | if h.opts.UseCGroup {
711 | return getCGroupCPUCore()
712 | }
713 |
714 | return float64(runtime.NumCPU()), nil
715 | }
716 |
717 | func (h *Holmes) getMemoryLimit() (uint64, error) {
718 | if h.opts.memoryLimit > 0 {
719 | return h.opts.memoryLimit, nil
720 | }
721 |
722 | if h.opts.UseCGroup {
723 | return getCGroupMemoryLimit()
724 | }
725 |
726 | return getNormalMemoryLimit()
727 | }
728 |
729 | // gcHeapProfile will dump profile twice when triggered once.
730 | // since the current memory profile will be merged after next GC cycle.
731 | // And we assume the finalizer will be called before next GC cycle(it will be usually).
732 | func (h *Holmes) gcHeapProfile(gc int, force bool, c typeOption) bool {
733 | match, reason := matchRule(h.gcHeapStats, gc, c.TriggerMin, c.TriggerAbs, c.TriggerDiff, NotSupportTypeMaxConfig)
734 | if !force && !match {
735 | // let user know why this should not dump
736 | h.Infof(UniformLogFormat, "NODUMP", check2name[gcHeap],
737 | c.TriggerMin, c.TriggerDiff, c.TriggerAbs,
738 | NotSupportTypeMaxConfig,
739 | h.gcHeapStats.sequentialData(), gc)
740 |
741 | return false
742 | }
743 |
744 | h.Alertf("holmes.gcheap", UniformLogFormat, "pprof", check2name[gcHeap],
745 | c.TriggerMin, c.TriggerDiff, c.TriggerAbs,
746 | NotSupportTypeMaxConfig, h.gcHeapStats, gc)
747 |
748 | // gcHeapTriggerCount only increased after got both two profiles
749 | eventID := fmt.Sprintf("heap-%d", h.gcHeapTriggerCount)
750 |
751 | var buf bytes.Buffer
752 | _ = pprof.Lookup("heap").WriteTo(&buf, int(h.opts.DumpProfileType)) // nolint: errcheck
753 |
754 | scene := Scene{
755 | typeOption: c,
756 | CurVal: gc,
757 | Avg: h.gcHeapStats.avg(),
758 | }
759 |
760 | h.ReportProfile(type2name[gcHeap], h.writeProfileDataToFile(buf, gcHeap, eventID),
761 | reason, eventID, time.Now(), buf.Bytes(), scene)
762 | return true
763 | }
764 |
765 | func (h *Holmes) writeProfileDataToFile(data bytes.Buffer, dumpType configureType, eventID string) string {
766 | fileName, err := writeFile(data, dumpType, h.opts.DumpOptions, eventID)
767 | if err != nil {
768 | h.Errorf("failed to write profile to file(%v), err: %s", fileName, err.Error())
769 | return ""
770 | }
771 |
772 | if h.opts.DumpOptions.DumpToLogger {
773 | h.Infof(fmt.Sprintf("[Holmes] %v profile: \n", check2name[dumpType]) + data.String())
774 | }
775 |
776 | h.Infof("[Holmes] pprof %v profile write to file %v successfully", check2name[dumpType], fileName)
777 | return fileName
778 | }
779 |
780 | func (h *Holmes) initEnvironment() {
781 | // whether the max memory is limited by cgroup
782 | if h.opts.UseCGroup {
783 | h.Infof("[Holmes] use cgroup to limit memory")
784 | } else {
785 | h.Infof("[Holmes] use the default memory percent calculated by gopsutil")
786 | }
787 | }
788 |
789 | func (h *Holmes) EnableDump(curCPU int) (err error) {
790 | if h.opts.CPUMaxPercent != 0 && curCPU >= h.opts.CPUMaxPercent {
791 | return fmt.Errorf("current cpu percent [%v] is greater than the CPUMaxPercent [%v]", curCPU, h.opts.CPUMaxPercent)
792 | }
793 | return nil
794 | }
795 |
796 | // Set sets holmes's optional after initialing.
797 | func (h *Holmes) Set(opts ...Option) error {
798 | h.opts.L.Lock()
799 | defer h.opts.L.Unlock()
800 |
801 | for _, opt := range opts {
802 | if err := opt.apply(h.opts); err != nil {
803 | return err
804 | }
805 | }
806 | return nil
807 | }
808 |
809 | func (h *Holmes) DisableProfileReporter() {
810 | atomic.StoreInt32(&h.opts.rptOpts.active, 0)
811 | }
812 |
813 | func (h *Holmes) EnableProfileReporter() {
814 | opt := h.opts.GetReporterOpts()
815 | if opt.reporter == nil {
816 | h.Infof("failed to enable profile reporter since reporter is empty")
817 | return
818 | }
819 | atomic.StoreInt32(&h.opts.rptOpts.active, 1)
820 | }
821 |
822 | func (h *Holmes) ReportProfile(pType string, filename string, reason ReasonType, eventID string, sampleTime time.Time, pprofBytes []byte, scene Scene) {
823 | if filename == "" {
824 | h.Errorf("dump name is empty, type:%s, reason:%s, eventID:%s", pType, reason.String(), eventID)
825 | return
826 | }
827 |
828 | defer func() {
829 | if r := recover(); r != nil {
830 | h.Errorf("Panic during report profile: %v", r)
831 | }
832 | }()
833 |
834 | if atomic.LoadInt64(&h.stopped) == 1 {
835 | return
836 | }
837 |
838 | opts := h.opts.GetReporterOpts()
839 | if opts.active == 0 {
840 | return
841 | }
842 |
843 | msg := rptEvent{
844 | PType: pType,
845 | FileName: filename,
846 | Reason: reason,
847 | EventID: eventID,
848 | SampleTime: sampleTime,
849 | PprofBytes: pprofBytes,
850 | Scene: scene,
851 | }
852 |
853 | // read channel should be atomic.
854 | ch := h.rptEventsCh
855 | if ch == nil {
856 | return
857 | }
858 | // Notice: here may be a litte race, will panic when ch is closed now.
859 | // we just leave it since it is very small and there is a recover.
860 | select {
861 | case ch <- msg:
862 | default:
863 | h.Warnf("reporter channel is full, will ignore it")
864 | }
865 | }
866 |
867 | // startReporter starts a background goroutine to consume event channel,
868 | // and finish it at after receive from cancel channel.
869 | func (h *Holmes) startReporter(ch chan rptEvent) {
870 | go func() {
871 | for evt := range ch {
872 | opts := h.opts.GetReporterOpts()
873 | if opts.reporter == nil {
874 | h.Infof("reporter is nil, please initial it before startReporter")
875 | // drop the event
876 | continue
877 | }
878 |
879 | // It's supposed to be sending judgment, isn't it?
880 | err := opts.reporter.Report(evt.PType, evt.FileName, evt.Reason, evt.EventID, evt.SampleTime, evt.PprofBytes, evt.Scene) // nolint: errcheck
881 | if err != nil {
882 | h.Infof("reporter err:%v", err)
883 |
884 | }
885 | }
886 | }()
887 | }
888 |
--------------------------------------------------------------------------------
/holmes_test.go:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package holmes
19 |
20 | import (
21 | "log"
22 | "os"
23 | "runtime"
24 | "testing"
25 | "time"
26 | )
27 |
28 | var h *Holmes
29 |
30 | func TestMain(m *testing.M) {
31 | log.Println("holmes initialing")
32 | h, _ = New(
33 | WithCollectInterval("1s"),
34 | WithTextDump(),
35 | WithGoroutineDump(10, 25, 80, 90, time.Minute),
36 | )
37 | log.Println("holmes initial success")
38 | h.EnableGoroutineDump().Start()
39 | time.Sleep(10 * time.Second)
40 | log.Println("on running")
41 | os.Exit(m.Run())
42 | }
43 |
44 | // -gcflags=all=-l
45 | func TestResetCollectInterval(t *testing.T) {
46 | before := h.collectCount
47 | go func() {
48 | h.Set(WithCollectInterval("2s")) //nolint:errcheck
49 | defer h.Set(WithCollectInterval("1s")) //nolint:errcheck
50 | time.Sleep(6 * time.Second)
51 | // if collect interval not change, collectCount would increase 5 at least
52 | if h.collectCount-before >= 5 {
53 | log.Fatalf("fail, before %v, now %v", before, h.collectCount)
54 | }
55 | }()
56 | time.Sleep(8 * time.Second)
57 | }
58 |
59 | func TestSetGrOpts(t *testing.T) {
60 | // decrease min trigger, if our set api is effective,
61 | // gr profile would be trigger and grCoolDown increase.
62 | min, diff, abs := 3, 10, 1
63 | before := h.grCoolDownTime
64 |
65 | err := h.Set(
66 | WithGoroutineDump(min, diff, abs, 90, time.Minute))
67 | if err != nil {
68 | log.Fatalf("fail to set opts on running time.")
69 | }
70 |
71 | time.Sleep(5 * time.Second)
72 | if before.Equal(h.grCoolDownTime) {
73 | log.Fatalf("fail")
74 | }
75 | }
76 |
77 | func TestCpuCore(t *testing.T) {
78 | _ = h.Set(
79 | WithCGroup(false),
80 | WithGoProcAsCPUCore(false),
81 | )
82 | cpuCore1, _ := h.getCPUCore()
83 | goProc1 := runtime.GOMAXPROCS(-1)
84 |
85 | // system cpu core matches go procs
86 | if cpuCore1 != float64(goProc1) {
87 | log.Fatalf("cpuCore1 %v not equal goProc1 %v", cpuCore1, goProc1)
88 | }
89 |
90 | // go proc = system cpu core + 1
91 | runtime.GOMAXPROCS(goProc1 + 1)
92 |
93 | cpuCore2, _ := h.getCPUCore()
94 | goProc2 := runtime.GOMAXPROCS(-1)
95 | if cpuCore2 != float64(goProc2)-1 {
96 | log.Fatalf("cpuCore2 %v not equal goProc2-1 %v", cpuCore2, goProc2)
97 | }
98 |
99 | // set cpu core directly
100 | _ = h.Set(
101 | WithCPUCore(cpuCore1 + 5),
102 | )
103 |
104 | cpuCore3, _ := h.getCPUCore()
105 | if cpuCore3 != cpuCore1+5 {
106 | log.Fatalf("cpuCore3 %v not equal cpuCore1+5 %v", cpuCore3, cpuCore1+5)
107 | }
108 | }
109 |
110 | func createThread(n int, blockTime time.Duration) {
111 | for i := 0; i < n; i++ {
112 | go func() {
113 | runtime.LockOSThread()
114 | time.Sleep(blockTime)
115 |
116 | runtime.UnlockOSThread()
117 | }()
118 | }
119 | }
120 |
121 | func TestWithShrinkThread(t *testing.T) {
122 | before := h.shrinkThreadTriggerCount
123 |
124 | err := h.Set(
125 | // delay 5 seconds, after the 50 threads unlocked
126 | WithThreadDump(10, 10, 10, time.Minute),
127 | WithShrinkThread(20, time.Second*5),
128 | WithCollectInterval("1s"),
129 | )
130 | h.EnableShrinkThread()
131 | if err != nil {
132 | log.Fatalf("fail to set opts on running time.")
133 | }
134 |
135 | threadNum1 := getThreadNum()
136 | // 50 threads exists 3 seconds
137 | createThread(50, time.Second*3)
138 |
139 | time.Sleep(time.Second)
140 | threadNum2 := getThreadNum()
141 | if threadNum2-threadNum1 < 40 {
142 | log.Fatalf("create thread failed, before: %v, now: %v", threadNum1, threadNum2)
143 | }
144 | log.Printf("created 50 threads, before: %v, now: %v", threadNum1, threadNum2)
145 |
146 | time.Sleep(10 * time.Second)
147 |
148 | if before+1 != h.shrinkThreadTriggerCount {
149 | log.Fatalf("shrink thread not triggered, before: %v, now: %v", before, h.shrinkThreadTriggerCount)
150 | }
151 |
152 | threadNum3 := getThreadNum()
153 | if threadNum2-threadNum3 < 30 {
154 | log.Fatalf("shrink thread failed, before: %v, now: %v", threadNum2, threadNum3)
155 | }
156 |
157 | h.DisableShrinkThread()
158 | }
159 |
--------------------------------------------------------------------------------
/log.go:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package holmes
19 |
20 | import (
21 | mlog "mosn.io/pkg/log"
22 | )
23 |
24 | func (h *Holmes) getLogger() mlog.ErrorLogger {
25 | h.opts.L.RLock()
26 | defer h.opts.L.RUnlock()
27 | return h.opts.logger
28 | }
29 |
30 | func (h *Holmes) Debugf(format string, args ...interface{}) {
31 | logger := h.getLogger()
32 | if logger == nil {
33 | return
34 | }
35 | logger.Debugf(format, args...)
36 | }
37 |
38 | func (h *Holmes) Infof(format string, args ...interface{}) {
39 | logger := h.getLogger()
40 | if logger == nil {
41 | return
42 | }
43 | logger.Infof(format, args...)
44 | }
45 |
46 | func (h *Holmes) Warnf(format string, args ...interface{}) {
47 | logger := h.getLogger()
48 | if logger == nil {
49 | return
50 | }
51 | logger.Warnf(format, args...)
52 | }
53 |
54 | func (h *Holmes) Errorf(format string, args ...interface{}) {
55 | logger := h.getLogger()
56 | if logger == nil {
57 | return
58 | }
59 | logger.Errorf(format, args...)
60 | }
61 |
62 | func (h *Holmes) Alertf(alert string, format string, args ...interface{}) {
63 | logger := h.getLogger()
64 | if logger == nil {
65 | return
66 | }
67 | logger.Alertf(alert, format, args...)
68 | }
69 |
70 | // NewStdLogger create an ErrorLogger interface value that writing to os.Stdout
71 | func NewStdLogger() mlog.ErrorLogger {
72 | logger, _ := mlog.GetOrCreateLogger("stdout", nil)
73 | return &mlog.SimpleErrorLog{
74 | Logger: logger,
75 | Level: mlog.DEBUG,
76 | }
77 | }
78 |
79 | func NewFileLog(path string, level mlog.Level) mlog.ErrorLogger {
80 | logger, _ := mlog.GetOrCreateLogger(path, nil)
81 | return &mlog.SimpleErrorLog{
82 | Logger: logger,
83 | Level: level,
84 | }
85 | }
86 |
--------------------------------------------------------------------------------
/options.go:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package holmes
19 |
20 | import (
21 | "sync"
22 | "sync/atomic"
23 | "time"
24 |
25 | mlog "mosn.io/pkg/log"
26 | )
27 |
28 | type options struct {
29 | logger mlog.ErrorLogger
30 |
31 | UseGoProcAsCPUCore bool // use the go max procs number as the CPU core number when it's true
32 | UseCGroup bool // use the CGroup to calc cpu/memory when it's true
33 |
34 | // overwrite the system level memory limitation when > 0.
35 | memoryLimit uint64
36 | cpuCore float64
37 |
38 | *ShrinkThrOptions
39 |
40 | *DumpOptions
41 |
42 | // interval for dump loop, default 5s
43 | CollectInterval time.Duration
44 | intervalResetting chan struct{}
45 |
46 | // if current cpu usage percent is greater than CPUMaxPercent,
47 | // holmes would not dump all types profile, cuz this
48 | // move may result of the system crash.
49 | CPUMaxPercent int
50 |
51 | // cpu sampling time
52 | CPUSamplingTime time.Duration
53 |
54 | // if write lock is held mean holmes's
55 | // configuration is being modified.
56 | L *sync.RWMutex
57 |
58 | // the cooldown time after every type of dump
59 | // interval for cooldown,default 1m
60 | // each check type have different cooldowns of their own
61 |
62 | grOpts *grOptions
63 |
64 | memOpts *typeOption
65 | gCHeapOpts *typeOption
66 | cpuOpts *typeOption
67 | threadOpts *typeOption
68 |
69 | // profile reporter
70 | rptOpts *ReporterOptions
71 | }
72 |
73 | type ReporterOptions struct {
74 | reporter ProfileReporter
75 | active int32 // switch
76 | }
77 |
78 | // newReporterOpts returns ReporterOptions。
79 | func newReporterOpts() *ReporterOptions {
80 | opts := &ReporterOptions{}
81 |
82 | return opts
83 | }
84 |
85 | // DumpOptions contains configuration about dump file.
86 | type DumpOptions struct {
87 | // full path to put the profile files, default /tmp
88 | DumpPath string
89 | // default dump to binary profile, set to true if you want a text profile
90 | DumpProfileType dumpProfileType
91 | // only dump top 10 if set to false, otherwise dump all, only effective when in_text = true
92 | DumpFullStack bool
93 | // dump profile to logger. It will make huge log output if enable DumpToLogger option. issues/90
94 | DumpToLogger bool
95 | }
96 |
97 | // ShrinkThrOptions contains the configuration about shrink thread
98 | type ShrinkThrOptions struct {
99 | // shrink the thread number when it exceeds the max threshold that specified in Threshold
100 | Enable bool
101 | Threshold int
102 | Delay time.Duration // start to shrink thread after the delay time.
103 | }
104 |
105 | // GetReporterOpts returns a copy of rptOpts.
106 | func (o *options) GetReporterOpts() ReporterOptions {
107 | o.L.RLock()
108 | defer o.L.RUnlock()
109 | return *o.rptOpts
110 | }
111 |
112 | // GetShrinkThreadOpts return a copy of ShrinkThrOptions.
113 | func (o *options) GetShrinkThreadOpts() ShrinkThrOptions {
114 | o.L.RLock()
115 | defer o.L.RUnlock()
116 | return *o.ShrinkThrOptions
117 | }
118 |
119 | // GetMemOpts return a copy of typeOption.
120 | func (o *options) GetMemOpts() typeOption {
121 | o.L.RLock()
122 | defer o.L.RUnlock()
123 | return *o.memOpts
124 | }
125 |
126 | // GetCPUOpts return a copy of typeOption
127 | // if cpuOpts not exist return a empty typeOption and false.
128 | func (o *options) GetCPUOpts() typeOption {
129 | o.L.RLock()
130 | defer o.L.RUnlock()
131 | return *o.cpuOpts
132 | }
133 |
134 | // GetGrOpts return a copy of grOptions
135 | // if grOpts not exist return a empty grOptions and false.
136 | func (o *options) GetGrOpts() grOptions {
137 | o.L.RLock()
138 | defer o.L.RUnlock()
139 | return *o.grOpts
140 | }
141 |
142 | // GetThreadOpts return a copy of typeOption
143 | // if threadOpts not exist return a empty typeOption and false.
144 | func (o *options) GetThreadOpts() typeOption {
145 | o.L.RLock()
146 | defer o.L.RUnlock()
147 | return *o.threadOpts
148 | }
149 |
150 | // GetGcHeapOpts return a copy of typeOption
151 | // if gCHeapOpts not exist return a empty typeOption and false.
152 | func (o *options) GetGcHeapOpts() typeOption {
153 | o.L.RLock()
154 | defer o.L.RUnlock()
155 | return *o.gCHeapOpts
156 | }
157 |
158 | // Option holmes option type.
159 | type Option interface {
160 | apply(*options) error
161 | }
162 |
163 | type optionFunc func(*options) error
164 |
165 | func (f optionFunc) apply(opts *options) error {
166 | return f(opts)
167 | }
168 |
169 | func newOptions() *options {
170 | o := &options{
171 | logger: NewStdLogger(),
172 | grOpts: newGrOptions(),
173 | memOpts: newMemOptions(),
174 | gCHeapOpts: newGCHeapOptions(),
175 | cpuOpts: newCPUOptions(),
176 | threadOpts: newThreadOptions(),
177 | CollectInterval: defaultInterval,
178 | intervalResetting: make(chan struct{}, 1),
179 | CPUSamplingTime: defaultCPUSamplingTime,
180 | DumpOptions: &DumpOptions{
181 | DumpPath: defaultDumpPath,
182 | DumpProfileType: defaultDumpProfileType,
183 | DumpFullStack: false,
184 | },
185 | ShrinkThrOptions: &ShrinkThrOptions{
186 | Enable: false,
187 | },
188 | L: &sync.RWMutex{},
189 | rptOpts: newReporterOpts(),
190 | }
191 | return o
192 | }
193 |
194 | // WithLogger set the logger
195 | // logger can be created by: NewFileLog("/path/to/log/file", level)
196 | func WithLogger(logger mlog.ErrorLogger) Option {
197 | return optionFunc(func(opts *options) (err error) {
198 | opts.logger = logger
199 | return
200 | })
201 | }
202 |
203 | // WithDumpPath set the dump path for holmes.
204 | func WithDumpPath(dumpPath string) Option {
205 | return optionFunc(func(opts *options) (err error) {
206 | opts.DumpPath = dumpPath
207 | return
208 | })
209 | }
210 |
211 | // WithCollectInterval : interval must be valid time duration string,
212 | // eg. "ns", "us" (or "µs"), "ms", "s", "m", "h".
213 | func WithCollectInterval(interval string) Option {
214 | return optionFunc(func(opts *options) (err error) {
215 | // CollectInterval wouldn't be zero value, because it
216 | // will be initialized as defaultInterval at newOptions()
217 | newInterval, err := time.ParseDuration(interval)
218 | if err != nil || opts.CollectInterval.Seconds() == newInterval.Seconds() {
219 | return
220 | }
221 |
222 | opts.CollectInterval = newInterval
223 | opts.intervalResetting <- struct{}{}
224 |
225 | return
226 | })
227 | }
228 |
229 | // WithCPUMax : set the CPUMaxPercent parameter as max
230 | func WithCPUMax(max int) Option {
231 | return optionFunc(func(opts *options) (err error) {
232 | opts.CPUMaxPercent = max
233 | return
234 | })
235 | }
236 |
237 | // WithCPUSamplingTime set cpu sampling time
238 | func WithCPUSamplingTime(duration string) Option {
239 | return optionFunc(func(opts *options) (err error) {
240 | // CPUSamplingTime wouldn't be zero value, because it
241 | // will be initialized as defaultCPUSamplingTime at newOptions()
242 | newDuration, err := time.ParseDuration(duration)
243 | if err != nil {
244 | return
245 | }
246 |
247 | if newDuration <= 0 {
248 | newDuration = defaultCPUSamplingTime
249 | }
250 |
251 | opts.CPUSamplingTime = newDuration
252 |
253 | return
254 | })
255 | }
256 |
257 | // WithBinaryDump set dump mode to binary.
258 | func WithBinaryDump() Option {
259 | return withDumpProfileType(binaryDump)
260 | }
261 |
262 | // WithTextDump set dump mode to text.
263 | func WithTextDump() Option {
264 | return withDumpProfileType(textDump)
265 | }
266 |
267 | // WithFullStack set to dump full stack or top 10 stack, when dump in text mode.
268 | func WithFullStack(isFull bool) Option {
269 | return optionFunc(func(opts *options) (err error) {
270 | opts.DumpFullStack = isFull
271 | return
272 | })
273 | }
274 |
275 | func withDumpProfileType(profileType dumpProfileType) Option {
276 | return optionFunc(func(opts *options) (err error) {
277 | opts.DumpProfileType = profileType
278 | return
279 | })
280 | }
281 |
282 | type grOptions struct {
283 | // enable the goroutine dumper, should dump if one of the following requirements is matched
284 | // 1. goroutine_num > TriggerMin && goroutine_num < GoroutineTriggerNumMax && goroutine diff percent > TriggerDiff
285 | // 2. goroutine_num > GoroutineTriggerNumAbsNum && goroutine_num < GoroutineTriggerNumMax
286 | *typeOption
287 | GoroutineTriggerNumMax int // goroutine trigger max in number
288 | }
289 |
290 | func newGrOptions() *grOptions {
291 | base := newTypeOpts(
292 | defaultGoroutineTriggerMin,
293 | defaultGoroutineTriggerAbs,
294 | defaultGoroutineTriggerDiff,
295 | defaultGoroutineCoolDown,
296 | )
297 | return &grOptions{typeOption: base}
298 | }
299 |
300 | // WithGoroutineDump set the goroutine dump options.
301 | func WithGoroutineDump(min int, diff int, abs int, max int, coolDown time.Duration) Option {
302 | return optionFunc(func(opts *options) (err error) {
303 | opts.grOpts.Set(min, abs, diff, coolDown)
304 | opts.grOpts.GoroutineTriggerNumMax = max
305 | return
306 | })
307 | }
308 |
309 | func WithDumpToLogger(new bool) Option {
310 | return optionFunc(func(opts *options) (err error) {
311 | opts.DumpToLogger = new
312 | return
313 | })
314 | }
315 |
316 | type typeOption struct {
317 | Enable bool
318 | // mem/cpu/gcheap trigger minimum in percent, goroutine/thread trigger minimum in number
319 | TriggerMin int
320 |
321 | // mem/cpu/gcheap trigger abs in percent, goroutine/thread trigger abs in number
322 | TriggerAbs int
323 |
324 | // mem/cpu/gcheap/goroutine/thread trigger diff in percent
325 | TriggerDiff int
326 |
327 | // CoolDown skip profile for CoolDown time after done a profile
328 | CoolDown time.Duration
329 | }
330 |
331 | func newTypeOpts(triggerMin, triggerAbs, triggerDiff int, coolDown time.Duration) *typeOption {
332 | return &typeOption{
333 | Enable: false,
334 | TriggerMin: triggerMin,
335 | TriggerAbs: triggerAbs,
336 | TriggerDiff: triggerDiff,
337 | CoolDown: coolDown,
338 | }
339 | }
340 |
341 | func (base *typeOption) Set(min, abs, diff int, coolDown time.Duration) {
342 | base.TriggerMin, base.TriggerAbs, base.TriggerDiff, base.CoolDown = min, abs, diff, coolDown
343 | }
344 |
345 | // newMemOptions
346 | // enable the heap dumper, should dump if one of the following requirements is matched
347 | // 1. memory usage > TriggerMin && memory usage diff > TriggerDiff
348 | // 2. memory usage > TriggerAbs.
349 | func newMemOptions() *typeOption {
350 | return newTypeOpts(
351 | defaultMemTriggerMin,
352 | defaultMemTriggerAbs,
353 | defaultMemTriggerDiff,
354 | defaultCooldown,
355 | )
356 | }
357 |
358 | // WithMemDump set the memory dump options.
359 | func WithMemDump(min int, diff int, abs int, coolDown time.Duration) Option {
360 | return optionFunc(func(opts *options) (err error) {
361 | opts.memOpts.Set(min, abs, diff, coolDown)
362 | return
363 | })
364 | }
365 |
366 | // newGCHeapOptions
367 | // enable the heap dumper, should dump if one of the following requirements is matched
368 | // 1. GC heap usage > TriggerMin && GC heap usage diff > TriggerDiff
369 | // 2. GC heap usage > TriggerAbs
370 | //
371 | // in percent.
372 | func newGCHeapOptions() *typeOption {
373 | return newTypeOpts(
374 | defaultGCHeapTriggerMin,
375 | defaultGCHeapTriggerAbs,
376 | defaultGCHeapTriggerDiff,
377 | defaultCooldown,
378 | )
379 | }
380 |
381 | // WithGCHeapDump set the GC heap dump options.
382 | func WithGCHeapDump(min int, diff int, abs int, coolDown time.Duration) Option {
383 | return optionFunc(func(opts *options) (err error) {
384 | opts.gCHeapOpts.Set(min, abs, diff, coolDown)
385 | return
386 | })
387 | }
388 |
389 | // WithCPUCore overwrite the system level CPU core number when it > 0.
390 | // it's not a good idea to modify it on fly since it affects the CPU percent caculation.
391 | func WithCPUCore(cpuCore float64) Option {
392 | return optionFunc(func(opts *options) (err error) {
393 | opts.cpuCore = cpuCore
394 | return
395 | })
396 | }
397 |
398 | // WithMemoryLimit overwrite the system level memory limit when it > 0.
399 | func WithMemoryLimit(limit uint64) Option {
400 | return optionFunc(func(opts *options) (err error) {
401 | opts.memoryLimit = limit
402 | return
403 | })
404 | }
405 |
406 | func newThreadOptions() *typeOption {
407 | return newTypeOpts(
408 | defaultThreadTriggerMin,
409 | defaultThreadTriggerAbs,
410 | defaultThreadTriggerDiff,
411 | defaultThreadCoolDown,
412 | )
413 | }
414 |
415 | // WithThreadDump set the thread dump options.
416 | func WithThreadDump(min, diff, abs int, coolDown time.Duration) Option {
417 | return optionFunc(func(opts *options) (err error) {
418 | opts.threadOpts.Set(min, abs, diff, coolDown)
419 | return
420 | })
421 | }
422 |
423 | // newCPUOptions
424 | // enable the cpu dumper, should dump if one of the following requirements is matched
425 | // in percent
426 | // 1. cpu usage > CPUTriggerMin && cpu usage diff > CPUTriggerDiff
427 | // 2. cpu usage > CPUTriggerAbs
428 | //
429 | // in percent.
430 | func newCPUOptions() *typeOption {
431 | return newTypeOpts(
432 | defaultCPUTriggerMin,
433 | defaultCPUTriggerAbs,
434 | defaultCPUTriggerDiff,
435 | defaultCooldown,
436 | )
437 | }
438 |
439 | // WithCPUDump set the cpu dump options.
440 | func WithCPUDump(min int, diff int, abs int, coolDown time.Duration) Option {
441 | return optionFunc(func(opts *options) (err error) {
442 | opts.cpuOpts.Set(min, abs, diff, coolDown)
443 | return
444 | })
445 | }
446 |
447 | // WithGoProcAsCPUCore set holmes use cgroup or not.
448 | func WithGoProcAsCPUCore(enabled bool) Option {
449 | return optionFunc(func(opts *options) (err error) {
450 | opts.UseGoProcAsCPUCore = enabled
451 | return
452 | })
453 | }
454 |
455 | // WithCGroup set holmes use cgroup or not.
456 | // Use CGroup are best used when resource limits are set.
457 | // refer to: https://github.com/mosn/holmes/issues/135
458 | func WithCGroup(useCGroup bool) Option {
459 | return optionFunc(func(opts *options) (err error) {
460 | opts.UseCGroup = useCGroup
461 | return
462 | })
463 | }
464 |
465 | // WithShrinkThread enable/disable shrink thread when the thread number exceed the max threshold.
466 | func WithShrinkThread(threshold int, delay time.Duration) Option {
467 | return optionFunc(func(opts *options) (err error) {
468 | if threshold > 0 {
469 | opts.ShrinkThrOptions.Threshold = threshold
470 | }
471 | opts.ShrinkThrOptions.Delay = delay
472 | return
473 | })
474 | }
475 |
476 | // WithProfileReporter will enable reporter
477 | // reopens profile reporter through WithProfileReporter(h.opts.rptOpts.reporter)
478 | func WithProfileReporter(r ProfileReporter) Option {
479 | return optionFunc(func(opts *options) (err error) {
480 | if r == nil {
481 | return nil
482 | }
483 |
484 | opts.rptOpts.reporter = r
485 | atomic.StoreInt32(&opts.rptOpts.active, 1)
486 | return
487 | })
488 | }
489 |
--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
1 | 
2 |
3 | * [Holmes](#holmes)
4 | * [Design](#design)
5 | * [How to use](#how-to-use)
6 | * [Dump goroutine when goroutine number spikes](#dump-goroutine-when-goroutine-number-spikes)
7 | * [dump cpu profile when cpu load spikes](#dump-cpu-profile-when-cpu-load-spikes)
8 | * [dump heap profile when RSS spikes](#dump-heap-profile-when-rss-spikes)
9 | * [Dump heap profile when RSS spikes based GC cycle](#dump-heap-profile-when-rss-spikes-based-gc-cycle)
10 | * [Set holmes configurations on fly](#set-holmes-configurations-on-fly)
11 | * [Reporter dump event](#reporter-dump-event)
12 | * [Enable them all\!](#enable-them-all)
13 | * [Running in docker or other cgroup limited environment](#running-in-docker-or-other-cgroup-limited-environment)
14 | * [known risks](#known-risks)
15 | * [Show cases](#show-cases)
16 |
17 | # Holmes
18 | [中文版](./doc/zh.md)
19 |
20 | Self-aware Golang profile dumper.
21 |
22 | Our online system often crashes at midnight (usually killed by the OS due to OOM).
23 | As lazy developers, we don't want to be woken up at midnight and waiting for the online error to recur.
24 |
25 | holmes comes to rescue.
26 |
27 | ## Design
28 |
29 | Holmes collects the following stats every interval passed:
30 |
31 | * Goroutine number by `runtime.NumGoroutine`.
32 | * RSS used by the current process with [gopsutil](https://github.com/shirou/gopsutil)
33 | * CPU percent a total. eg total 8 core, use 4 core = 50% with [gopsutil](https://github.com/shirou/gopsutil)
34 |
35 | In addition, holmes will collect `RSS` based on GC cycle, if you enable `GC heap`.
36 |
37 | After warming up(10 times collects after starting application) phase finished,
38 | Holmes will compare the current stats with the average
39 | of previous collected stats(10 cycles). If the dump rule is matched, Holmes will dump
40 | the related profile to log(text mode) or binary file(binary mode).
41 |
42 | When you get warning messages sent by your own monitor system, e.g, memory usage exceed 80%,
43 | OOM killed, CPU usage exceed 80%, goroutine num exceed 100k. The profile is already dumped
44 | to your dump path. You could just fetch the profile and see what actually happened without pressure.
45 |
46 |
47 | ## How to use
48 |
49 | ```shell
50 | go get mosn.io/holmes
51 | ```
52 |
53 | ### Dump goroutine when goroutine number spikes
54 |
55 | ```go
56 | h, _ := holmes.New(
57 | holmes.WithCollectInterval("5s"),
58 | holmes.WithDumpPath("/tmp"),
59 | holmes.WithTextDump(),
60 | holmes.WithDumpToLogger(true),
61 | holmes.WithGoroutineDump(10, 25, 2000, 10*1000,time.Minute),
62 | )
63 | h.EnableGoroutineDump()
64 |
65 | // start the metrics collect and dump loop
66 | h.Start()
67 |
68 | // stop the dumper
69 | h.Stop()
70 | ```
71 |
72 | * WithCollectInterval("5s") means the system metrics are collected once 5 seconds
73 | * WithDumpPath("/tmp") means the dump binary file(binary mode) will write content to `/tmp` dir.
74 | * WithTextDump() means not in binary mode, so it's text mode profiles
75 | * WithDumpToLogger() means profiles information will be outputted to logger.
76 | * WithGoroutineDump(10, 25, 2000, 100*1000,time.Minute) means dump will happen when current_goroutine_num > 10 &&
77 | current_goroutine_num < `100*1000` && current_goroutine_num > `125%` * previous_average_goroutine_num or current_goroutine_num > `2000`,
78 | `time.Minute` means once a dump happened, the next dump will not happen before cooldown
79 | finish-1 minute.
80 | > WithGoroutineDump(min int, diff int, abs int, max int, coolDown time.Duration)
81 | > 100*1000 means max goroutine number, when current goroutines number is greater 100k, holmes would not
82 | > dump goroutine profile. Cuz if goroutine num is huge, e.g, 100k goroutine dump will also become a
83 | > heavy action: stw && stack dump. Max = 0 means no limit.
84 |
85 | ### dump cpu profile when cpu load spikes
86 |
87 | ```go
88 | h, _ := holmes.New(
89 | holmes.WithCollectInterval("5s"),
90 | holmes.WithDumpPath("/tmp"),
91 | holmes.WithCPUDump(20, 25, 80, time.Minute),
92 | holmes.WithCPUMax(90),
93 | )
94 | h.EnableCPUDump()
95 |
96 | // start the metrics collect and dump loop
97 | h.Start()
98 |
99 | // stop the dumper
100 | h.Stop()
101 | ```
102 |
103 | * WithCollectInterval("5s") means the system metrics are collected once 5 seconds
104 | * WithDumpPath("/tmp") means the dump binary file(binary mode) will write content to `/tmp` dir.
105 | * WithBinaryDump() or WithTextDump() doesn't affect the CPU profile dump, because the pprof
106 | standard library doesn't support text mode dump.
107 | * WithCPUDump(10, 25, 80,time.Minute) means dump will happen when cpu usage > `10%` &&
108 | cpu usage > `125%` * previous cpu usage recorded or cpu usage > `80%`.
109 | `time.Minute` means once a dump happened, the next dump will not happen before
110 | cooldown finish-1 minute.
111 | * WithCPUMax means holmes would not dump all types profile when current cpu
112 | usage percent is greater than CPUMaxPercent.
113 |
114 | ### dump heap profile when RSS spikes
115 |
116 | ```go
117 | h, _ := holmes.New(
118 | holmes.WithCollectInterval("5s"),
119 | holmes.WithDumpPath("/tmp"),
120 | holmes.WithTextDump(),
121 | holmes.WithMemDump(30, 25, 80,time.Minute),
122 | )
123 |
124 | h.EnableMemDump()
125 |
126 | // start the metrics collect and dump loop
127 | h.Start()
128 |
129 | // stop the dumper
130 | h.Stop()
131 | ```
132 |
133 | * WithCollectInterval("5s") means the system metrics are collected once 5 seconds
134 | * WithDumpPath("/tmp") means the dump binary file(binary mode) will write content to `/tmp` dir.
135 | * WithTextDump() means not in binary mode, so it's text mode profiles
136 | * WithMemDump(30, 25, 80, time.Minute) means dump will happen when memory usage > `10%` &&
137 | memory usage > `125%` * previous memory usage or memory usage > `80%`.
138 | `time.Minute` means once a dump happened, the next dump will not happen before
139 | cooldown finish-1 minute.
140 |
141 | ### Dump heap profile when RSS spikes based GC cycle
142 |
143 | In some situations we can not get useful information, such the application allocates heap memory and
144 | collects it between one `CollectInterval`. So we design a new heap memory monitor rule, which bases on
145 | GC cycle, to control holmes dump. It will dump twice heap profile continuously while RSS spike, then devs
146 | can compare the profiles through `pprof base` command.
147 |
148 |
149 | ```go
150 | h, _ := holmes.New(
151 | holmes.WithDumpPath("/tmp"),
152 | holmes.WithLogger(holmes.NewFileLog("/tmp/holmes.log", mlog.INFO)),
153 | holmes.WithBinaryDump(),
154 | holmes.WithMemoryLimit(100*1024*1024), // 100MB
155 | holmes.WithGCHeapDump(10, 20, 40, time.Minute),
156 | // holmes.WithProfileReporter(reporter),
157 | )
158 | h.EnableGCHeapDump().Start()
159 | time.Sleep(time.Hour)
160 | ```
161 | ### Set holmes configurations on fly
162 | You can use `Set` method to modify holmes' configurations when the application is running.
163 | ```go
164 | h.Set(
165 | WithCollectInterval("2s"),
166 | WithGoroutineDump(min, diff, abs, 90, time.Minute))
167 | ```
168 |
169 | ### Reporter dump event
170 |
171 | You can use `Reporter` to implement the following features:
172 | * Send alarm messages that include the scene information when holmes dump profiles.
173 | * Send profiles to the data center for saving or analyzing.
174 |
175 | ```go
176 | type ReporterImpl struct{}
177 | func (r *ReporterImpl) Report(pType string, filename string, reason ReasonType, eventID string, sampleTime time.Time, pprofBytes []byte, scene Scene) error{
178 | // do something
179 | }
180 | ......
181 | r := &ReporterImpl{} // a implement of holmes.ProfileReporter Interface.
182 | h, _ := holmes.New(
183 | holmes.WithProfileReporter(reporter),
184 | holmes.WithDumpPath("/tmp"),
185 | holmes.WithLogger(holmes.NewFileLog("/tmp/holmes.log", mlog.INFO)),
186 | holmes.WithBinaryDump(),
187 | holmes.WithMemoryLimit(100*1024*1024), // 100MB
188 | holmes.WithGCHeapDump(10, 20, 40, time.Minute),
189 | )
190 |
191 | ```
192 |
193 | #### Enable holmes as pyroscope client
194 |
195 | Holmes supports to upload your profile to [pyroscope](https://github.com/pyroscope-io/pyroscope) server. More details
196 | click [here](./example/pyroscope_rideshare/README.md) please.
197 |
198 | Noted that **NOT** set `TextDump` when you enable holmes as pyroscope client.
199 |
200 | ### Enable them all!
201 |
202 | It's easy.
203 |
204 | ```go
205 | h, _ := holmes.New(
206 | holmes.WithCollectInterval("5s"),
207 | holmes.WithDumpPath("/tmp"),
208 | holmes.WithTextDump(),
209 |
210 | holmes.WithCPUDump(10, 25, 80, time.Minute),
211 | holmes.WithMemDump(30, 25, 80, time.Minute),
212 | holmes.WithGCHeapDump(10, 20, 40, time.Minute),
213 | holmes.WithGoroutineDump(500, 25, 20000, 0, time.Minute),
214 | )
215 |
216 | h.EnableCPUDump().
217 | EnableGoroutineDump().
218 | EnableMemDump().
219 | EnableGCHeapDump().Start()
220 |
221 | ```
222 |
223 | ### Running in docker or other cgroup limited environment
224 |
225 | ```go
226 | h, _ := holmes.New(
227 | holmes.WithCollectInterval("5s"),
228 | holmes.WithDumpPath("/tmp"),
229 | holmes.WithTextDump(),
230 |
231 | holmes.WithCPUDump(10, 25, 80,time.Minute),
232 | holmes.WithCGroup(true), // set cgroup to true
233 | )
234 | ```
235 |
236 | ## known risks
237 |
238 | If golang version < 1.19, collect a goroutine itself [may cause latency spike](https://github.com/golang/go/issues/33250) because of the long time STW.
239 | At golang 1.19, it has been optz by concurrent way at this [CL](https://go-review.googlesource.com/c/go/+/387415/).
240 |
241 | ## Show cases
242 | [Click here](./doc/example.md)
243 |
244 | ## Contributing
245 | See our [contributor guide](./CONTRIBUTING.md).
246 |
247 | ## Community
248 |
249 | Scan the QR code below with DingTalk(钉钉) to join the Holmes user group.
250 |
251 |
252 |
253 |
254 |
--------------------------------------------------------------------------------
/report.go:
--------------------------------------------------------------------------------
1 | package holmes
2 |
3 | import "time"
4 |
5 | type ProfileReporter interface {
6 | Report(pType string, filename string, reason ReasonType, eventID string, sampleTime time.Time, pprofBytes []byte, scene Scene) error
7 | }
8 |
9 | // rptEvent stands of the args of report event
10 | type rptEvent struct {
11 | PType string
12 | FileName string
13 | Reason ReasonType
14 | EventID string
15 | SampleTime time.Time
16 | PprofBytes []byte
17 | Scene Scene
18 | }
19 |
20 | // Scene contains the scene information when profile triggers,
21 | // including current value, average value and configurations.
22 | type Scene struct {
23 | typeOption
24 |
25 | // current value while dump event occurs
26 | CurVal int
27 | // Avg is the average of the past values
28 | Avg int
29 | }
30 |
31 | type ReasonType uint8
32 |
33 | const (
34 | // ReasonCurlLessMin means current value is less than min value.
35 | ReasonCurlLessMin ReasonType = iota
36 | // ReasonCurlGreaterMin means current value is greater than min value,
37 | // but don't meet any trigger conditions.
38 | ReasonCurlGreaterMin
39 | // ReasonCurGreaterMax means current value is greater than max value.
40 | ReasonCurGreaterMax
41 | // ReasonCurGreaterAbs means current value meets the trigger condition where
42 | // it is greater than abs value.
43 | ReasonCurGreaterAbs
44 | // ReasonDiff means current value is greater than the value: (diff + 1) * agv.
45 | ReasonDiff
46 | )
47 |
48 | func (rt ReasonType) String() string {
49 | var reason string
50 | switch rt {
51 | case ReasonCurlLessMin:
52 | reason = "curVal < ruleMin"
53 | case ReasonCurlGreaterMin:
54 | reason = "curVal >= ruleMin, but don't meet diff trigger condition"
55 | case ReasonCurGreaterMax:
56 | reason = "curVal >= ruleMax"
57 | case ReasonCurGreaterAbs:
58 | reason = "curVal > ruleAbs"
59 | case ReasonDiff:
60 | reason = "curVal >= ruleMin, and meet diff trigger condition"
61 |
62 | }
63 |
64 | return reason
65 | }
66 |
--------------------------------------------------------------------------------
/reporters/http_reporter/http_reporter.go:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package http_reporter
19 |
20 | import (
21 | "bytes"
22 | "encoding/json"
23 | "fmt"
24 | "io/ioutil"
25 | "mime/multipart"
26 | "net/http"
27 | "time"
28 |
29 | "mosn.io/holmes"
30 | )
31 |
32 | type HttpReporter struct {
33 | token string
34 | url string
35 | }
36 |
37 | type Response struct {
38 | Code int `json:"code"`
39 | Message string `json:"message"`
40 | }
41 |
42 | func NewReporter(token string, url string) holmes.ProfileReporter {
43 | return &HttpReporter{
44 | token: token,
45 | url: url,
46 | }
47 | }
48 |
49 | func (r *HttpReporter) Report(ptype string, filename string, reason holmes.ReasonType, eventID string, tt time.Time, bts []byte, scene holmes.Scene) error {
50 | body := &bytes.Buffer{}
51 | writer := multipart.NewWriter(body)
52 |
53 | // read filename
54 | if filename == "" {
55 | return fmt.Errorf("file name is empty")
56 | }
57 | data, err := ioutil.ReadFile(filename)
58 | if err != nil {
59 | return fmt.Errorf("read form File: %s err: %v", filename, err)
60 | }
61 | part, err := writer.CreateFormFile("profile", "go-pprof-profile")
62 | if err != nil {
63 | return fmt.Errorf("create form File err: %v", err)
64 | }
65 |
66 | if _, err := part.Write(data); err != nil {
67 | return fmt.Errorf("write buf to file part err: %v", err)
68 | }
69 |
70 | writer.WriteField("token", r.token) // nolint: errcheck
71 | writer.WriteField("profile_type", ptype) // nolint: errcheck
72 | writer.WriteField("event_id", eventID) // nolint: errcheck
73 | writer.WriteField("comment", reason.String()) // nolint: errcheck
74 | writer.Close() // nolint: errcheck
75 | request, err := http.NewRequest("POST", r.url, body)
76 | if err != nil {
77 | return fmt.Errorf("NewRequest err: %v", err)
78 | }
79 |
80 | request.Header.Add("Content-Type", writer.FormDataContentType())
81 | client := &http.Client{}
82 | response, err := client.Do(request)
83 | if err != nil {
84 | return fmt.Errorf("do Request err: %v", err)
85 | }
86 | defer response.Body.Close() // nolint: errcheck
87 |
88 | respContent, err := ioutil.ReadAll(response.Body)
89 | if err != nil {
90 | return fmt.Errorf("read response err: %v", err)
91 | }
92 |
93 | rsp := &Response{}
94 | if err := json.Unmarshal(respContent, rsp); err != nil {
95 | return fmt.Errorf("failed to decode resp json: %v", err)
96 | }
97 |
98 | if rsp.Code != 1 {
99 | return fmt.Errorf("code: %d, msg: %s", rsp.Code, rsp.Message)
100 | }
101 | return nil
102 | }
103 |
--------------------------------------------------------------------------------
/reporters/http_reporter/http_reporter_test.go:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package http_reporter
19 |
20 | import (
21 | "log"
22 | "mosn.io/holmes"
23 | "testing"
24 | "time"
25 |
26 | "github.com/gin-gonic/gin"
27 | )
28 |
29 | func TestHttpReporter_Report(t *testing.T) {
30 | newMockServer()
31 |
32 | reporter := NewReporter("test", "http://127.0.0.1:8080/profile/upload")
33 |
34 | if err := reporter.Report("goroutine", "reporter_filename_test", holmes.ReasonCurlGreaterMin, "test-id", time.Now(), []byte{}, holmes.Scene{}); err != nil {
35 | log.Fatalf("failed to report: %v", err)
36 | }
37 | }
38 |
39 | func newMockServer() {
40 | r := gin.New()
41 | r.POST("/profile/upload", ProfileUploadHandler)
42 | go r.Run() //nolint:errcheck // listen and serve on 0.0.0.0:8080 (for windows "localhost:8080")
43 |
44 | time.Sleep(time.Millisecond * 100)
45 | }
46 |
47 | func ProfileUploadHandler(c *gin.Context) {
48 | ret := map[string]interface{}{}
49 | ret["code"] = 1
50 | ret["message"] = "success"
51 | c.JSON(200, ret)
52 | }
53 |
--------------------------------------------------------------------------------
/reporters/http_reporter/reporter_filename_test:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mosn/holmes/efb1a7768843e83b645f6e683f7b6c5d826651ab/reporters/http_reporter/reporter_filename_test
--------------------------------------------------------------------------------
/reporters/pyroscope_reporter/client_config.go:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 | package pyroscope_reporter
18 |
19 | import (
20 | "errors"
21 | "time"
22 |
23 | "mosn.io/holmes/reporters/pyroscope_reporter/flameql"
24 | )
25 |
26 | /*
27 | Copied from pyroscope-io/client
28 | */
29 | var (
30 | ErrCloudTokenRequired = errors.New("Please provide an authentication token. You can find it here: https://pyroscope.io/cloud")
31 | ErrUpload = errors.New("Failed to upload a profile")
32 | ErrUpgradeServer = errors.New("Newer version of pyroscope server required (>= v0.3.1). Visit https://pyroscope.io/docs/golang/ for more information")
33 | )
34 |
35 | const (
36 | Pprof UploadFormat = "pprof"
37 | Trie = "trie"
38 | DefaultUploadRate = 10 * time.Second
39 | )
40 |
41 | type UploadFormat string
42 | type Payload interface {
43 | Bytes() []byte
44 | }
45 |
46 | type ParserState int
47 |
48 | const (
49 | ReservedTagKeyName = "__name__"
50 | )
51 |
52 | var (
53 | heapSampleTypes = map[string]*SampleType{
54 | "alloc_objects": {
55 | Units: "objects",
56 | Cumulative: false,
57 | },
58 | "alloc_space": {
59 | Units: "bytes",
60 | Cumulative: false,
61 | },
62 | "inuse_space": {
63 | Units: "bytes",
64 | Aggregation: "average",
65 | Cumulative: false,
66 | },
67 | "inuse_objects": {
68 | Units: "objects",
69 | Aggregation: "average",
70 | Cumulative: false,
71 | },
72 | }
73 | goroutineSampleTypes = map[string]*SampleType{
74 | "goroutine": {
75 | DisplayName: "goroutines",
76 | Units: "goroutines",
77 | Aggregation: "average",
78 | },
79 | }
80 | )
81 |
82 | type SampleType struct {
83 | Units string `json:"units,omitempty"`
84 | Aggregation string `json:"aggregation,omitempty"`
85 | DisplayName string `json:"display-name,omitempty"`
86 | Sampled bool `json:"sampled,omitempty"`
87 | Cumulative bool `json:"cumulative,omitempty"`
88 | }
89 |
90 | type UploadJob struct {
91 | Name string
92 | StartTime time.Time
93 | EndTime time.Time
94 | SpyName string
95 | SampleRate uint32
96 | Units string
97 | AggregationType string
98 | Format UploadFormat
99 | Profile []byte
100 | PrevProfile []byte
101 | SampleTypeConfig map[string]*SampleType
102 | }
103 |
104 | type RemoteConfig struct {
105 | AuthToken string // holmes not used
106 | UpstreamThreads int // holmes not used
107 | UpstreamAddress string
108 | UpstreamRequestTimeout time.Duration
109 |
110 | ManualStart bool // holmes not used
111 | }
112 |
113 | // mergeTagsWithAppName validates user input and merges explicitly specified
114 | // tags with tags from app name.
115 | //
116 | // App name may be in the full form including tags (app.name{foo=bar,baz=qux}).
117 | // Returned application name is always short, any tags that were included are
118 | // moved to tags map. When merged with explicitly provided tags (config/CLI),
119 | // last take precedence.
120 | //
121 | // App name may be an empty string. Tags must not contain reserved keys,
122 | // the map is modified in place.
123 | func mergeTagsWithAppName(appName string, tags map[string]string) (string, error) {
124 | k, err := flameql.ParseKey(appName)
125 | if err != nil {
126 | return "", err
127 | }
128 | for tagKey, tagValue := range tags {
129 | if flameql.IsTagKeyReserved(tagKey) {
130 | continue
131 | }
132 | if err = flameql.ValidateTagKey(tagKey); err != nil {
133 | return "", err
134 | }
135 | k.Add(tagKey, tagValue)
136 | }
137 | return k.Normalized(), nil
138 | }
139 |
--------------------------------------------------------------------------------
/reporters/pyroscope_reporter/flameql/error.go:
--------------------------------------------------------------------------------
1 | package flameql
2 |
3 | import (
4 | "errors"
5 | "fmt"
6 | )
7 |
8 | /*
9 | Copied from pyroscope-io/client
10 | */
11 | var (
12 | ErrInvalidQuerySyntax = errors.New("invalid query syntax")
13 | ErrInvalidAppName = errors.New("invalid application name")
14 | ErrInvalidMatchersSyntax = errors.New("invalid tag matchers syntax")
15 | ErrInvalidTagKey = errors.New("invalid tag key")
16 | ErrInvalidTagValueSyntax = errors.New("invalid tag value syntax")
17 |
18 | ErrAppNameIsRequired = errors.New("application name is required")
19 | ErrTagKeyIsRequired = errors.New("tag key is required")
20 | ErrTagKeyReserved = errors.New("tag key is reserved")
21 |
22 | ErrMatchOperatorIsRequired = errors.New("match operator is required")
23 | ErrUnknownOp = errors.New("unknown tag match operator")
24 | )
25 |
26 | type Error struct {
27 | Inner error
28 | Expr string
29 | // TODO: add offset?
30 | }
31 |
32 | func newErr(err error, expr string) *Error { return &Error{Inner: err, Expr: expr} }
33 |
34 | func (e *Error) Error() string { return e.Inner.Error() + ": " + e.Expr }
35 |
36 | func (e *Error) Unwrap() error { return e.Inner }
37 |
38 | func newInvalidTagKeyRuneError(k string, r rune) *Error {
39 | return newInvalidRuneError(ErrInvalidTagKey, k, r)
40 | }
41 |
42 | func newInvalidAppNameRuneError(k string, r rune) *Error {
43 | return newInvalidRuneError(ErrInvalidAppName, k, r)
44 | }
45 |
46 | func newInvalidRuneError(err error, k string, r rune) *Error {
47 | return newErr(err, fmt.Sprintf("%s: character is not allowed: %q", k, r))
48 | }
49 |
--------------------------------------------------------------------------------
/reporters/pyroscope_reporter/flameql/flameql.go:
--------------------------------------------------------------------------------
1 | package flameql
2 |
3 | import "regexp"
4 |
5 | /*
6 | Copied from pyroscope-io/client
7 | */
8 | type Query struct {
9 | AppName string
10 | Matchers []*TagMatcher
11 |
12 | q string // The original query string.
13 | }
14 |
15 | func (q *Query) String() string { return q.q }
16 |
17 | type TagMatcher struct {
18 | Key string
19 | Value string
20 | Op
21 |
22 | R *regexp.Regexp
23 | }
24 |
25 | type Op int
26 |
27 | const (
28 | // The order should respect operator priority and cost.
29 | // Negating operators go first. See IsNegation.
30 | _ Op = iota
31 | OpNotEqual // !=
32 | OpNotEqualRegex // !~
33 | OpEqual // =
34 | OpEqualRegex // =~
35 | )
36 |
37 | const (
38 | ReservedTagKeyName = "__name__"
39 | )
40 |
41 | var reservedTagKeys = []string{
42 | ReservedTagKeyName,
43 | }
44 |
45 | // IsNegation reports whether the operator assumes negation.
46 | func (o Op) IsNegation() bool { return o < OpEqual }
47 |
48 | // ByPriority is a supplemental type for sorting tag matchers.
49 | type ByPriority []*TagMatcher
50 |
51 | func (p ByPriority) Len() int { return len(p) }
52 | func (p ByPriority) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
53 | func (p ByPriority) Less(i, j int) bool { return p[i].Op < p[j].Op }
54 |
55 | func (m *TagMatcher) Match(v string) bool {
56 | switch m.Op {
57 | case OpEqual:
58 | return m.Value == v
59 | case OpNotEqual:
60 | return m.Value != v
61 | case OpEqualRegex:
62 | return m.R.Match([]byte(v))
63 | case OpNotEqualRegex:
64 | return !m.R.Match([]byte(v))
65 | default:
66 | panic("invalid match operator")
67 | }
68 | }
69 |
70 | // ValidateTagKey report an error if the given key k violates constraints.
71 | //
72 | // The function should be used to validate user input. The function returns
73 | // ErrTagKeyReserved if the key is valid but reserved for internal use.
74 | func ValidateTagKey(k string) error {
75 | if len(k) == 0 {
76 | return ErrTagKeyIsRequired
77 | }
78 | for _, r := range k {
79 | if !IsTagKeyRuneAllowed(r) {
80 | return newInvalidTagKeyRuneError(k, r)
81 | }
82 | }
83 | if IsTagKeyReserved(k) {
84 | return newErr(ErrTagKeyReserved, k)
85 | }
86 | return nil
87 | }
88 |
89 | // ValidateAppName report an error if the given app name n violates constraints.
90 | func ValidateAppName(n string) error {
91 | if len(n) == 0 {
92 | return ErrAppNameIsRequired
93 | }
94 | for _, r := range n {
95 | if !IsAppNameRuneAllowed(r) {
96 | return newInvalidAppNameRuneError(n, r)
97 | }
98 | }
99 | return nil
100 | }
101 |
102 | func IsTagKeyRuneAllowed(r rune) bool {
103 | return (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || (r >= '0' && r <= '9') || r == '_'
104 | }
105 |
106 | func IsAppNameRuneAllowed(r rune) bool {
107 | return r == '-' || r == '.' || IsTagKeyRuneAllowed(r)
108 | }
109 |
110 | func IsTagKeyReserved(k string) bool {
111 | for _, s := range reservedTagKeys {
112 | if s == k {
113 | return true
114 | }
115 | }
116 | return false
117 | }
118 |
--------------------------------------------------------------------------------
/reporters/pyroscope_reporter/flameql/key.go:
--------------------------------------------------------------------------------
1 | package flameql
2 |
3 | import (
4 | "errors"
5 | "strconv"
6 | "strings"
7 | "time"
8 | )
9 |
10 | /*
11 | Copied from pyroscope-io/client
12 | */
13 | type Key struct {
14 | labels map[string]string
15 | }
16 |
17 | type ParserState int
18 |
19 | const (
20 | nameParserState ParserState = iota
21 | tagKeyParserState
22 | tagValueParserState
23 | doneParserState
24 | )
25 |
26 | func NewKey(labels map[string]string) *Key { return &Key{labels: labels} }
27 |
28 | func ParseKey(name string) (*Key, error) {
29 | k := &Key{labels: make(map[string]string)}
30 | p := parser{parserState: nameParserState}
31 | var err error
32 | for _, r := range name + "{" {
33 | switch p.parserState {
34 | case nameParserState:
35 | err = p.nameParserCase(r, k)
36 | case tagKeyParserState:
37 | p.tagKeyParserCase(r)
38 | case tagValueParserState:
39 | err = p.tagValueParserCase(r, k)
40 | }
41 | if err != nil {
42 | return nil, err
43 | }
44 | }
45 | return k, nil
46 | }
47 |
48 | type parser struct {
49 | parserState ParserState
50 | key string
51 | value string
52 | }
53 |
54 | // ParseKey's nameParserState switch case
55 | func (p *parser) nameParserCase(r int32, k *Key) error {
56 | switch r {
57 | case '{':
58 | p.parserState = tagKeyParserState
59 | appName := strings.TrimSpace(p.value)
60 | if err := ValidateAppName(appName); err != nil {
61 | return err
62 | }
63 | k.labels["__name__"] = appName
64 | default:
65 | p.value += string(r)
66 | }
67 | return nil
68 | }
69 |
70 | // ParseKey's tagKeyParserState switch case
71 | func (p *parser) tagKeyParserCase(r int32) {
72 | switch r {
73 | case '}':
74 | p.parserState = doneParserState
75 | case '=':
76 | p.parserState = tagValueParserState
77 | p.value = ""
78 | default:
79 | p.key += string(r)
80 | }
81 | }
82 |
83 | // ParseKey's tagValueParserState switch case
84 | func (p *parser) tagValueParserCase(r int32, k *Key) error {
85 | switch r {
86 | case ',', '}':
87 | p.parserState = tagKeyParserState
88 | key := strings.TrimSpace(p.key)
89 | if !IsTagKeyReserved(key) {
90 | if err := ValidateTagKey(key); err != nil {
91 | return err
92 | }
93 | }
94 | k.labels[key] = strings.TrimSpace(p.value)
95 | p.key = ""
96 | default:
97 | p.value += string(r)
98 | }
99 | return nil
100 | }
101 |
102 | func (k *Key) SegmentKey() string {
103 | return k.Normalized()
104 | }
105 |
106 | func TreeKey(k string, depth int, unixTime int64) string {
107 | return k + ":" + strconv.Itoa(depth) + ":" + strconv.FormatInt(unixTime, 10)
108 | }
109 |
110 | func (k *Key) TreeKey(depth int, t time.Time) string {
111 | return TreeKey(k.Normalized(), depth, t.Unix())
112 | }
113 |
114 | var errKeyInvalid = errors.New("invalid key")
115 |
116 | // ParseTreeKey retrieves tree time and depth level from the given key.
117 | func ParseTreeKey(k string) (time.Time, int, error) {
118 | a := strings.Split(k, ":")
119 | if len(a) < 3 {
120 | return time.Time{}, 0, errKeyInvalid
121 | }
122 | level, err := strconv.Atoi(a[1])
123 | if err != nil {
124 | return time.Time{}, 0, err
125 | }
126 | v, err := strconv.Atoi(a[2])
127 | if err != nil {
128 | return time.Time{}, 0, err
129 | }
130 | return time.Unix(int64(v), 0), level, err
131 | }
132 |
133 | func (k *Key) DictKey() string {
134 | return k.labels["__name__"]
135 | }
136 |
137 | // FromTreeToDictKey returns app name from tree key k: given tree key
138 | // "foo{}:0:1234567890", the call returns "foo".
139 | //
140 | // Before tags support, segment key form (i.e. app name + tags: foo{key=value})
141 | // has been used to reference a dictionary (trie).
142 | func FromTreeToDictKey(k string) string {
143 | return k[0:strings.IndexAny(k, "{")]
144 | }
145 |
146 | func (k *Key) Normalized() string {
147 | var sb strings.Builder
148 |
149 | sortedMap := New()
150 | for k, v := range k.labels {
151 | if k == "__name__" {
152 | sb.WriteString(v)
153 | } else {
154 | sortedMap.Put(k, v)
155 | }
156 | }
157 |
158 | sb.WriteString("{")
159 | for i, k := range sortedMap.Keys() {
160 | v := sortedMap.Get(k).(string)
161 | if i != 0 {
162 | sb.WriteString(",")
163 | }
164 | sb.WriteString(k)
165 | sb.WriteString("=")
166 | sb.WriteString(v)
167 | }
168 | sb.WriteString("}")
169 |
170 | return sb.String()
171 | }
172 |
173 | func (k *Key) AppName() string {
174 | return k.labels["__name__"]
175 | }
176 |
177 | func (k *Key) Labels() map[string]string {
178 | return k.labels
179 | }
180 |
181 | func (k *Key) Add(key, value string) {
182 | if value == "" {
183 | delete(k.labels, key)
184 | } else {
185 | k.labels[key] = value
186 | }
187 | }
188 |
189 | // Match reports whether the key matches the query.
190 | func (k *Key) Clone() *Key {
191 | newMap := make(map[string]string)
192 | for k, v := range k.labels {
193 | newMap[k] = v
194 | }
195 | return &Key{labels: newMap}
196 | }
197 |
198 | func (k *Key) Match(q *Query) bool {
199 | if k.AppName() != q.AppName {
200 | return false
201 | }
202 | for _, m := range q.Matchers {
203 | var ok bool
204 | for labelKey, labelValue := range k.labels {
205 | if m.Key != labelKey {
206 | continue
207 | }
208 | if m.Match(labelValue) {
209 | if !m.IsNegation() {
210 | ok = true
211 | break
212 | }
213 | } else if m.IsNegation() {
214 | return false
215 | }
216 | }
217 | if !ok && !m.IsNegation() {
218 | return false
219 | }
220 | }
221 | return true
222 | }
223 |
--------------------------------------------------------------------------------
/reporters/pyroscope_reporter/flameql/parse.go:
--------------------------------------------------------------------------------
1 | package flameql
2 |
3 | import (
4 | "regexp"
5 | "sort"
6 | "strings"
7 | )
8 |
9 | /*
10 | Copied from pyroscope-io/client
11 | */
12 |
13 | // ParseQuery parses a string of $app_name<{<$tag_matchers>}> form.
14 | func ParseQuery(s string) (*Query, error) {
15 | s = strings.TrimSpace(s)
16 | q := Query{q: s}
17 |
18 | for offset, c := range s {
19 | switch c {
20 | case '{':
21 | if offset == 0 {
22 | return nil, ErrAppNameIsRequired
23 | }
24 | if s[len(s)-1] != '}' {
25 | return nil, newErr(ErrInvalidQuerySyntax, "expected } at the end")
26 | }
27 | m, err := ParseMatchers(s[offset+1 : len(s)-1])
28 | if err != nil {
29 | return nil, err
30 | }
31 | q.AppName = s[:offset]
32 | q.Matchers = m
33 | return &q, nil
34 | default:
35 | if !IsAppNameRuneAllowed(c) {
36 | return nil, newErr(ErrInvalidAppName, s[:offset+1])
37 | }
38 | }
39 | }
40 |
41 | if len(s) == 0 {
42 | return nil, ErrAppNameIsRequired
43 | }
44 |
45 | q.AppName = s
46 | return &q, nil
47 | }
48 |
49 | // ParseMatchers parses a string of $tag_matcher<,$tag_matchers> form.
50 | func ParseMatchers(s string) ([]*TagMatcher, error) {
51 | var matchers []*TagMatcher
52 | for _, t := range split(s) {
53 | if t == "" {
54 | continue
55 | }
56 | m, err := ParseMatcher(strings.TrimSpace(t))
57 | if err != nil {
58 | return nil, err
59 | }
60 | matchers = append(matchers, m)
61 | }
62 | if len(matchers) == 0 && len(s) != 0 {
63 | return nil, newErr(ErrInvalidMatchersSyntax, s)
64 | }
65 | sort.Sort(ByPriority(matchers))
66 | return matchers, nil
67 | }
68 |
69 | // ParseMatcher parses a string of $tag_key$op"$tag_value" form,
70 | // where $op is one of the supported match operators.
71 | func ParseMatcher(s string) (*TagMatcher, error) {
72 | var tm TagMatcher
73 | var offset int
74 | var c rune
75 |
76 | loop:
77 | for offset, c = range s {
78 | r := len(s) - (offset + 1)
79 | switch c {
80 | case '=':
81 | switch {
82 | case r <= 2:
83 | return nil, newErr(ErrInvalidTagValueSyntax, s)
84 | case s[offset+1] == '"':
85 | tm.Op = OpEqual
86 | case s[offset+1] == '~':
87 | if r <= 3 {
88 | return nil, newErr(ErrInvalidTagValueSyntax, s)
89 | }
90 | tm.Op = OpEqualRegex
91 | default:
92 | // Just for more meaningful error message.
93 | if s[offset+2] != '"' {
94 | return nil, newErr(ErrInvalidTagValueSyntax, s)
95 | }
96 | return nil, newErr(ErrUnknownOp, s)
97 | }
98 | break loop
99 | case '!':
100 | if r <= 3 {
101 | return nil, newErr(ErrInvalidTagValueSyntax, s)
102 | }
103 | switch s[offset+1] {
104 | case '=':
105 | tm.Op = OpNotEqual
106 | case '~':
107 | tm.Op = OpNotEqualRegex
108 | default:
109 | return nil, newErr(ErrUnknownOp, s)
110 | }
111 | break loop
112 | default:
113 | if !IsTagKeyRuneAllowed(c) {
114 | return nil, newInvalidTagKeyRuneError(s, c)
115 | }
116 | }
117 | }
118 |
119 | k := s[:offset]
120 | if IsTagKeyReserved(k) {
121 | return nil, newErr(ErrTagKeyReserved, k)
122 | }
123 |
124 | var v string
125 | var ok bool
126 | switch tm.Op {
127 | default:
128 | return nil, newErr(ErrMatchOperatorIsRequired, s)
129 | case OpEqual:
130 | v, ok = unquote(s[offset+1:])
131 | case OpNotEqual, OpEqualRegex, OpNotEqualRegex:
132 | v, ok = unquote(s[offset+2:])
133 | }
134 | if !ok {
135 | return nil, newErr(ErrInvalidTagValueSyntax, v)
136 | }
137 |
138 | // Compile regex, if applicable.
139 | switch tm.Op {
140 | case OpEqualRegex, OpNotEqualRegex:
141 | r, err := regexp.Compile(v)
142 | if err != nil {
143 | return nil, newErr(err, v)
144 | }
145 | tm.R = r
146 | }
147 |
148 | tm.Key = k
149 | tm.Value = v
150 | return &tm, nil
151 | }
152 |
153 | func unquote(s string) (string, bool) {
154 | if s[0] != '"' || s[len(s)-1] != '"' {
155 | return s, false
156 | }
157 | return s[1 : len(s)-1], true
158 | }
159 |
160 | func split(s string) []string {
161 | var r []string
162 | var x int
163 | var y bool
164 | for i := 0; i < len(s); i++ {
165 | switch {
166 | case s[i] == ',' && !y:
167 | r = append(r, s[x:i])
168 | x = i + 1
169 | case s[i] == '"':
170 | if y && i > 0 && s[i-1] != '\\' {
171 | y = false
172 | continue
173 | }
174 | y = true
175 | }
176 | }
177 | return append(r, s[x:])
178 | }
179 |
--------------------------------------------------------------------------------
/reporters/pyroscope_reporter/flameql/sortedmap.go:
--------------------------------------------------------------------------------
1 | package flameql
2 |
3 | import (
4 | "sort"
5 | )
6 |
7 | type SortedMap struct {
8 | data map[string]interface{}
9 | keys []string
10 | }
11 |
12 | func (s *SortedMap) Put(k string, v interface{}) {
13 | s.data[k] = v
14 | i := sort.Search(len(s.keys), func(i int) bool { return s.keys[i] >= k })
15 | s.keys = append(s.keys, "")
16 | copy(s.keys[i+1:], s.keys[i:])
17 | s.keys[i] = k
18 | }
19 |
20 | func (s *SortedMap) Get(k string) (v interface{}) {
21 | return s.data[k]
22 | }
23 |
24 | func (s *SortedMap) Keys() []string {
25 | return s.keys
26 | }
27 |
28 | func New() *SortedMap {
29 | return &SortedMap{
30 | data: make(map[string]interface{}),
31 | keys: make([]string, 0),
32 | }
33 | }
34 |
--------------------------------------------------------------------------------
/reporters/pyroscope_reporter/pyroscope_client.go:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package pyroscope_reporter
19 |
20 | import (
21 | "bytes"
22 | "encoding/json"
23 | "fmt"
24 | "io/ioutil"
25 | "mime/multipart"
26 | "net/http"
27 | "net/url"
28 | "path/filepath"
29 | "strconv"
30 | "time"
31 |
32 | "mosn.io/holmes"
33 |
34 | mlog "mosn.io/pkg/log"
35 | )
36 |
37 | /*
38 | Enable holmes to report pprof event to pyroscope as it's client.
39 | */
40 |
41 | type PyroscopeReporter struct {
42 | AppName string
43 | Tags map[string]string
44 |
45 | cfg RemoteConfig
46 | client *http.Client
47 | Logger mlog.ErrorLogger
48 | }
49 |
50 | func NewPyroscopeReporter(AppName string, tags map[string]string, cfg RemoteConfig, logger mlog.ErrorLogger) (*PyroscopeReporter, error) {
51 | appName, err := mergeTagsWithAppName(AppName, tags)
52 | if err != nil {
53 | return nil, err
54 | }
55 |
56 | reporter := &PyroscopeReporter{
57 | cfg: cfg,
58 | client: &http.Client{
59 | Transport: &http.Transport{
60 | MaxConnsPerHost: cfg.UpstreamThreads,
61 | },
62 | Timeout: cfg.UpstreamRequestTimeout,
63 | },
64 | Logger: logger,
65 | AppName: appName,
66 | }
67 |
68 | // todo: holmes doesn't support auth token temporary
69 |
70 | return reporter, nil
71 | }
72 |
73 | // uploadProfile copied from pyroscope client
74 | func (r *PyroscopeReporter) uploadProfile(j *UploadJob) error {
75 | u, err := url.Parse(r.cfg.UpstreamAddress)
76 | if err != nil {
77 | return fmt.Errorf("url parse: %v", err)
78 | }
79 |
80 | body := &bytes.Buffer{}
81 |
82 | writer := multipart.NewWriter(body)
83 | fw, err := writer.CreateFormFile("profile", "profile.pprof")
84 | fw.Write(j.Profile) // nolint: errcheck
85 | if err != nil {
86 | return err
87 | }
88 | if j.PrevProfile != nil {
89 | fw, err = writer.CreateFormFile("prev_profile", "profile.pprof")
90 | fw.Write(j.PrevProfile) // nolint: errcheck
91 | if err != nil {
92 | return err
93 | }
94 | }
95 | if j.SampleTypeConfig != nil {
96 | fw, err = writer.CreateFormFile("sample_type_config", "sample_type_config.json")
97 | if err != nil {
98 | return err
99 | }
100 | b, err := json.Marshal(j.SampleTypeConfig)
101 | if err != nil {
102 | return err
103 | }
104 | fw.Write(b)
105 | }
106 | writer.Close() // nolint: errcheck
107 |
108 | q := u.Query()
109 | q.Set("name", j.Name)
110 | // TODO: I think these should be renamed to startTime / endTime
111 | q.Set("from", strconv.Itoa(int(j.StartTime.Unix())))
112 | q.Set("until", strconv.Itoa(int(j.EndTime.Unix())))
113 | q.Set("spyName", j.SpyName)
114 | q.Set("sampleRate", strconv.Itoa(int(j.SampleRate)))
115 | q.Set("units", j.Units)
116 | q.Set("aggregationType", j.AggregationType)
117 |
118 | u.Path = filepath.Join(u.Path, "/ingest")
119 | u.RawQuery = q.Encode()
120 |
121 | r.Logger.Debugf("uploading at %s", u.String())
122 |
123 | // new a request for the job
124 | request, err := http.NewRequest("POST", u.String(), body)
125 | //r.Logger.Debugf("body is %s", body.String())
126 | if err != nil {
127 | return fmt.Errorf("new http request: %v", err)
128 | }
129 | contentType := writer.FormDataContentType()
130 | r.Logger.Debugf("content type: %s", contentType)
131 | request.Header.Set("Content-Type", contentType)
132 | // request.Header.Set("Content-Type", "binary/octet-stream+"+string(j.Format))
133 |
134 | if r.cfg.AuthToken != "" {
135 | request.Header.Set("Authorization", "Bearer "+r.cfg.AuthToken)
136 | }
137 |
138 | // do the request and get the response
139 | response, err := r.client.Do(request)
140 | if err != nil {
141 | return fmt.Errorf("do http request: %v", err)
142 | }
143 | defer response.Body.Close() // nolint: errcheck
144 |
145 | // read all the response body
146 | _, err = ioutil.ReadAll(response.Body)
147 | if err != nil {
148 | return fmt.Errorf("read response body: %v", err)
149 | }
150 |
151 | if response.StatusCode == 422 {
152 | return ErrUpgradeServer
153 | }
154 | if response.StatusCode != 200 {
155 | return ErrUpload
156 | }
157 |
158 | return nil
159 | }
160 |
161 | func (r *PyroscopeReporter) Report(ptype string, filename string, reason holmes.ReasonType, eventID string, sampleTime time.Time, pprofBytes []byte, scene holmes.Scene) error {
162 | endTime := sampleTime.Truncate(DefaultUploadRate)
163 | startTime := endTime.Add(-DefaultUploadRate)
164 | _, _, _, _, _ = ptype, filename, reason, eventID, scene
165 | stc := sampleTypeCfg(ptype)
166 | j := &UploadJob{
167 | Name: r.AppName,
168 | StartTime: startTime,
169 | EndTime: endTime,
170 | SpyName: "gospy",
171 | SampleRate: 100,
172 | Units: "samples",
173 | AggregationType: "sum",
174 | Format: Pprof,
175 | Profile: pprofBytes,
176 | SampleTypeConfig: stc,
177 | }
178 |
179 | if err := r.uploadProfile(j); err != nil {
180 | return err
181 | }
182 | return nil
183 | }
184 |
185 | func sampleTypeCfg(ptype string) map[string]*SampleType {
186 | switch ptype {
187 | case "heap":
188 | return heapSampleTypes
189 | case "goroutine":
190 | return goroutineSampleTypes
191 | }
192 | return nil
193 | }
194 |
--------------------------------------------------------------------------------
/reporters/pyroscope_reporter/pyroscope_client_test.go:
--------------------------------------------------------------------------------
1 | package pyroscope_reporter
2 |
3 | import (
4 | "log"
5 | "os"
6 | "testing"
7 | "time"
8 |
9 | "github.com/gin-gonic/gin"
10 |
11 | "mosn.io/holmes"
12 | )
13 |
14 | var h *holmes.Holmes
15 |
16 | func TestMain(m *testing.M) {
17 | log.Println("holmes initialing")
18 | h, _ = holmes.New(
19 | holmes.WithCollectInterval("1s"),
20 | )
21 | log.Println("holmes initial success")
22 | h.EnableMemDump().EnableGoroutineDump().EnableCPUDump().Start()
23 | time.Sleep(11 * time.Second)
24 | log.Println("on running")
25 | newMockServer()
26 | os.Exit(m.Run())
27 | }
28 |
29 | var received = false
30 |
31 | func TestPyroscopeClient(t *testing.T) {
32 |
33 | cfg := RemoteConfig{
34 | //AuthToken: "",
35 | //UpstreamThreads: 4,
36 | UpstreamAddress: "http://localhost:8080",
37 | UpstreamRequestTimeout: 3 * time.Second,
38 | }
39 | tags := map[string]string{
40 | "region": "zh",
41 | }
42 | pReporter, err := NewPyroscopeReporter("holmes-client-01", tags, cfg, holmes.NewStdLogger())
43 | if err != nil {
44 | log.Fatalf("NewPyroscopeReporter error %v", err)
45 | }
46 |
47 | err = h.Set(
48 | holmes.WithProfileReporter(pReporter),
49 | holmes.WithGoroutineDump(0, 0, 1, 2, time.Second),
50 | holmes.WithCPUDump(0, 2, 80, time.Second),
51 | holmes.WithMemDump(0, 1, 1, time.Second),
52 | holmes.WithCollectInterval("1s"),
53 | )
54 | if err != nil {
55 | log.Fatalf("fail to set opts on running time.")
56 | }
57 | go cpuex()
58 | time.Sleep(20 * time.Second)
59 | if !received {
60 | t.Errorf("mock pyroscope server didn't received request")
61 | }
62 | }
63 |
64 | func cpuex() {
65 | go func() {
66 | for {
67 | time.Sleep(time.Millisecond)
68 | }
69 | }()
70 | }
71 |
72 | func newMockServer() {
73 | r := gin.New()
74 | r.POST("/ingest", ProfileUploadHandler)
75 | go r.Run() //nolint:errcheck // listen and serve on 0.0.0.0:8080 (for windows "localhost:8080")
76 |
77 | time.Sleep(time.Millisecond * 100)
78 | }
79 |
80 | func ProfileUploadHandler(c *gin.Context) {
81 | ret := map[string]interface{}{}
82 | ret["code"] = 1
83 | ret["message"] = "success"
84 | c.JSON(200, ret)
85 | received = true
86 | }
87 |
--------------------------------------------------------------------------------
/reporters/reporter_test.go:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package reporters
19 |
20 | import (
21 | "fmt"
22 | "log"
23 | "os"
24 | "testing"
25 | "time"
26 |
27 | "mosn.io/holmes"
28 | )
29 |
30 | var h *holmes.Holmes
31 |
32 | func TestMain(m *testing.M) {
33 | log.Println("holmes initialing")
34 | h, _ = holmes.New(
35 | holmes.WithCollectInterval("1s"),
36 | holmes.WithDumpPath("./"),
37 | holmes.WithTextDump(),
38 | )
39 | log.Println("holmes initial success")
40 | h.EnableGoroutineDump().EnableCPUDump().Start()
41 | time.Sleep(11 * time.Second)
42 | log.Println("on running")
43 | os.Exit(m.Run())
44 | }
45 |
46 | var grReportCount int
47 | var cpuReportCount int
48 | var unknownReasonTypeErr error
49 | var sceneException error
50 |
51 | type mockReporter struct {
52 | }
53 |
54 | func (m *mockReporter) Report(pType string, filename string, reason holmes.ReasonType, eventID string, sampleTime time.Time, pprofBytes []byte, scene holmes.Scene) error {
55 | log.Printf("call %s , filename %s report \n", pType, filename)
56 |
57 | // read filename
58 | switch pType {
59 | case "goroutine":
60 | grReportCount++
61 | case "cpu":
62 | cpuReportCount++
63 |
64 | }
65 |
66 | if len(reason.String()) == 0 { // unknown reason type
67 | unknownReasonTypeErr = fmt.Errorf("reporter: unknown reason type")
68 | return unknownReasonTypeErr
69 | }
70 |
71 | { // test scene
72 | errPrefix := "reporter: scene exception ==> "
73 | if scene.TriggerAbs == 0 {
74 | sceneException = fmt.Errorf(errPrefix + "abs in configuration is 0")
75 | return sceneException
76 | }
77 | if scene.TriggerDiff == 0 {
78 | sceneException = fmt.Errorf(errPrefix + "diff in configuration is 0")
79 | return sceneException
80 | }
81 | }
82 | return nil
83 | }
84 |
85 | var grReopenReportCount int
86 |
87 | type mockReopenReporter struct {
88 | }
89 |
90 | func (m *mockReopenReporter) Report(pType string, filename string, reason holmes.ReasonType, eventID string, sampleTime time.Time, pprofBytes []byte, scene holmes.Scene) error {
91 | log.Printf("call %s report \n", pType)
92 |
93 | switch pType {
94 | case "goroutine":
95 | grReopenReportCount++
96 | }
97 |
98 | if len(reason.String()) == 0 { // unknown reason type
99 | unknownReasonTypeErr = fmt.Errorf("reopen reporter: unknown reason type")
100 | return unknownReasonTypeErr
101 | }
102 |
103 | { // test scene
104 | errPrefix := "reopen reporter: scene exception ==> "
105 | if scene.TriggerAbs == 0 {
106 | sceneException = fmt.Errorf(errPrefix + "abs in configuration is 0")
107 | return sceneException
108 | }
109 | if scene.TriggerDiff == 0 {
110 | sceneException = fmt.Errorf(errPrefix + "diff in configuration is 0")
111 | return sceneException
112 | }
113 | }
114 | return nil
115 | }
116 |
117 | func TestReporter(t *testing.T) {
118 | grReportCount = 0
119 | cpuReportCount = 0
120 | unknownReasonTypeErr = nil
121 | sceneException = nil
122 |
123 | r := &mockReporter{}
124 | err := h.Set(
125 | holmes.WithProfileReporter(r),
126 | holmes.WithGoroutineDump(5, 10, 20, 90, time.Second),
127 | holmes.WithCPUDump(0, 2, 80, time.Second),
128 | holmes.WithCollectInterval("5s"),
129 | )
130 | if err != nil {
131 | log.Fatalf("fail to set opts on running time.")
132 | }
133 | go cpuex()
134 | time.Sleep(10 * time.Second)
135 |
136 | if grReportCount == 0 {
137 | log.Fatalf("not grReport")
138 | }
139 |
140 | if cpuReportCount == 0 {
141 | log.Fatalf("not cpuReport")
142 | }
143 |
144 | if unknownReasonTypeErr != nil {
145 | log.Fatalf(unknownReasonTypeErr.Error())
146 | }
147 |
148 | if sceneException != nil {
149 | log.Fatalf(sceneException.Error())
150 | }
151 |
152 | // test reopen feature
153 | h.Stop()
154 | h.Start()
155 | grReopenReportCount = 0
156 | _ = h.Set(
157 | holmes.WithProfileReporter(&mockReopenReporter{}))
158 | time.Sleep(10 * time.Second)
159 |
160 | time.Sleep(5 * time.Second)
161 |
162 | if grReopenReportCount == 0 {
163 | log.Fatalf("fail to reopen")
164 | }
165 | }
166 |
167 | func TestReporterReopen(t *testing.T) {
168 | grReportCount = 0
169 | cpuReportCount = 0
170 | r := &mockReporter{}
171 | err := h.Set(
172 | holmes.WithProfileReporter(r),
173 | holmes.WithGoroutineDump(5, 10, 20, 90, time.Second),
174 | holmes.WithCPUDump(0, 2, 80, time.Second),
175 | holmes.WithCollectInterval("5s"),
176 | holmes.WithDumpToLogger(true),
177 | )
178 | if err != nil {
179 | log.Fatalf("fail to set opts on running time.")
180 | }
181 | go cpuex()
182 | time.Sleep(10 * time.Second)
183 |
184 | if grReportCount == 0 {
185 | log.Fatalf("not grReport")
186 | }
187 |
188 | if cpuReportCount == 0 {
189 | log.Fatalf("not cpuReport")
190 | }
191 |
192 | // test reopen feature
193 | h.DisableProfileReporter()
194 |
195 | h.EnableProfileReporter()
196 |
197 | grReopenReportCount = 0
198 | _ = h.Set(
199 | holmes.WithProfileReporter(&mockReopenReporter{}))
200 | time.Sleep(10 * time.Second)
201 |
202 | time.Sleep(5 * time.Second)
203 |
204 | if grReopenReportCount == 0 {
205 | log.Fatalf("fail to reopen")
206 | }
207 | }
208 |
209 | func cpuex() {
210 | go func() {
211 | var ch = make(chan struct{})
212 | for {
213 | select {
214 | case <-ch:
215 | // do nothing
216 | default:
217 | continue
218 | }
219 | }
220 | }()
221 | }
222 |
--------------------------------------------------------------------------------
/ring.go:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package holmes
19 |
20 | type ring struct {
21 | data []int
22 | idx int
23 | sum int
24 | maxLen int
25 | }
26 |
27 | func newRing(maxLen int) ring {
28 | return ring{
29 | data: make([]int, 0, maxLen),
30 | idx: 0,
31 | maxLen: maxLen,
32 | }
33 | }
34 |
35 | func (r *ring) push(i int) {
36 | if r.maxLen == 0 {
37 | return
38 | }
39 |
40 | // the first round
41 | if len(r.data) < r.maxLen {
42 | r.sum += i
43 | r.data = append(r.data, i)
44 | return
45 | }
46 |
47 | r.sum += i - r.data[r.idx]
48 |
49 | // the ring is expanded, just write to the position
50 | r.data[r.idx] = i
51 | r.idx = (r.idx + 1) % r.maxLen
52 | }
53 |
54 | func (r *ring) avg() int {
55 | // Check if the len(r.data) is zero before dividing
56 | if r.maxLen == 0 || len(r.data) == 0 {
57 | return 0
58 | }
59 | return r.sum / len(r.data)
60 | }
61 |
62 | func (r *ring) sequentialData() []int {
63 | index := r.idx
64 | slice := make([]int, r.maxLen)
65 | // len(r.data) < r.maxLen ( cap > len ), index is not incremented. >>> (r.data = append(r.data, i)). r.idx starts scrolling only when the array is full.
66 | if index == 0 {
67 | copy(slice, r.data)
68 | return slice
69 | }
70 | copy(slice, r.data[index:])
71 | copy((slice)[r.maxLen-index:], r.data[:index])
72 | return slice
73 | }
74 |
--------------------------------------------------------------------------------
/ring_test.go:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package holmes
19 |
20 | import (
21 | "testing"
22 |
23 | "github.com/stretchr/testify/assert"
24 | )
25 |
26 | func TestEmptyRing(t *testing.T) {
27 | var r = newRing(0)
28 | assert.Equal(t, r.avg(), 0)
29 |
30 | r = newRing(1)
31 | assert.Equal(t, r.avg(), 0)
32 | }
33 |
34 | func TestRing(t *testing.T) {
35 | var cases = []struct {
36 | slice []int
37 | maxLen int
38 | avg int
39 | }{
40 | {
41 | slice: []int{1, 2, 3},
42 | maxLen: 10,
43 | avg: 2,
44 | },
45 | {
46 | slice: []int{1, 2, 3},
47 | maxLen: 1,
48 | avg: 3,
49 | },
50 | }
51 |
52 | for _, cas := range cases {
53 | var r = newRing(cas.maxLen)
54 | for _, elem := range cas.slice {
55 | r.push(elem)
56 | }
57 | assert.Equal(t, r.avg(), cas.avg)
58 | }
59 | }
60 |
61 | func Test_ring_humanData(t *testing.T) {
62 | r := newRing(5)
63 | var cases = []struct {
64 | except []int
65 | }{
66 | {
67 | except: []int{1, 0, 0, 0, 0},
68 | },
69 | {
70 | except: []int{1, 2, 0, 0, 0},
71 | },
72 | {
73 | except: []int{1, 2, 3, 0, 0},
74 | },
75 | {
76 | except: []int{1, 2, 3, 4, 0},
77 | },
78 | {
79 | except: []int{1, 2, 3, 4, 5},
80 | },
81 | {
82 | except: []int{2, 3, 4, 5, 6},
83 | },
84 | {
85 | except: []int{3, 4, 5, 6, 7},
86 | },
87 | {
88 | except: []int{4, 5, 6, 7, 8},
89 | },
90 | {
91 | except: []int{5, 6, 7, 8, 9},
92 | },
93 | {
94 | except: []int{6, 7, 8, 9, 10},
95 | },
96 | }
97 | for i := 0; i < 10; i++ {
98 | r.push(i + 1)
99 | assert.Equal(t, r.sequentialData(), cases[i].except)
100 | }
101 | }
102 |
--------------------------------------------------------------------------------
/tool/build-example.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # make sure the example can be compiled
4 |
5 | set -e
6 | set -x
7 |
8 | examples=`ls example`
9 |
10 | for file in $examples; do
11 | echo $file
12 | cd example/$file
13 |
14 | go mod tidy
15 | rm -rf vendor
16 | go build .
17 |
18 | cd -
19 | done
20 |
--------------------------------------------------------------------------------
/util.go:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package holmes
19 |
20 | import (
21 | "bytes"
22 | "fmt"
23 | "io/ioutil"
24 | "math"
25 | "os"
26 | "path/filepath"
27 | "runtime"
28 | "runtime/pprof"
29 | "strconv"
30 | "strings"
31 | "time"
32 |
33 | mem_util "github.com/shirou/gopsutil/mem"
34 | "github.com/shirou/gopsutil/process"
35 | )
36 |
37 | // copied from https://github.com/containerd/cgroups/blob/318312a373405e5e91134d8063d04d59768a1bff/utils.go#L251
38 | func parseUint(s string, base, bitSize int) (uint64, error) {
39 | v, err := strconv.ParseUint(s, base, bitSize)
40 | if err != nil {
41 | intValue, intErr := strconv.ParseInt(s, base, bitSize)
42 | // 1. Handle negative values greater than MinInt64 (and)
43 | // 2. Handle negative values lesser than MinInt64
44 | if intErr == nil && intValue < 0 {
45 | return 0, nil
46 | } else if intErr != nil &&
47 | intErr.(*strconv.NumError).Err == strconv.ErrRange &&
48 | intValue < 0 {
49 | return 0, nil
50 | }
51 | return 0, err
52 | }
53 | return v, nil
54 | }
55 |
56 | // copied from https://github.com/containerd/cgroups/blob/318312a373405e5e91134d8063d04d59768a1bff/utils.go#L243
57 | func readUint(path string) (uint64, error) {
58 | v, err := ioutil.ReadFile(path)
59 | if err != nil {
60 | return 0, err
61 | }
62 | return parseUint(strings.TrimSpace(string(v)), 10, 64)
63 | }
64 |
65 | // only reserve the top n.
66 | func trimResultTop(buffer bytes.Buffer) []byte {
67 | index := TrimResultTopN
68 | arr := strings.SplitN(buffer.String(), "\n\n", TrimResultTopN+1)
69 |
70 | if len(arr) <= TrimResultTopN {
71 | index = len(arr) - 1
72 | }
73 |
74 | return []byte(strings.Join(arr[:index], "\n\n"))
75 | }
76 |
77 | // only reserve the front n bytes
78 | func trimResultFront(buffer bytes.Buffer) []byte {
79 | if buffer.Len() <= TrimResultMaxBytes {
80 | return buffer.Bytes()
81 | }
82 | return buffer.Bytes()[:TrimResultMaxBytes-1]
83 | }
84 |
85 | // return values:
86 | // 1. cpu percent, not division cpu cores yet,
87 | // 2. RSS mem in bytes,
88 | // 3. goroutine num,
89 | // 4. thread num
90 | func getUsage() (float64, uint64, int, int, error) {
91 | p, err := process.NewProcess(int32(os.Getpid()))
92 | if err != nil {
93 | return 0, 0, 0, 0, err
94 | }
95 | cpuPercent, err := p.Percent(time.Second)
96 | if err != nil {
97 | return 0, 0, 0, 0, err
98 | }
99 |
100 | mem, err := p.MemoryInfo()
101 | if err != nil {
102 | return 0, 0, 0, 0, err
103 | }
104 |
105 | rss := mem.RSS
106 | gNum := runtime.NumGoroutine()
107 | tNum := getThreadNum()
108 |
109 | return cpuPercent, rss, gNum, tNum, nil
110 | }
111 |
112 | // get cpu core number limited by CGroup.
113 | func getCGroupCPUCore() (float64, error) {
114 | var cpuQuota uint64
115 |
116 | cpuPeriod, err := readUint(cgroupCpuPeriodPath)
117 | if cpuPeriod == 0 || err != nil {
118 | return 0, err
119 | }
120 |
121 | if cpuQuota, err = readUint(cgroupCpuQuotaPath); err != nil {
122 | return 0, err
123 | }
124 |
125 | return float64(cpuQuota) / float64(cpuPeriod), nil
126 | }
127 |
128 | func getCGroupMemoryLimit() (uint64, error) {
129 | usage, err := readUint(cgroupMemLimitPath)
130 | if err != nil {
131 | return 0, err
132 | }
133 | machineMemory, err := mem_util.VirtualMemory()
134 | if err != nil {
135 | return 0, err
136 | }
137 | limit := uint64(math.Min(float64(usage), float64(machineMemory.Total)))
138 | return limit, nil
139 | }
140 |
141 | func getNormalMemoryLimit() (uint64, error) {
142 | machineMemory, err := mem_util.VirtualMemory()
143 | if err != nil {
144 | return 0, err
145 | }
146 | return machineMemory.Total, nil
147 | }
148 |
149 | func getThreadNum() int {
150 | return pprof.Lookup("threadcreate").Count()
151 | }
152 |
153 | // cpu mem goroutine thread err.
154 | func collect(cpuCore float64, memoryLimit uint64) (int, int, int, int, error) {
155 | cpu, mem, gNum, tNum, err := getUsage()
156 | if err != nil {
157 | return 0, 0, 0, 0, err
158 | }
159 |
160 | // The default percent is from all cores, multiply by cpu core
161 | // but it's inconvenient to calculate the proper percent
162 | // here we divide by core number, so we can set a percent bar more intuitively
163 | cpuPercent := cpu / cpuCore
164 |
165 | memPercent := float64(mem) / float64(memoryLimit) * 100
166 |
167 | return int(cpuPercent), int(memPercent), gNum, tNum, nil
168 | }
169 |
170 | func matchRule(history ring, curVal, ruleMin, ruleAbs, ruleDiff, ruleMax int) (bool, ReasonType) {
171 | // should bigger than rule min
172 | if curVal < ruleMin {
173 | return false, ReasonCurlLessMin
174 | //fmt.Sprintf("curVal [%d]< ruleMin [%d]", curVal, ruleMin)
175 | }
176 |
177 | // if ruleMax is enable and current value bigger max, skip dumping
178 | if ruleMax != NotSupportTypeMaxConfig && curVal >= ruleMax {
179 | return false, ReasonCurGreaterMax
180 | }
181 |
182 | // the current peak load exceed the absolute value
183 | if curVal > ruleAbs {
184 | return true, ReasonCurGreaterAbs
185 | // fmt.Sprintf("curVal [%d] > ruleAbs [%d]", curVal, ruleAbs)
186 | }
187 |
188 | // the peak load matches the rule
189 | avg := history.avg()
190 | if curVal >= avg*(100+ruleDiff)/100 {
191 | return true, ReasonDiff
192 | // fmt.Sprintf("curVal[%d] >= avg[%d]*(100+ruleDiff)/100", curVal, avg)
193 | }
194 | return false, ReasonCurlGreaterMin
195 | }
196 |
197 | func getBinaryFileName(filePath string, dumpType configureType, eventID string) string {
198 | suffix := time.Now().Format("20060102150405.000") + ".log"
199 | if len(eventID) == 0 {
200 | return filepath.Join(filePath, check2name[dumpType]+"."+suffix)
201 | }
202 |
203 | return filepath.Join(filePath, check2name[dumpType]+"."+eventID+"."+suffix)
204 | }
205 |
206 | // fix #89
207 | func getBinaryFileNameAndCreate(dump string, dumpType configureType, eventID string) (*os.File, string, error) {
208 | filePath := getBinaryFileName(dump, dumpType, eventID)
209 | f, err := os.OpenFile(filePath, defaultLoggerFlags, defaultLoggerPerm)
210 | if err != nil && os.IsNotExist(err) {
211 | if err = os.MkdirAll(dump, 0o755); err != nil {
212 | return nil, filePath, err
213 | }
214 | f, err = os.OpenFile(filePath, defaultLoggerFlags, defaultLoggerPerm)
215 | if err != nil {
216 | return nil, filePath, err
217 | }
218 | }
219 | return f, filePath, err
220 | }
221 |
222 | func writeFile(data bytes.Buffer, dumpType configureType, dumpOpts *DumpOptions, eventID string) (string, error) {
223 | var buf []byte
224 | if dumpOpts.DumpProfileType == textDump && !dumpOpts.DumpFullStack {
225 | switch dumpType {
226 | case mem, gcHeap, goroutine:
227 | buf = trimResultTop(data)
228 | case thread:
229 | buf = trimResultFront(data)
230 | default:
231 | buf = data.Bytes()
232 | }
233 | } else {
234 | buf = data.Bytes()
235 | }
236 |
237 | file, fileName, err := getBinaryFileNameAndCreate(dumpOpts.DumpPath, dumpType, eventID)
238 | if err != nil {
239 | return fileName, fmt.Errorf("pprof %v open file failed : %w", type2name[dumpType], err)
240 | }
241 | defer file.Close() //nolint:errcheck,gosec
242 |
243 | if _, err = file.Write(buf); err != nil {
244 | return fileName, fmt.Errorf("pprof %v write to file failed : %w", type2name[dumpType], err)
245 | }
246 | return fileName, nil
247 | }
248 |
--------------------------------------------------------------------------------