├── .gitignore ├── LICENSE ├── Makefile ├── README.md └── trace_irqoff.c /.gitignore: -------------------------------------------------------------------------------- 1 | # 2 | # NOTE! Don't add files that are generated in specific 3 | # subdirectories here. Add them in the ".gitignore" file 4 | # in that subdirectory instead. 5 | # 6 | # NOTE! Please use 'git ls-files -i --exclude-standard' 7 | # command after changing this file, to see if there are 8 | # any tracked files which get ignored after the change. 9 | # 10 | # Normal rules (sorted alphabetically) 11 | # 12 | .* 13 | *.a 14 | *.asn1.[ch] 15 | *.bin 16 | *.bz2 17 | *.c.[012]*.* 18 | *.dt.yaml 19 | *.dtb 20 | *.dtb.S 21 | *.dwo 22 | *.elf 23 | *.gcno 24 | *.gz 25 | *.i 26 | *.ko 27 | *.lex.c 28 | *.ll 29 | *.lst 30 | *.lz4 31 | *.lzma 32 | *.lzo 33 | *.mod 34 | *.mod.c 35 | *.ns_deps 36 | *.o 37 | *.o.* 38 | *.patch 39 | *.s 40 | *.so 41 | *.so.dbg 42 | *.su 43 | *.symtypes 44 | *.tab.[ch] 45 | *.tar 46 | *.xz 47 | Module.symvers 48 | modules.builtin 49 | modules.order 50 | 51 | # 52 | # Top-level generic files 53 | # 54 | /tags 55 | /TAGS 56 | /linux 57 | /vmlinux 58 | /vmlinux.32 59 | /vmlinux-gdb.py 60 | /vmlinuz 61 | /System.map 62 | /Module.markers 63 | /modules.builtin.modinfo 64 | 65 | # 66 | # RPM spec file (make rpm-pkg) 67 | # 68 | /*.spec 69 | 70 | # 71 | # Debian directory (make deb-pkg) 72 | # 73 | /debian/ 74 | 75 | # 76 | # Snap directory (make snap-pkg) 77 | # 78 | /snap/ 79 | 80 | # 81 | # tar directory (make tar*-pkg) 82 | # 83 | /tar-install/ 84 | 85 | # 86 | # We don't want to ignore the following even if they are dot-files 87 | # 88 | !.clang-format 89 | !.cocciconfig 90 | !.get_maintainer.ignore 91 | !.gitattributes 92 | !.gitignore 93 | !.mailmap 94 | 95 | # 96 | # Generated include files 97 | # 98 | /include/config/ 99 | /include/generated/ 100 | /include/ksym/ 101 | /arch/*/include/generated/ 102 | 103 | # stgit generated dirs 104 | patches-* 105 | 106 | # quilt's files 107 | patches 108 | series 109 | 110 | # cscope files 111 | cscope.* 112 | ncscope.* 113 | 114 | # gnu global files 115 | GPATH 116 | GRTAGS 117 | GSYMS 118 | GTAGS 119 | 120 | # id-utils files 121 | ID 122 | 123 | *.orig 124 | *~ 125 | \#*# 126 | 127 | # 128 | # Leavings from module signing 129 | # 130 | extra_certificates 131 | signing_key.pem 132 | signing_key.priv 133 | signing_key.x509 134 | x509.genkey 135 | 136 | # Kconfig presets 137 | /all.config 138 | /alldef.config 139 | /allmod.config 140 | /allno.config 141 | /allrandom.config 142 | /allyes.config 143 | 144 | # Kdevelop4 145 | *.kdev4 146 | 147 | # Clang's compilation database file 148 | /compile_commands.json 149 | 150 | # Python cache files 151 | *__pycache__* 152 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | 294 | Copyright (C) 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | , 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. 340 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | obj-m += trace_irqoff.o 2 | 3 | KERNELDIR := /lib/modules/$(shell uname -r)/build 4 | PWD := $(shell pwd) 5 | all: 6 | $(MAKE) -C $(KERNELDIR) M=$(PWD) modules 7 | 8 | clean: 9 | rm -rf *.ko *.mod* *.o modules.* Module.symvers 10 | 11 | install: 12 | insmod trace_irqoff.ko 13 | 14 | remove: 15 | rmmod trace_irqoff 16 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Trace-irqoff 2 | 3 | ## 我们的需求是什么 4 | 5 | 在实际问题中,业务经常会遇到网络延迟高问题,这种问题分析下来。基本是如下几种可能原因: 6 | 7 | - 中断关闭时间太长 8 | - softirq 关闭时间太长 9 | 10 | 以上是我们根据经验猜测可能出现的原因,实际问题中,我迫切的需要确定是否以上原因导致问题。如果是的话,具体是什么原因导致以上两种情况发生呢?因此,我们迫切需要定位具体的元凶,使其现形。所以,我们的需求是开发一个工具可以追踪和定位中断或者软中断关闭的时间。**这款工具现在已经开发完成,名为:Interrupts-off or softirqs-off latency tracer,简称 trace-irqoff。** 11 | 12 | ## 如何安装 13 | 14 | 安装 trace-irqoff 工具很简单,git clone代码后执行如下命令即可安装。 15 | 16 | ```bash 17 | make -j8 18 | make install 19 | ``` 20 | 21 | ## 如何使用 22 | 23 | 安装 trace-irqoff 工具成功后。会创建如下 **/proc/trace_irqoff** 目录。 24 | 25 | ```bash 26 | root@n18-061-206:/proc/trace_irqoff# ls 27 | distribute enable sampling_period trace_latency 28 | ``` 29 | 30 | /proc/trace_irqoff 目录下存在 4 个文件,分别:distribute, enable, sampling_period 和 trace_latency。工具安装后,默认是关闭状态,我们需要手动打开 trace。 31 | 32 | ##### 1. 打开 trace 33 | 34 | ```bash 35 | echo 1 > /proc/trace_irqoff/enable 36 | ``` 37 | 38 | ##### 2. 关闭 trace 39 | 40 | ```bash 41 | echo 0 > /proc/trace_irqoff/enable 42 | ``` 43 | 44 | ##### 3. 设置 trace 阈值 45 | 46 | trace-irqoff 工具只会针对关闭中断或者软中断时间超过阈值的情况下记录堆栈信息。因此我们可以通过如下命令查看当前 trace 的阈值: 47 | 48 | ```bash 49 | cat /proc/trace_irqoff/trace_latency 50 | trace_irqoff_latency: 50ms 51 | hardirq: 52 | softirq: 53 | ``` 54 | 55 | 默认阈值是 50ms,如第 2 行所示。第 4 行输出 hardirq: 代表下面的栈是可能关闭中断超过阈值的栈。同理,第 6 行是软中断关闭时间超过阈值的栈。 56 | 57 | 如果需要修改阈值至 100ms 可通过如下命令(写入值单位是 ms): 58 | 59 | ```bash 60 | echo 100 > /proc/trace_irqoff/trace_latency 61 | ``` 62 | 63 | ##### 4. 清除栈信息 64 | 65 | 当然如果需要清除 /proc/trace_irqoff 记录的栈信息。可以执行如下命令(不会修改阈值为 0): 66 | 67 | ```bash 68 | echo 0 > /proc/trace_irqoff/trace_latency 69 | ``` 70 | 71 | ##### 5. 查看中断关闭次数的统计信息 72 | 73 | 如果我们需要知道中断被关闭一定的时间的次数,可以通过如下命令获取统计信息。 74 | 75 | ```bash 76 | root@n18-061-206:/proc/trace_irqoff# cat distribute 77 | hardirq-off: 78 | msecs : count distribution 79 | 20 -> 39 : 1 |********** | 80 | 40 -> 79 : 0 | | 81 | 80 -> 159 : 4 |****************************************| 82 | 160 -> 319 : 2 |******************** | 83 | 320 -> 639 : 1 |********** | 84 | softirq-off: 85 | msecs : count distribution 86 | 20 -> 39 : 0 | | 87 | 40 -> 79 : 0 | | 88 | 80 -> 159 : 0 | | 89 | 160 -> 319 : 1 |****************************************| 90 | ``` 91 | 92 | > 在这个例子中,我们看到hardirq被关闭时间x ∈ [80, 159] ms,次数4次。softirq被关闭时间x ∈ [160, 319] ms,次数1次 93 | 94 | 如果没有任何信息输出,这说明没有任何地方关闭中断时间超过20ms。 95 | 96 | ##### 6. 修改采样周期 97 | 98 | 从上面一节我们可以看到,中断关闭时间分布图最小粒度是 20ms。这是因为采样周期是 10ms。根据采样定理,大于等于 2 倍采样周期时间才能反映真实情况。如果需要提高统计粒度,可修改采样周期时间。例如修改采样周期为 1ms,可执行如下命令(必须在 tracer 关闭的情况下操作有效): 99 | 100 | ```bash 101 | # 单位 ms,可设置最小的采样周期是 1ms。 102 | echo 1 > /proc/trace_irqoff/sampling_period 103 | ``` 104 | 105 | ## 案例分析 106 | 107 | ##### 1. hardirq 关闭 108 | 109 | 我们使用如下示意测试程序,关闭中断 100ms。查看 trace_irqoff 文件内容。 110 | 111 | ```c 112 | static void disable_hardirq(unsigned long latency) 113 | { 114 | local_irq_disable(); 115 | mdelay(latency); 116 | local_irq_enable(); 117 | } 118 | ``` 119 | 120 | 通过模块测试以上代码,然后查看栈信息。 121 | 122 | ```bash 123 | cat /proc/trace_irqoff/trace_latency 124 | trace_irqoff_latency: 50ms 125 | hardirq: 126 | cpu: 17 127 | COMMAND: bash PID: 22840 LATENCY: 107ms 128 | trace_irqoff_hrtimer_handler+0x39/0x99 [trace_irqoff] 129 | __hrtimer_run_queues+0xfa/0x270 130 | hrtimer_interrupt+0x101/0x240 131 | smp_apic_timer_interrupt+0x5e/0x120 132 | apic_timer_interrupt+0xf/0x20 133 | disable_hardirq+0x5b/0x70 134 | proc_reg_write+0x36/0x60 135 | __vfs_write+0x33/0x190 136 | vfs_write+0xb0/0x190 137 | ksys_write+0x52/0xc0 138 | do_syscall_64+0x4f/0xe0 139 | entry_SYSCALL_64_after_hwframe+0x44/0xa9 140 | softirq: 141 | ``` 142 | 143 | 我们可以看到 hardirq 一栏记录 cpu17 执行 bash 命令,关闭中断 107ms(误差 10ms 之内)。其栈信息对应disable_hardirq() 函数中。第 20 行 softirq 一栏没有信息,说明没有记录 softirq 被关闭的栈。 144 | 145 | ##### 2. softirq 关闭 146 | 147 | 我们使用如下示意测试程序,关闭 softirq 100ms。查看 trace_irqoff 文件内容。 148 | 149 | ```c 150 | static void disable_softirq(unsigned long latency) 151 | { 152 | local_bh_disable(); 153 | mdelay(latency); 154 | local_bh_enable(); 155 | } 156 | ``` 157 | 158 | 通过模块测试以上代码,然后查看栈信息。 159 | 160 | ```bash 161 | cat /proc/trace_irqoff/trace_latency 162 | trace_irqoff_latency: 50ms 163 | hardirq: 164 | softirq: 165 | cpu: 17 166 | COMMAND: bash PID: 22840 LATENCY: 51+ms 167 | trace_irqoff_hrtimer_handler+0x97/0x99 [trace_irqoff] 168 | __hrtimer_run_queues+0xfa/0x270 169 | hrtimer_interrupt+0x101/0x240 170 | smp_apic_timer_interrupt+0x5e/0x120 171 | apic_timer_interrupt+0xf/0x20 172 | delay_tsc+0x3c/0x50 173 | disable_softirq+0x4b/0x80 174 | proc_reg_write+0x36/0x60 175 | __vfs_write+0x33/0x190 176 | vfs_write+0xb0/0x190 177 | ksys_write+0x52/0xc0 178 | do_syscall_64+0x4f/0xe0 179 | entry_SYSCALL_64_after_hwframe+0x44/0xa9 180 | 181 | COMMAND: bash PID: 22840 LATENCY: 106ms 182 | trace_irqoff_timer_handler+0x3a/0x60 [trace_irqoff] 183 | call_timer_fn+0x29/0x120 184 | run_timer_softirq+0x16c/0x400 185 | __do_softirq+0x108/0x2b8 186 | do_softirq_own_stack+0x2a/0x40 187 | do_softirq.part.21+0x56/0x60 188 | __local_bh_enable_ip+0x60/0x70 189 | disable_softirq+0x62/0x80 190 | proc_reg_write+0x36/0x60 191 | __vfs_write+0x33/0x190 192 | vfs_write+0xb0/0x190 193 | ksys_write+0x52/0xc0 194 | do_syscall_64+0x4f/0xe0 195 | entry_SYSCALL_64_after_hwframe+0x44/0xa9 196 | ``` 197 | 198 | 针对 softirq 关闭情况,有 2 个栈与之对应。我们注意到第 9 行的函数名称和第 24 行的函数名称是不一样的。第 9 行的栈是硬件中断 handler 捕捉到软中断关闭,第 24 行是软中断 handler 捕捉到软中断被关闭。正常情况下,我们以 24 行开始的栈为分析目标即可。当 24 行的栈是无效的时候,可以看第 9 行的栈。这里注意:第 9 行的 lantency 提示信息 **51+ms** 是阈值信息。并非实际 latency(所以我在后面添加一个'+'字符,表示latency大于51ms)。实际的 latency 是第 24 行显示的 106ms。下面就看下为什么 2 个栈是有必要的。 199 | 200 | ##### 3. ksoftirqd 延迟 201 | 202 | 我们看一个曾经处理的一个实际问题。 203 | 204 | ```bash 205 | cat /proc/trace_irqoff/trace_latency 206 | trace_irqoff_latency: 300ms 207 | hardirq: 208 | softirq: 209 | cpu: 4 210 | COMMAND: lxcfs PID: 4058797 LATENCY: 303+ms 211 | trace_irqoff_record+0x12b/0x1b0 [trace_irqoff] 212 | trace_irqoff_hrtimer_handler+0x97/0x99 [trace_irqoff] 213 | __hrtimer_run_queues+0xdc/0x220 214 | hrtimer_interrupt+0xa6/0x1f0 215 | smp_apic_timer_interrupt+0x62/0x120 216 | apic_timer_interrupt+0x7d/0x90 217 | memcg_sum_events.isra.26+0x3f/0x60 218 | memcg_stat_show+0x323/0x460 219 | seq_read+0x11f/0x3f0 220 | __vfs_read+0x33/0x160 221 | vfs_read+0x91/0x130 222 | SyS_read+0x52/0xc0 223 | do_syscall_64+0x68/0x100 224 | entry_SYSCALL_64_after_hwframe+0x3d/0xa2 225 | 226 | COMMAND: ksoftirqd/4 PID: 34 LATENCY: 409ms 227 | trace_irqoff_record+0x12b/0x1b0 [trace_irqoff] 228 | trace_irqoff_timer_handler+0x3a/0x60 [trace_irqoff] 229 | call_timer_fn+0x2e/0x130 230 | run_timer_softirq+0x1d4/0x420 231 | __do_softirq+0x108/0x2a9 232 | run_ksoftirqd+0x1e/0x40 233 | smpboot_thread_fn+0xfe/0x150 234 | kthread+0xfc/0x130 235 | ret_from_fork+0x1f/0x30 236 | ``` 237 | 238 | 我们看到下面的进程 ksoftirqd/4 的栈,延迟时间是 409ms。ksoftirqd 进程是 kernel 中处理 softirq 的进程。因此这段栈对我们是没有意义的,因为元凶已经错过了。所以此时,我们可以借鉴上面的栈信息,我们看到当 softirq 被延迟 303ms 的时候,当前 CPU 正在执行的进程是 lxcfs。并且栈是 memory cgroup 相关。因此,我们基本可以判断 lxcfs 进程执行时间过长,由于 kernel 态不支持抢占,因此导致 ksoftirqd 进程没有机会得到运行。 239 | 240 | -------------------------------------------------------------------------------- /trace_irqoff.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: GPL-2.0 2 | /* 3 | * Trace Irqsoff 4 | * 5 | * Copyright (C) 2020 Bytedance, Inc., Muchun Song 6 | * 7 | * The main authors of the trace irqsoff code are: 8 | * 9 | * Muchun Song 10 | */ 11 | #define pr_fmt(fmt) "trace-irqoff: " fmt 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) 30 | #include 31 | #else 32 | #include 33 | #endif 34 | 35 | #define MAX_TRACE_ENTRIES (SZ_1K / sizeof(unsigned long)) 36 | #define PER_TRACE_ENTRIES_AVERAGE (8 + 8) 37 | 38 | #define MAX_STACE_TRACE_ENTRIES \ 39 | (MAX_TRACE_ENTRIES / PER_TRACE_ENTRIES_AVERAGE) 40 | 41 | #define MAX_LATENCY_RECORD 10 42 | 43 | #if LINUX_VERSION_CODE < KERNEL_VERSION(5, 6, 0) 44 | #ifndef DEFINE_SHOW_ATTRIBUTE 45 | #define DEFINE_SHOW_ATTRIBUTE(__name) \ 46 | static int __name ## _open(struct inode *inode, struct file *file) \ 47 | { \ 48 | return single_open(file, __name ## _show, inode->i_private); \ 49 | } \ 50 | \ 51 | static const struct file_operations __name ## _fops = { \ 52 | .owner = THIS_MODULE, \ 53 | .open = __name ## _open, \ 54 | .read = seq_read, \ 55 | .llseek = seq_lseek, \ 56 | .release = single_release, \ 57 | } 58 | #endif /* DEFINE_SHOW_ATTRIBUTE */ 59 | #define IRQ_OFF_DEFINE_SHOW_ATTRIBUTE DEFINE_SHOW_ATTRIBUTE 60 | 61 | #else /* LINUX_VERSION_CODE */ 62 | #define IRQ_OFF_DEFINE_SHOW_ATTRIBUTE(__name) \ 63 | static int __name ## _open(struct inode *inode, struct file *file) \ 64 | { \ 65 | return single_open(file, __name ## _show, inode->i_private); \ 66 | } \ 67 | \ 68 | static const struct proc_ops __name ## _fops = { \ 69 | .proc_open = __name ## _open, \ 70 | .proc_read = seq_read, \ 71 | .proc_lseek = seq_lseek, \ 72 | .proc_release = single_release, \ 73 | } 74 | #endif /* LINUX_VERSION_CODE */ 75 | 76 | static bool trace_enable; 77 | 78 | /** 79 | * Default sampling period is 10000000ns. The minimum value is 1000000ns. 80 | */ 81 | static u64 sampling_period = 10 * 1000 * 1000UL; 82 | 83 | /** 84 | * How many times should we record the stack trace. 85 | * Default is 50000000ns. 86 | */ 87 | static u64 trace_irqoff_latency = 50 * 1000 * 1000UL; 88 | 89 | struct irqoff_trace { 90 | unsigned int nr_entries; 91 | unsigned long *entries; 92 | }; 93 | 94 | struct stack_trace_metadata { 95 | u64 last_timestamp; 96 | unsigned long nr_irqoff_trace; 97 | struct irqoff_trace trace[MAX_STACE_TRACE_ENTRIES]; 98 | unsigned long nr_entries; 99 | unsigned long entries[MAX_TRACE_ENTRIES]; 100 | unsigned long latency_count[MAX_LATENCY_RECORD]; 101 | 102 | /* Task command names*/ 103 | char comms[MAX_STACE_TRACE_ENTRIES][TASK_COMM_LEN]; 104 | 105 | /* Task pids*/ 106 | pid_t pids[MAX_STACE_TRACE_ENTRIES]; 107 | 108 | struct { 109 | u64 nsecs:63; 110 | u64 more:1; 111 | } latency[MAX_STACE_TRACE_ENTRIES]; 112 | }; 113 | 114 | struct per_cpu_stack_trace { 115 | struct timer_list timer; 116 | struct hrtimer hrtimer; 117 | struct stack_trace_metadata hardirq_trace; 118 | struct stack_trace_metadata softirq_trace; 119 | 120 | bool softirq_delayed; 121 | }; 122 | 123 | static struct per_cpu_stack_trace __percpu *cpu_stack_trace; 124 | 125 | #if LINUX_VERSION_CODE < KERNEL_VERSION(5, 1, 0) 126 | static void (*save_stack_trace_skip_hardirq)(struct pt_regs *regs, 127 | struct stack_trace *trace); 128 | 129 | static inline void stack_trace_skip_hardirq_init(void) 130 | { 131 | save_stack_trace_skip_hardirq = 132 | (void *)kallsyms_lookup_name("save_stack_trace_regs"); 133 | } 134 | 135 | static inline void store_stack_trace(struct pt_regs *regs, 136 | struct irqoff_trace *trace, 137 | unsigned long *entries, 138 | unsigned int max_entries, int skip) 139 | { 140 | struct stack_trace stack_trace; 141 | 142 | stack_trace.nr_entries = 0; 143 | stack_trace.max_entries = max_entries; 144 | stack_trace.entries = entries; 145 | stack_trace.skip = skip; 146 | 147 | if (regs && save_stack_trace_skip_hardirq) 148 | save_stack_trace_skip_hardirq(regs, &stack_trace); 149 | else 150 | save_stack_trace(&stack_trace); 151 | 152 | trace->entries = entries; 153 | trace->nr_entries = stack_trace.nr_entries; 154 | 155 | /* 156 | * Some daft arches put -1 at the end to indicate its a full trace. 157 | * 158 | * this is buggy anyway, since it takes a whole extra entry so a 159 | * complete trace that maxes out the entries provided will be reported 160 | * as incomplete, friggin useless . 161 | */ 162 | if (trace->nr_entries != 0 && 163 | trace->entries[trace->nr_entries - 1] == ULONG_MAX) 164 | trace->nr_entries--; 165 | } 166 | #else 167 | static unsigned int (*stack_trace_save_skip_hardirq)(struct pt_regs *regs, 168 | unsigned long *store, 169 | unsigned int size, 170 | unsigned int skipnr); 171 | 172 | #if LINUX_VERSION_CODE < KERNEL_VERSION(5, 6, 0) 173 | static inline void stack_trace_skip_hardirq_init(void) 174 | { 175 | stack_trace_save_skip_hardirq = 176 | (void *)kallsyms_lookup_name("stack_trace_save_regs"); 177 | } 178 | #else /* LINUX_VERSION_CODE */ 179 | 180 | static int noop_pre_handler(struct kprobe *p, struct pt_regs *regs){ 181 | return 0; 182 | } 183 | 184 | /** 185 | * Since commit 0bd476e6c671 ("kallsyms: unexport kallsyms_lookup_name() 186 | * and kallsyms_on_each_symbol()"), kallsyms_lookup_name is unexported. 187 | * 188 | * We can only find the kallsyms_lookup_name's addr by using kprobes, then use 189 | * the unexported kallsyms_lookup_name to find symbols. 190 | */ 191 | static void stack_trace_skip_hardirq_init(void) 192 | { 193 | int ret; 194 | struct kprobe kp; 195 | unsigned long (*kallsyms_lookup_name_fun)(const char *name); 196 | 197 | 198 | ret = -1; 199 | kp.symbol_name = "kallsyms_lookup_name"; 200 | kp.pre_handler = noop_pre_handler; 201 | stack_trace_save_skip_hardirq = NULL; 202 | 203 | ret = register_kprobe(&kp); 204 | if (ret < 0) { 205 | return; 206 | } 207 | 208 | kallsyms_lookup_name_fun = (void*)kp.addr; 209 | unregister_kprobe(&kp); 210 | 211 | stack_trace_save_skip_hardirq = 212 | (void *)kallsyms_lookup_name_fun("stack_trace_save_regs"); 213 | } 214 | #endif /* LINUX_VERSION_CODE */ 215 | 216 | static inline void store_stack_trace(struct pt_regs *regs, 217 | struct irqoff_trace *trace, 218 | unsigned long *entries, 219 | unsigned int max_entries, int skip) 220 | { 221 | trace->entries = entries; 222 | if (regs && stack_trace_save_skip_hardirq) 223 | trace->nr_entries = stack_trace_save_skip_hardirq(regs, entries, 224 | max_entries, 225 | skip); 226 | else 227 | trace->nr_entries = stack_trace_save(entries, max_entries, 228 | skip); 229 | } 230 | #endif 231 | 232 | /** 233 | * Note: Must be called with irq disabled. 234 | */ 235 | static bool save_trace(struct pt_regs *regs, bool hardirq, u64 latency) 236 | { 237 | unsigned long nr_entries, nr_irqoff_trace; 238 | struct irqoff_trace *trace; 239 | struct stack_trace_metadata *stack_trace; 240 | 241 | stack_trace = hardirq ? this_cpu_ptr(&cpu_stack_trace->hardirq_trace) : 242 | this_cpu_ptr(&cpu_stack_trace->softirq_trace); 243 | 244 | nr_irqoff_trace = stack_trace->nr_irqoff_trace; 245 | if (unlikely(nr_irqoff_trace >= MAX_STACE_TRACE_ENTRIES)) 246 | return false; 247 | 248 | nr_entries = stack_trace->nr_entries; 249 | if (unlikely(nr_entries >= MAX_TRACE_ENTRIES - 1)) 250 | return false; 251 | 252 | strlcpy(stack_trace->comms[nr_irqoff_trace], current->comm, 253 | TASK_COMM_LEN); 254 | stack_trace->pids[nr_irqoff_trace] = current->pid; 255 | stack_trace->latency[nr_irqoff_trace].nsecs = latency; 256 | stack_trace->latency[nr_irqoff_trace].more = !hardirq && regs; 257 | 258 | trace = stack_trace->trace + nr_irqoff_trace; 259 | store_stack_trace(regs, trace, stack_trace->entries + nr_entries, 260 | MAX_TRACE_ENTRIES - nr_entries, 0); 261 | stack_trace->nr_entries += trace->nr_entries; 262 | 263 | /** 264 | * Ensure that the initialisation of @trace is complete before we 265 | * update the @nr_irqoff_trace. 266 | */ 267 | smp_store_release(&stack_trace->nr_irqoff_trace, nr_irqoff_trace + 1); 268 | 269 | if (unlikely(stack_trace->nr_entries >= MAX_TRACE_ENTRIES - 1)) { 270 | pr_info("BUG: MAX_TRACE_ENTRIES too low!"); 271 | 272 | return false; 273 | } 274 | 275 | return true; 276 | } 277 | 278 | static bool trace_irqoff_record(u64 delta, bool hardirq, bool skip) 279 | { 280 | int index = 0; 281 | u64 throttle = sampling_period << 1; 282 | u64 delta_old; 283 | 284 | if (delta < throttle) 285 | return false; 286 | 287 | delta -= sampling_period; 288 | delta_old = delta; 289 | delta >>= 1; 290 | while (delta > sampling_period) { 291 | index++; 292 | delta >>= 1; 293 | } 294 | 295 | if (unlikely(index >= MAX_LATENCY_RECORD)) 296 | index = MAX_LATENCY_RECORD - 1; 297 | 298 | if (hardirq) 299 | __this_cpu_inc(cpu_stack_trace->hardirq_trace.latency_count[index]); 300 | else if (!skip) 301 | __this_cpu_inc(cpu_stack_trace->softirq_trace.latency_count[index]); 302 | 303 | if (unlikely(delta_old >= trace_irqoff_latency)) 304 | save_trace(skip ? get_irq_regs() : NULL, hardirq, delta_old); 305 | 306 | return true; 307 | } 308 | 309 | static enum hrtimer_restart trace_irqoff_hrtimer_handler(struct hrtimer *hrtimer) 310 | { 311 | u64 now = local_clock(), delta; 312 | 313 | delta = now - __this_cpu_read(cpu_stack_trace->hardirq_trace.last_timestamp); 314 | __this_cpu_write(cpu_stack_trace->hardirq_trace.last_timestamp, now); 315 | 316 | if (trace_irqoff_record(delta, true, true)) { 317 | __this_cpu_write(cpu_stack_trace->softirq_trace.last_timestamp, 318 | now); 319 | } else if (!__this_cpu_read(cpu_stack_trace->softirq_delayed)) { 320 | u64 delta_soft; 321 | 322 | delta_soft = now - 323 | __this_cpu_read(cpu_stack_trace->softirq_trace.last_timestamp); 324 | 325 | if (unlikely(delta_soft >= trace_irqoff_latency + sampling_period)) { 326 | __this_cpu_write(cpu_stack_trace->softirq_delayed, true); 327 | trace_irqoff_record(delta_soft, false, true); 328 | } 329 | } 330 | 331 | hrtimer_forward_now(hrtimer, ns_to_ktime(sampling_period)); 332 | 333 | return HRTIMER_RESTART; 334 | } 335 | 336 | #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0) 337 | static void trace_irqoff_timer_handler(unsigned long data) 338 | #else 339 | static void trace_irqoff_timer_handler(struct timer_list *timer) 340 | #endif 341 | { 342 | u64 now = local_clock(), delta; 343 | #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0) 344 | struct timer_list *timer = (struct timer_list *)data; 345 | #endif 346 | 347 | delta = now - __this_cpu_read(cpu_stack_trace->softirq_trace.last_timestamp); 348 | __this_cpu_write(cpu_stack_trace->softirq_trace.last_timestamp, now); 349 | 350 | __this_cpu_write(cpu_stack_trace->softirq_delayed, false); 351 | 352 | trace_irqoff_record(delta, false, false); 353 | 354 | #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 7, 0) 355 | mod_timer_pinned(timer, 356 | jiffies + msecs_to_jiffies(sampling_period / 1000000UL)); 357 | #else 358 | mod_timer(timer, 359 | jiffies + msecs_to_jiffies(sampling_period / 1000000UL)); 360 | #endif 361 | } 362 | 363 | static void smp_clear_stack_trace(void *info) 364 | { 365 | int i; 366 | struct per_cpu_stack_trace *stack_trace = info; 367 | 368 | stack_trace->hardirq_trace.nr_entries = 0; 369 | stack_trace->hardirq_trace.nr_irqoff_trace = 0; 370 | stack_trace->softirq_trace.nr_entries = 0; 371 | stack_trace->softirq_trace.nr_irqoff_trace = 0; 372 | 373 | for (i = 0; i < MAX_LATENCY_RECORD; i++) { 374 | stack_trace->hardirq_trace.latency_count[i] = 0; 375 | stack_trace->softirq_trace.latency_count[i] = 0; 376 | } 377 | } 378 | 379 | static void smp_timers_start(void *info) 380 | { 381 | u64 now = local_clock(); 382 | struct per_cpu_stack_trace *stack_trace = info; 383 | struct hrtimer *hrtimer = &stack_trace->hrtimer; 384 | struct timer_list *timer = &stack_trace->timer; 385 | 386 | stack_trace->hardirq_trace.last_timestamp = now; 387 | stack_trace->softirq_trace.last_timestamp = now; 388 | 389 | hrtimer_start_range_ns(hrtimer, ns_to_ktime(sampling_period), 390 | 0, HRTIMER_MODE_REL_PINNED); 391 | 392 | timer->expires = jiffies + msecs_to_jiffies(sampling_period / 1000000UL); 393 | add_timer_on(timer, smp_processor_id()); 394 | } 395 | 396 | #define NUMBER_CHARACTER 40 397 | 398 | static bool histogram_show(struct seq_file *m, const char *header, 399 | const unsigned long *hist, unsigned long size, 400 | unsigned int factor) 401 | { 402 | int i, zero_index = 0; 403 | unsigned long count_max = 0; 404 | 405 | for (i = 0; i < size; i++) { 406 | unsigned long count = hist[i]; 407 | 408 | if (count > count_max) 409 | count_max = count; 410 | 411 | if (count) 412 | zero_index = i + 1; 413 | } 414 | if (count_max == 0) 415 | return false; 416 | 417 | /* print header */ 418 | if (header) 419 | seq_printf(m, "%s\n", header); 420 | seq_printf(m, "%*c%s%*c : %-9s %s\n", 9, ' ', "msecs", 10, ' ', "count", 421 | "distribution"); 422 | 423 | for (i = 0; i < zero_index; i++) { 424 | int num; 425 | int scale_min, scale_max; 426 | char str[NUMBER_CHARACTER + 1]; 427 | 428 | scale_max = 2 << i; 429 | scale_min = unlikely(i == 0) ? 1 : scale_max / 2; 430 | 431 | num = hist[i] * NUMBER_CHARACTER / count_max; 432 | memset(str, '*', num); 433 | memset(str + num, ' ', NUMBER_CHARACTER - num); 434 | str[NUMBER_CHARACTER] = '\0'; 435 | 436 | seq_printf(m, "%10d -> %-10d : %-8lu |%s|\n", 437 | scale_min * factor, scale_max * factor - 1, 438 | hist[i], str); 439 | } 440 | 441 | return true; 442 | } 443 | 444 | static void distribute_show_one(struct seq_file *m, void *v, bool hardirq) 445 | { 446 | int cpu; 447 | unsigned long latency_count[MAX_LATENCY_RECORD] = { 0 }; 448 | 449 | for_each_online_cpu(cpu) { 450 | int i; 451 | unsigned long *count; 452 | 453 | count = hardirq ? 454 | per_cpu_ptr(cpu_stack_trace->hardirq_trace.latency_count, cpu) : 455 | per_cpu_ptr(cpu_stack_trace->softirq_trace.latency_count, cpu); 456 | 457 | for (i = 0; i < MAX_LATENCY_RECORD; i++) 458 | latency_count[i] += count[i]; 459 | } 460 | 461 | histogram_show(m, hardirq ? "hardirq-off:" : "softirq-off:", 462 | latency_count, MAX_LATENCY_RECORD, 463 | sampling_period / (1000 * 1000UL)); 464 | } 465 | 466 | static int distribute_show(struct seq_file *m, void *v) 467 | { 468 | distribute_show_one(m, v, true); 469 | distribute_show_one(m, v, false); 470 | 471 | return 0; 472 | } 473 | 474 | IRQ_OFF_DEFINE_SHOW_ATTRIBUTE(distribute); 475 | 476 | static void seq_print_stack_trace(struct seq_file *m, struct irqoff_trace *trace) 477 | { 478 | int i; 479 | 480 | if (WARN_ON(!trace->entries)) 481 | return; 482 | 483 | for (i = 0; i < trace->nr_entries; i++) 484 | seq_printf(m, "%*c%pS\n", 5, ' ', (void *)trace->entries[i]); 485 | } 486 | 487 | static ssize_t trace_latency_write(struct file *file, const char __user *buf, 488 | size_t count, loff_t *ppos) 489 | { 490 | unsigned long latency; 491 | 492 | if (kstrtoul_from_user(buf, count, 0, &latency)) 493 | return -EINVAL; 494 | 495 | if (latency == 0) { 496 | int cpu; 497 | 498 | for_each_online_cpu(cpu) 499 | smp_call_function_single(cpu, smp_clear_stack_trace, 500 | per_cpu_ptr(cpu_stack_trace, cpu), 501 | true); 502 | return count; 503 | } else if (latency < (sampling_period << 1) / (1000 * 1000UL)) 504 | return -EINVAL; 505 | 506 | trace_irqoff_latency = latency * 1000 * 1000UL; 507 | 508 | return count; 509 | } 510 | 511 | static void trace_latency_show_one(struct seq_file *m, void *v, bool hardirq) 512 | { 513 | int cpu; 514 | 515 | for_each_online_cpu(cpu) { 516 | int i; 517 | unsigned long nr_irqoff_trace; 518 | struct stack_trace_metadata *stack_trace; 519 | 520 | stack_trace = hardirq ? 521 | per_cpu_ptr(&cpu_stack_trace->hardirq_trace, cpu) : 522 | per_cpu_ptr(&cpu_stack_trace->softirq_trace, cpu); 523 | 524 | /** 525 | * Paired with smp_store_release() in the save_trace(). 526 | */ 527 | nr_irqoff_trace = smp_load_acquire(&stack_trace->nr_irqoff_trace); 528 | 529 | if (!nr_irqoff_trace) 530 | continue; 531 | 532 | seq_printf(m, " cpu: %d\n", cpu); 533 | 534 | for (i = 0; i < nr_irqoff_trace; i++) { 535 | struct irqoff_trace *trace = stack_trace->trace + i; 536 | 537 | seq_printf(m, "%*cCOMMAND: %s PID: %d LATENCY: %lu%s\n", 538 | 5, ' ', stack_trace->comms[i], 539 | stack_trace->pids[i], 540 | stack_trace->latency[i].nsecs / (1000 * 1000UL), 541 | stack_trace->latency[i].more ? "+ms" : "ms"); 542 | seq_print_stack_trace(m, trace); 543 | seq_putc(m, '\n'); 544 | 545 | cond_resched(); 546 | } 547 | } 548 | } 549 | 550 | static int trace_latency_show(struct seq_file *m, void *v) 551 | { 552 | seq_printf(m, "trace_irqoff_latency: %llums\n\n", 553 | trace_irqoff_latency / (1000 * 1000UL)); 554 | 555 | seq_puts(m, " hardirq:\n"); 556 | trace_latency_show_one(m, v, true); 557 | 558 | seq_putc(m, '\n'); 559 | 560 | seq_puts(m, " softirq:\n"); 561 | trace_latency_show_one(m, v, false); 562 | 563 | return 0; 564 | } 565 | 566 | static int trace_latency_open(struct inode *inode, struct file *file) 567 | { 568 | return single_open(file, trace_latency_show, inode->i_private); 569 | } 570 | 571 | #if LINUX_VERSION_CODE < KERNEL_VERSION(5, 6, 0) 572 | static const struct file_operations trace_latency_fops = { 573 | .owner = THIS_MODULE, 574 | .open = trace_latency_open, 575 | .read = seq_read, 576 | .write = trace_latency_write, 577 | .llseek = seq_lseek, 578 | .release = single_release, 579 | }; 580 | #else 581 | static const struct proc_ops trace_latency_fops = { 582 | .proc_open = trace_latency_open, 583 | .proc_read = seq_read, 584 | .proc_write = trace_latency_write, 585 | .proc_lseek = seq_lseek, 586 | .proc_release = single_release, 587 | }; 588 | #endif 589 | 590 | static int enable_show(struct seq_file *m, void *ptr) 591 | { 592 | seq_printf(m, "%s\n", trace_enable ? "enabled" : "disabled"); 593 | 594 | return 0; 595 | } 596 | 597 | static int enable_open(struct inode *inode, struct file *file) 598 | { 599 | return single_open(file, enable_show, inode->i_private); 600 | } 601 | 602 | static void trace_irqoff_start_timers(void) 603 | { 604 | int cpu; 605 | 606 | for_each_online_cpu(cpu) { 607 | struct hrtimer *hrtimer; 608 | struct timer_list *timer; 609 | 610 | hrtimer = per_cpu_ptr(&cpu_stack_trace->hrtimer, cpu); 611 | hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_PINNED); 612 | hrtimer->function = trace_irqoff_hrtimer_handler; 613 | 614 | timer = per_cpu_ptr(&cpu_stack_trace->timer, cpu); 615 | #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 7, 0) 616 | __setup_timer(timer, trace_irqoff_timer_handler, 617 | (unsigned long)timer, TIMER_IRQSAFE); 618 | #elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0) 619 | timer->flags = TIMER_PINNED | TIMER_IRQSAFE; 620 | setup_timer(timer, trace_irqoff_timer_handler, 621 | (unsigned long)timer); 622 | #else 623 | timer_setup(timer, trace_irqoff_timer_handler, 624 | TIMER_PINNED | TIMER_IRQSAFE); 625 | #endif 626 | 627 | smp_call_function_single(cpu, smp_timers_start, 628 | per_cpu_ptr(cpu_stack_trace, cpu), 629 | true); 630 | } 631 | } 632 | 633 | static void trace_irqoff_cancel_timers(void) 634 | { 635 | int cpu; 636 | 637 | for_each_online_cpu(cpu) { 638 | struct hrtimer *hrtimer; 639 | struct timer_list *timer; 640 | 641 | hrtimer = per_cpu_ptr(&cpu_stack_trace->hrtimer, cpu); 642 | hrtimer_cancel(hrtimer); 643 | 644 | timer = per_cpu_ptr(&cpu_stack_trace->timer, cpu); 645 | del_timer_sync(timer); 646 | } 647 | } 648 | 649 | #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) 650 | #include 651 | 652 | static int kstrtobool_from_user(const char __user *s, size_t count, bool *res) 653 | { 654 | /* Longest string needed to differentiate, newline, terminator */ 655 | char buf[4]; 656 | 657 | count = min(count, sizeof(buf) - 1); 658 | if (copy_from_user(buf, s, count)) 659 | return -EFAULT; 660 | buf[count] = '\0'; 661 | return strtobool(buf, res); 662 | } 663 | #endif 664 | 665 | static ssize_t enable_write(struct file *file, const char __user *buf, 666 | size_t count, loff_t *ppos) 667 | { 668 | bool enable; 669 | 670 | if (kstrtobool_from_user(buf, count, &enable)) 671 | return -EINVAL; 672 | 673 | if (!!enable == !!trace_enable) 674 | return count; 675 | 676 | if (enable) 677 | trace_irqoff_start_timers(); 678 | else 679 | trace_irqoff_cancel_timers(); 680 | 681 | trace_enable = enable; 682 | 683 | return count; 684 | } 685 | 686 | #if LINUX_VERSION_CODE < KERNEL_VERSION(5, 6, 0) 687 | static const struct file_operations enable_fops = { 688 | .open = enable_open, 689 | .read = seq_read, 690 | .write = enable_write, 691 | .llseek = seq_lseek, 692 | .release = single_release, 693 | }; 694 | #else 695 | static const struct proc_ops enable_fops = { 696 | .proc_open = enable_open, 697 | .proc_read = seq_read, 698 | .proc_write = enable_write, 699 | .proc_lseek = seq_lseek, 700 | .proc_release = single_release, 701 | }; 702 | #endif 703 | 704 | static int sampling_period_show(struct seq_file *m, void *ptr) 705 | { 706 | seq_printf(m, "%llums\n", sampling_period / (1000 * 1000UL)); 707 | 708 | return 0; 709 | } 710 | 711 | static int sampling_period_open(struct inode *inode, struct file *file) 712 | { 713 | return single_open(file, sampling_period_show, inode->i_private); 714 | } 715 | 716 | static ssize_t sampling_period_write(struct file *file, const char __user *buf, 717 | size_t count, loff_t *ppos) 718 | { 719 | unsigned long period; 720 | 721 | if (trace_enable) 722 | return -EINVAL; 723 | 724 | if (kstrtoul_from_user(buf, count, 0, &period)) 725 | return -EINVAL; 726 | 727 | period *= 1000 * 1000UL; 728 | if (period > (trace_irqoff_latency >> 1)) 729 | trace_irqoff_latency = period << 1; 730 | 731 | sampling_period = period; 732 | 733 | return count; 734 | } 735 | 736 | #if LINUX_VERSION_CODE < KERNEL_VERSION(5, 6, 0) 737 | static const struct file_operations sampling_period_fops = { 738 | .open = sampling_period_open, 739 | .read = seq_read, 740 | .write = sampling_period_write, 741 | .llseek = seq_lseek, 742 | .release = single_release, 743 | }; 744 | #else 745 | static const struct proc_ops sampling_period_fops = { 746 | .proc_open = sampling_period_open, 747 | .proc_read = seq_read, 748 | .proc_write = sampling_period_write, 749 | .proc_lseek = seq_lseek, 750 | .proc_release = single_release, 751 | }; 752 | #endif 753 | 754 | static int __init trace_irqoff_init(void) 755 | { 756 | struct proc_dir_entry *parent_dir; 757 | 758 | cpu_stack_trace = alloc_percpu(struct per_cpu_stack_trace); 759 | if (!cpu_stack_trace) 760 | return -ENOMEM; 761 | 762 | stack_trace_skip_hardirq_init(); 763 | 764 | parent_dir = proc_mkdir("trace_irqoff", NULL); 765 | if (!parent_dir) 766 | goto free_percpu; 767 | 768 | if (!proc_create("distribute", S_IRUSR, parent_dir, &distribute_fops)) 769 | goto remove_proc; 770 | 771 | if (!proc_create("trace_latency", S_IRUSR | S_IWUSR, parent_dir, 772 | &trace_latency_fops)) 773 | goto remove_proc; 774 | 775 | if (!proc_create("enable", S_IRUSR | S_IWUSR, parent_dir, &enable_fops)) 776 | goto remove_proc; 777 | 778 | if (!proc_create("sampling_period", S_IRUSR | S_IWUSR, parent_dir, 779 | &sampling_period_fops)) 780 | goto remove_proc; 781 | 782 | return 0; 783 | 784 | remove_proc: 785 | remove_proc_subtree("trace_irqoff", NULL); 786 | free_percpu: 787 | free_percpu(cpu_stack_trace); 788 | 789 | return -ENOMEM; 790 | } 791 | 792 | static void __exit trace_irqoff_exit(void) 793 | { 794 | if (trace_enable) 795 | trace_irqoff_cancel_timers(); 796 | remove_proc_subtree("trace_irqoff", NULL); 797 | free_percpu(cpu_stack_trace); 798 | } 799 | 800 | module_init(trace_irqoff_init); 801 | module_exit(trace_irqoff_exit); 802 | MODULE_LICENSE("GPL v2"); 803 | MODULE_AUTHOR("Muchun Song "); 804 | --------------------------------------------------------------------------------