├── .gitmodules ├── CMakeLists.txt ├── LICENSE ├── README.md ├── attic └── tsc.h ├── bench_syscalls.cc ├── doc ├── osjitter-atom-C3758-half-hz.txt ├── osjitter-atom-C3758-hz.txt ├── pingpong-atom-C3758-half-hz.txt ├── pingpong-atom-C3758-write-early-enabled.txt └── pingpong-atom-C3758.txt ├── helper ├── bench2tidy.py └── bench_playbook.py ├── makefile ├── osjitter.c ├── pingpong.c ├── ptp-clock-future.h ├── ptp-clock-offset.c ├── tsc.h ├── tuned ├── gs-isol-cpus-half-hz │ ├── tuned.conf │ └── vars.conf ├── gs-isol-cpus-hz │ ├── tuned.conf │ └── vars.conf ├── gs-isol-cpus │ ├── tuned.conf │ └── vars.conf └── gs-latency │ └── tuned.conf ├── util.c └── util.h /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "benchmark"] 2 | path = benchmark 3 | url = https://github.com/google/benchmark.git 4 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.1) 2 | project(bench_syscalls CXX) 3 | 4 | 5 | add_subdirectory(benchmark) 6 | 7 | add_executable(bench_syscalls 8 | bench_syscalls.cc 9 | ) 10 | target_link_libraries(bench_syscalls benchmark::benchmark) 11 | 12 | 13 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | 635 | Copyright (C) 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | Copyright (C) 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . 675 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | This repository contains OSjitter, Pingpong and other 2 | latency/performance related utilities. 3 | 4 | OSjitter is a tool for measuring how much 5 | the operating system interrupts programs. Such interruptions 6 | increase the latency of a program while the variation in latency 7 | is called jitter. 8 | 9 | This tool can be used to quickly measure a lower bound for the 10 | latency of a given system configuration. Note that the OS jitter 11 | depends on the kind of load a real-time program is applying to a 12 | system. Thus, one still needs to execute a domain specific 13 | test-suite to the real-time program of interest after a tool like 14 | OSjitter shows good results. 15 | 16 | The Pingong utility measures the overhead of several thread 17 | notification mechanisms such as spinning on a atomic variable 18 | (with/without pauses), POSIX condition variables, semaphores, 19 | pipes and raw Linux futexes. 20 | 21 | The ptp-clock-offset utility is a small program for checking 22 | the availability of different PTP offset ioctls and how they 23 | perform. Rule of thumb: using any PTP offset ioctl is better than 24 | having to use `clock_gettime()` and smaller delays are better. 25 | 26 | There is also a microbenchmark (`bench_syscalls.cc`) that measures 27 | some (seemingly) low-overhead syscalls in order to measure the 28 | userspace to kernelspace mode-switch costs. See also a [related 29 | blog post](https://gms.tf/on-the-costs-of-syscalls.html) for some results. 30 | 31 | 32 | 2019, Georg Sauthoff , GPLv3+ 33 | 34 | ## Example Session 35 | 36 | Check out the help: 37 | 38 | $ ./osjitter -h 39 | 40 | Isolating the last 3 cores on a 8 core system: 41 | 42 | $ cat /proc/cmdline 43 | [..] isolcpus=5-7 nohz=on nohz_full=5-7 rcu_nocbs=5-7 rcu_nocb_poll \ 44 | nowatchdog mce=ignore_ce acpi_irq_nobalance pcie_aspm=off tsc=reliable 45 | 46 | This system is a Supermicro one (running Fedora 29) with an Atom CPU: 47 | 48 | $ cat /proc/cpuinfo | grep model' name' | head -n 1 49 | model name : Intel(R) Atom(TM) CPU C3758 @ 2.20GHz 50 | 51 | First OSjitter run: 52 | 53 | $ ./osjitter -t 60 54 | CPU TSC_khz #intr #delta ovfl_ns invol_ctx sum_intr_ns iratio rt_s loop_ns median_ns p20_ns p80_ns p90_ns p99_ns p99.9_ns max_ns mad_ns 55 | 0 2200000 60240 60240 0 8065 283228653 0.005 60 22 3151 2989 4354 6047 7218 443376 9380037 220 56 | 1 2200000 60192 60192 0 9809 216975033 0.004 60 22 2710 2339 3740 5314 6322 11774 4614206 432 57 | 2 2200000 60199 60199 0 5942 180783353 0.003 60 22 2424 2219 3399 4847 7888 14611 1465586 223 58 | 3 2200000 60193 60193 0 5465 171929486 0.003 60 22 2426 2236 3087 4246 6388 11487 592769 187 59 | 4 2200000 60320 60320 0 6173 212338516 0.004 60 22 2548 2358 3468 5005 6280 40044 2262400 211 60 | 5 2200000 156 156 0 1 576392 0.000 60 22 3681 2801 4044 4388 11667 12138 12286 428 61 | 6 2200000 156 156 0 1 581260 0.000 60 22 3565 2788 3964 4270 12278 20279 28125 451 62 | 7 2200000 126 126 0 1 450470 0.000 60 22 3703 2467 4003 4205 9163 11859 12198 352 63 | 64 | => The threads on the isolated CPUs are much less interrupted the 65 | other ones. 66 | 67 | Move all interrupts away from the isolated CPUs: 68 | 69 | # tuna -q '*' -c 0-4 -m -x 70 | 71 | OSjitter: 72 | 73 | $ ./osjitter -t 60 74 | CPU TSC_khz #intr #delta ovfl_ns invol_ctx sum_intr_ns iratio rt_s loop_ns median_ns p20_ns p80_ns p90_ns p99_ns p99.9_ns max_ns mad_ns 75 | 0 2200000 60342 60342 0 6207 272600031 0.005 60 22 3105 2980 4141 5898 7205 442155 4772690 144 76 | 1 2200000 60334 60334 0 6483 196708372 0.003 60 22 2488 2293 3530 5014 6335 13491 4684815 236 77 | 2 2200000 60330 60330 0 8479 211832782 0.004 60 22 2528 2296 3651 5269 9299 15708 5513140 347 78 | 3 2200000 60256 60256 0 7973 237326578 0.004 60 22 2477 2261 3617 5155 7186 39479 5602172 325 79 | 4 2200000 60280 60280 0 5149 197355746 0.003 60 22 2532 2345 3020 4026 6309 16298 2630389 175 80 | 5 2200000 8 8 0 1 41371 0.000 60 22 3340 1869 8570 11288 11288 11288 11616 1470 81 | 6 2200000 8 8 0 1 41025 0.000 60 22 3291 1706 8616 11429 11429 11429 11609 1585 82 | 7 2200000 10 10 0 1 46852 0.000 60 22 2886 1927 8794 11968 11968 11968 12126 959 83 | 84 | => Even less interruptions on the isolated CPU's 85 | 86 | Move all moveable kernel threads away from the isolated CPUs: 87 | 88 | # tuna -U -t '*' -c 0-4 -m 89 | 90 | OSjitter: 91 | 92 | $ ./osjitter -t 60 93 | CPU TSC_khz #intr #delta ovfl_ns invol_ctx sum_intr_ns iratio rt_s loop_ns median_ns p20_ns p80_ns p90_ns p99_ns p99.9_ns max_ns mad_ns 94 | 0 2200000 60246 60246 0 4333 231374600 0.004 60 22 3177 3040 3595 4465 10924 29714 469030 134 95 | 1 2200000 60403 60403 0 5965 198823307 0.003 60 22 2490 2274 3425 4865 6387 16643 4743847 229 96 | 2 2200000 60445 60445 0 5020 186508000 0.003 60 22 2402 2172 2959 3740 5762 12846 1716645 209 97 | 3 2200000 60490 60490 0 10195 234402816 0.004 60 22 2825 2308 4398 5358 6915 112854 3997080 576 98 | 4 2200000 60276 60276 0 7274 212001750 0.004 60 22 2531 2328 3668 5061 5747 13550 6431210 275 99 | 5 2200000 8 8 0 1 34188 0.000 60 22 3197 1765 5095 8923 8923 8923 11685 1114 100 | 6 2200000 8 8 0 1 39910 0.000 60 22 3218 1616 8130 11231 11231 11231 11793 1601 101 | 7 2200000 5 5 0 0 16998 0.000 60 22 2091 2079 8506 8506 8506 8506 8506 574 102 | 103 | => Isolated CPUs: Improvements in interruptions, few improvements 104 | in median, max and median absolute deviation (MAD). 105 | 106 | Switch from throughput-performance based tuned profile to a latency-performance 107 | based one (i.e. disable CPU frequency scaling, longer stat interval, writeback 108 | cpubask etc.): 109 | 110 | # tuned-adm profile gs-latency 111 | 112 | OSjitter: 113 | 114 | $ ./osjitter -t 60 115 | CPU TSC_khz #intr #delta ovfl_ns invol_ctx sum_intr_ns iratio rt_s loop_ns median_ns p20_ns p80_ns p90_ns p99_ns p99.9_ns max_ns mad_ns 116 | 0 2200000 60250 60250 0 686 213519597 0.004 60 22 3125 3008 3250 3323 13616 37892 1871887 97 117 | 1 2200000 60223 60223 0 26628 287996052 0.005 60 22 3118 2914 6182 6266 7117 17085 5240030 777 118 | 2 2200000 60241 60241 0 26289 272751612 0.005 60 22 3079 2889 6183 6260 6480 9952 1231324 728 119 | 3 2200000 60193 60193 0 167 163954807 0.003 60 22 2360 2123 2470 2526 3210 13830 8119388 124 120 | 4 2200000 60223 60223 0 120 161220610 0.003 60 22 2427 2231 2514 2566 3060 13410 1885120 99 121 | 5 2200000 5 5 0 1 14843 0.000 60 21 2255 1897 6112 6112 6112 6112 6112 402 122 | 6 2200000 5 5 0 0 17074 0.000 60 22 2144 1852 8859 8859 8859 8859 8859 389 123 | 7 2200000 5 5 0 0 16665 0.000 60 22 1922 1808 8630 8630 8630 8630 8630 234 124 | 125 | => Isolated CPUs: less interruptions, less total interruptions, improvements in median, max and MAD 126 | 127 | ## How it works 128 | 129 | OSjitter creates a measurement thread for each selected CPU that 130 | polls the CPU's [Time Stamp Counter (TSC)][tsc]. In each 131 | iteration the previous counter value is subtracted from the 132 | previous one and if that duration is above the threshold 133 | (default: 100 ns) it's counted as an interruption. 134 | 135 | Since the 1990ies, x86 CPUs feature a TSC, which can be read with 136 | a special instruction from any user-space program. The TSC on 137 | relatively modern CPUs is supposed to run constant and reliable, 138 | i.e. even during CPU-frequency changes and power-saving state 139 | changes. That means that the TSC frequency (although constant) 140 | may be different from the base frequency of the CPU. Since the 141 | TSC is integrated into the CPU, can be accessed like a register 142 | (with low overhead) and has a high accuracy it's well suited for 143 | measuring even short interruptions. 144 | 145 | When a program is interrupted by the operating system the TSC 146 | ticks continue and thus after the program execution continues 147 | (otherwise transparently to the program) it can derive how long 148 | it was interrupted by looking at the current TSC value. 149 | 150 | The actual TSC frequency is required to convert TSC counts to 151 | nanoseconds. OSjitter obtains the TSC frequency from the kernel, 152 | i.e. from `/sys/devices/system/cpu/cpu0/tsc_freq_khz` (if 153 | available) or it parses it from `journalctl --boot` ([relevant 154 | stackoverflow answer][2]). 155 | 156 | ## How to build 157 | 158 | For most utilities: 159 | 160 | $ make 161 | 162 | The syscall benchmark: 163 | 164 | $ git submodule update --init 165 | $ mkdir build 166 | $ cd build 167 | $ CXXFLAGS='-Wall -O3 -g' cmake .. -DCMAKE_BUILD_TYPE=Release -DBENCHMARK_ENABLE_GTEST_TESTS=0 -GNinja 168 | $ ninja 169 | 170 | (or a similar cmake invocation) 171 | 172 | ## Related Work 173 | 174 | There is [sysjitter][sj] (1.4, GPLv3) which also reads the [TSC][tsc] in 175 | a loop to detect external interruptions. Some differences are: 176 | 177 | - Sysjitter calibrates the TSC frequency against `gettimeofday()` 178 | whereas OSJitter just obtains the Kernel's TSC frequency 179 | (the Kernel is in a better position to calibrate the TSC 180 | frequency and Linux contains a well-engineered calibration 181 | logic including possible refinements after the first 182 | calibration) 183 | - Sysjitter just invokes the RDTSC instruction while OSjitter 184 | invokes RDTSC and RDTSCP in combination with fencing 185 | instructions 186 | - OSjitter uses ISO C atomic operations while Sysjitter uses GCC 187 | atomic intrinsics 188 | - In contrast to OSjitter, sysjitter doesn't allow to specify the 189 | scheduling class/priority of the measurement threads 190 | - OSjitter's output includes a measure for dispersion (MAD) 191 | - Besides TSC on x86, sysjitter also support reading a timestamp 192 | counter on POWER CPUs. 193 | 194 | The Linux Kernel contains a [hardware latency detector][hwl] to 195 | check for interruptions caused outside of the operating system 196 | such as the [System Management Mode][smm] (SMM). It also queries 197 | the TSC in a loop. 198 | 199 | The SMM is triggered by System Management Interrupts (SMI) 200 | which are transparent to the kernel and can only be detected 201 | indirectly. An alternative to the TSC approach for detecting and 202 | measuring SMIs is to query CPU counters the SMI changes 203 | ([relevant stackoverflow answer][1]). 204 | 205 | [Cyclictest][cyc] measures OS latency by [setting 206 | timers][cyc2] and comparing the actual sleep time with the 207 | configured one. 208 | 209 | Erik Rigtorp has published 210 | [hiccups](https://github.com/rigtorp/hiccups) to measure 'system 211 | induced jitter', 212 | [ipc-bench](https://github.com/rigtorp/ipc-bench) as a ping-pong 213 | latency benchmark and [c2clat](https://github.com/rigtorp/c2clat) 214 | to measure inter-core latency. The hiccups repository references 215 | [osnoise](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/trace/osnoise-tracer.rst), 216 | an OS jitter detector built into the Linux kernel which appeared 217 | in Linux 5.14 or so that complements the above mentioned hardware 218 | latency detector. 219 | 220 | ## Pingpong Results 221 | 222 | The doc directory contains some example Pingpong results for 223 | different configurations. 224 | 225 | The results for condition variable, semaphore and futex are quite 226 | similar because, on Linux, condition variables and semaphores are 227 | implemented in terms of futex. 228 | 229 | Notifying via a traditional UNIX pipe is more expensive than 230 | using a futex but it's the same order of magnitude. 231 | 232 | Inserting a PAUSE instruction while spinning on an atomic 233 | variable increases the median absolute deviation (MAD) just a 234 | little bit, but yields similar median while reducing the number 235 | of executed instructions. 236 | 237 | As documented in the kernel documentation, comparing the results 238 | with and without `full_hz=` show how this features increases 239 | context-switch overhead and thus increases latency for the 240 | syscall methods (e.g. by 0.6 us or so in the median, a few us in 241 | the other percentiles and maximum). On the other hand, more 242 | context-switch overhead isn't relevant for spinning on an atomic 243 | variable, thus, `full_hz=` really pays off for this use-case 244 | because the process is interrupted much less. 245 | 246 | 247 | [sj]: https://www.openonload.org/download.html 248 | [hwl]: https://www.kernel.org/doc/html/latest/trace/hwlat_detector.html 249 | [smm]: https://en.wikipedia.org/wiki/System_Management_Mode 250 | [1]: https://stackoverflow.com/a/57961772/427158 251 | [tsc]: https://en.wikipedia.org/wiki/Time_Stamp_Counter 252 | [cyc]: https://git.kernel.org/pub/scm/linux/kernel/git/clrkwllms/rt-tests.git 253 | [cyc2]: http://people.redhat.com/williams/latency-howto/rt-latency-howto.txt 254 | [2]: https://stackoverflow.com/a/57835630/427158 255 | -------------------------------------------------------------------------------- /attic/tsc.h: -------------------------------------------------------------------------------- 1 | 2 | #include // __rdtsc(), _mm_lfence(), ... 3 | 4 | 5 | extern __inline uint64_t __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 6 | double_fenced_rdtsc(void) 7 | { 8 | // https://www.felixcloutier.com/x86/rdtsc 9 | // If software requires RDTSC to be executed only after all previous 10 | // instructions have executed and all previous loads and stores are 11 | // globally visible, it can execute the sequence MFENCE;LFENCE immediately 12 | // before RDTSC. 13 | // If software requires RDTSC to be executed prior to execution of any 14 | // subsequent instruction (including any memory accesses), it can execute 15 | // the sequence LFENCE immediately after RDTSC. 16 | _mm_mfence(); 17 | _mm_lfence(); 18 | uint64_t r = __rdtsc(); 19 | _mm_lfence(); 20 | return r; 21 | } 22 | 23 | extern __inline uint64_t __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 24 | far_fenced_rdtsc(void) 25 | { 26 | // https://www.felixcloutier.com/x86/rdtsc 27 | // If software requires RDTSC to be executed prior to execution of any 28 | // subsequent instruction (including any memory accesses), it can execute 29 | // the sequence LFENCE immediately after RDTSC. 30 | uint64_t r = __rdtsc(); 31 | _mm_lfence(); 32 | return r; 33 | } 34 | 35 | -------------------------------------------------------------------------------- /bench_syscalls.cc: -------------------------------------------------------------------------------- 1 | 2 | // SPDX-License-Identifier: GPL-3.0-or-later 3 | // SPDX-FileCopyrightText: © 2021 Georg Sauthoff 4 | 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include 16 | 17 | 18 | static void bench_getuid(benchmark::State& state) { 19 | for (auto _ : state) { 20 | getuid(); 21 | } 22 | } 23 | 24 | BENCHMARK(bench_getuid); 25 | 26 | static void bench_getpid(benchmark::State& state) { 27 | for (auto _ : state) { 28 | getpid(); 29 | } 30 | } 31 | 32 | BENCHMARK(bench_getpid); 33 | 34 | static void bench_close(benchmark::State& state) { 35 | for (auto _ : state) { 36 | close(999); 37 | } 38 | } 39 | 40 | BENCHMARK(bench_close); 41 | 42 | static void bench_syscall(benchmark::State& state) { 43 | for (auto _ : state) { 44 | syscall(423); 45 | } 46 | } 47 | 48 | BENCHMARK(bench_syscall); 49 | 50 | static void bench_sched_yield(benchmark::State& state) { 51 | for (auto _ : state) { 52 | sched_yield(); 53 | } 54 | } 55 | 56 | BENCHMARK(bench_sched_yield); 57 | 58 | static void bench_clock_gettime(benchmark::State& state) { 59 | struct timespec ts = {0}; 60 | for (auto _ : state) { 61 | clock_gettime(CLOCK_REALTIME, &ts); 62 | } 63 | } 64 | 65 | BENCHMARK(bench_clock_gettime); 66 | 67 | static void bench_clock_gettime_tai(benchmark::State& state) { 68 | struct timespec ts = {0}; 69 | for (auto _ : state) { 70 | clock_gettime(CLOCK_TAI, &ts); 71 | } 72 | } 73 | 74 | BENCHMARK(bench_clock_gettime_tai); 75 | 76 | static void bench_clock_gettime_monotonic(benchmark::State& state) { 77 | struct timespec ts = {0}; 78 | for (auto _ : state) { 79 | clock_gettime(CLOCK_MONOTONIC, &ts); 80 | } 81 | } 82 | 83 | BENCHMARK(bench_clock_gettime_monotonic); 84 | 85 | static void bench_clock_gettime_monotonic_raw(benchmark::State& state) { 86 | struct timespec ts = {0}; 87 | for (auto _ : state) { 88 | clock_gettime(CLOCK_MONOTONIC_RAW, &ts); 89 | } 90 | } 91 | 92 | BENCHMARK(bench_clock_gettime_monotonic_raw); 93 | 94 | static void bench_nanosleep0(benchmark::State& state) { 95 | struct timespec ts = {0}; 96 | for (auto _ : state) { 97 | int r = nanosleep(&ts, 0); 98 | assert(!r); 99 | } 100 | } 101 | 102 | BENCHMARK(bench_nanosleep0); 103 | 104 | static void bench_nanosleep0_slack1(benchmark::State& state) { 105 | int r = prctl(PR_SET_TIMERSLACK, 1, 0, 0, 0); 106 | assert(!r); 107 | struct timespec ts = {0}; 108 | for (auto _ : state) { 109 | int r = nanosleep(&ts, 0); 110 | assert(!r); 111 | } 112 | } 113 | 114 | BENCHMARK(bench_nanosleep0_slack1); 115 | 116 | static void bench_nanosleep1_slack1(benchmark::State& state) { 117 | int r = prctl(PR_SET_TIMERSLACK, 1, 0, 0, 0); 118 | assert(!r); 119 | struct timespec ts = { .tv_nsec = 1 }; 120 | for (auto _ : state) { 121 | int r = nanosleep(&ts, 0); 122 | assert(!r); 123 | } 124 | } 125 | 126 | BENCHMARK(bench_nanosleep1_slack1); 127 | 128 | static void bench_pthread_cond_signal(benchmark::State& state) { 129 | pthread_cond_t cv = PTHREAD_COND_INITIALIZER; 130 | for (auto _ : state) { 131 | int r = pthread_cond_signal(&cv); 132 | assert(!r); 133 | } 134 | } 135 | 136 | BENCHMARK(bench_pthread_cond_signal); 137 | 138 | static void bench_assign(benchmark::State& state) { 139 | double f = 0; 140 | for (auto _ : state) { 141 | f = 23; 142 | benchmark::DoNotOptimize(f); 143 | } 144 | } 145 | 146 | BENCHMARK(bench_assign); 147 | 148 | static void bench_sqrt(benchmark::State& state) { 149 | double f = 23; 150 | double g = 0; 151 | for (auto _ : state) { 152 | benchmark::DoNotOptimize(f); 153 | g = sqrt(f); 154 | benchmark::DoNotOptimize(g); 155 | } 156 | } 157 | 158 | BENCHMARK(bench_sqrt); 159 | 160 | static void bench_sqrtrec(benchmark::State& state) { 161 | double f = 23; 162 | for (auto _ : state) { 163 | f = sqrt(f); 164 | } 165 | } 166 | 167 | BENCHMARK(bench_sqrtrec); 168 | 169 | static void bench_nothing(benchmark::State& state) { 170 | unsigned i = 0; 171 | for (auto _ : state) { 172 | ++i; 173 | } 174 | } 175 | 176 | BENCHMARK(bench_nothing); 177 | 178 | BENCHMARK_MAIN(); 179 | -------------------------------------------------------------------------------- /doc/osjitter-atom-C3758-half-hz.txt: -------------------------------------------------------------------------------- 1 | $ ./osjitter -t 60 2 | CPU TSC_khz #intr #delta ovfl_ns invol_ctx sum_intr_ns iratio rt_s loop_ns median_ns p20_ns p80_ns p90_ns p99_ns p99.9_ns max_ns mad_ns 3 | 0 2200000 60273 60273 0 122 229869307 0.004 60 20 3482 3322 3740 3940 4648 36907 3929568 145 4 | 1 2200000 60249 60249 0 617 170593054 0.003 60 20 2685 2339 2970 3101 7809 14140 2288695 249 5 | 2 2200000 60307 60307 0 216 219753975 0.004 60 21 2581 2298 2872 3009 3909 453060 1941053 230 6 | 3 2200000 60206 60206 0 25792 310482349 0.005 60 20 3426 2748 7840 7960 8382 13177 2528739 876 7 | 4 2200000 60244 60244 0 450 176518457 0.003 60 20 2642 2310 2957 3093 4880 11532 4537616 256 8 | 5 2200000 60061 60061 0 1 156952003 0.003 60 20 2606 2299 2907 3044 3396 4388 15060 242 9 | 6 2200000 60061 60061 0 1 154872338 0.003 60 20 2566 2270 2870 3010 3333 4434 14995 241 10 | 7 2200000 60061 60061 0 1 154459207 0.003 60 20 2560 2260 2865 3000 3332 4420 12925 241 11 | -------------------------------------------------------------------------------- /doc/osjitter-atom-C3758-hz.txt: -------------------------------------------------------------------------------- 1 | $ ./osjitter -t 60 2 | CPU TSC_khz #intr #delta ovfl_ns invol_ctx sum_intr_ns iratio rt_s loop_ns median_ns p20_ns p80_ns p90_ns p99_ns p99.9_ns max_ns mad_ns 3 | 0 2200000 71112 71112 0 30615 1307556496 0.022 60 22 5938 4131 7280 71342 122061 197748 32007581 1554 4 | 1 2200000 69271 69271 0 31437 734304556 0.012 60 22 5454 2960 6852 12580 92996 114692 14820361 2171 5 | 2 2200000 66890 66890 0 1130 868183998 0.014 60 22 2745 2383 3327 4860 18203 1733105 22469364 328 6 | 3 2200000 70697 70697 0 14009 552432760 0.009 60 22 2847 2490 6605 12346 69640 190076 2848408 419 7 | 4 2200000 67158 67158 0 1837 343931516 0.006 60 22 2820 2491 3488 5195 50712 440682 2845008 311 8 | 5 2200000 60062 60062 0 1 166677345 0.003 60 22 2725 2428 3069 3299 4287 5413 12129 251 9 | 6 2200000 60062 60062 0 1 161701916 0.003 60 22 2653 2360 2975 3191 4100 5133 13279 244 10 | 7 2200000 60062 60062 0 1 161126550 0.003 60 21 2644 2351 2964 3183 4066 5123 13364 241 11 | -------------------------------------------------------------------------------- /doc/pingpong-atom-C3758-half-hz.txt: -------------------------------------------------------------------------------- 1 | $ ./pingpong --pin 0 6 --pin 1 5 --sem 2 | Thread TSC_khz #delta min_ns max_ns median_ns p20_ns p80_ns p90_ns p99_ns p99.9_ns mad_ns 3 | 0 2200000 500000 4130 10298 4471 4370 4708 4801 5152 8041 123 4 | 1 2200000 500000 4058 10873 4484 4377 4738 4829 5201 8121 126 5 | $ ./pingpong --pin 0 6 --pin 1 5 --futex 6 | Thread TSC_khz #delta min_ns max_ns median_ns p20_ns p80_ns p90_ns p99_ns p99.9_ns mad_ns 7 | 0 2200000 500000 4003 8964 4270 4200 4527 4608 4966 7720 73 8 | 1 2200000 500000 2631 9746 4263 4186 4533 4692 5024 7660 91 9 | $ ./pingpong --pin 0 6 --pin 1 5 --pipe 10 | Thread TSC_khz #delta min_ns max_ns median_ns p20_ns p80_ns p90_ns p99_ns p99.9_ns mad_ns 11 | 0 2200000 500000 5290 12497 5862 5660 6405 6560 7006 9944 210 12 | 1 2200000 500000 5246 12132 5730 5533 6209 6531 6985 9837 211 13 | $ ./pingpong --pin 0 6 --pin 1 5 --cv 14 | Thread TSC_khz #delta min_ns max_ns median_ns p20_ns p80_ns p90_ns p99_ns p99.9_ns mad_ns 15 | 0 2200000 500000 4498 9666 4845 4776 5055 5181 5549 8419 79 16 | 1 2200000 500000 4390 13333 4765 4669 5026 5174 5521 8239 107 17 | $ ./pingpong --pin 0 6 --pin 1 5 --spin 18 | Thread TSC_khz #delta min_ns max_ns median_ns p20_ns p80_ns p90_ns p99_ns p99.9_ns mad_ns 19 | 0 2200000 500000 173 3540 179 178 179 180 186 259 0 20 | 1 2200000 500000 170 3455 175 174 176 176 181 186 0 21 | $ ./pingpong --pin 0 6 --pin 1 5 --spin-pause 22 | Thread TSC_khz #delta min_ns max_ns median_ns p20_ns p80_ns p90_ns p99_ns p99.9_ns mad_ns 23 | 0 2200000 500000 168 3667 176 170 177 180 202 276 3 24 | 1 2200000 500000 166 3376 170 166 173 177 201 209 3 25 | $ ./pingpong --pin 0 6 --pin 1 5 --spin-pause -p 2 26 | Thread TSC_khz #delta min_ns max_ns median_ns p20_ns p80_ns p90_ns p99_ns p99.9_ns mad_ns 27 | 0 2200000 500000 169 3965 191 178 241 255 271 455 12 28 | 1 2200000 500000 166 3719 198 181 245 261 271 444 16 29 | -------------------------------------------------------------------------------- /doc/pingpong-atom-C3758-write-early-enabled.txt: -------------------------------------------------------------------------------- 1 | $ ./pingpong --pin 0 6 --pin 1 5 --spin 2 | Thread TSC_khz #delta min_ns max_ns median_ns p20_ns p80_ns p90_ns p99_ns p99.9_ns mad_ns 3 | 0 2200000 500000 171 538 178 178 179 179 184 201 0 4 | 1 2200000 500000 169 531 175 174 175 175 175 186 0 5 | $ ./pingpong --pin 0 6 --pin 1 5 --spin 6 | Thread TSC_khz #delta min_ns max_ns median_ns p20_ns p80_ns p90_ns p99_ns p99.9_ns mad_ns 7 | 0 2200000 500000 176 551 178 178 179 179 185 251 0 8 | 1 2200000 500000 169 568 175 174 175 175 175 198 0 9 | $ ./pingpong --pin 0 6 --pin 1 5 --spin 10 | Thread TSC_khz #delta min_ns max_ns median_ns p20_ns p80_ns p90_ns p99_ns p99.9_ns mad_ns 11 | 0 2200000 500000 170 538 178 178 179 179 184 201 0 12 | 1 2200000 500000 169 525 175 174 175 175 175 187 0 13 | $ ./pingpong --pin 0 6 --pin 1 5 --spin-pause 14 | Thread TSC_khz #delta min_ns max_ns median_ns p20_ns p80_ns p90_ns p99_ns p99.9_ns mad_ns 15 | 0 2200000 500000 169 540 172 170 176 176 182 219 2 16 | 1 2200000 500000 164 530 170 166 172 173 176 196 2 17 | $ ./pingpong --pin 0 6 --pin 1 5 --spin-pause -p 2 18 | Thread TSC_khz #delta min_ns max_ns median_ns p20_ns p80_ns p90_ns p99_ns p99.9_ns mad_ns 19 | 0 2200000 500000 169 548 175 171 178 179 241 269 2 20 | 1 2200000 500000 166 535 201 198 205 205 255 271 2 21 | -------------------------------------------------------------------------------- /doc/pingpong-atom-C3758.txt: -------------------------------------------------------------------------------- 1 | $ ./pingpong --pin 0 6 --pin 1 5 --sem 2 | Thread TSC_khz #delta min_ns max_ns median_ns p20_ns p80_ns p90_ns p99_ns p99.9_ns mad_ns 3 | 0 2200000 500000 4704 12329 5123 4916 5660 6095 6548 10087 216 4 | 1 2200000 500000 4654 16630 5105 4906 5690 6093 6562 10080 210 5 | $ ./pingpong --pin 0 6 --pin 1 5 --futex 6 | Thread TSC_khz #delta min_ns max_ns median_ns p20_ns p80_ns p90_ns p99_ns p99.9_ns mad_ns 7 | 0 2200000 500000 4552 12166 5003 4870 5660 5936 6350 10100 160 8 | 1 2200000 500000 4482 11956 4863 4721 5542 5897 6349 10038 164 9 | $ ./pingpong --pin 0 6 --pin 1 5 --pipe 10 | Thread TSC_khz #delta min_ns max_ns median_ns p20_ns p80_ns p90_ns p99_ns p99.9_ns mad_ns 11 | 0 2200000 500000 6047 831490 6654 6394 7539 7928 8394 12050 272 12 | 1 2200000 500000 5900 14528 6551 6264 7387 7913 8324 11926 300 13 | $ ./pingpong --pin 0 6 --pin 1 5 --cv 14 | Thread TSC_khz #delta min_ns max_ns median_ns p20_ns p80_ns p90_ns p99_ns p99.9_ns mad_ns 15 | 0 2200000 500000 5290 13729 5700 5528 6548 6771 7224 10766 192 16 | 1 2200000 500000 5088 15968 5541 5369 6338 6559 6959 10613 193 17 | $ ./pingpong --pin 0 6 --pin 1 5 --spin 18 | Thread TSC_khz #delta min_ns max_ns median_ns p20_ns p80_ns p90_ns p99_ns p99.9_ns mad_ns 19 | 0 2200000 500000 176 538 178 178 179 179 185 268 0 20 | 1 2200000 500000 172 535 175 174 175 175 181 205 0 21 | $ ./pingpong --pin 0 6 --pin 1 5 --spin-pause 22 | Thread TSC_khz #delta min_ns max_ns median_ns p20_ns p80_ns p90_ns p99_ns p99.9_ns mad_ns 23 | 0 2200000 500000 171 612 180 179 180 180 186 206 0 24 | 1 2200000 500000 167 616 176 176 176 176 176 200 0 25 | $ ./pingpong --pin 0 6 --pin 1 5 --spin-pause -p 2 26 | Thread TSC_khz #delta min_ns max_ns median_ns p20_ns p80_ns p90_ns p99_ns p99.9_ns mad_ns 27 | 0 2200000 500000 169 549 175 171 178 179 244 269 2 28 | 1 2200000 500000 166 535 201 198 205 205 255 271 2 29 | -------------------------------------------------------------------------------- /helper/bench2tidy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # SPDX-License-Identifier: GPL-3.0-or-later 4 | # SPDX-FileCopyrightText: © 2021 Georg Sauthoff 5 | 6 | import argparse 7 | 8 | def is_not_a_point(line): 9 | for k in ('_mean"', '_median"', '_stddev"'): 10 | if k in line: 11 | return True 12 | return False 13 | 14 | def dump_csv(filename, host, o): 15 | with open(filename) as f: 16 | state = 0 17 | for line in f: 18 | if state == 0: 19 | if line.startswith('name,iterations,real_time,cpu_time,time_unit'): 20 | state = 1 21 | elif state == 1: 22 | if is_not_a_point(line): 23 | continue 24 | i = line.rindex(',ns,') 25 | o.write(f'{host},{line[:i]}\n') 26 | 27 | 28 | def main(filenames, ofilename): 29 | with open(ofilename, 'w') as f: 30 | f.write('host,name,iterations,real_ns,cpu_ns\n') 31 | for fn in filenames: 32 | host = fn[fn.rindex('-')+1:-4] 33 | dump_csv(fn, host, f) 34 | 35 | def parse_args(): 36 | p = argparse.ArgumentParser() 37 | p.add_argument('filenames', metavar='CSV_FILENAME', nargs='+', 38 | help='hosts under test') 39 | p.add_argument('--out', '-o', default='all.csv', 40 | help='resulting CSV filename (default: %(default)s)') 41 | return p.parse_args() 42 | 43 | if __name__ == '__main__': 44 | args = parse_args() 45 | main(args.filenames, args.out) 46 | 47 | -------------------------------------------------------------------------------- /helper/bench_playbook.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Distribute and run a benchmark on a bunch of hosts. 4 | # 5 | # SPDX-License-Identifier: GPL-3.0-or-later 6 | # SPDX-FileCopyrightText: © 2021 Georg Sauthoff 7 | 8 | import mitogen 9 | import mitogen.select 10 | import mitogen.utils 11 | 12 | import argparse 13 | import logging 14 | import os 15 | import platform 16 | import subprocess 17 | import tempfile 18 | 19 | 20 | log = logging.getLogger(__name__) 21 | 22 | def bench(exe, bcmd): 23 | with tempfile.TemporaryDirectory() as d: 24 | exe_path = f'{d}/bench' 25 | with open(exe_path, 'wb') as f: 26 | f.write(exe) 27 | os.chmod(exe_path, 0o755) 28 | core = min(int(os.cpu_count()/2*1.5), os.cpu_count()-1) 29 | ts = [ 'taskset', '-c', str(core) ] 30 | subprocess.check_output(ts + bcmd, cwd=d) 31 | hostname = platform.node().split('.', 1)[0] 32 | with open(f'{d}/out.csv') as f: 33 | csv = f.read() 34 | with open('/proc/cmdline') as f: 35 | cmdline = f.read().strip() 36 | try: 37 | tuned = subprocess.check_output(['/usr/sbin/tuned-adm', 'active'], universal_newlines=True) 38 | tuned = tuned.split()[-1] 39 | except: 40 | tuned = '' 41 | with open('/proc/cpuinfo') as f: 42 | cpuinfo = f.read().splitlines() 43 | cpuinfo = [ l.split(': ')[-1] for l in cpuinfo if l.startswith('model name') ][0] 44 | return hostname, cpuinfo, cmdline, tuned, csv 45 | 46 | 47 | def main(router, hosts, exe_path, bcmd, out_dir): 48 | with open(exe_path, 'rb') as f: 49 | exe = f.read() 50 | 51 | cns = [ (router.ssh(hostname=h, python_path='/usr/bin/python3'), h) for h in hosts ] 52 | 53 | fs = [] 54 | for c, host in cns: 55 | log.info(f'Starting bench on {host} ...') 56 | fs.append(c.call_async(bench, exe, bcmd)) 57 | 58 | with open(f'{out_dir}/hosts.csv', 'w') as g: 59 | g.write('hostname,cpuinfo,cmdline,tuned\n') 60 | for i, res in enumerate(mitogen.select.Select(fs)): 61 | log.info(f'Receiving from {res.router._stream_by_id[res.src_id].conn.options.hostname} ...') 62 | r = res.unpickle() 63 | g.write(f'{r[0]},{r[1]},"{r[2]}",{r[3]}\n') 64 | with open(f'{out_dir}/bench-{r[0]}.csv', 'w') as f: 65 | f.write(r[4]) 66 | 67 | def parse_args(): 68 | p = argparse.ArgumentParser() 69 | p.add_argument('hosts', metavar='HOST', nargs='+', 70 | help='hosts under test') 71 | p.add_argument('--out', '-o', default='out', 72 | help='local directory for storing collected benchmark results (default: %(default)s)') 73 | p.add_argument('--exe', '-e', default='bench_syscalls', 74 | help='executable to transfer and execute remotely (default: %(default)s)') 75 | p.add_argument('-n', type=int, default=3, 76 | help='benchmark repetitions (default: %(default)s)') 77 | p.add_argument('--log', default='pb.log', 78 | help='logfile (is more verbose than the console log) (default: %(default)s)') 79 | args = p.parse_args() 80 | return args 81 | 82 | if __name__ == '__main__': 83 | args = parse_args() 84 | bcmd = [ './bench', '--benchmark_out_format=csv', '--benchmark_out=out.csv', 85 | f'--benchmark_repetitions={args.n}' ] 86 | os.makedirs(args.out, exist_ok=True) 87 | mitogen.utils.log_to_file(args.log) 88 | h = logging.StreamHandler() 89 | h.setFormatter(logging.Formatter( 90 | '%(asctime)s - %(levelname)-8s - %(message)s [%(name)s]', 91 | '%Y-%m-%d %H:%M:%S')) 92 | log.addHandler(h) 93 | mitogen.utils.run_with_router(main, args.hosts, args.exe, bcmd, args.out) 94 | 95 | -------------------------------------------------------------------------------- /makefile: -------------------------------------------------------------------------------- 1 | 2 | CFLAGSW_GCC = -Wall -Wextra -Wno-missing-field-initializers \ 3 | -Wno-parentheses -Wno-missing-braces \ 4 | -Wmissing-prototypes -Wfloat-equal \ 5 | -Wwrite-strings -Wpointer-arith -Wcast-align \ 6 | -Wnull-dereference \ 7 | -Werror=multichar -Werror=sizeof-pointer-memaccess -Werror=return-type \ 8 | -fstrict-aliasing 9 | 10 | CFLAGS0 = -pthread -g 11 | CFLAGS1 = -O3 12 | 13 | CFLAGS = $(CFLAGSW_GCC) $(CFLAGS0) $(CFLAGS1) 14 | 15 | .PHONY: all 16 | all: osjitter pingpong 17 | 18 | osjitter: util.o 19 | 20 | pingpong: util.o 21 | 22 | ptp-clock-offset: util.o 23 | 24 | .PHONY: clean 25 | clean: 26 | rm -f osjitter osjitter.o util.o pingpong pingpong.o ptp-clock-offset 27 | -------------------------------------------------------------------------------- /osjitter.c: -------------------------------------------------------------------------------- 1 | // OSjitter - measure program interruptions 2 | // 3 | // 2019, Georg Sauthoff 4 | // 5 | // SPDX-License-Identifier: GPL-3.0-or-later 6 | 7 | #define _GNU_SOURCE 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | #include // __mm_pause() 26 | 27 | #include "util.h" 28 | #include "tsc.h" 29 | 30 | static atomic_bool start_work = false; 31 | static atomic_bool quit_thread = false; 32 | 33 | 34 | struct Args { 35 | uint32_t cpus; 36 | cpu_set_t cpu_set; 37 | 38 | int sched_policy; 39 | int sched_prio; 40 | 41 | uint32_t runtime_s; 42 | uint32_t thresh_ns; 43 | 44 | uint32_t tsc_khz; 45 | uint32_t mult; 46 | uint32_t shift; 47 | uint32_t tsc_thresh; 48 | uint64_t tsc_runtime; 49 | uint64_t samples; 50 | 51 | unsigned pid; 52 | size_t tid_off; 53 | }; 54 | typedef struct Args Args; 55 | 56 | static void help(FILE *f, const char *argv0) 57 | { 58 | fprintf(f, "%s - measure involuntary program interruptions\n" 59 | "\n" 60 | "Options:\n" 61 | " -t SEC measurement period in s (default: 10 s)\n" 62 | " -d NS threshold for an interruption in ns (default: 100 ns)\n" 63 | " --cpu X CPU (Cores) that are part of the measurement (default: all);\n" 64 | " --cpu X-Y count from zero, single core or range\n" 65 | " --sched X scheduling policy for measurement threads (default: OTHER);\n" 66 | " 1:FIFO, 2:RR etc. WARNING: only specify a subset with --cpu\n" 67 | " when setting a realtime policy\n" 68 | " --prio X realtime priority (default: 1)\n" 69 | " --khz X frequency of TSC in kHz (default: read from\n" 70 | " /sys/devices/system/cpu/cpu0/tsc_freq_khz if available or\n" 71 | " journalctl --boot)\n" 72 | "\n" 73 | "How it works: a measurement thread is pinned on each selected CPU\n" 74 | "where it loops without making system calls and periodically reads\n" 75 | "the TSC to detect external interruptions. Thus, it detects latency\n" 76 | "introducing interruptions by the OS and possibly even by the SMM.\n" 77 | "\n" 78 | "Output columns:\n" 79 | " CPU - CPU/Core number, count from 0, cf. /proc/cpuinfo and lscpu\n" 80 | " TSC_KHZ - frequency of the Time Stamp Counter (TSC)\n" 81 | " might be different from the CPU's base frequency\n" 82 | " #intr - number of interruptions (above the threshold, cf. -d)\n" 83 | " #delta - number of recorded interruptions (might overflow)\n" 84 | " ovfl_ns - time after which interrupt recording overflowed\n" 85 | " invol_ctx - number of involuntary context switches\n" 86 | " (i.e. due to scheduling)\n" 87 | " sum_intr_ns - sum of all interruptions in ns\n" 88 | " iratio - ratio of interruption time to runtime\n" 89 | " (IOW off-program to program time)\n" 90 | " rt_s - measurement time in s (cf. -t)\n" 91 | " loop_ns - smallest loop runtime (likely of an uninterrupted iteration\n" 92 | " is used to better approximate interruption time\n" 93 | " median_ns - Median of all recorded interruptions\n" 94 | " pX_ns - X/100 percentile\n" 95 | " max_ns - the longest interruption\n" 96 | " mad_ns - median absolute deviation of all recorded interruptions\n" 97 | "\n" 98 | "How much happens in a nanosecond?\n" 99 | "A CPU running at 3.6 GHz progresses by 3.6 cycles in 1 ns. And a\n" 100 | "modern pipelined super-scalar CPU may execute up to 3 instructions\n" 101 | "or so per cycle, on average.\n" 102 | "\n" 103 | "2019, Georg Sauthoff , GPLv3+\n" 104 | , argv0); 105 | } 106 | 107 | static int parse_args(Args *args, int argc, char **argv) 108 | { 109 | *args = (const Args){0}; 110 | CPU_ZERO(&args->cpu_set); 111 | 112 | for (int i = 1; i < argc; ++i) { 113 | if (!strcmp(argv[i], "--cpu")) { 114 | ++i; 115 | if (i >= argc) { 116 | fprintf(stderr, "--cpu argument is missing\n"); 117 | return -1; 118 | } 119 | char *p = strchr(argv[i], '-'); 120 | if (p) { 121 | *p = 0; 122 | unsigned b = atoi(argv[i]); 123 | unsigned e = atoi(p+1); 124 | if (b >= 1024 || e >= 1024) { 125 | fprintf(stderr, "--cpu range out of range\n"); 126 | return -1; 127 | } 128 | for (unsigned k = b; k <= e; ++k) { 129 | CPU_SET(k, &args->cpu_set); 130 | } 131 | } else { 132 | CPU_SET(atoi(argv[i]), &args->cpu_set); 133 | } 134 | } else if (!strcmp(argv[i], "-t")) { 135 | ++i; 136 | if (i >= argc) { 137 | fprintf(stderr, "-t argument is missing\n"); 138 | return -1; 139 | } 140 | args->runtime_s = atoi(argv[i]); 141 | } else if (!strcmp(argv[i], "-d")) { 142 | ++i; 143 | if (i >= argc) { 144 | fprintf(stderr, "-d argument is missing\n"); 145 | return -1; 146 | } 147 | args->thresh_ns = atoi(argv[i]); 148 | } else if (!strcmp(argv[i], "--sched")) { 149 | ++i; 150 | if (i >= argc) { 151 | fprintf(stderr, "--sched argument is missing\n"); 152 | return -1; 153 | } 154 | args->sched_policy = atoi(argv[i]); 155 | if (!args->sched_prio) 156 | args->sched_prio = 1; 157 | } else if (!strcmp(argv[i], "--prio")) { 158 | ++i; 159 | if (i >= argc) { 160 | fprintf(stderr, "--prio argument is missing\n"); 161 | return -1; 162 | } 163 | args->sched_prio = atoi(argv[i]); 164 | } else if (!strcmp(argv[i], "--khz")) { 165 | ++i; 166 | if (i >= argc) { 167 | fprintf(stderr, "--khz argument is missing\n"); 168 | return -1; 169 | } 170 | args->tsc_khz = atoi(argv[i]); 171 | } else if (!strcmp(argv[i], "-h") || !strcmp(argv[i], "--help")) { 172 | help(stdout, argv[0]); 173 | exit(0); 174 | } else { 175 | fprintf(stderr, "unknown option: %s\n", argv[i]); 176 | return -1; 177 | } 178 | } 179 | 180 | if (!args->runtime_s) 181 | args->runtime_s = 10; 182 | if (!args->thresh_ns) 183 | args->thresh_ns = 100; 184 | if (!args->samples) 185 | args->samples = args->runtime_s * 105000; 186 | 187 | return 0; 188 | } 189 | 190 | 191 | 192 | static int is_cpu_online(uint32_t cpu, bool *b) 193 | { 194 | char filename[64]; 195 | snprintf(filename, sizeof filename, "/sys/devices/system/cpu/cpu%u/online", 196 | cpu); 197 | int fd = open(filename, O_RDONLY); 198 | if (fd == -1) { 199 | // CPU not hot-plugable 200 | if (errno == ENOENT) { 201 | *b = true; 202 | return 0; 203 | } 204 | perror("opening /sys/devices/system/cpu/cpu%u/online"); 205 | return -1; 206 | } 207 | char buf[2] = {0}; 208 | ssize_t r = read(fd, buf, sizeof buf); 209 | if (r == -1) { 210 | perror("reading /sys/devices/system/cpu/cpu0/tsc_freq_khz"); 211 | close(fd); 212 | return -1; 213 | } 214 | *b = buf[0] == '1' && buf[1] == '\n'; 215 | int t = close(fd); 216 | if (t == -1) { 217 | perror("closing /sys/devices/system/cpu/cpu%u/online"); 218 | return -1; 219 | } 220 | return 0; 221 | } 222 | 223 | 224 | // cf. gdb> ptype pthread 225 | // (requires glibc debuginfo installed) 226 | static size_t get_tid_off(void) 227 | { 228 | pthread_t t = pthread_self(); 229 | const char *b; 230 | memcpy(&b, (void*)t, sizeof b); 231 | const char *e = b + 1024; 232 | unsigned pid = getpid(); 233 | for (const char *p = b + 128; p < e; p+=4) { 234 | unsigned x; 235 | memcpy(&x, p, sizeof x); 236 | if (x == pid) 237 | return p - b; 238 | } 239 | return 0; 240 | } 241 | 242 | // alternative to calling gettid() in each child 243 | static unsigned pthread_to_tid(pthread_t t, size_t off) 244 | { 245 | const char *p; 246 | memcpy(&p, (void*)t, sizeof p); 247 | unsigned tid; 248 | memcpy(&tid, p + off, sizeof tid); 249 | return tid; 250 | } 251 | 252 | 253 | static int set_params(Args *args) 254 | { 255 | args->pid = getpid(); 256 | args->tid_off = get_tid_off(); 257 | 258 | args->cpus = sysconf(_SC_NPROCESSORS_CONF); 259 | if (!CPU_COUNT(&args->cpu_set)) { 260 | for (unsigned k = 0; k <= args->cpus; ++k) { 261 | bool b = false; 262 | int r = is_cpu_online(k, &b); 263 | if (r) 264 | return r; 265 | if (b) 266 | CPU_SET(k, &args->cpu_set); 267 | } 268 | } 269 | 270 | if (!args->tsc_khz) { 271 | int r = get_tsc_khz(&args->tsc_khz); 272 | if (r < 0) 273 | return r; 274 | } 275 | clocks_calc_mult_shift(&args->mult, &args->shift, 276 | args->tsc_khz, 1000000l, 0); 277 | { 278 | double d = 1000000000l; 279 | d /= args->thresh_ns; 280 | double e = args->tsc_khz; 281 | e *= 1000; 282 | e /= d; 283 | args->tsc_thresh = (uint32_t) e; 284 | } 285 | { 286 | double d = args->tsc_khz; 287 | d *= 1000; 288 | d *= args->runtime_s; 289 | args->tsc_runtime = (uint64_t) d; 290 | } 291 | return 0; 292 | } 293 | 294 | static Args global_args; 295 | 296 | struct Worker { 297 | pthread_t worker_id; 298 | unsigned tid; 299 | uint32_t cpu_id; 300 | 301 | uint32_t *deltas; // array of interruptions 302 | uint64_t samples; // #used array entries 303 | uint64_t thresh_cnt; // counted interruptions 304 | 305 | uint64_t tsc_start; // start of measurements 306 | uint64_t tsc_overflow; // when it overflowed (or 0 for no overflow) 307 | uint64_t tsc_total_int; // sum of interruptions 308 | uint64_t tsc_delta_min; // minimum loop time 309 | 310 | uint64_t invol_switch; // involuntary context switches 311 | }; 312 | typedef struct Worker Worker; 313 | 314 | static int check_cpuinfo(void) 315 | { 316 | FILE *f = popen("grep '^flags' /proc/cpuinfo | tr ' ' '\\n'" 317 | " | grep '^\\(constant\\|nonstop\\)_tsc$'", "re"); 318 | if (!f) { 319 | perror("popen"); 320 | return -1; 321 | } 322 | char *line = 0; 323 | size_t n = 0; 324 | bool constant_tsc = false; 325 | bool nonstop_tsc = false; 326 | for (;;) { 327 | ssize_t l = getline(&line, &n, f); 328 | if (l == -1) { 329 | if (feof(f)) { 330 | break; 331 | } else { 332 | perror("getline"); 333 | pclose(f); 334 | return -1; 335 | } 336 | } 337 | if (!strcmp(line, "constant_tsc\n")) 338 | constant_tsc = true; 339 | if (!strcmp(line, "nonstop_tsc\n")) 340 | nonstop_tsc = true; 341 | } 342 | int r = pclose(f); 343 | if (r == -1) { 344 | perror("pclose"); 345 | return -1; 346 | } 347 | r = 0; 348 | if (!constant_tsc) { 349 | fprintf(stderr, "CPU doesn't support a constant TSC\n"); 350 | r = 1; 351 | } 352 | if (!nonstop_tsc) { 353 | fprintf(stderr, "CPU's TSC stops in sleep states\n"); 354 | r = 1; 355 | } 356 | return r; 357 | } 358 | 359 | // Note that /proc/%u/task/%u/sched is gone after the thread 360 | // returned from its main function, 361 | // i.e. even before the parent called pthread_join() 362 | static int read_proc_sched(unsigned pid, unsigned tid, Worker *w) 363 | { 364 | char filename[64]; 365 | snprintf(filename, sizeof filename, "/proc/%u/task/%u/sched", pid, tid); 366 | int fd = open(filename, O_RDONLY); 367 | if (fd == -1) { 368 | perror("opening /proc/%u/task/%u/sched"); 369 | return -1; 370 | } 371 | char buf[4*1024] = {0}; 372 | ssize_t n = read(fd, buf, sizeof buf); 373 | if (n == -1) { 374 | perror("reading /proc/%u/task/%u/sched"); 375 | close(fd); 376 | return -1; 377 | } 378 | const char q[] = "nr_involuntary_switches"; 379 | char *p = memmem(buf, n, q, sizeof q - 1); 380 | if (!p) { 381 | fprintf(stderr, 382 | "Couldn't find involuntary switches in /proc/.../sched\n"); 383 | return -1; 384 | } 385 | p += sizeof q - 1; 386 | char *e = memchr(p, '\n', n - (p-buf)); 387 | if (!e) { 388 | fprintf(stderr, "Couldn't find end in /proc/.../sched\n"); 389 | return -1; 390 | } 391 | *e = 0; 392 | char *m = memrchr(p, ' ', e-p); 393 | if (!m) { 394 | fprintf(stderr, "Couldn't find begin in /proc/.../sched\n"); 395 | return -1; 396 | } 397 | ++m; 398 | w->invol_switch = atol(m); 399 | int r = close(fd); 400 | if (r == -1) { 401 | perror("closing /proc/%u/task/%u/sched"); 402 | return -1; 403 | } 404 | return 0; 405 | } 406 | 407 | 408 | static void *worker_main(void *p) 409 | { 410 | Worker *w = p; 411 | Args args = global_args; 412 | size_t n = args.samples; 413 | // uint32_t is big enough to store interruptions of up to ~ 1 s 414 | // when using a TSC that runs at 4 GHz 415 | uint32_t *ds = calloc(n, sizeof ds[0]); 416 | if (!ds) { 417 | fprintf(stderr, "Failed to allocate delta array on core %" PRIu32 "\n", 418 | w->cpu_id); 419 | return NULL; 420 | } 421 | size_t i = 0; 422 | while(!atomic_load_explicit(&start_work, memory_order_consume)) { 423 | _mm_pause(); 424 | } 425 | for (unsigned i = 0; i < 1000; ++i) 426 | _mm_pause(); 427 | 428 | uint64_t tsc_total_int = 0; 429 | uint64_t tsc_overflow = 0; 430 | uint64_t tsc_thresh = args.tsc_thresh; 431 | uint64_t tsc_delta_min = UINT64_MAX; 432 | 433 | uint64_t start = fenced_rdtsc(); 434 | uint64_t limit = start + args.tsc_runtime; 435 | uint64_t tsc = start; 436 | 437 | // unroll the loop one time for a more 'realistic' tsc_delta_min 438 | if (tsc < limit) { 439 | uint64_t t = fenced_rdtscp(); 440 | uint64_t delta = t - tsc; 441 | tsc = t; 442 | if (delta > tsc_thresh) { 443 | tsc_total_int += delta; 444 | if (i < n) { 445 | ds[i] = delta > UINT32_MAX ? UINT32_MAX : delta; 446 | } else if (!tsc_overflow) { 447 | tsc_overflow = t; 448 | } 449 | ++i; 450 | } 451 | if (delta < tsc_delta_min) 452 | tsc_delta_min = delta; 453 | } 454 | tsc_delta_min = UINT64_MAX; // throw the first tsc_delta_min away 455 | while (tsc < limit) { 456 | uint64_t t = fenced_rdtscp(); 457 | uint32_t delta = t - tsc; 458 | tsc = t; 459 | if (delta > tsc_thresh) { 460 | tsc_total_int += delta; 461 | if (i < n) { 462 | ds[i] = delta > UINT32_MAX ? UINT32_MAX : delta; 463 | } else if (!tsc_overflow) { 464 | tsc_overflow = t; 465 | } 466 | ++i; 467 | } 468 | if (delta < tsc_delta_min) 469 | tsc_delta_min = delta; 470 | } 471 | 472 | while(!atomic_load_explicit(&quit_thread, memory_order_consume)) { 473 | _mm_pause(); 474 | } 475 | 476 | w->deltas = ds; 477 | w->samples = i < n ? i : n; 478 | w->thresh_cnt = i; 479 | w->tsc_start = start; 480 | w->tsc_overflow = tsc_overflow; 481 | w->tsc_total_int = tsc_total_int - (tsc_delta_min*i); 482 | w->tsc_delta_min = tsc_delta_min; 483 | 484 | for (size_t i = 0; i < w->samples; ++i) { 485 | // Assuming that we have some loop iterations without any interruption 486 | w->deltas[i] -= w->tsc_delta_min; 487 | } 488 | qsort(w->deltas, w->samples, sizeof w->deltas[0], cmp_u32); 489 | 490 | // no need release/consume/aquire those values because 491 | // the main thread calls pthread_join() before reading those values 492 | // which acts as a memory barrier 493 | 494 | return w; 495 | } 496 | 497 | 498 | static int pp_results(const Worker *ws, FILE *f) 499 | { 500 | Args *args = &global_args; 501 | fprintf(f, " CPU TSC_khz #intr #delta ovfl_ns invol_ctx sum_intr_ns iratio rt_s loop_ns median_ns p20_ns p80_ns p90_ns p99_ns p99.9_ns max_ns mad_ns\n"); 502 | uint32_t *ys = 0; 503 | for (unsigned cpu = 0; cpu < args->cpus; ++cpu) { 504 | if (!CPU_ISSET(cpu, &args->cpu_set)) 505 | continue; 506 | const Worker *w = ws+cpu; 507 | uint64_t intr_ns = mul_u64_u32_shr(w->tsc_total_int, 508 | args->mult, args->shift); 509 | ys = realloc(ys, (w->samples ? w->samples : 1) * sizeof ys[0]); 510 | if (!ys) { 511 | fprintf(stderr, "realloc in pp_results failed\n"); 512 | return -1; 513 | } 514 | uint32_t mad = mad_u32(w->deltas, ys, w->samples); 515 | fprintf(f, "%4u %8" PRIu32 " %6" PRIu64 " %7" PRIu64 516 | " %8" PRIu64 517 | " %10" PRIu64 518 | " %12" PRIu64 " %7.3f" 519 | " %5" PRIu32 520 | " %8" PRIu64 521 | " %10" PRIu64 522 | " %7" PRIu64 523 | " %7" PRIu64 524 | " %7" PRIu64 525 | " %7" PRIu64 526 | " %9" PRIu64 527 | " %8" PRIu64 528 | " %7" PRIu64 529 | "\n", 530 | cpu, args->tsc_khz, w->thresh_cnt, w->samples, 531 | w->tsc_overflow ? mul_u64_u32_shr(w->tsc_overflow - w->tsc_start, 532 | args->mult, args->shift) : 0, 533 | w->invol_switch, 534 | intr_ns, (double)intr_ns/((double)args->runtime_s*1000000000), 535 | args->runtime_s, 536 | mul_u64_u32_shr(w->tsc_delta_min, args->mult, args->shift), 537 | mul_u64_u32_shr(percentile_u32(w->deltas, w->samples, 1, 2), 538 | args->mult, args->shift), 539 | mul_u64_u32_shr(percentile_u32(w->deltas, w->samples, 1, 5), 540 | args->mult, args->shift), 541 | mul_u64_u32_shr(percentile_u32(w->deltas, w->samples, 4, 5), 542 | args->mult, args->shift), 543 | mul_u64_u32_shr(percentile_u32(w->deltas, w->samples, 90, 100), 544 | args->mult, args->shift), 545 | mul_u64_u32_shr(percentile_u32(w->deltas, w->samples, 99, 100), 546 | args->mult, args->shift), 547 | mul_u64_u32_shr(percentile_u32(w->deltas, w->samples, 999, 1000), 548 | args->mult, args->shift), 549 | mul_u64_u32_shr(w->samples ? w->deltas[w->samples - 1] : 0, 550 | args->mult, args->shift), 551 | mul_u64_u32_shr(mad, args->mult, args->shift) 552 | ); 553 | } 554 | free(ys); 555 | return 0; 556 | } 557 | 558 | static int create_workers(Worker *ws) 559 | { 560 | Args *args = &global_args; 561 | for (unsigned cpu = 0; cpu < args->cpus; ++cpu) { 562 | ws[cpu].cpu_id = cpu; 563 | // => no need to synchronize this thread parameter because pthread_join 564 | // acts as a memory barrier 565 | if (!CPU_ISSET(cpu, &args->cpu_set)) 566 | continue; 567 | 568 | pthread_attr_t attr; 569 | int r = pthread_attr_init(&attr); 570 | if (r) { 571 | perror_e(r, "pthread_attr_init failed"); 572 | return 1; 573 | } 574 | cpu_set_t cpus; 575 | CPU_ZERO(&cpus); 576 | CPU_SET(cpu, &cpus); 577 | r = pthread_attr_setaffinity_np(&attr, sizeof cpus, &cpus); 578 | if (r) { 579 | perror_e(r, "pthread_attr_setaffinity_np failed"); 580 | return 1; 581 | } 582 | if (args->sched_policy) { 583 | r = pthread_attr_setschedpolicy(&attr, args->sched_policy); 584 | if (r) { 585 | perror_e(r, "pthread_attr_setschedpolicy failed"); 586 | return 1; 587 | } 588 | // without any prio pthread_create complains about 'Invalid argument' 589 | struct sched_param param = { .sched_priority = args->sched_prio }; 590 | r = pthread_attr_setschedparam(&attr, ¶m); 591 | if (r) { 592 | perror_e(r, "pthread_attr_setschedparam failed"); 593 | return 1; 594 | } 595 | r = pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED); 596 | if (r) { 597 | perror_e(r, "pthread_attr_setinheritsched failed"); 598 | return 1; 599 | } 600 | } 601 | r = pthread_create(&ws[cpu].worker_id, &attr, worker_main, ws+cpu); 602 | if (r) { 603 | perror_e(r, "pthread_create failed"); 604 | return 1; 605 | } 606 | ws[cpu].tid = pthread_to_tid(ws[cpu].worker_id, args->tid_off); 607 | if (!ws[cpu].tid) { 608 | fprintf(stderr, "Couldn't get TID of created thread\n"); 609 | return 1; 610 | } 611 | r = pthread_attr_destroy(&attr); 612 | if (r) { 613 | perror_e(r, "pthread_attr_init failed"); 614 | return 1; 615 | } 616 | } 617 | return 0; 618 | } 619 | 620 | static int join_workers(Worker *ws) 621 | { 622 | Args *args = &global_args; 623 | bool error_in_thread = false; 624 | for (unsigned cpu = 0; cpu < args->cpus; ++cpu) { 625 | if (!CPU_ISSET(cpu, &args->cpu_set)) 626 | continue; 627 | void *w_ret = 0; 628 | int r = pthread_join(ws[cpu].worker_id, &w_ret); 629 | if (r) { 630 | perror_e(r, "pthread_join failed"); 631 | return 1; 632 | } 633 | if (!w_ret) 634 | error_in_thread = true; 635 | } 636 | if (error_in_thread) { 637 | fprintf(stderr, "One thread reported an error\n"); 638 | return 1; 639 | } 640 | return 0; 641 | } 642 | 643 | 644 | int main(int argc, char **argv) 645 | { 646 | int r = check_cpuinfo(); 647 | if (r) { 648 | fprintf(stderr, "CPU doesn't have constant_tsc+nonstop_tsc features\n"); 649 | return 1; 650 | } 651 | Args *args = &global_args; 652 | r = parse_args(args, argc, argv); 653 | if (r) { 654 | fprintf(stderr, "Parsing arguments failed\n"); 655 | return 1; 656 | } 657 | r = set_params(args); 658 | if (r) { 659 | fprintf(stderr, "Setting parameters failed\n"); 660 | return 1; 661 | } 662 | 663 | 664 | Worker *ws = calloc(args->cpus, sizeof ws[0]); 665 | if (!ws) { 666 | perror("workers allocation"); 667 | return 1; 668 | } 669 | r = create_workers(ws); 670 | if (r) { 671 | return 1; 672 | } 673 | 674 | atomic_store_explicit(&start_work, true, memory_order_release); 675 | 676 | struct timespec ts = { .tv_sec = args->runtime_s, .tv_nsec = 100 * 1000}; 677 | r = nanosleep(&ts, NULL); 678 | if (r == -1) { 679 | perror("sleep of control thread was interrupted"); 680 | return 1; 681 | } 682 | 683 | for (unsigned cpu = 0; cpu < args->cpus; ++cpu) { 684 | if (!CPU_ISSET(cpu, &args->cpu_set)) 685 | continue; 686 | int r = read_proc_sched(args->pid, ws[cpu].tid, ws + cpu); 687 | if (r) { 688 | return 1; 689 | } 690 | } 691 | 692 | atomic_store_explicit(&quit_thread, true, memory_order_release); 693 | 694 | r = join_workers(ws); 695 | if (r) { 696 | return 1; 697 | } 698 | 699 | r = pp_results(ws, stdout); 700 | if (r) { 701 | return 1; 702 | } 703 | 704 | free(ws); 705 | 706 | return 0; 707 | } 708 | -------------------------------------------------------------------------------- /pingpong.c: -------------------------------------------------------------------------------- 1 | // pingpong - measure thread notification overhead 2 | // 3 | // 2019, Georg Sauthoff 4 | // 5 | // SPDX-License-Identifier: GPL-3.0-or-later 6 | 7 | #define _GNU_SOURCE 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include // __rdtsc(), _mm_lfence(), ... 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | #include "util.h" 27 | #include "tsc.h" 28 | 29 | static atomic_bool start_work; 30 | 31 | // make sure that both variables go into different cachelines 32 | // (intel/amd CPUs have 64 byte cache lines) 33 | // without C11 support 34 | //static _Atomic uint64_t g_tsc __attribute__ ((aligned (64))); 35 | //static alignas(64) _Atomic uint64_t g_tsc; 36 | 37 | 38 | struct Cell { 39 | alignas(64) _Atomic uint64_t tsc; 40 | }; 41 | typedef struct Cell Cell; 42 | 43 | static Cell g_cell[2]; 44 | 45 | // without C11 support: 46 | // struct Item { ... } __attribute__ ((aligned (64))); 47 | 48 | struct Item { 49 | // aligning the first field is equivalent to aligning the struct itself 50 | alignas(64) pthread_mutex_t mutex; 51 | pthread_cond_t cond_var; 52 | uint64_t tsc; 53 | }; 54 | typedef struct Item Item; 55 | 56 | static_assert(sizeof(Item) % 64 == 0, "Item is not aligned"); 57 | 58 | static Item g_item[2] = { 59 | { PTHREAD_MUTEX_INITIALIZER, PTHREAD_COND_INITIALIZER }, 60 | { PTHREAD_MUTEX_INITIALIZER, PTHREAD_COND_INITIALIZER } 61 | }; 62 | 63 | static_assert(alignof(g_item) == 64, "Item array is not aligned"); 64 | 65 | static int g_pipes[2][2]; 66 | 67 | 68 | struct Follicle { 69 | alignas(64) _Atomic int futex; 70 | uint64_t tsc; 71 | }; 72 | typedef struct Follicle Follicle; 73 | static Follicle g_follicle[2]; 74 | 75 | static int 76 | atomic_futex(_Atomic int *uaddr, int futex_op, int val, 77 | const struct timespec *timeout, int *uaddr2, int val3) 78 | { 79 | (void)uaddr2; 80 | return syscall(SYS_futex, uaddr, futex_op, val, timeout, uaddr, val3); 81 | } 82 | 83 | static int futex_lock(_Atomic int *f) 84 | { 85 | for (;;) { 86 | int zero = 0; 87 | if (atomic_compare_exchange_weak(f, &zero, 1)) 88 | return 0; 89 | int r = atomic_futex(f, FUTEX_WAIT_PRIVATE, 1, NULL, NULL, 0); 90 | if (r == -1) { 91 | if (errno != EAGAIN) 92 | return r; 93 | } 94 | } 95 | return 0; 96 | } 97 | 98 | // returns 1 if one thread was woken up 99 | static int futex_unlock(_Atomic int *f) 100 | { 101 | int one = 1; 102 | if (atomic_compare_exchange_strong(f, &one, 0)) { 103 | int r = atomic_futex(f, FUTEX_WAKE_PRIVATE, 1, NULL, NULL, 0); 104 | return r; 105 | } else { 106 | return -2; 107 | } 108 | return 0; 109 | } 110 | 111 | struct Stripe { 112 | alignas(64) sem_t sem; 113 | uint64_t tsc; 114 | }; 115 | typedef struct Stripe Stripe; 116 | static Stripe g_stripe[2]; 117 | 118 | enum Method { 119 | METHOD_SPIN, 120 | METHOD_SPIN_PAUSE, 121 | METHOD_SPIN_PAUSE_MORE, 122 | METHOD_COND_VAR, 123 | METHOD_NULL, 124 | METHOD_PIPE, 125 | METHOD_FUTEX, 126 | METHOD_SEMAPHORE 127 | }; 128 | typedef enum Method Method; 129 | struct Args { 130 | uint32_t tsc_khz; 131 | uint32_t mult; 132 | uint32_t shift; 133 | unsigned n; // number of iterations 134 | unsigned k; // number of pause iterations before each store 135 | unsigned p; // number of pause iterations after each test 136 | unsigned pin[2]; 137 | bool json; 138 | Method method; 139 | }; 140 | typedef struct Args Args; 141 | 142 | static void help(FILE *f, const char *argv0) 143 | { 144 | fprintf(f, "pingpong - measure inter thread notification overhead\n" 145 | "\n" 146 | "call: %s [OPT..]\n" 147 | "\n" 148 | "Options:\n" 149 | " --khz KHZ TSC frequency (default: parse journalctl, read /proc)\n" 150 | " -n ping-pong iterations (default: 10^6)\n" 151 | " -k #iterations pause before storing (default: 1000)\n" 152 | " --pin THREAD CPU 0 <= THREAD <= 1, pin each thread to a CPU/core\n" 153 | " (default: no pinning)\n" 154 | " --json write raw values to JSON file (default: false)\n" 155 | " --spin loop on an atomic variable (default)\n" 156 | " --spin-pause pause after each atomic load\n" 157 | " -p #pauses after each atomic load\n" 158 | " --cv use a condition variable for ping pong\n" 159 | " --pipe use a UNIX pipe for ping pong\n" 160 | " --futex use a Linux futex for ping pong\n" 161 | " --sem use a POSIX semaphore for ping ping\n" 162 | " --null signal nothing\n" 163 | "\n" 164 | "2019, Georg Sauthoff , GPLv3+\n" 165 | , argv0); 166 | } 167 | 168 | static int parse_args(Args *args, int argc, char **argv) 169 | { 170 | *args = (const Args){0}; 171 | for (int i = 1; i < argc; ++i) { 172 | if (!strcmp(argv[i], "-h") || !strcmp(argv[i], "--help")) { 173 | help(stdout, argv[0]); 174 | exit(0); 175 | } else if (!strcmp(argv[i], "--khz")) { 176 | ++i; 177 | if (i >= argc) { 178 | fprintf(stderr, "--khz argument is missing\n"); 179 | return -1; 180 | } 181 | args->tsc_khz = atoi(argv[i]); 182 | } else if (!strcmp(argv[i], "-n")) { 183 | ++i; 184 | if (i >= argc) { 185 | fprintf(stderr, "-n argument is missing\n"); 186 | return -1; 187 | } 188 | args->n = atoi(argv[i]); 189 | } else if (!strcmp(argv[i], "-k")) { 190 | ++i; 191 | if (i >= argc) { 192 | fprintf(stderr, "-k argument is missing\n"); 193 | return -1; 194 | } 195 | args->k = atoi(argv[i]); 196 | } else if (!strcmp(argv[i], "-p")) { 197 | ++i; 198 | if (i >= argc) { 199 | fprintf(stderr, "-p argument is missing\n"); 200 | return -1; 201 | } 202 | args->p = atoi(argv[i]); 203 | } else if (!strcmp(argv[i], "--pin")) { 204 | if (i+2 >= argc) { 205 | fprintf(stderr, "--pin THREAD CPU arguments are missing\n"); 206 | return -1; 207 | } 208 | unsigned j = atoi(argv[++i]); 209 | unsigned cpu = atoi(argv[++i]); 210 | if (j > 1) { 211 | fprintf(stderr, "--pin THREAD CPU - 0 <= THREAD <= 1\n"); 212 | return -1; 213 | } 214 | args->pin[j] = cpu + 1; 215 | } else if (!strcmp(argv[i], "--json")) { 216 | args->json = true; 217 | } else if (!strcmp(argv[i], "--spin")) { 218 | args->method = METHOD_SPIN; 219 | } else if (!strcmp(argv[i], "--spin-pause")) { 220 | args->method = METHOD_SPIN_PAUSE; 221 | } else if (!strcmp(argv[i], "--cv")) { 222 | args->method = METHOD_COND_VAR; 223 | } else if (!strcmp(argv[i], "--null")) { 224 | args->method = METHOD_NULL; 225 | } else if (!strcmp(argv[i], "--pipe")) { 226 | args->method = METHOD_PIPE; 227 | } else if (!strcmp(argv[i], "--futex")) { 228 | args->method = METHOD_FUTEX; 229 | } else if (!strcmp(argv[i], "--sem")) { 230 | args->method = METHOD_SEMAPHORE; 231 | } else { 232 | fprintf(stderr, "Unknown argument: %s\n", argv[i]); 233 | exit(1); 234 | } 235 | } 236 | if (!args->n) 237 | args-> n = 1000 * 1000; 238 | if (!args->k) 239 | args-> k = 1000; 240 | if (args->method == METHOD_SPIN_PAUSE && args->p) 241 | args->method = METHOD_SPIN_PAUSE_MORE; 242 | return 0; 243 | } 244 | 245 | struct Worker { 246 | pthread_t worker_id; 247 | unsigned init; // 0 -> start with send, 1 -> start with receive 248 | unsigned n; // number of iterations 249 | unsigned k; 250 | unsigned p; 251 | uint32_t *raw_ds; // delta values 252 | uint32_t *ds; // delta values 253 | unsigned ds_size; // #delta values 254 | }; 255 | typedef struct Worker Worker; 256 | 257 | 258 | static void *spin_main_finalize(Worker *x, uint32_t *ds, unsigned j) 259 | { 260 | assert(j <= x->n/2); 261 | uint32_t *raw_ds = malloc(j * sizeof raw_ds[0]); 262 | if (!raw_ds) { 263 | fprintf(stderr, "Failed to allocate delta array in thread\n"); 264 | return 0; 265 | } 266 | memcpy(raw_ds, ds, j * sizeof ds[0]); 267 | qsort(ds, j, sizeof ds[0], cmp_u32); 268 | x->ds = ds; 269 | x->raw_ds = raw_ds; 270 | x->ds_size = j; 271 | return x; 272 | } 273 | 274 | static void *spin_main(void *p) 275 | { 276 | Worker *x = (Worker*) p; 277 | Worker w = *x; 278 | 279 | uint64_t tsc = 1; 280 | unsigned j = 0; 281 | uint32_t *ds = calloc(w.n/2, sizeof ds[0]); 282 | if (!ds) { 283 | fprintf(stderr, "Failed to allocate delta array in thread\n"); 284 | return 0; 285 | } 286 | 287 | while(!atomic_load_explicit(&start_work, memory_order_consume)) { 288 | _mm_pause(); 289 | } 290 | 291 | for (unsigned i = 0; i < w.n; ++i) { 292 | if (i % 2 == w.init) { // sender 293 | unsigned k = i < 2 ? w.k : w.k * 2; 294 | for (unsigned j = 0; j < k; ++j) 295 | _mm_pause(); 296 | uint64_t t; 297 | for (;;) { 298 | t = fenced_rdtsc(); 299 | if (t <= tsc) 300 | continue; 301 | atomic_store_explicit(&g_cell[!w.init].tsc, t, 302 | memory_order_release); 303 | break; 304 | } 305 | } else { // receiver 306 | uint64_t new_tsc; 307 | for (;;) { 308 | new_tsc = atomic_load_explicit(&g_cell[w.init].tsc, 309 | memory_order_consume); 310 | if (new_tsc > tsc) { 311 | break; 312 | } 313 | } 314 | uint64_t now = fenced_rdtscp(); 315 | uint64_t delta = now - new_tsc; 316 | ds[j++] = delta; 317 | tsc = new_tsc; 318 | } 319 | } 320 | return spin_main_finalize(x, ds, j); 321 | } 322 | 323 | 324 | static void *spin_null_main(void *p) 325 | { 326 | Worker *x = (Worker*) p; 327 | Worker w = *x; 328 | 329 | unsigned j = 0; 330 | uint32_t *ds = calloc(w.n/2, sizeof ds[0]); 331 | if (!ds) { 332 | fprintf(stderr, "Failed to allocate delta array in thread\n"); 333 | return 0; 334 | } 335 | 336 | while(!atomic_load_explicit(&start_work, memory_order_consume)) { 337 | _mm_pause(); 338 | } 339 | 340 | for (unsigned i = 0; i < w.n/2; ++i) { 341 | uint64_t new_tsc = fenced_rdtsc(); 342 | uint64_t now = fenced_rdtscp(); 343 | uint64_t delta = now - new_tsc; 344 | ds[j++] = delta; 345 | } 346 | return spin_main_finalize(x, ds, j); 347 | } 348 | 349 | static void *spin_pause_main(void *p) 350 | { 351 | Worker *x = (Worker*) p; 352 | Worker w = *x; 353 | 354 | uint64_t tsc = 1; 355 | unsigned j = 0; 356 | uint32_t *ds = calloc(w.n/2, sizeof ds[0]); 357 | if (!ds) { 358 | fprintf(stderr, "Failed to allocate delta array in thread\n"); 359 | return 0; 360 | } 361 | 362 | while(!atomic_load_explicit(&start_work, memory_order_consume)) { 363 | _mm_pause(); 364 | } 365 | 366 | for (unsigned i = 0; i < w.n; ++i) { 367 | if (i % 2 == w.init) { // sender 368 | unsigned k = i < 2 ? w.k : w.k * 2; 369 | for (unsigned j = 0; j < k; ++j) 370 | _mm_pause(); 371 | uint64_t t; 372 | for (;;) { 373 | t = fenced_rdtsc(); 374 | if (t <= tsc) 375 | continue; 376 | atomic_store_explicit(&g_cell[!w.init].tsc, t, 377 | memory_order_release); 378 | break; 379 | } 380 | } else { // receiver 381 | uint64_t new_tsc; 382 | for (;;) { 383 | new_tsc = atomic_load_explicit(&g_cell[w.init].tsc, 384 | memory_order_consume); 385 | if (new_tsc > tsc) { 386 | break; 387 | } 388 | _mm_pause(); 389 | } 390 | uint64_t now = fenced_rdtscp(); 391 | uint64_t delta = now - new_tsc; 392 | ds[j++] = delta; 393 | tsc = new_tsc; 394 | } 395 | } 396 | return spin_main_finalize(x, ds, j); 397 | } 398 | 399 | static void *spin_pause_more_main(void *p) 400 | { 401 | Worker *x = (Worker*) p; 402 | Worker w = *x; 403 | 404 | uint64_t tsc = 1; 405 | unsigned j = 0; 406 | uint32_t *ds = calloc(w.n/2, sizeof ds[0]); 407 | if (!ds) { 408 | fprintf(stderr, "Failed to allocate delta array in thread\n"); 409 | return 0; 410 | } 411 | 412 | while(!atomic_load_explicit(&start_work, memory_order_consume)) { 413 | _mm_pause(); 414 | } 415 | 416 | for (unsigned i = 0; i < w.n; ++i) { 417 | if (i % 2 == w.init) { // sender 418 | unsigned k = i < 2 ? w.k : w.k * 2; 419 | for (unsigned j = 0; j < k; ++j) 420 | _mm_pause(); 421 | uint64_t t; 422 | for (;;) { 423 | t = fenced_rdtsc(); 424 | if (t <= tsc) 425 | continue; 426 | atomic_store_explicit(&g_cell[!w.init].tsc, t, 427 | memory_order_release); 428 | break; 429 | } 430 | } else { // receiver 431 | uint64_t new_tsc; 432 | for (;;) { 433 | new_tsc = atomic_load_explicit(&g_cell[w.init].tsc, 434 | memory_order_consume); 435 | if (new_tsc > tsc) { 436 | break; 437 | } 438 | for (unsigned j = 0; j < w.p; ++j) 439 | _mm_pause(); 440 | } 441 | uint64_t now = fenced_rdtscp(); 442 | uint64_t delta = now - new_tsc; 443 | ds[j++] = delta; 444 | tsc = new_tsc; 445 | } 446 | } 447 | return spin_main_finalize(x, ds, j); 448 | } 449 | 450 | 451 | static void *cv_main(void *p) 452 | { 453 | Worker *x = (Worker*) p; 454 | Worker w = *x; 455 | 456 | uint64_t tsc = 1; 457 | unsigned j = 0; 458 | uint32_t *ds = calloc(w.n/2, sizeof ds[0]); 459 | if (!ds) { 460 | fprintf(stderr, "Failed to allocate delta array in thread\n"); 461 | return 0; 462 | } 463 | 464 | while(!atomic_load_explicit(&start_work, memory_order_consume)) { 465 | _mm_pause(); 466 | } 467 | 468 | for (unsigned i = 0; i < w.n; ++i) { 469 | if (i % 2 == w.init) { // sender 470 | unsigned k = i < 2 ? w.k : w.k * 2; 471 | for (unsigned j = 0; j < k; ++j) 472 | _mm_pause(); 473 | uint64_t t; 474 | for (;;) { 475 | t = fenced_rdtsc(); 476 | if (t <= tsc) 477 | continue; 478 | int r = pthread_mutex_lock(&g_item[!w.init].mutex); 479 | if (r) { 480 | perror_e(r, "sender: mutex lock"); 481 | return 0; 482 | } 483 | g_item[!w.init].tsc = t; 484 | r = pthread_mutex_unlock(&g_item[!w.init].mutex); 485 | if (r) { 486 | perror_e(r, "sender: mutex unlock"); 487 | return 0; 488 | } 489 | r = pthread_cond_signal(&g_item[!w.init].cond_var); 490 | if (r) { 491 | perror_e(r, "cond signal: mutex lock"); 492 | return 0; 493 | } 494 | break; 495 | } 496 | } else { // receiver 497 | int r = pthread_mutex_lock(&g_item[w.init].mutex); 498 | if (r) { 499 | perror_e(r, "retrieve: mutex lock"); 500 | return 0; 501 | } 502 | while (g_item[w.init].tsc <= tsc) { 503 | r = pthread_cond_wait(&g_item[w.init].cond_var, 504 | &g_item[w.init].mutex); 505 | if (r) { 506 | perror_e(r, "cond_wait"); 507 | return 0; 508 | } 509 | } 510 | uint64_t new_tsc = g_item[w.init].tsc; 511 | r = pthread_mutex_unlock(&g_item[w.init].mutex); 512 | if (r) { 513 | perror_e(r, "retrieve: mutex unlock"); 514 | return 0; 515 | } 516 | uint64_t now = fenced_rdtscp(); 517 | uint64_t delta = now - new_tsc; 518 | ds[j++] = delta; 519 | tsc = new_tsc; 520 | } 521 | } 522 | return spin_main_finalize(x, ds, j); 523 | } 524 | 525 | static void *pipe_main(void *p) 526 | { 527 | Worker *x = (Worker*) p; 528 | Worker w = *x; 529 | 530 | uint64_t tsc = 1; 531 | unsigned j = 0; 532 | uint32_t *ds = calloc(w.n/2, sizeof ds[0]); 533 | if (!ds) { 534 | fprintf(stderr, "Failed to allocate delta array in thread\n"); 535 | return 0; 536 | } 537 | 538 | while(!atomic_load_explicit(&start_work, memory_order_consume)) { 539 | _mm_pause(); 540 | } 541 | 542 | for (unsigned i = 0; i < w.n; ++i) { 543 | if (i % 2 == w.init) { // sender 544 | unsigned k = i < 2 ? w.k : w.k * 2; 545 | for (unsigned j = 0; j < k; ++j) 546 | _mm_pause(); 547 | uint64_t t; 548 | for (;;) { 549 | t = fenced_rdtsc(); 550 | if (t <= tsc) 551 | continue; 552 | ssize_t l = write(g_pipes[!w.init][1], &t, sizeof t); 553 | if (l == -1) { 554 | perror("pipe write"); 555 | return 0; 556 | } 557 | if (l != sizeof t) { 558 | fprintf(stderr, "written into pipe less than expected\n"); 559 | return 0; 560 | } 561 | break; 562 | } 563 | } else { // receiver 564 | uint64_t new_tsc; 565 | ssize_t l = read(g_pipes[w.init][0], &new_tsc, sizeof new_tsc); 566 | if (l == -1) { 567 | perror("pipe read"); 568 | return 0; 569 | } 570 | if (l != sizeof new_tsc) { 571 | fprintf(stderr, "read from pipe less than expected\n"); 572 | return 0; 573 | } 574 | uint64_t now = fenced_rdtscp(); 575 | uint64_t delta = now - new_tsc; 576 | ds[j++] = delta; 577 | tsc = new_tsc; 578 | } 579 | } 580 | return spin_main_finalize(x, ds, j); 581 | } 582 | 583 | static void *semaphore_main(void *p) 584 | { 585 | Worker *x = (Worker*) p; 586 | Worker w = *x; 587 | 588 | uint64_t tsc = 1; 589 | unsigned j = 0; 590 | uint32_t *ds = calloc(w.n/2, sizeof ds[0]); 591 | if (!ds) { 592 | fprintf(stderr, "Failed to allocate delta array in thread\n"); 593 | return 0; 594 | } 595 | 596 | while(!atomic_load_explicit(&start_work, memory_order_consume)) { 597 | _mm_pause(); 598 | } 599 | 600 | for (unsigned i = 0; i < w.n; ++i) { 601 | if (i % 2 == w.init) { // sender 602 | int r = sem_wait(&g_stripe[w.init].sem); 603 | if (r == -1) { 604 | perror("sem wait"); 605 | return 0; 606 | } 607 | 608 | unsigned k = i < 2 ? w.k : w.k * 2; 609 | for (unsigned j = 0; j < k; ++j) 610 | _mm_pause(); 611 | uint64_t t; 612 | for (;;) { 613 | t = fenced_rdtsc(); 614 | if (t <= tsc) 615 | continue; 616 | g_stripe[!w.init].tsc = t; 617 | 618 | int r = sem_post(&g_stripe[!w.init].sem); 619 | if (r == -1) { 620 | perror("sem post"); 621 | return 0; 622 | } 623 | 624 | break; 625 | } 626 | } else { // receiver 627 | uint64_t new_tsc; 628 | 629 | int r = sem_wait(&g_stripe[w.init].sem); 630 | if (r == -1) { 631 | perror("sem wait"); 632 | return 0; 633 | } 634 | new_tsc = g_stripe[w.init].tsc; 635 | 636 | uint64_t now = fenced_rdtscp(); 637 | uint64_t delta = now - new_tsc; 638 | ds[j++] = delta; 639 | tsc = new_tsc; 640 | 641 | r = sem_post(&g_stripe[w.init].sem); 642 | if (r == -1) { 643 | perror("sem post"); 644 | return 0; 645 | } 646 | } 647 | } 648 | return spin_main_finalize(x, ds, j); 649 | } 650 | 651 | // note that this lock/unlock scheme doesn't work with posix mutexes 652 | // because unlocking a locked posix mutex from a different thread 653 | // is undefined behaviour 654 | static void *futex_main(void *p) 655 | { 656 | Worker *x = (Worker*) p; 657 | Worker w = *x; 658 | 659 | uint64_t tsc = 1; 660 | unsigned j = 0; 661 | uint32_t *ds = calloc(w.n/2, sizeof ds[0]); 662 | if (!ds) { 663 | fprintf(stderr, "Failed to allocate delta array in thread\n"); 664 | return 0; 665 | } 666 | 667 | while(!atomic_load_explicit(&start_work, memory_order_consume)) { 668 | _mm_pause(); 669 | } 670 | 671 | for (unsigned i = 0; i < w.n; ++i) { 672 | if (i % 2 == w.init) { // sender 673 | int r = futex_lock(&g_follicle[w.init].futex); 674 | if (r == -1 ) { 675 | perror("futex wait"); 676 | return 0; 677 | } 678 | 679 | unsigned k = i < 2 ? w.k : w.k * 2; 680 | for (unsigned j = 0; j < k; ++j) 681 | _mm_pause(); 682 | uint64_t t; 683 | for (;;) { 684 | t = fenced_rdtsc(); 685 | if (t <= tsc) 686 | continue; 687 | g_follicle[!w.init].tsc = t; 688 | int r = futex_unlock(&g_follicle[!w.init].futex); 689 | if (r == -1) { 690 | perror("futex wake"); 691 | return 0; 692 | } 693 | if (r == -2) { 694 | fprintf(stderr, "%u: unexpectedly unlocked\n", w.init); 695 | abort(); 696 | } 697 | break; 698 | } 699 | } else { // receiver 700 | uint64_t new_tsc; 701 | 702 | int r = futex_lock(&g_follicle[w.init].futex); 703 | if (r == -1 ) { 704 | perror("futex wait"); 705 | return 0; 706 | } 707 | new_tsc = g_follicle[w.init].tsc; 708 | 709 | uint64_t now = fenced_rdtscp(); 710 | uint64_t delta = now - new_tsc; 711 | ds[j++] = delta; 712 | tsc = new_tsc; 713 | 714 | r = futex_unlock(&g_follicle[w.init].futex); 715 | if (r == -1 ) { 716 | perror("futex wake"); 717 | return 0; 718 | } 719 | if (r == -2) { 720 | fprintf(stderr, "%u: unexpectedly unlocked\n", w.init); 721 | abort(); 722 | } 723 | } 724 | } 725 | return spin_main_finalize(x, ds, j); 726 | } 727 | 728 | static int print_json(const Args *args, const Worker *ws, FILE *f) 729 | { 730 | fprintf(f, "[\n"); 731 | for (unsigned i = 0; i < 2; ++i) { 732 | const Worker *w = ws + i; 733 | fprintf(f, " ["); 734 | if (w->ds_size) { 735 | fprintf(f, " %" PRIu64, 736 | mul_u64_u32_shr(w->raw_ds[0], args->mult, args->shift)); 737 | } 738 | for (unsigned j = 1; j < w->ds_size; ++j) { 739 | fprintf(f, ", %" PRIu64, 740 | mul_u64_u32_shr(w->raw_ds[j], args->mult, args->shift)); 741 | } 742 | fprintf(f, "]"); 743 | if (!i) 744 | fprintf(f, ",\n"); 745 | } 746 | fprintf(f, "\n]\n"); 747 | return 0; 748 | } 749 | 750 | static int pp_results(const Args *args, const Worker *ws, FILE *f) 751 | { 752 | fprintf(f, "Thread TSC_khz #delta min_ns max_ns median_ns p20_ns p80_ns p90_ns p99_ns p99.9_ns mad_ns\n"); 753 | uint32_t *ys = 0; 754 | for (unsigned i = 0; i < 2; ++i) { 755 | const Worker *w = ws + i; 756 | ys = realloc(ys, w->ds_size * sizeof ys[0]); 757 | if (!ys) { 758 | fprintf(stderr, "realloc in pp_results failed\n"); 759 | return -1; 760 | } 761 | uint32_t mad = mad_u32(w->ds, ys, w->ds_size); 762 | if (!w->ds_size) 763 | continue; 764 | fprintf(f, "%6u %8" PRIu32 " %7u " 765 | "%7" PRIu64 " " 766 | "%7" PRIu64 " " 767 | "%10" PRIu64 " " 768 | "%7" PRIu64 " " 769 | "%7" PRIu64 " " 770 | "%7" PRIu64 " " 771 | "%7" PRIu64 " " 772 | "%9" PRIu64 " " 773 | "%7" PRIu64 " " 774 | "\n", 775 | i, args->tsc_khz, w->ds_size, 776 | mul_u64_u32_shr(w->ds[0], 777 | args->mult, args->shift), 778 | mul_u64_u32_shr(w->ds[w->ds_size - 1], 779 | args->mult, args->shift), 780 | mul_u64_u32_shr(percentile_u32(w->ds, w->ds_size, 1, 2), 781 | args->mult, args->shift), 782 | mul_u64_u32_shr(percentile_u32(w->ds, w->ds_size, 1, 5), 783 | args->mult, args->shift), 784 | mul_u64_u32_shr(percentile_u32(w->ds, w->ds_size, 4, 5), 785 | args->mult, args->shift), 786 | mul_u64_u32_shr(percentile_u32(w->ds, w->ds_size, 90, 100), 787 | args->mult, args->shift), 788 | mul_u64_u32_shr(percentile_u32(w->ds, w->ds_size, 99, 100), 789 | args->mult, args->shift), 790 | mul_u64_u32_shr(percentile_u32(w->ds, w->ds_size, 999, 1000), 791 | args->mult, args->shift), 792 | mul_u64_u32_shr(mad, args->mult, args->shift) 793 | ); 794 | } 795 | free(ys); 796 | return 0; 797 | } 798 | 799 | static int spin_pingpong(const Args *args) 800 | { 801 | Worker ws[2] = {0}; 802 | for (unsigned i = 0; i < 2; ++i) { 803 | ws[i].n = args->n; 804 | ws[i].k = args->k; 805 | ws[i].p = args->p; 806 | ws[i].init = i; 807 | pthread_attr_t attr; 808 | int r = pthread_attr_init(&attr); 809 | if (r) { 810 | perror_e(r, "pthread_attr_init failed"); 811 | return 1; 812 | } 813 | if (args->pin[i]) { 814 | cpu_set_t cpus; 815 | CPU_ZERO(&cpus); 816 | CPU_SET(args->pin[i] - 1, &cpus); 817 | r = pthread_attr_setaffinity_np(&attr, sizeof cpus, &cpus); 818 | if (r) { 819 | perror_e(r, "pthread_attr_setaffinity_np failed"); 820 | return 1; 821 | } 822 | } 823 | switch (args->method) { 824 | case METHOD_SPIN: 825 | r = pthread_create(&ws[i].worker_id, &attr, spin_main, ws+i); 826 | break; 827 | case METHOD_SPIN_PAUSE: 828 | r = pthread_create(&ws[i].worker_id, &attr, spin_pause_main, 829 | ws+i); 830 | break; 831 | case METHOD_SPIN_PAUSE_MORE: 832 | r = pthread_create(&ws[i].worker_id, &attr, 833 | spin_pause_more_main, ws+i); 834 | break; 835 | case METHOD_COND_VAR: 836 | r = pthread_create(&ws[i].worker_id, &attr, cv_main, ws+i); 837 | break; 838 | case METHOD_PIPE: 839 | r = pipe(g_pipes[i]); 840 | if (r == -1) { 841 | perror("pipe"); 842 | return 1; 843 | } 844 | r = pthread_create(&ws[i].worker_id, &attr, pipe_main, ws+i); 845 | break; 846 | case METHOD_FUTEX: 847 | g_follicle[i].futex = i; 848 | r = pthread_create(&ws[i].worker_id, &attr, futex_main, ws+i); 849 | break; 850 | case METHOD_SEMAPHORE: 851 | r = sem_init(&g_stripe[i].sem, 0, !i); 852 | if (r == -1) { 853 | perror("sem_init"); 854 | return 1; 855 | } 856 | r = pthread_create(&ws[i].worker_id, &attr, semaphore_main, ws+i); 857 | break; 858 | case METHOD_NULL: 859 | r = pthread_create(&ws[i].worker_id, &attr, spin_null_main, 860 | ws+i); 861 | break; 862 | } 863 | if (r) { 864 | perror_e(r, "pthread_create failed"); 865 | return 1; 866 | } 867 | r = pthread_attr_destroy(&attr); 868 | if (r) { 869 | perror_e(r, "pthread_attr_init failed"); 870 | return 1; 871 | } 872 | } 873 | 874 | atomic_store_explicit(&start_work, true, memory_order_release); 875 | 876 | bool error_in_thread = false; 877 | for (unsigned i = 0; i < 2; ++i) { 878 | void *w_ret = 0; 879 | int r = pthread_join(ws[i].worker_id, &w_ret); 880 | if (r) { 881 | perror_e(r, "pthread_join failed"); 882 | return 1; 883 | } 884 | if (!w_ret) 885 | error_in_thread = true; 886 | } 887 | if (error_in_thread) { 888 | fprintf(stderr, "One thread reported an error\n"); 889 | return 1; 890 | } 891 | if (args->json) 892 | print_json(args, ws, stdout); 893 | else 894 | pp_results(args, ws, stdout); 895 | for (unsigned i = 0; i < 2; ++i) { 896 | free(ws[i].ds); 897 | free(ws[i].raw_ds); 898 | } 899 | return 0; 900 | } 901 | 902 | 903 | int main(int argc, char **argv) 904 | { 905 | Args args; 906 | int r = parse_args(&args, argc, argv); 907 | if (r) { 908 | return 1; 909 | } 910 | if (!args.tsc_khz) { 911 | int r = get_tsc_khz(&args.tsc_khz); 912 | if (r < 0) 913 | return 1; 914 | } 915 | clocks_calc_mult_shift(&args.mult, &args.shift, 916 | args.tsc_khz, 1000000l, 0); 917 | 918 | r = spin_pingpong(&args); 919 | if (r) 920 | return 1; 921 | return 0; 922 | } 923 | -------------------------------------------------------------------------------- /ptp-clock-future.h: -------------------------------------------------------------------------------- 1 | #ifndef PTP_CLOCK_FUTURE_H 2 | #define PTP_CLOCK_FUTURE_H 3 | 4 | #include 5 | 6 | 7 | // Note that PTP_SYS_OFFSET_EXTENDED is missing on some RHEL 7 versions although 8 | // PTP_SYS_OFFSET_PRECISE is even available. 9 | 10 | 11 | // imported from https://sourceforge.net/p/linuxptp/code/ci/61c6a708980217119e829e4b41ea2504e673e4fb/ 12 | #ifndef PTP_SYS_OFFSET_EXTENDED 13 | 14 | #define PTP_SYS_OFFSET_EXTENDED \ 15 | _IOWR(PTP_CLK_MAGIC, 9, struct ptp_sys_offset_extended) 16 | 17 | struct ptp_sys_offset_extended { 18 | unsigned int n_samples; /* Desired number of measurements. */ 19 | unsigned int rsv[3]; /* Reserved for future use. */ 20 | /* 21 | * Array of [system, phc, system] time stamps. The kernel will provide 22 | * 3*n_samples time stamps. 23 | */ 24 | struct ptp_clock_time ts[PTP_MAX_SAMPLES][3]; 25 | }; 26 | 27 | #endif /* PTP_SYS_OFFSET_EXTENDED */ 28 | 29 | 30 | 31 | #endif 32 | -------------------------------------------------------------------------------- /ptp-clock-offset.c: -------------------------------------------------------------------------------- 1 | // Check what methods are available for PTP offset calculation 2 | // and how they perform. 3 | // 4 | // 2020, Georg Sauthoff 5 | // 6 | // SPDX-License-Identifier: GPL-3.0-or-later 7 | 8 | #define _GNU_SOURCE 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | #include 23 | #include "ptp-clock-future.h" 24 | 25 | 26 | // for Solarflare private clock offset ioctl 27 | #include // SIOCDEVPRIVATE 28 | #include // ifreq 29 | #include // socket() 30 | #include // socket() 31 | #include // strcpy() 32 | #include // close() 33 | 34 | 35 | #include // ethtool_ts_info 36 | #include // SIOCETHTOOL 37 | 38 | 39 | #include "tsc.h" 40 | #include "util.h" 41 | 42 | 43 | // as of 2020 44 | static int64_t tai_off_ns = 37000000000l; 45 | 46 | #ifndef PCO_READ_PERF 47 | static uint32_t tsc_khz; 48 | #endif 49 | static uint32_t tsc_mult; 50 | static uint32_t tsc_shift; 51 | 52 | 53 | 54 | 55 | 56 | static int64_t pct2ns(const struct ptp_clock_time *ptc) 57 | { 58 | return (int64_t)(ptc->sec * 1000000000) + (int64_t)ptc->nsec; 59 | } 60 | static int64_t pct2ns_tai(const struct ptp_clock_time *ptc) 61 | { 62 | return pct2ns(ptc) + tai_off_ns; 63 | } 64 | 65 | static int64_t ts2ns(const struct timespec *ts) 66 | { 67 | return (int64_t)(ts->tv_sec * 1000000000) + (int64_t)ts->tv_nsec; 68 | } 69 | static int64_t ts2ns_tai(const struct timespec *ts) 70 | { 71 | return ts2ns(ts) + tai_off_ns; 72 | } 73 | 74 | static uint64_t tsc2ns(uint64_t cyc) 75 | { 76 | return mul_u64_u32_shr(cyc, tsc_mult, tsc_shift); 77 | } 78 | 79 | // these 2 lines are from linuxptp's missing.h 80 | #define CLOCKFD 3 81 | #define FD_TO_CLOCKID(fd) ((clockid_t) ((((unsigned int) ~fd) << 3) | CLOCKFD)) 82 | 83 | static int read_clock_offset(int fd) 84 | { 85 | int r[3]; 86 | struct timespec ts[3]; 87 | clockid_t clk_id = FD_TO_CLOCKID(fd); 88 | for (int i = 0; i < 5; ++i) { 89 | r[0] = clock_gettime(CLOCK_REALTIME, ts); 90 | r[1] = clock_gettime(clk_id, ts+1); 91 | r[2] = clock_gettime(CLOCK_REALTIME, ts+2); 92 | if (r[0] == -1) { 93 | perror("clock_gettime CLOCK_REALTIME 1"); 94 | return 1; 95 | } 96 | if (r[1] == -1) { 97 | perror("clock_gettime ptp"); 98 | return 1; 99 | } 100 | if (r[2] == -1) { 101 | perror("clock_gettime CLOCK_REALTIME 2"); 102 | return 1; 103 | } 104 | int64_t delay = ts2ns_tai(ts + 2) - ts2ns_tai(ts); 105 | int64_t off = (ts2ns_tai(ts) + ts2ns_tai(ts + 2)) / 2 - ts2ns(ts + 1); 106 | printf("clock_gettime no %u: %" PRId64 " ns, delay: %" PRId64 " ns\n", 107 | i+1, off, delay); 108 | } 109 | return 0; 110 | } 111 | 112 | static int read_ptp_offset(int fd) 113 | { 114 | struct ptp_sys_offset pso = { .n_samples = 5}; 115 | uint64_t b = fenced_rdtsc(); 116 | int r = ioctl(fd, PTP_SYS_OFFSET, &pso); 117 | uint64_t e = fenced_rdtscp(); 118 | if (r) { 119 | perror("PTP_SYS_OFFSET"); 120 | return 1; 121 | } 122 | uint64_t sc_delay = tsc2ns(e - b); 123 | unsigned k = 1; 124 | for (unsigned i = 0; i < pso.n_samples * 2; i+=2, ++k) { 125 | int64_t delay = pct2ns_tai(pso.ts + i+2) - pct2ns_tai(pso.ts + i); 126 | int64_t off = (pct2ns_tai(pso.ts + i) + pct2ns_tai(pso.ts + i+2)) / 2 - pct2ns(pso.ts + i+1); 127 | printf("PTP_SYS_OFFSET no %u: %" PRId64 " ns, delay: %" PRId64 " ns, syscall: %" PRIu64 " ns\n", 128 | k, off, delay, sc_delay); 129 | } 130 | return 0; 131 | } 132 | 133 | static int read_ptp_offset_extended(int fd) 134 | { 135 | struct ptp_sys_offset_extended psoe = { .n_samples = 5}; 136 | uint64_t b = fenced_rdtsc(); 137 | int r = ioctl(fd, PTP_SYS_OFFSET_EXTENDED, &psoe); 138 | uint64_t e = fenced_rdtscp(); 139 | if (r) { 140 | perror("PTP_SYS_OFFSET_EXTENDED"); 141 | return 1; 142 | } 143 | uint64_t sc_delay = tsc2ns(e - b); 144 | for (unsigned i = 0; i < psoe.n_samples; ++i) { 145 | int64_t delay = pct2ns_tai(&psoe.ts[i][2]) - pct2ns_tai(&psoe.ts[i][0]); 146 | int64_t off = (pct2ns_tai(&psoe.ts[i][0]) + pct2ns_tai(&psoe.ts[i][2])) / 2 147 | - pct2ns(&psoe.ts[i][1]); 148 | printf("PTP_SYS_OFFSET_EXTENDED no %u: %" PRId64 " ns, delay: %" PRId64 " ns, sycall: %" PRIu64 " ns\n", 149 | i+1, off, delay, sc_delay); 150 | } 151 | return 0; 152 | } 153 | 154 | static int read_ptp_offset_precise(int fd) 155 | { 156 | struct ptp_sys_offset_precise psop = { 0 }; 157 | uint64_t b = fenced_rdtsc(); 158 | int r = ioctl(fd, PTP_SYS_OFFSET_PRECISE, &psop); 159 | uint64_t e = fenced_rdtscp(); 160 | if (r) { 161 | perror("PTP_SYS_OFFSET_PRECISE"); 162 | return 1; 163 | } 164 | uint64_t sc_delay = tsc2ns(e - b); 165 | int64_t off = pct2ns_tai(&psop.sys_realtime) - pct2ns(&psop.device); 166 | printf("PTP_SYS_OFFSET_PRECISE: %" PRId64 " ns, delay: 0 ns, syscall: %" PRIu64 " ns\n", 167 | off, sc_delay); 168 | return 0; 169 | } 170 | 171 | 172 | static int mk_if_fd() 173 | { 174 | int fd = socket(AF_INET, SOCK_DGRAM, 0); 175 | if (fd == -1) 176 | perror("creating if fd"); 177 | return fd; 178 | } 179 | 180 | 181 | static int get_ptp_dev(int fd, const char *if_name, const char **dev, bool *is_sfc) 182 | { 183 | struct ethtool_ts_info tsi = { 184 | .cmd = ETHTOOL_GET_TS_INFO, 185 | .phc_index = 23 186 | }; 187 | 188 | struct ifreq ifr = { 189 | .ifr_data = (void*) &tsi 190 | }; 191 | strcpy(ifr.ifr_name, if_name); 192 | 193 | int r = ioctl(fd, SIOCETHTOOL, &ifr); 194 | if (r == -1) { 195 | perror("ioctl SIOCETHTOOL ETHTOOL_GET_TS_INFO"); 196 | return -1; 197 | } 198 | 199 | if (tsi.phc_index == -1) { 200 | fprintf(stderr, "%s has no PTP hardware clock device\n", if_name); 201 | return -1; 202 | } 203 | char *s = 0; 204 | r = asprintf(&s, "/dev/ptp%d", tsi.phc_index); 205 | if (r == -1) { 206 | perror("asprintf"); 207 | return -1; 208 | } 209 | *dev = s; 210 | 211 | struct ethtool_drvinfo di = { 212 | .cmd = ETHTOOL_GDRVINFO 213 | }; 214 | ifr.ifr_data = (void*) &di; 215 | 216 | r = ioctl(fd, SIOCETHTOOL, &ifr); 217 | if (r == -1) { 218 | perror("ioctl SIOCETHTOOL ETHTOOL_GDRVINFO"); 219 | return 1; 220 | } 221 | 222 | if (!strcmp(di.driver, "sfc")) 223 | *is_sfc = true; 224 | 225 | return 0; 226 | } 227 | 228 | 229 | struct sfc_ts { 230 | int64_t sec; 231 | int32_t nsec; 232 | }; 233 | 234 | static int64_t sfcts2ns(const struct sfc_ts *ts) 235 | { 236 | return (int64_t)(ts->sec * 1000000000lu) + (int64_t)ts->nsec; 237 | } 238 | 239 | const unsigned long SIOCEFX = SIOCDEVPRIVATE + 3; 240 | const uint16_t EFX_TS_SYNC = 0xef16; 241 | 242 | static int read_sfc_offset(int fd, const char *name) 243 | { 244 | struct ts_req { 245 | uint16_t command; 246 | uint16_t pad; 247 | struct sfc_ts ts; 248 | } __attribute__ ((packed)); 249 | struct ts_req d = { 250 | .command = EFX_TS_SYNC 251 | }; 252 | struct ifreq ifr = { 253 | .ifr_data = (void*) &d 254 | }; 255 | strcpy(ifr.ifr_name, name); 256 | 257 | 258 | 259 | uint64_t b = fenced_rdtsc(); 260 | int r = ioctl(fd, SIOCEFX, &ifr); 261 | uint64_t e = fenced_rdtscp(); 262 | if (r) { 263 | perror("SFC SIOCEFX"); 264 | return 1; 265 | } 266 | uint64_t sc_delay = tsc2ns(e - b); 267 | struct sfc_ts t = d.ts; 268 | int64_t off = sfcts2ns(&t); 269 | 270 | printf("SFC_OFFSET: %" PRId64 " ns, delay: ? ns, syscall: %" PRIu64 " ns\n", 271 | off, sc_delay); 272 | 273 | 274 | return 0; 275 | } 276 | 277 | int main(int argc, char **argv) 278 | { 279 | if (argc < 2) { 280 | fprintf(stderr, "call: %s /dev/ptpX|ifname\n", argv[0]); 281 | return 1; 282 | } 283 | 284 | #ifndef PCO_READ_PERF 285 | int r = get_tsc_khz(&tsc_khz); 286 | if (r) { 287 | return 1; 288 | } 289 | clocks_calc_mult_shift(&tsc_mult, &tsc_shift, 290 | tsc_khz, 1000000l, 0); 291 | #else 292 | int r = get_tsc_perf(&tsc_mult, &tsc_shift); 293 | if (r == -1) 294 | return 1; 295 | #endif 296 | 297 | 298 | bool is_sfc = false; 299 | const char *if_name = 0; 300 | int if_fd = -1; 301 | const char *dev = argv[1]; 302 | 303 | 304 | if (*dev != '/') { 305 | if_name = dev; 306 | if_fd = mk_if_fd(); 307 | if (if_fd == -1) 308 | return 1; 309 | int r = get_ptp_dev(if_fd, if_name, &dev, &is_sfc); 310 | if (r == -1) 311 | return 1; 312 | } 313 | 314 | int fd = open(dev, O_RDWR); 315 | if (fd == -1) { 316 | perror("open PTP device"); 317 | return 1; 318 | } 319 | 320 | printf("## Testing clock_gettime\n"); 321 | read_clock_offset(fd); 322 | 323 | printf("## Testing PTP_SYS_OFFSET ioctl (%#lx)\n", PTP_SYS_OFFSET); 324 | read_ptp_offset(fd); 325 | printf("## Testing PTP_SYS_OFFSET_EXTENDED ioctl (%#lx)\n", PTP_SYS_OFFSET_EXTENDED); 326 | read_ptp_offset_extended(fd); 327 | printf("## Testing PTP_SYS_OFFSET_PRECISE ioctl (%#lx)\n", PTP_SYS_OFFSET_PRECISE); 328 | read_ptp_offset_precise(fd); 329 | 330 | if (is_sfc) { 331 | printf("## Testing Solarflare SIOCEFX / EFX_TS_SYNC ioctl (%#lx / %#" PRIx16 ")\n", SIOCEFX, EFX_TS_SYNC); 332 | read_sfc_offset(if_fd, if_name); 333 | } 334 | 335 | if (if_fd != -1) 336 | close(if_fd); 337 | close(fd); 338 | 339 | return 0; 340 | } 341 | -------------------------------------------------------------------------------- /tsc.h: -------------------------------------------------------------------------------- 1 | // 2019, Georg Sauthoff 2 | // 3 | // SPDX-License-Identifier: GPL-3.0-or-later 4 | 5 | 6 | // Read Time-Stamp Counter 7 | extern __inline uint64_t __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 8 | fenced_rdtsc(void) 9 | { 10 | uint64_t x; 11 | asm volatile ( 12 | ".intel_syntax noprefix \n\t" // switch to prettier syntax 13 | // 'If software requires RDTSC to be executed only after all previous 14 | // instructions have executed and all previous loads and stores are 15 | // globally visible, it can execute the sequence MFENCE;LFENCE 16 | // immediately before RDTSC.' 17 | // https://www.felixcloutier.com/x86/rdtsc 18 | "mfence \n\t" 19 | "lfence \n\t" 20 | // similar effect, execute CPUID before RDTSC 21 | // cf. https://www.intel.de/content/dam/www/public/us/en/documents/white-papers/ia-32-ia-64-benchmark-code-execution-paper.pdf 22 | //"cpuid \n\t" // writes to EAX, EBX, ECX, EDX 23 | "rdtsc \n\t" // counter into EDX:EAX 24 | "shl rdx, 0x20 \n\t" // shift higher-half left 25 | "or rax, rdx \n\t" // combine them 26 | ".att_syntax prefix \n\t" // switch back to the default syntax 27 | 28 | : "=a" (x) // output operands, 29 | // i.e. overwrites (=) R'a'X which is mapped to x 30 | : // input operands 31 | : "rdx"); // additional clobbers (with cpuid also: rbx, rcx) 32 | return x; 33 | } 34 | // Read Time-Stamp Counter and Processor ID 35 | // 'The RDTSCP instruction is not a serializing instruction, but it does wait 36 | // until all previous instructions have executed and all previous loads are 37 | // globally visible.' 38 | // https://www.felixcloutier.com/x86/rdtscp 39 | extern __inline uint64_t __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 40 | fenced_rdtscp(void) 41 | { 42 | uint64_t x; 43 | asm volatile ( 44 | ".intel_syntax noprefix \n\t" 45 | "rdtscp \n\t" // counter into EDX:EAX, id into ECX 46 | // 'If software requires RDTSCP to be executed prior to execution of 47 | // any subsequent instruction (including any memory accesses), it can 48 | // execute LFENCE immediately after RDTSCP.' 49 | // https://www.felixcloutier.com/x86/rdtscp 50 | "lfence \n\t" // better than CPUID 51 | // alternatively call CPUID (clobbers more registers, though) 52 | // cf. https://www.intel.de/content/dam/www/public/us/en/documents/white-papers/ia-32-ia-64-benchmark-code-execution-paper.pdf 53 | "shl rdx, 0x20 \n\t" // shift higher-half left 54 | "or rax, rdx \n\t" // combine them 55 | ".att_syntax prefix \n\t" 56 | 57 | : "=a" (x) // output operands, 58 | // i.e. overwrites (=) R'a'X which is mapped to x 59 | : // input operands 60 | : "rdx", "rcx"); // additional clobbers 61 | return x; 62 | } 63 | -------------------------------------------------------------------------------- /tuned/gs-isol-cpus-half-hz/tuned.conf: -------------------------------------------------------------------------------- 1 | # 2 | # tuned configuration 3 | # 4 | 5 | [main] 6 | # based on /usr/lib/tuned/throughput-performance/tuned.conf 7 | summary=Isolate CPUs as much as possible, i.e. as adaptive ticks CPUs 8 | 9 | [variables] 10 | include=${i:PROFILE_DIR}/vars.conf 11 | 12 | [cpu] 13 | governor=performance 14 | energy_perf_bias=performance 15 | min_perf_pct=100 16 | 17 | [disk] 18 | # The default unit for readahead is KiB. This can be adjusted to sectors 19 | # by specifying the relevant suffix, eg. (readahead => 8192 s). There must 20 | # be at least one space between the number and suffix (if suffix is specified). 21 | readahead=>4096 22 | 23 | [sysctl] 24 | # ktune sysctl settings for rhel6 servers, maximizing i/o throughput 25 | # 26 | # Minimal preemption granularity for CPU-bound tasks: 27 | # (default: 1 msec# (1 + ilog(ncpus)), units: nanoseconds) 28 | kernel.sched_min_granularity_ns = 10000000 29 | 30 | # SCHED_OTHER wake-up granularity. 31 | # (default: 1 msec# (1 + ilog(ncpus)), units: nanoseconds) 32 | # 33 | # This option delays the preemption effects of decoupled workloads 34 | # and reduces their over-scheduling. Synchronous workloads will still 35 | # have immediate wakeup/sleep latencies. 36 | kernel.sched_wakeup_granularity_ns = 15000000 37 | 38 | # If a workload mostly uses anonymous memory and it hits this limit, the entire 39 | # working set is buffered for I/O, and any more write buffering would require 40 | # swapping, so it's time to throttle writes until I/O can catch up. Workloads 41 | # that mostly use file mappings may be able to use even higher values. 42 | # 43 | # The generator of dirty data starts writeback at this percentage (system default 44 | # is 20%) 45 | vm.dirty_ratio = 40 46 | 47 | # Start background writeback (via writeback threads) at this percentage (system 48 | # default is 10%) 49 | vm.dirty_background_ratio = 10 50 | 51 | # PID allocation wrap value. When the kernel's next PID value 52 | # reaches this value, it wraps back to a minimum PID value. 53 | # PIDs of value pid_max or larger are not allocated. 54 | # 55 | # A suggested value for pid_max is 1024 * <# of cpu cores/threads in system> 56 | # e.g., a box with 32 cpus, the default of 32768 is reasonable, for 64 cpus, 57 | # 65536, for 4096 cpus, 4194304 (which is the upper limit possible). 58 | #kernel.pid_max = 65536 59 | 60 | # The swappiness parameter controls the tendency of the kernel to move 61 | # processes out of physical memory and onto the swap disk. 62 | # 0 tells the kernel to avoid swapping processes out of physical memory 63 | # for as long as possible 64 | # 100 tells the kernel to aggressively swap processes out of physical memory 65 | # and move them to swap cache 66 | vm.swappiness=10 67 | 68 | # nohz=on - just to be explicit, it is already the default 69 | # rcu_nocbs= - implied by nohz_full= 70 | # tsc=reliable - avoid timer interruptions where the TSCs of the different cores are compared 71 | # cf. clocksource_watchdog calls in ftrace traces 72 | [bootloader] 73 | cmdline=isolcpus=${isolated_cores} nohz=on rcu_nocbs=${isolated_cores} rcu_nocb_poll nowatchdog mce=ignore_ce acpi_irq_nobalance pcie_aspm=off tsc=reliable 74 | 75 | -------------------------------------------------------------------------------- /tuned/gs-isol-cpus-half-hz/vars.conf: -------------------------------------------------------------------------------- 1 | isolated_cores=5-7 2 | -------------------------------------------------------------------------------- /tuned/gs-isol-cpus-hz/tuned.conf: -------------------------------------------------------------------------------- 1 | # 2 | # tuned configuration 3 | # 4 | 5 | [main] 6 | # based on /usr/lib/tuned/throughput-performance/tuned.conf 7 | summary=Isolate CPUs as much as possible, i.e. as adaptive ticks CPUs 8 | 9 | [variables] 10 | include=${i:PROFILE_DIR}/vars.conf 11 | 12 | [cpu] 13 | governor=performance 14 | energy_perf_bias=performance 15 | min_perf_pct=100 16 | 17 | [disk] 18 | # The default unit for readahead is KiB. This can be adjusted to sectors 19 | # by specifying the relevant suffix, eg. (readahead => 8192 s). There must 20 | # be at least one space between the number and suffix (if suffix is specified). 21 | readahead=>4096 22 | 23 | [sysctl] 24 | # ktune sysctl settings for rhel6 servers, maximizing i/o throughput 25 | # 26 | # Minimal preemption granularity for CPU-bound tasks: 27 | # (default: 1 msec# (1 + ilog(ncpus)), units: nanoseconds) 28 | kernel.sched_min_granularity_ns = 10000000 29 | 30 | # SCHED_OTHER wake-up granularity. 31 | # (default: 1 msec# (1 + ilog(ncpus)), units: nanoseconds) 32 | # 33 | # This option delays the preemption effects of decoupled workloads 34 | # and reduces their over-scheduling. Synchronous workloads will still 35 | # have immediate wakeup/sleep latencies. 36 | kernel.sched_wakeup_granularity_ns = 15000000 37 | 38 | # If a workload mostly uses anonymous memory and it hits this limit, the entire 39 | # working set is buffered for I/O, and any more write buffering would require 40 | # swapping, so it's time to throttle writes until I/O can catch up. Workloads 41 | # that mostly use file mappings may be able to use even higher values. 42 | # 43 | # The generator of dirty data starts writeback at this percentage (system default 44 | # is 20%) 45 | vm.dirty_ratio = 40 46 | 47 | # Start background writeback (via writeback threads) at this percentage (system 48 | # default is 10%) 49 | vm.dirty_background_ratio = 10 50 | 51 | # PID allocation wrap value. When the kernel's next PID value 52 | # reaches this value, it wraps back to a minimum PID value. 53 | # PIDs of value pid_max or larger are not allocated. 54 | # 55 | # A suggested value for pid_max is 1024 * <# of cpu cores/threads in system> 56 | # e.g., a box with 32 cpus, the default of 32768 is reasonable, for 64 cpus, 57 | # 65536, for 4096 cpus, 4194304 (which is the upper limit possible). 58 | #kernel.pid_max = 65536 59 | 60 | # The swappiness parameter controls the tendency of the kernel to move 61 | # processes out of physical memory and onto the swap disk. 62 | # 0 tells the kernel to avoid swapping processes out of physical memory 63 | # for as long as possible 64 | # 100 tells the kernel to aggressively swap processes out of physical memory 65 | # and move them to swap cache 66 | vm.swappiness=10 67 | 68 | # nohz=off - old-school scheduler behavior, i.e. disable dyntick-idle mode 69 | # rcu_nocbs= - implied by nohz_full= 70 | # tsc=reliable - avoid timer interruptions where the TSCs of the different cores are compared 71 | # cf. clocksource_watchdog calls in ftrace traces 72 | [bootloader] 73 | cmdline=isolcpus=${isolated_cores} nohz=off rcu_nocbs=${isolated_cores} rcu_nocb_poll nowatchdog mce=ignore_ce acpi_irq_nobalance pcie_aspm=off tsc=reliable 74 | 75 | -------------------------------------------------------------------------------- /tuned/gs-isol-cpus-hz/vars.conf: -------------------------------------------------------------------------------- 1 | isolated_cores=5-7 2 | -------------------------------------------------------------------------------- /tuned/gs-isol-cpus/tuned.conf: -------------------------------------------------------------------------------- 1 | # 2 | # tuned configuration 3 | # 4 | 5 | [main] 6 | # based on /usr/lib/tuned/throughput-performance/tuned.conf 7 | summary=Isolate CPUs as much as possible, i.e. as adaptive ticks CPUs 8 | 9 | [variables] 10 | include=${i:PROFILE_DIR}/vars.conf 11 | 12 | [cpu] 13 | governor=performance 14 | energy_perf_bias=performance 15 | min_perf_pct=100 16 | 17 | [disk] 18 | # The default unit for readahead is KiB. This can be adjusted to sectors 19 | # by specifying the relevant suffix, eg. (readahead => 8192 s). There must 20 | # be at least one space between the number and suffix (if suffix is specified). 21 | readahead=>4096 22 | 23 | [sysctl] 24 | # ktune sysctl settings for rhel6 servers, maximizing i/o throughput 25 | # 26 | # Minimal preemption granularity for CPU-bound tasks: 27 | # (default: 1 msec# (1 + ilog(ncpus)), units: nanoseconds) 28 | kernel.sched_min_granularity_ns = 10000000 29 | 30 | # SCHED_OTHER wake-up granularity. 31 | # (default: 1 msec# (1 + ilog(ncpus)), units: nanoseconds) 32 | # 33 | # This option delays the preemption effects of decoupled workloads 34 | # and reduces their over-scheduling. Synchronous workloads will still 35 | # have immediate wakeup/sleep latencies. 36 | kernel.sched_wakeup_granularity_ns = 15000000 37 | 38 | # If a workload mostly uses anonymous memory and it hits this limit, the entire 39 | # working set is buffered for I/O, and any more write buffering would require 40 | # swapping, so it's time to throttle writes until I/O can catch up. Workloads 41 | # that mostly use file mappings may be able to use even higher values. 42 | # 43 | # The generator of dirty data starts writeback at this percentage (system default 44 | # is 20%) 45 | vm.dirty_ratio = 40 46 | 47 | # Start background writeback (via writeback threads) at this percentage (system 48 | # default is 10%) 49 | vm.dirty_background_ratio = 10 50 | 51 | # PID allocation wrap value. When the kernel's next PID value 52 | # reaches this value, it wraps back to a minimum PID value. 53 | # PIDs of value pid_max or larger are not allocated. 54 | # 55 | # A suggested value for pid_max is 1024 * <# of cpu cores/threads in system> 56 | # e.g., a box with 32 cpus, the default of 32768 is reasonable, for 64 cpus, 57 | # 65536, for 4096 cpus, 4194304 (which is the upper limit possible). 58 | #kernel.pid_max = 65536 59 | 60 | # The swappiness parameter controls the tendency of the kernel to move 61 | # processes out of physical memory and onto the swap disk. 62 | # 0 tells the kernel to avoid swapping processes out of physical memory 63 | # for as long as possible 64 | # 100 tells the kernel to aggressively swap processes out of physical memory 65 | # and move them to swap cache 66 | vm.swappiness=10 67 | 68 | # cf. https://unix.stackexchange.com/a/539266/1131 69 | # nohz=on - just to be explicit, it is already the default 70 | # rcu_nocbs= - implied by nohz_full= 71 | # tsc=reliable - avoid timer interruptions where the TSCs of the different cores are compared 72 | # cf. clocksource_watchdog calls in ftrace traces 73 | [bootloader] 74 | cmdline=isolcpus=${isolated_cores} nohz=on nohz_full=${isolated_cores} rcu_nocbs=${isolated_cores} rcu_nocb_poll nowatchdog mce=ignore_ce acpi_irq_nobalance pcie_aspm=off tsc=reliable 75 | 76 | -------------------------------------------------------------------------------- /tuned/gs-isol-cpus/vars.conf: -------------------------------------------------------------------------------- 1 | isolated_cores=5-7 2 | -------------------------------------------------------------------------------- /tuned/gs-latency/tuned.conf: -------------------------------------------------------------------------------- 1 | # 2 | # tuned configuration 3 | # 4 | 5 | [main] 6 | # based on /usr/lib/tuned/latency-performance/tuned.conf 7 | summary=Aggressive latency settings 8 | 9 | [variables] 10 | include=/etc/tuned/gs-isol-cpus/vars.conf 11 | not_isolated_cpumask = ${f:cpulist2hex_invert:${isolated_cores}} 12 | 13 | [cpu] 14 | #force_latency=1 15 | # Should be equivalent to idle=poll processor.max_cstate=0 intel_idle.max_cstate=0 16 | force_latency=0 17 | governor=performance 18 | energy_perf_bias=performance 19 | min_perf_pct=100 20 | 21 | [sysctl] 22 | # ktune sysctl settings for rhel6 servers, maximizing i/o throughput 23 | # 24 | # Minimal preemption granularity for CPU-bound tasks: 25 | # (default: 1 msec# (1 + ilog(ncpus)), units: nanoseconds) 26 | kernel.sched_min_granularity_ns=10000000 27 | 28 | # If a workload mostly uses anonymous memory and it hits this limit, the entire 29 | # working set is buffered for I/O, and any more write buffering would require 30 | # swapping, so it's time to throttle writes until I/O can catch up. Workloads 31 | # that mostly use file mappings may be able to use even higher values. 32 | # 33 | # The generator of dirty data starts writeback at this percentage (system default 34 | # is 20%) 35 | vm.dirty_ratio=10 36 | 37 | # Start background writeback (via writeback threads) at this percentage (system 38 | # default is 10%) 39 | vm.dirty_background_ratio=3 40 | 41 | # The swappiness parameter controls the tendency of the kernel to move 42 | # processes out of physical memory and onto the swap disk. 43 | # 0 tells the kernel to avoid swapping processes out of physical memory 44 | # for as long as possible 45 | # 100 tells the kernel to aggressively swap processes out of physical memory 46 | # and move them to swap cache 47 | vm.swappiness=10 48 | 49 | # The total time the scheduler will consider a migrated process 50 | # "cache hot" and thus less likely to be re-migrated 51 | # (system default is 500000, i.e. 0.5 ms) 52 | kernel.sched_migration_cost_ns=5000000 53 | 54 | # ^^^ above sysctl params from /usr/lib/tuned/latency-performance/tuned.conf 55 | vm.stat_interval=60 56 | 57 | [sysfs] 58 | /sys/bus/workqueue/devices/writeback/cpumask = ${not_isolated_cpumask} 59 | /sys/devices/virtual/workqueue/cpumask = ${not_isolated_cpumask} 60 | # should be equivalent to mce=ignore_ce 61 | /sys/devices/system/machinecheck/machinecheck*/ignore_ce = 1 62 | 63 | 64 | -------------------------------------------------------------------------------- /util.c: -------------------------------------------------------------------------------- 1 | 2 | // 2019, Georg Sauthoff 3 | // 4 | // SPDX-License-Identifier: GPL-3.0-or-later 5 | 6 | #define _GNU_SOURCE 7 | 8 | #include "util.h" 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | // perf_event_open() etc. 22 | #include 23 | #include 24 | #include 25 | 26 | void perror_e(int r, const char *msg) 27 | { 28 | char buf[1024]; 29 | fprintf(stderr, "%s: %s\n", msg, strerror_r(r, buf, sizeof buf)); 30 | } 31 | 32 | static bool is_sorted(const uint32_t *xs, size_t n) 33 | { 34 | if (!n) 35 | return true; 36 | uint32_t a = xs[0]; 37 | for (size_t i = 1; i < n; ++i) { 38 | if (a > xs[i]) 39 | return false; 40 | a = xs[i]; 41 | } 42 | return true; 43 | } 44 | 45 | uint32_t percentile_u32(const uint32_t *x, size_t n, size_t a, size_t b) 46 | { 47 | assert(is_sorted(x, n)); 48 | 49 | if (!n) 50 | return 0; 51 | size_t i = n * a / b; 52 | assert(i < n); 53 | if (n % 2 || !i) { 54 | return x[i]; 55 | } else { 56 | assert(i); 57 | return (x[i] + x[i-1])/2; 58 | } 59 | } 60 | 61 | // median absolute deviation 62 | // a measure of dispersion (like the standard deviation) 63 | uint32_t mad_u32(const uint32_t *x, uint32_t *y, size_t n) 64 | { 65 | if (!n) 66 | return 0; 67 | uint32_t median = percentile_u32(x, n, 1, 2); 68 | for (size_t i = 0; i < n; ++i) { 69 | y[i] = labs((long)x[i] - (long)median); 70 | } 71 | qsort(y, n, sizeof y[0], cmp_u32); 72 | uint32_t mad = percentile_u32(y, n, 1, 2); 73 | return mad; 74 | } 75 | 76 | // This function is copied from 77 | // https://elixir.bootlin.com/linux/v5.2.12/source/kernel/time/clocksource.c#L21 78 | // File license: GPL-2.0+ 79 | // slightly modified 80 | /** 81 | * clocks_calc_mult_shift - calculate mult/shift factors for scaled math of clocks 82 | * @mult: pointer to mult variable 83 | * @shift: pointer to shift variable 84 | * @from: frequency to convert from 85 | * @to: frequency to convert to 86 | * @maxsec: guaranteed runtime conversion range in seconds 87 | * 88 | * The function evaluates the shift/mult pair for the scaled math 89 | * operations of clocksources and clockevents. 90 | * 91 | * @to and @from are frequency values in HZ. For clock sources @to is 92 | * NSEC_PER_SEC == 1GHz and @from is the counter frequency. For clock 93 | * event @to is the counter frequency and @from is NSEC_PER_SEC. 94 | * 95 | * The @maxsec conversion range argument controls the time frame in 96 | * seconds which must be covered by the runtime conversion with the 97 | * calculated mult and shift factors. This guarantees that no 64bit 98 | * overflow happens when the input value of the conversion is 99 | * multiplied with the calculated mult factor. Larger ranges may 100 | * reduce the conversion accuracy by chosing smaller mult and shift 101 | * factors. 102 | */ 103 | void clocks_calc_mult_shift( 104 | uint32_t *mult, uint32_t *shift, uint32_t from, uint32_t to, 105 | uint32_t maxsec) 106 | { 107 | uint64_t tmp; 108 | uint32_t sft, sftacc= 32; 109 | 110 | /* 111 | * Calculate the shift factor which is limiting the conversion 112 | * range: 113 | */ 114 | tmp = ((uint64_t)maxsec * from) >> 32; 115 | while (tmp) { 116 | tmp >>=1; 117 | sftacc--; 118 | } 119 | 120 | /* 121 | * Find the conversion shift/mult pair which has the best 122 | * accuracy and fits the maxsec conversion range: 123 | */ 124 | for (sft = 32; sft > 0; sft--) { 125 | tmp = (uint64_t) to << sft; 126 | tmp += from / 2; 127 | // do_div(tmp, from); 128 | tmp = tmp / (uint64_t) from; 129 | 130 | if ((tmp >> sftacc) == 0) 131 | break; 132 | } 133 | *mult = tmp; 134 | *shift = sft; 135 | } 136 | 137 | 138 | 139 | // as of Kernel 5.2.7 /sys/devices/system/cpu/cpu0/tsc_freq_khz 140 | // isn't provided by the mainline kernel 141 | // see https://github.com/trailofbits/ 142 | // or even better https://github.com/trailofbits/tsc_freq_khz/pull/1 143 | // for a simple kernel module that provides this file 144 | static int get_tsc_khz_proc(uint32_t *tsc_khz) 145 | { 146 | int fd = open("/sys/devices/system/cpu/cpu0/tsc_freq_khz", O_RDONLY); 147 | if (fd == -1) { 148 | if (errno == ENOENT) 149 | return 1; 150 | perror("opening /sys/devices/system/cpu/cpu0/tsc_freq_khz"); 151 | return -1; 152 | } 153 | char buf[16]; 154 | ssize_t r = read(fd, buf, sizeof buf - 1); 155 | if (r == -1) { 156 | perror("reading /sys/devices/system/cpu/cpu0/tsc_freq_khz"); 157 | close(fd); 158 | return -1; 159 | } 160 | buf[r] = 0; 161 | if (r && buf[r-1] == '\n') 162 | buf[r-1] = 0; 163 | *tsc_khz = atoi(buf); 164 | int t = close(fd); 165 | if (t == -1) { 166 | perror("closing /sys/devices/system/cpu/cpu0/tsc_freq_khz"); 167 | return -1; 168 | } 169 | return 0; 170 | } 171 | 172 | static int get_tsc_khz_cmd(const char *cmd, uint32_t *tsc_khz) 173 | { 174 | FILE *f = popen(cmd, "re"); 175 | if (!f) { 176 | perror("reading TSC khz from journalctl failed"); 177 | return 1; 178 | } 179 | char *line = 0; 180 | size_t n = 0; 181 | ssize_t l = getline(&line, &n, f); 182 | if (l == -1) { 183 | if (!feof(f)) { 184 | perror("journal getline"); 185 | pclose(f); 186 | return -1; 187 | } 188 | } 189 | if (l > 15 + 7) { 190 | fprintf(stderr, "buffer for TSC khz from journal too small\n"); 191 | return -1; 192 | } 193 | if (l < 11) 194 | return 0; 195 | char buf[16]; 196 | char *t = mempcpy(buf, line+1, l-1-8-1); 197 | t = mempcpy(t, line+(l-7-1), 3); 198 | *t = 0; 199 | *tsc_khz = atoi(buf); 200 | int r = pclose(f); 201 | if (r == -1) { 202 | perror("pclose journal"); 203 | return -1; 204 | } 205 | return 0; 206 | } 207 | 208 | static int get_tsc_khz_journal(uint32_t *tsc_khz) 209 | { 210 | 211 | const char cmd[] = "journalctl -k 2>/dev/null | grep 'kernel: tsc:' -i " 212 | "| cut -d' ' -f5- | grep -o ' [0-9]\\+\\.[0-9]\\{3\\} MHz' " 213 | "| tail -n 1 "; 214 | return get_tsc_khz_cmd(cmd, tsc_khz); 215 | } 216 | 217 | // fall-back to dmesg on systems without journald or ones 218 | // where the user doesn't have enough permissions for journalctl --boot. 219 | // pitfall: the message might be already rotated out of the dmesg buffer, 220 | // on a long running system 221 | static int get_tsc_khz_dmesg(uint32_t *tsc_khz) 222 | { 223 | const char cmd[] = "dmesg | grep '\\] tsc:' -i" 224 | "| cut -d' ' -f5- | grep -o ' [0-9]\\+\\.[0-9]\\{3\\} MHz' " 225 | "| tail -n 1 "; 226 | return get_tsc_khz_cmd(cmd, tsc_khz); 227 | } 228 | 229 | // see also https://stackoverflow.com/a/57835630/427158 for 230 | // some ways to get the tick rate of the TSC 231 | int get_tsc_khz(uint32_t *tsc_khz) 232 | { 233 | *tsc_khz = 0; 234 | int r = get_tsc_khz_proc(tsc_khz); 235 | if (r < 0) 236 | return r; 237 | if (!*tsc_khz) { 238 | int r = get_tsc_khz_journal(tsc_khz); 239 | if (r < 0) 240 | return r; 241 | } 242 | if (!*tsc_khz) { 243 | int r = get_tsc_khz_dmesg(tsc_khz); 244 | if (r < 0) 245 | return r; 246 | } 247 | if (!*tsc_khz) { 248 | fprintf(stderr, "Couldn't determine TSC rate\n"); 249 | return -1; 250 | } 251 | return 0; 252 | } 253 | 254 | 255 | static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, 256 | int cpu, int group_fd, unsigned long flags) 257 | { 258 | return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags); 259 | } 260 | 261 | // see also https://stackoverflow.com/a/57835630/427158 262 | // 263 | // Unfortunately, the kernel decreases precision of mult and shift 264 | // due to backwards compatibility: 265 | // 266 | // https://elixir.bootlin.com/linux/v5.19.17/source/arch/x86/kernel/tsc.c#L148 267 | // 268 | // Thus, for short durations, calling clocks_calc_mult_shift() with the true 269 | // TSC rate in user space is more precise. 270 | int get_tsc_perf(uint32_t *mult, uint32_t *shift) 271 | { 272 | struct perf_event_attr pe = { 273 | .type = PERF_TYPE_HARDWARE, 274 | .size = sizeof(struct perf_event_attr), 275 | .config = PERF_COUNT_HW_INSTRUCTIONS, 276 | .disabled = 1, 277 | .exclude_kernel = 1, 278 | .exclude_hv = 1 279 | }; 280 | int fd = perf_event_open(&pe, 0, -1, -1, 0); 281 | if (fd == -1) { 282 | perror("perf_event_open failed"); 283 | return -1; 284 | } 285 | void *addr = mmap(NULL, 4*1024, PROT_READ, MAP_SHARED, fd, 0); 286 | if (!addr) { 287 | perror("mmap perf page failed"); 288 | return -1; 289 | } 290 | struct perf_event_mmap_page *pc = addr; 291 | if (pc->cap_user_time != 1) { 292 | fprintf(stderr, "Perf system doesn't support user time\n"); 293 | return -1; 294 | } 295 | *mult = pc->time_mult; 296 | *shift = pc->time_shift; 297 | int r = munmap(addr, 4*1024); 298 | if (r == -1) { 299 | perror("munmap perf page"); 300 | return -1; 301 | } 302 | close(fd); 303 | return 0; 304 | } 305 | 306 | -------------------------------------------------------------------------------- /util.h: -------------------------------------------------------------------------------- 1 | 2 | // 2019, Georg Sauthoff 3 | // 4 | // SPDX-License-Identifier: GPL-3.0-or-later 5 | 6 | #ifndef OSJITTER_UTIL_H 7 | #define OSJITTER_UTIL_H 8 | 9 | #include 10 | #include 11 | 12 | static inline int cmp_u32(const void *a, const void *b) 13 | { 14 | const uint32_t *x = a; 15 | const uint32_t *y = b; 16 | 17 | if (*x < *y) 18 | return -1; 19 | if (*x > *y) 20 | return 1; 21 | return 0; 22 | } 23 | 24 | // Linux Kernel has a function that is named the same 25 | static inline uint64_t mul_u64_u32_shr(uint64_t cyc, uint32_t mult, uint32_t shift) 26 | { 27 | __uint128_t x = cyc; 28 | x *= mult; 29 | x >>= shift; 30 | return x; 31 | } 32 | 33 | void perror_e(int r, const char *msg); 34 | 35 | uint32_t percentile_u32(const uint32_t *x, size_t n, size_t a, size_t b); 36 | uint32_t mad_u32(const uint32_t *x, uint32_t *y, size_t n); 37 | 38 | int get_tsc_khz(uint32_t *tsc_khz); 39 | 40 | void clocks_calc_mult_shift( 41 | uint32_t *mult, uint32_t *shift, uint32_t from, uint32_t to, 42 | uint32_t maxsec); 43 | 44 | int get_tsc_perf(uint32_t *mult, uint32_t *shift); 45 | 46 | #endif 47 | --------------------------------------------------------------------------------