├── CONTRIBUTING.md ├── DOCUMENTATION.old.md ├── LICENSE ├── README.md ├── config.txt ├── images └── bochs.png ├── instrumentation ├── common.cc ├── common.h ├── events.cc ├── events.h ├── instrument.cc ├── instrument.h ├── invoke.cc ├── invoke.h ├── logging.proto ├── os_freebsd.cc ├── os_freebsd.h ├── os_linux.cc ├── os_linux.h ├── os_openbsd.cc ├── os_openbsd.h ├── os_windows.cc ├── os_windows.h ├── symbols.cc └── symbols.h ├── third_party ├── LICENSE └── instrumentation │ ├── Makefile.in │ ├── mem_interface.cc │ └── mem_interface.h └── tools ├── Makefile ├── common.cc ├── common.h ├── count_callstack_depth.cc ├── count_excp_handlers.cc ├── doubleread.cc ├── linux_symbolize.py ├── logging.proto ├── no_cidll.cc ├── print.cc ├── separate.cc ├── stats.cc ├── unhandled_access.cc ├── win32_symbolize.cc └── win32_symbolize.py /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | We'd love to accept your patches and contributions to this project. There are 4 | just a few small guidelines you need to follow. 5 | 6 | ## Contributor License Agreement 7 | 8 | Contributions to this project must be accompanied by a Contributor License 9 | Agreement. You (or your employer) retain the copyright to your contribution; 10 | this simply gives us permission to use and redistribute your contributions as 11 | part of the project. Head over to to see 12 | your current agreements on file or to sign a new one. 13 | 14 | You generally only need to submit a CLA once, so if you've already submitted one 15 | (even if it was for a different project), you probably don't need to do it 16 | again. 17 | 18 | ## Code reviews 19 | 20 | All submissions, including submissions by project members, require review. We 21 | use GitHub pull requests for this purpose. Consult 22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 23 | information on using pull requests. 24 | 25 | ## Community Guidelines 26 | 27 | This project follows [Google's Open Source Community 28 | Guidelines](https://opensource.google.com/conduct/). -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Bochspwn 2 | 3 | Bochspwn is a system-wide instrumentation project designed to log memory accesses performed by operating system kernels and examine them in search of patterns indicating the presence of certain bugs, such as "double fetches". Information about memory references is obtained by running the guest operating systems within the [Bochs IA-32 emulator](http://bochs.sourceforge.net/) with the custom instrumentation component compiled in. It was written in 2013, and was used to discover over 50 race conditions in the Windows kernel, fixed across numerous security bulletins ([MS13-016](https://docs.microsoft.com/en-us/security-updates/SecurityBulletins/2013/ms13-016), [MS13-017](https://docs.microsoft.com/en-us/security-updates/SecurityBulletins/2013/ms13-017), [MS13-031](https://docs.microsoft.com/en-us/security-updates/SecurityBulletins/2013/ms13-031), [MS13-036](https://docs.microsoft.com/en-us/security-updates/SecurityBulletins/2013/ms13-036)). For further information, see [Read more](#read-more). 4 | 5 | ## Support status 6 | 7 | The toolset is not actively maintained, and its source code is released "as is", mostly for reference purposes. It was originally released as *kfetch-toolkit* in 2013 after the Black Hat USA talk, together with a comprehensive documentation at [DOCUMENTATION.old.md](DOCUMENTATION.old.md) (now partially obsolete). In 2017, we revised the source code of the project and implemented several new features: 8 | 9 | 1. Information about the address space layout of kernel drivers is stored in a separate file (`modules.bin` by default), and each driver is referenced by its index in the main log file. This was done to save disk space, by preventing the reduntant information (image names and base addresses) from being needlessly saved for every stack trace item in the log. 10 | 2. Information about the presence of an active exception handler in each stack frame was added to the access log protocol buffer, allowing us to detect a number of local Windows DoS vulnerabilities (see examples [1](https://j00ru.vexillium.org/2017/02/windows-kernel-local-denial-of-service-1/), [2](https://j00ru.vexillium.org/2017/02/windows-kernel-local-denial-of-service-2/), [3](https://j00ru.vexillium.org/2017/03/windows-kernel-local-denial-of-service-3/), [4](https://j00ru.vexillium.org/2017/04/windows-kernel-local-denial-of-service-4/)). 11 | 3. Information about the value of [`PreviousMode`](https://docs.microsoft.com/en-us/windows-hardware/drivers/kernel/previousmode) at the time of the memory access in Windows was added to the protocol buffer. 12 | 4. The "online" double-fetch detection mode was removed from the code, as it was deemed too slow to be practically useful. 13 | 5. Some symbolization-related and other minor bugs were fixed in the code. 14 | 15 | The instrumentation was also ported to Bochs version 2.6.9, the latest one at the time of this writing. 16 | 17 | ## Building and usage 18 | 19 | For general instructions, see [DOCUMENTATION.old.md](DOCUMENTATION.old.md). 20 | 21 | You may wish to use more recent versions of the referenced software (e.g. Bochs 2.6.9, libprotobuf 3.4.1 etc.), and update the Bochspwn configuration file to account for the 2017 changes. When in doubt, please refer to the source code or [contact us](mailto:mjurczyk@google.com) with any questions. 22 | 23 | ## Example report 24 | 25 | ``` 26 | ------------------------------ found double-read of address 0x00000000001ef766 27 | Read no. 1: 28 | [pid/tid/ct: 000000fc/00000100/01d27c3a91e567e6] { smss.exe} 0000001e, 00000042: READ of 1ef764 (5 * 4 bytes), pc = 82a75263 [ rep movsd dword ptr es:[edi], dword ptr ds:[esi] ] 29 | [previous mode: 1] 30 | #0 0x82a75263 ((0026a263) ntoskrnl!SeCaptureSecurityDescriptor+00000067) <===== SEH enabled (#0) 31 | #1 0x82a36a23 ((0022ba23) ntoskrnl!ObpCaptureObjectCreateInformation+000000c2) <===== SEH enabled (#0) 32 | #2 0x82a45de2 ((0023ade2) ntoskrnl!ObOpenObjectByName+0000009b) 33 | #3 0x82a3c7db ((002317db) ntoskrnl!IopCreateFile+00000673) <===== SEH disabled 34 | #4 0x82a60402 ((00255402) ntoskrnl!NtCreateFile+00000034) 35 | #5 0x82848db6 ((0003ddb6) ntoskrnl!KiSystemServicePostCall+00000000) 36 | 37 | Read no. 2: 38 | [pid/tid/ct: 000000fc/00000100/01d27c3a91e567e6] { smss.exe} 0000001e, 00000042: READ of 1ef766 (1 * 2 bytes), pc = 82a752ad [ movzx edx, word ptr ds:[eax+2] ] 39 | [previous mode: 1] 40 | #0 0x82a752ad ((0026a2ad) ntoskrnl!SeCaptureSecurityDescriptor+000000b1) <===== SEH enabled (#1) 41 | #1 0x82a36a23 ((0022ba23) ntoskrnl!ObpCaptureObjectCreateInformation+000000c2) <===== SEH enabled (#0) 42 | #2 0x82a45de2 ((0023ade2) ntoskrnl!ObOpenObjectByName+0000009b) 43 | #3 0x82a3c7db ((002317db) ntoskrnl!IopCreateFile+00000673) <===== SEH disabled 44 | #4 0x82a60402 ((00255402) ntoskrnl!NtCreateFile+00000034) 45 | #5 0x82848db6 ((0003ddb6) ntoskrnl!KiSystemServicePostCall+00000000) 46 | ``` 47 | 48 | ## Read more 49 | 50 | * Whitepaper - [Identifying and Exploiting Windows Kernel Race Conditions via Memory Access Patterns](https://j00ru.vexillium.org/papers/2013/bochspwn.pdf) 51 | * SyScan 2013 slides - [Bochspwn: Exploiting Kernel Race COnditions Found via Memory Access Patterns](https://j00ru.vexillium.org/slides/2013/syscan.pdf) 52 | * Black Hat USA 2013 slides - [Bochspwn: Identifying 0-days via System-wide Memory Access Pattern Analysis](https://j00ru.vexillium.org/slides/2013/bhusa.pdf) 53 | * Black Hat USA 2013 video - [Bochspwn: Identifying 0-days via System-wide Memory Access Pattern Analysis on YouTube](https://www.youtube.com/watch?v=ypV0kpi4cd8) 54 | * Blog post - [Kernel double-fetch race condition exploitation on x86 - further thoughts](https://j00ru.vexillium.org/2013/06/kernel-double-fetch-race-condition-exploitation-on-x86-further-thoughts/) 55 | 56 | ## Bochspwn Reloaded 57 | 58 | In 2017, we implemented a new type of full-system instrumentation on top of the Bochs emulator, named *Bochspwn Reloaded*. The instrumentation performs taint tracking of the guest kernel address space, and detects the disclosure of uninitialized kernel stack/heap memory to user-mode. It helped us identify over [70](https://bugs.chromium.org/p/project-zero/issues/list?can=1&q=finder:mjurczyk%20product:kernel%20opened%3E2017-02-23%20opened%3C2018-1-23%20%22uninitialized%20%22memory%20disclosure%22&colspec=ID%20Status%20Restrict%20Reported%20Vendor%20Product%20Finder%20Summary&cells=ids) bugs in the Windows kernel, and more than [10](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/log/?qt=author&q=mjurczyk@google.com) lesser bugs in Linux in 2017 and early 2018. 59 | 60 | The tool was discussed at the [REcon Montreal](https://j00ru.vexillium.org/talks/recon-bochspwn-reloaded-detecting-kernel-memory-disclosure/), [Black Hat USA](https://j00ru.vexillium.org/talks/blackhat-usa-bochspwn-reloaded-detecting-kernel-memory-disclosure/), and [INFILTRATE](https://j00ru.vexillium.org/talks/infiltrate-bochspwn-revolutions-further-advancements-in-detecting-kernel-infoleaks/) conferences, as well as in the [ 61 | Detecting Kernel Memory Disclosure with x86 Emulation and Taint Tracking](http://j00ru.vexillium.org/papers/2018/bochspwn_reloaded.pdf) whitepaper. It is also an open-source project, and its source code can be found in the [bochspwn-reloaded](https://github.com/google/bochspwn-reloaded) repository. 62 | 63 | ## Disclaimer 64 | 65 | This is not an official Google product. 66 | -------------------------------------------------------------------------------- /config.txt: -------------------------------------------------------------------------------- 1 | [general] 2 | trace_log_path = memlog.bin 3 | modules_list_path = modules.bin 4 | 5 | os = windows 6 | bitness = 32 7 | version = win10_32 8 | 9 | min_read_size = 1 10 | max_read_size = 16 11 | min_write_size = 1 12 | max_write_size = 16 13 | 14 | callstack_length = 48 15 | write_as_text = 0 16 | 17 | symbolize = 0 18 | symbol_path = 19 | 20 | [win7_32] 21 | kprcb = 0x120 22 | current_thread = 0x04 23 | tcb = 0x0 24 | process = 0x150 25 | client_id = 0x22c 26 | process_id = 0 27 | thread_id = 4 28 | create_time = 0x200 29 | image_filename = 0x16c 30 | kdversionblock = 0x34 31 | psloadedmodulelist = 0x18 32 | loadorder_flink = 0x0 33 | basedllname = 0x2c 34 | baseaddress = 0x18 35 | sizeofimage = 0x20 36 | us_len = 0x0 37 | us_buffer = 0x4 38 | teb_cid = 0x20 39 | irql = 0x24 40 | previous_mode = 0x13a 41 | exception_list = 0x0 42 | next_exception = 0x0 43 | try_level = 0xc 44 | 45 | [win10_32] 46 | kprcb = 0x120 47 | current_thread = 0x04 48 | tcb = 0x0 49 | process = 0x150 50 | client_id = 0x374 51 | process_id = 0 52 | thread_id = 4 53 | create_time = 0x348 54 | image_filename = 0x174 55 | kdversionblock = 0x34 56 | psloadedmodulelist = 0x18 57 | loadorder_flink = 0x0 58 | basedllname = 0x2c 59 | baseaddress = 0x18 60 | sizeofimage = 0x20 61 | us_len = 0x0 62 | us_buffer = 0x4 63 | teb_cid = 0x20 64 | irql = 0x24 65 | previous_mode = 0x15a 66 | exception_list = 0x0 67 | next_exception = 0x0 68 | try_level = 0xc 69 | 70 | [obsd-5.3-amd64] 71 | cpu_info_ci_curproc = 0x288 72 | proc_p_addr = 0x228 73 | proc_p_pid = 0x5c 74 | proc_p_comm = 0x1da 75 | comm_size = 17 76 | proc_p_p = 0x20 77 | process_ps_pgrp = 0x1b8 78 | pgrp_id = 0x20 79 | kernel_start = 0xffffffff801001e0 80 | kernel_end = 0xffffffff80678c0c 81 | copyin = 0xffffffff80453ba0 82 | copyin_end = 0xffffffff80453c08 83 | copyinstr = 0xffffffff80453cc0 84 | copyinstr_end = 0xffffffff80453d34 85 | 86 | [fbsd-9.1-x86] 87 | thread_td_tid = 0x44 88 | thread_td_proc = 0x4 89 | proc_p_pid = 0x64 90 | proc_p_comm = 0x20c 91 | proc_p_comm_size = 20 92 | modules = 0xc117c4f4 93 | tqh_first = 0 94 | tqh_last = 4 95 | module_name = 0x1c 96 | max_module_name = 0x100 97 | module_file = 0x10 98 | linker_file_address = 0x24 99 | linker_file_size = 0x28 100 | kernel_start = 0xc04000f4 101 | kernel_end = 0xc114446c 102 | copyin = 0xc0e22eb0 103 | copyin_end = 0xc0e22f19 104 | copyinstr = 0xc0e230a0 105 | copyinstr_end = 0xc0e2311a 106 | fuword32 = 0xc0e22f5c 107 | fuword32_end = 0xc0e22f86 108 | fuword16 = 0xc0e22f90 109 | fuword16_end = 0xc0e22fbb 110 | fubyte = 0xc0e22fbc 111 | fubyte_end = 0xc0e22ff9 112 | 113 | [fbsd-9.1-amd64] 114 | thread_td_tid = 0x88 115 | thread_td_proc = 0x8 116 | proc_p_pid = 0xb0 117 | proc_p_comm = 0x338 118 | proc_p_comm_size = 20 119 | modules = 0xffffffff812243e0 120 | tqh_first = 0 121 | tqh_last = 8 122 | module_name = 0x30 123 | max_module_name = 0x100 124 | module_file = 0x20 125 | linker_file_address = 0x40 126 | linker_file_size = 0x48 127 | kernel_start = 0xffffffff80200190 128 | kernel_end = 0xffffffff812f90a0 129 | copyin = 0xffffffff80bd5e40 130 | copyin_end = 0xffffffff80bd6085 131 | copyinstr = 0xffffffff80bd6190 132 | copyinstr_end = 0xffffffff80bd621c 133 | 134 | [ubuntu_server_32_3.8.0-23-generic] 135 | thread_size = 0x2000 136 | thread_info_task = 0 137 | task_struct_pid = 0x208 138 | task_struct_tgid = 0x20c 139 | task_struct_comm = 0x2e4 140 | task_comm_len = 16 141 | modules = 0xc187c758 142 | module_list = 0x4 143 | module_name = 0xc 144 | module_core = 0xdc 145 | module_core_size = 0xe4 146 | module_name_len = 56 147 | kernel_start = 0xc1000000 148 | kernel_end = 0xc1a76000 149 | 150 | [ubuntu_server_64_3.8.0-23-generic] 151 | thread_size = 0x2000 152 | thread_info_task = 0 153 | task_struct_pid = 0x2d4 154 | task_struct_tgid = 0x2d8 155 | task_struct_comm = 0x480 156 | task_comm_len = 16 157 | modules = 0xffffffff81c3b7d0 158 | module_list = 0x8 159 | module_name = 0x18 160 | module_core = 0x160 161 | module_core_size = 0x16c 162 | module_name_len = 56 163 | kernel_start = 0xffffffff81000000 164 | kernel_end = 0xffffffff82346000 165 | -------------------------------------------------------------------------------- /images/bochs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/googleprojectzero/bochspwn/f8c238ae19cd22fdebb9ba5aa5ee034829b4512e/images/bochs.png -------------------------------------------------------------------------------- /instrumentation/common.cc: -------------------------------------------------------------------------------- 1 | ///////////////////////////////////////////////////////////////////////// 2 | // 3 | // Authors: Mateusz Jurczyk (mjurczyk@google.com) 4 | // Gynvael Coldwind (gynvael@google.com) 5 | // 6 | // Copyright 2013-2018 Google LLC 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | // 14 | // Unless required by applicable law or agreed to in writing, software 15 | // distributed under the License is distributed on an "AS IS" BASIS, 16 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | // See the License for the specific language governing permissions and 18 | // limitations under the License. 19 | // 20 | 21 | #include "common.h" 22 | 23 | #include 24 | #include 25 | #include 26 | 27 | #include "logging.pb.h" 28 | #include "symbols.h" 29 | 30 | // See instrumentation.h for globals' documentation. 31 | namespace globals { 32 | bochspwn_config config; 33 | std::vector modules; 34 | std::map thread_states; 35 | 36 | log_data_st last_ld; 37 | bool last_ld_present; 38 | 39 | bool has_instr_before_execution_handler; 40 | } // namespace globals 41 | 42 | // Given a kernel-mode virtual address, returns an index of the corresponding 43 | // module descriptor in globals::modules, or -1, if it's not found. Assuming 44 | // that every executed address belongs to a valid PE address at any given time, 45 | // not finding an address should be interpreted as a signal to update the current 46 | // module database. 47 | int find_module(bx_address item) { 48 | unsigned int sz = globals::modules.size(); 49 | for (unsigned int i = 0; i < sz; i++) { 50 | if (globals::modules[i]->module_base <= item && 51 | globals::modules[i]->module_base + globals::modules[i]->module_size > item) { 52 | return i; 53 | } 54 | } 55 | 56 | return -1; 57 | } 58 | 59 | // Given a kernel driver name, returns an index of the corresponding module 60 | // descriptor in globals::modules, or -1, if it's not found. 61 | int find_module_by_name(const std::string& module) { 62 | unsigned int sz = globals::modules.size(); 63 | for (unsigned int i = 0; i < sz; i++) { 64 | if (!strcmp(globals::modules[i]->module_name, module.c_str())) { 65 | return i; 66 | } 67 | } 68 | 69 | return -1; 70 | } 71 | 72 | std::string LogDataAsText(const log_data_st& ld) { 73 | char buffer[256]; 74 | std::string ret; 75 | 76 | snprintf(buffer, sizeof(buffer), 77 | "[pid/tid/ct: %.8x/%.8x/%.8x%.8x] {%16s} %.8x, %.8x: %s of %llx " 78 | "(%u * %u bytes), pc = %llx [ %40s ]\n", 79 | ld.process_id(), ld.thread_id(), 80 | (unsigned)(ld.create_time() >> 32), 81 | (unsigned)(ld.create_time()), 82 | ld.image_file_name().c_str(), 83 | (unsigned)ld.syscall_count(), 84 | (unsigned)ld.syscall_id(), 85 | translate_mem_access(ld.access_type()), 86 | ld.lin(), 87 | (unsigned)ld.repeated(), 88 | (unsigned)ld.len(), 89 | ld.pc(), 90 | ld.pc_disasm().c_str()); 91 | ret = buffer; 92 | 93 | if (ld.has_previous_mode()) { 94 | snprintf(buffer, sizeof(buffer), "[previous mode: %d]\n", ld.previous_mode()); 95 | ret += buffer; 96 | } 97 | 98 | for (int i = 0; i < ld.stack_trace_size(); i++) { 99 | int module_idx = ld.stack_trace(i).module_idx(); 100 | 101 | if (module_idx != -1) { 102 | if (globals::config.symbolize) { 103 | snprintf(buffer, sizeof(buffer), " #%u 0x%llx (%s)", i, 104 | (globals::modules[module_idx]->module_base + ld.stack_trace(i).relative_pc()), 105 | symbols::symbolize(globals::modules[module_idx]->module_name, 106 | ld.stack_trace(i).relative_pc()).c_str()); 107 | } else { 108 | snprintf(buffer, sizeof(buffer), " #%u 0x%llx (%s+%.8x)", i, 109 | (globals::modules[module_idx]->module_base + ld.stack_trace(i).relative_pc()), 110 | globals::modules[module_idx]->module_name, 111 | (unsigned)ld.stack_trace(i).relative_pc()); 112 | } 113 | } else { 114 | snprintf(buffer, sizeof(buffer), " #%u 0x%llx (???"")", 115 | i, (ld.stack_trace(i).relative_pc())); 116 | } 117 | ret += buffer; 118 | 119 | if (ld.stack_trace(i).has_try_level()) { 120 | uint32_t try_level = ld.stack_trace(i).try_level(); 121 | if (try_level == 0xFFFFFFFE) { 122 | snprintf(buffer, sizeof(buffer), " <===== SEH disabled"); 123 | } else { 124 | snprintf(buffer, sizeof(buffer), " <===== SEH enabled (#%u)", try_level); 125 | } 126 | ret += buffer; 127 | } 128 | 129 | ret += "\n"; 130 | } 131 | 132 | return ret; 133 | } 134 | 135 | const char *translate_mem_access(log_data_st::mem_access_type type) { 136 | switch (type) { 137 | case log_data_st::MEM_READ: return "READ"; 138 | case log_data_st::MEM_WRITE: return "WRITE"; 139 | case log_data_st::MEM_EXEC: return "EXEC"; 140 | case log_data_st::MEM_RW: return "R/W"; 141 | } 142 | return "INVALID"; 143 | } 144 | 145 | -------------------------------------------------------------------------------- /instrumentation/common.h: -------------------------------------------------------------------------------- 1 | ///////////////////////////////////////////////////////////////////////// 2 | // 3 | // Authors: Mateusz Jurczyk (mjurczyk@google.com) 4 | // Gynvael Coldwind (gynvael@google.com) 5 | // 6 | // Copyright 2013-2018 Google LLC 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | // 14 | // Unless required by applicable law or agreed to in writing, software 15 | // distributed under the License is distributed on an "AS IS" BASIS, 16 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | // See the License for the specific language governing permissions and 18 | // limitations under the License. 19 | // 20 | 21 | #ifndef BOCHSPWN_COMMON_H_ 22 | #define BOCHSPWN_COMMON_H_ 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | 30 | #include "logging.pb.h" 31 | 32 | // ------------------------------------------------------------------ 33 | // Constants. 34 | // ------------------------------------------------------------------ 35 | const char kConfFileEnvVariable[] = "BOCHSPWN_CONF"; 36 | 37 | // ------------------------------------------------------------------ 38 | // Internal enumerations and structures. 39 | // ------------------------------------------------------------------ 40 | 41 | // Generic settings read from .ini configuration file. 42 | struct bochspwn_config { 43 | // Path to the trace log output file. 44 | char *trace_log_path; 45 | 46 | // Path to the modules output file. 47 | char *modules_list_path; 48 | 49 | // Handle to the trace log output file. 50 | FILE *trace_file; 51 | 52 | // Handle to the modules output file. 53 | FILE *modules_file; 54 | 55 | // Guest operating system name. Currently allowed: {"windows", "linux", 56 | // "freebsd"} 57 | char *system; 58 | 59 | // Guest operating system version, used as the name for system-specific 60 | // .ini configuration section. 61 | char *os_version; 62 | 63 | // Guest operating system bitness. Allowed values: {32, 64} 64 | uint32_t bitness; 65 | 66 | // Minimum and maximum lengths of single memory reads. 67 | uint32_t min_read_size, max_read_size; 68 | 69 | // Minimum and maximum lengths of single memory writes. 70 | uint32_t min_write_size, max_write_size; 71 | 72 | // Maximum number of stack frames stored in a single memory access 73 | // descriptor. 74 | uint32_t callstack_length; 75 | 76 | // If non-zero, indicates that output logs should be printed in plain 77 | // text instead of binary blobs. Useful for debugging. 78 | uint32_t write_as_text; 79 | 80 | // If non-zero, indicates that stack traces in the logs should be 81 | // symbolized using provided .pdb files. 82 | uint32_t symbolize; 83 | 84 | // Specifies path to directory containing .pdb files for the kernel 85 | // modules of the guest system. Valid only if globals::symbolize is 86 | // non-zero. 87 | char *symbol_path; 88 | 89 | // Initialize fields with typical values for safety. 90 | bochspwn_config() : trace_log_path(strdup("memlog.bin")), modules_list_path(strdup("modules.bin")), 91 | trace_file(NULL), modules_file(NULL), system(strdup("windows")), 92 | os_version(strdup("win7_32")), bitness(32), min_read_size(1), max_read_size(16), 93 | min_write_size(1), max_write_size(16), callstack_length(64), write_as_text(0), 94 | symbolize(0), symbol_path(NULL) {} 95 | 96 | ~bochspwn_config() { 97 | free(trace_log_path); 98 | free(modules_list_path); 99 | free(system); 100 | free(os_version); 101 | free(symbol_path); 102 | 103 | if (trace_file != NULL) { 104 | fclose(trace_file); 105 | } 106 | if (modules_file != NULL) { 107 | fclose(modules_file); 108 | } 109 | } 110 | }; 111 | 112 | // Included here to mitigate the header hell. 113 | #include "bochs.h" 114 | #include "cpu/cpu.h" 115 | #include "mem_interface.h" 116 | 117 | // Stack-trace descriptor, contains a full list of absolute virtual 118 | // function call addresses. 119 | struct stack_trace { 120 | std::vector trace; 121 | 122 | bool operator< (const stack_trace& a) const { 123 | return (trace < a.trace); 124 | } 125 | bool operator != (const stack_trace& a) const { 126 | return (trace != a.trace); 127 | } 128 | }; 129 | 130 | // Unique thread identifier. 131 | struct client_id { 132 | uint64_t process_id; 133 | uint64_t thread_id; 134 | 135 | client_id() : process_id(0), thread_id(0) {} 136 | client_id(uint64_t pid, uint64_t tid) : process_id(pid), thread_id(tid) {} 137 | 138 | // The operator is required by C++ STL structures such as map<> to 139 | // deterministically and accurately compare structures used as keys. 140 | // The specified order is not relevant here. 141 | bool operator< (const client_id& a) const { 142 | if (process_id == a.process_id) { 143 | return (thread_id < a.thread_id); 144 | } 145 | return (process_id < a.process_id); 146 | } 147 | }; 148 | 149 | // Per-thread information describing system call activity: number of 150 | // services invoked by the point in time the structure is saved and the 151 | // last-seen syscall id. 152 | struct thread_info { 153 | uint32_t syscall_count; 154 | uint16_t last_syscall_id; 155 | 156 | // Only used by Linux and FreeBSD - this is the return address 157 | // for a couple of user-memory-to-kernel-memory copying functions. 158 | uint64_t last_ret_addr; 159 | 160 | thread_info() : syscall_count(0), last_syscall_id(0) {} 161 | }; 162 | 163 | // Information about a known kernel module currently loaded in the 164 | // operating system. 165 | struct module_info { 166 | uint64_t module_base; 167 | uint64_t module_size; 168 | char *module_name; 169 | 170 | module_info() : module_base(0), module_size(0), module_name(NULL) {} 171 | module_info(bx_address b, bx_address s, const char *n) : 172 | module_base(b), module_size(s), module_name(strdup(n)) {} 173 | ~module_info() { if (module_name) free(module_name); } 174 | }; 175 | 176 | // ------------------------------------------------------------------ 177 | // Global helper functions. 178 | // ------------------------------------------------------------------ 179 | 180 | // Find kernel module descriptor by address or name. 181 | int find_module(bx_address item); 182 | int find_module_by_name(const std::string& module); 183 | 184 | // Print out log record as nicely formatted text. 185 | std::string LogDataAsText(const log_data_st& ld); 186 | 187 | // Translate memory access type enum into textual representation. 188 | const char *translate_mem_access(log_data_st::mem_access_type type); 189 | 190 | // ------------------------------------------------------------------ 191 | // Global helper macros. 192 | // ------------------------------------------------------------------ 193 | #define READ_INI_STRING(file, section, name, buf, size) \ 194 | if (!GetPrivateProfileStringA((section), (name), NULL, (buf), (size), (file))) {\ 195 | fprintf(stderr, "Unable to read the %s/%s string from configuration file.\n", \ 196 | (section), (name));\ 197 | return false;\ 198 | } 199 | 200 | #define READ_INI_INT(file, section, name, buf, size, dest) \ 201 | READ_INI_STRING((file), (section), (name), (buf), (size))\ 202 | if (!sscanf(buf, "%i", (dest))) {\ 203 | fprintf(stderr, "Unable to parse the %s/%s value as integer.\n", \ 204 | (section), (name));\ 205 | return false;\ 206 | } 207 | 208 | #define READ_INI_ULL(file, section, name, buf, size, dest) \ 209 | READ_INI_STRING((file), (section), (name), (buf), (size))\ 210 | if (!sscanf(buf, "%llx", (dest))) {\ 211 | fprintf(stderr, "Unable to parse the %s/%s value as integer.\n", \ 212 | (section), (name));\ 213 | return false;\ 214 | } 215 | 216 | // ------------------------------------------------------------------ 217 | // Global objects. 218 | // ------------------------------------------------------------------ 219 | namespace globals { 220 | 221 | // Generic configuration. 222 | extern bochspwn_config config; 223 | 224 | // Global information about all currently known kernel modules. Updated 225 | // lazily, only when an unknown driver is encountered. 226 | extern std::vector modules; 227 | 228 | // Thread descriptors including syscall stats / pending memory references. 229 | extern std::map thread_states; 230 | 231 | // Last known memory access descriptor. 232 | extern log_data_st last_ld; 233 | extern bool last_ld_present; 234 | 235 | // If set to true by a system-module, an additional callback gets invoked 236 | // before an instruction is executed. 237 | extern bool has_instr_before_execution_handler; 238 | 239 | } // namespace globals 240 | 241 | #endif // BOCHSPWN_COMMON_H_ 242 | 243 | -------------------------------------------------------------------------------- /instrumentation/events.cc: -------------------------------------------------------------------------------- 1 | ///////////////////////////////////////////////////////////////////////// 2 | // 3 | // Authors: Mateusz Jurczyk (mjurczyk@google.com) 4 | // Gynvael Coldwind (gynvael@google.com) 5 | // 6 | // Copyright 2013-2018 Google LLC 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | // 14 | // Unless required by applicable law or agreed to in writing, software 15 | // distributed under the License is distributed on an "AS IS" BASIS, 16 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | // See the License for the specific language governing permissions and 18 | // limitations under the License. 19 | // 20 | 21 | #include "events.h" 22 | 23 | #include "bochs.h" 24 | #include "cpu/cpu.h" 25 | 26 | #include "common.h" 27 | #include "invoke.h" 28 | #include "logging.pb.h" 29 | 30 | namespace events { 31 | 32 | bool event_new_syscall(BX_CPU_C *pcpu, client_id *cid) { 33 | thread_info& thread = globals::thread_states[*cid]; 34 | 35 | thread.syscall_count++; 36 | thread.last_syscall_id = pcpu->gen_reg[BX_16BIT_REG_AX].word.rx; 37 | return true; 38 | } 39 | 40 | bool event_new_module(module_info *mi) { 41 | // Save the new module in the internal list. 42 | globals::modules.push_back(mi); 43 | 44 | // Save information about the new module to an output file. 45 | module_st new_module; 46 | 47 | new_module.set_name(mi->module_name); 48 | new_module.set_base_addr(mi->module_base); 49 | new_module.set_size(mi->module_size); 50 | 51 | std::string data; 52 | uint32_t data_size; 53 | 54 | if (!new_module.SerializeToString(&data)) { 55 | fprintf(stderr, "Unable to serialize protocol buffer to string.\n"); 56 | abort(); 57 | } 58 | data_size = data.size(); 59 | 60 | FILE *f = globals::config.modules_file; 61 | if (fwrite(&data_size, sizeof(uint32_t), 1, f) != 1 || 62 | fwrite(data.c_str(), 1, data.size(), f) != data.size()) { 63 | fprintf(stderr, "Unable to write serialized protobuf to file.\n"); 64 | abort(); 65 | } 66 | 67 | return true; 68 | } 69 | 70 | bool event_process_log() { 71 | FILE *f = globals::config.trace_file; 72 | 73 | if (globals::config.write_as_text) { 74 | fprintf(f, "%s\n", LogDataAsText(globals::last_ld).c_str()); 75 | } else { 76 | std::string data; 77 | uint32_t data_size; 78 | 79 | if (!globals::last_ld.SerializeToString(&data)) { 80 | fprintf(stderr, "Unable to serialize protocol buffer to string.\n"); 81 | abort(); 82 | } 83 | data_size = data.size(); 84 | 85 | if (fwrite(&data_size, sizeof(uint32_t), 1, f) != 1 || 86 | fwrite(data.c_str(), 1, data.size(), f) != data.size()) { 87 | fprintf(stderr, "Unable to write serialized protobuf to file.\n"); 88 | abort(); 89 | } 90 | } 91 | 92 | return true; 93 | } 94 | 95 | } // namespace events 96 | 97 | -------------------------------------------------------------------------------- /instrumentation/events.h: -------------------------------------------------------------------------------- 1 | ///////////////////////////////////////////////////////////////////////// 2 | // 3 | // Authors: Mateusz Jurczyk (mjurczyk@google.com) 4 | // Gynvael Coldwind (gynvael@google.com) 5 | // 6 | // Copyright 2013-2018 Google LLC 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | // 14 | // Unless required by applicable law or agreed to in writing, software 15 | // distributed under the License is distributed on an "AS IS" BASIS, 16 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | // See the License for the specific language governing permissions and 18 | // limitations under the License. 19 | // 20 | 21 | #ifndef BOCHSPWN_EVENTS_H_ 22 | #define BOCHSPWN_EVENTS_H_ 23 | 24 | #include 25 | 26 | #include "bochs.h" 27 | #include "cpu/cpu.h" 28 | 29 | #include "common.h" 30 | 31 | namespace events { 32 | 33 | // New system call was invoked in the guest operating system. 34 | bool event_new_syscall(BX_CPU_C *, client_id *); 35 | 36 | // A new module was detected in the guest operating system. 37 | bool event_new_module(module_info *mi); 38 | 39 | // A new memory access log should be processed. 40 | bool event_process_log(); 41 | 42 | } // namespace events 43 | 44 | #endif // BOCHSPWN_EVENT_H_ 45 | 46 | -------------------------------------------------------------------------------- /instrumentation/instrument.cc: -------------------------------------------------------------------------------- 1 | ///////////////////////////////////////////////////////////////////////// 2 | // 3 | // Authors: Mateusz Jurczyk (mjurczyk@google.com) 4 | // Gynvael Coldwind (gynvael@google.com) 5 | // 6 | // Copyright 2013-2018 Google LLC 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | // 14 | // Unless required by applicable law or agreed to in writing, software 15 | // distributed under the License is distributed on an "AS IS" BASIS, 16 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | // See the License for the specific language governing permissions and 18 | // limitations under the License. 19 | // 20 | 21 | #include 22 | 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | #include "bochs.h" 30 | #include "cpu/cpu.h" 31 | #include "disasm/disasm.h" 32 | 33 | #include "common.h" 34 | #include "events.h" 35 | #include "instrument.h" 36 | #include "invoke.h" 37 | #include "logging.pb.h" 38 | #include "symbols.h" 39 | 40 | // ------------------------------------------------------------------ 41 | // Helper declarations. 42 | // ------------------------------------------------------------------ 43 | static bool init_basic_config(const char *path, bochspwn_config *config); 44 | static void process_mem_access(BX_CPU_C *pcpu, bx_address lin, unsigned len, bx_address pc, 45 | log_data_st::mem_access_type access_type, char *disasm); 46 | static void destroy_globals(); 47 | 48 | // ------------------------------------------------------------------ 49 | // Instrumentation implementation. 50 | // ------------------------------------------------------------------ 51 | 52 | // Callback invoked on Bochs CPU initialization. 53 | void bx_instr_initialize(unsigned cpu) { 54 | char *conf_path = NULL; 55 | 56 | // Initialize symbols subsystem. 57 | symbols::initialize(); 58 | 59 | // Obtain configuration file path. 60 | if (conf_path = getenv(kConfFileEnvVariable), !conf_path) { 61 | fprintf(stderr, "Configuration file not specified in \"%s\"\n", 62 | kConfFileEnvVariable); 63 | abort(); 64 | } 65 | 66 | // Read basic configuration from .ini file. 67 | if (!init_basic_config(conf_path, &globals::config)) { 68 | fprintf(stderr, "Initialization with config file \"%s\" failed\n", conf_path); 69 | abort(); 70 | } 71 | 72 | // Initialize output trace log file handle for the first time. 73 | globals::config.trace_file = fopen(globals::config.trace_log_path, "wb"); 74 | if (!globals::config.trace_file) { 75 | fprintf(stderr, "Unable to open the \"%s\" trace log file\n", globals::config.trace_log_path); 76 | abort(); 77 | } 78 | // Set internal buffer size to 32kB for performance reasons. 79 | setvbuf(globals::config.trace_file, NULL, _IOFBF, 32 * 1024); 80 | 81 | // Initialize modules output file handle for the first time. 82 | globals::config.modules_file = fopen(globals::config.modules_list_path, "wb"); 83 | if (!globals::config.modules_file) { 84 | fprintf(stderr, "Unable to open the \"%s\" modules log file\n", globals::config.modules_list_path); 85 | abort(); 86 | } 87 | // Disable buffering for the file. 88 | setbuf(globals::config.modules_file, NULL); 89 | 90 | // Allow the guest-specific part to initialize (read internal offsets etc). 91 | if (!invoke_system_handler(BX_OS_EVENT_INIT, conf_path, NULL)) { 92 | fprintf(stderr, "Guest-specific initialization with file \"%s\" failed\n", conf_path); 93 | abort(); 94 | } 95 | } 96 | 97 | // Callback invoked on destroying a Bochs CPU object. 98 | void bx_instr_exit(unsigned cpu) { 99 | // Free the symbols subsystem. 100 | symbols::destroy(); 101 | 102 | // Free allocations in global structures. 103 | destroy_globals(); 104 | } 105 | 106 | // Callback called on attempt to access linear memory. 107 | // 108 | // Note: the BX_INSTR_LIN_ACCESS instrumentation doesn't work when 109 | // repeat-speedups feature is enabled. Always remember to set 110 | // BX_SUPPORT_REPEAT_SPEEDUPS to 0 in config.h, otherwise Bochspwn might 111 | // not work correctly. 112 | void bx_instr_lin_access(unsigned cpu, bx_address lin, bx_address phy, 113 | unsigned len, unsigned memtype, unsigned rw) { 114 | BX_CPU_C *pcpu = BX_CPU(cpu); 115 | 116 | // Not going to use physical memory address. 117 | (void)phy; 118 | 119 | // Read-write instructions are currently not interesting. 120 | if (rw == BX_RW) 121 | return; 122 | 123 | // Is the CPU in protected or long mode? 124 | unsigned mode = 0; 125 | 126 | // Note: DO NOT change order of these ifs. long64_mode must be called 127 | // before protected_mode, since it will also return "true" on protected_mode 128 | // query (well, long mode is technically protected mode). 129 | if (pcpu->long64_mode()) { 130 | #if BX_SUPPORT_X86_64 131 | mode = 64; 132 | #else 133 | return; 134 | #endif // BX_SUPPORT_X86_64 135 | } else if (pcpu->protected_mode()) { 136 | // This is either protected 32-bit mode or 32-bit compat. long mode. 137 | mode = 32; 138 | } else { 139 | // Nothing interesting. 140 | // TODO(gynvael): Well actually there is the smm_mode(), which 141 | // might be a little interesting, even if it's just the bochs BIOS 142 | // SMM code. 143 | return; 144 | } 145 | 146 | // Is pc in kernel memory area? 147 | // Is lin in user memory area? 148 | bx_address pc = pcpu->prev_rip; 149 | if (!invoke_system_handler(BX_OS_EVENT_CHECK_KERNEL_ADDR, &pc, NULL) || 150 | !invoke_system_handler(BX_OS_EVENT_CHECK_USER_ADDR, &lin, NULL)) { 151 | return; /* pc not in ring-0 or lin not in ring-3 */ 152 | } 153 | 154 | // Check if the access meets specified operand length criteria. 155 | if (rw == BX_READ) { 156 | if (len < globals::config.min_read_size || len > globals::config.max_read_size) { 157 | return; 158 | } 159 | } else { 160 | if (len < globals::config.min_write_size || len > globals::config.max_write_size) { 161 | return; 162 | } 163 | } 164 | 165 | // Save basic information about the access. 166 | log_data_st::mem_access_type access_type; 167 | switch (rw) { 168 | case BX_READ: 169 | access_type = log_data_st::MEM_READ; 170 | break; 171 | case BX_WRITE: 172 | access_type = log_data_st::MEM_WRITE; 173 | break; 174 | case BX_EXECUTE: 175 | access_type = log_data_st::MEM_EXEC; 176 | break; 177 | case BX_RW: 178 | access_type = log_data_st::MEM_RW; 179 | break; 180 | default: abort(); 181 | } 182 | 183 | // Disassemble current instruction. 184 | static Bit8u ibuf[32] = {0}; 185 | static char pc_disasm[64]; 186 | if (read_lin_mem(pcpu, pc, sizeof(ibuf), ibuf)) { 187 | disassembler bx_disassemble; 188 | bx_disassemble.disasm(mode == 32, mode == 64, 0, pc, ibuf, pc_disasm); 189 | } 190 | 191 | // With basic information filled in, process the access further. 192 | process_mem_access(pcpu, lin, len, pc, access_type, pc_disasm); 193 | } 194 | 195 | // Callback invoked before execution of each instruction takes place. 196 | // Used to intercept system call invocations. 197 | void bx_instr_before_execution(unsigned cpu, bxInstruction_c *i) { 198 | static client_id thread; 199 | BX_CPU_C *pcpu = BX_CPU(cpu); 200 | unsigned opcode; 201 | 202 | // We're not interested in instructions executed in real mode. 203 | if (!pcpu->protected_mode() && !pcpu->long64_mode()) { 204 | return; 205 | } 206 | 207 | // If the system needs an additional invokement from here, call it now. 208 | if (globals::has_instr_before_execution_handler) { 209 | invoke_system_handler(BX_OS_EVENT_INSTR_BEFORE_EXECUTION, pcpu, i); 210 | } 211 | 212 | // Any system-call invoking instruction is interesting - this 213 | // is mostly due to 64-bit Linux which allows various ways 214 | // to be used for system-call invocation. 215 | // Note: We're not checking for int1, int3 nor into instructions. 216 | opcode = i->getIaOpcode(); 217 | if (opcode != BX_IA_SYSCALL && opcode != BX_IA_SYSENTER && opcode != BX_IA_INT_Ib) { 218 | return; 219 | } 220 | 221 | // The only two allowed interrupts are int 0x2e and int 0x80, which are legacy 222 | // ways to invoke system calls on Windows and linux, respectively. 223 | if (opcode == BX_IA_INT_Ib && i->Ib() != 0x2e && i->Ib() != 0x80) { 224 | return; 225 | } 226 | 227 | // Obtain information about the current process/thread IDs. 228 | if (!invoke_system_handler(BX_OS_EVENT_FILL_CID, pcpu, &thread)) { 229 | return; 230 | } 231 | 232 | // Process information about a new syscall depending on the current mode. 233 | if (!events::event_new_syscall(pcpu, &thread)) { 234 | return; 235 | } 236 | } 237 | 238 | // ------------------------------------------------------------------ 239 | // Helper functions' implementation. 240 | // ------------------------------------------------------------------ 241 | 242 | static bool init_basic_config(const char *config_path, bochspwn_config *config) { 243 | static char buffer[256]; 244 | 245 | // Trace output file path. 246 | READ_INI_STRING(config_path, "general", "trace_log_path", buffer, sizeof(buffer)); 247 | config->trace_log_path = strdup(buffer); 248 | 249 | // Modules list file path. 250 | READ_INI_STRING(config_path, "general", "modules_list_path", buffer, sizeof(buffer)); 251 | config->modules_list_path = strdup(buffer); 252 | 253 | // Operating system. 254 | READ_INI_STRING(config_path, "general", "os", buffer, sizeof(buffer)); 255 | 256 | bool found = false; 257 | for (unsigned int i = 0; kSupportedSystems[i] != NULL; i++) { 258 | if (!strcmp(buffer, kSupportedSystems[i])) { 259 | config->system = strdup(buffer); 260 | found = true; 261 | break; 262 | } 263 | } 264 | if (!found) { 265 | fprintf(stderr, "Unsupported system \"%s\"\n", buffer); 266 | return false; 267 | } 268 | 269 | // Bitness. 270 | READ_INI_INT(config_path, "general", "bitness", buffer, sizeof(buffer), &config->bitness); 271 | if (config->bitness != 32 && config->bitness != 64) { 272 | fprintf(stderr, "Only 32 and 64 bitness allowed\n"); 273 | return false; 274 | } 275 | 276 | // System version. 277 | READ_INI_STRING(config_path, "general", "version", buffer, sizeof(buffer)); 278 | config->os_version = strdup(buffer); 279 | 280 | // Minimum and maximum length of read and write operations. 281 | READ_INI_INT(config_path, "general", "min_read_size", buffer, sizeof(buffer), 282 | &config->min_read_size); 283 | READ_INI_INT(config_path, "general", "max_read_size", buffer, sizeof(buffer), 284 | &config->max_read_size); 285 | READ_INI_INT(config_path, "general", "min_write_size", buffer, sizeof(buffer), 286 | &config->min_write_size); 287 | READ_INI_INT(config_path, "general", "max_write_size", buffer, sizeof(buffer), 288 | &config->max_write_size); 289 | 290 | // Maximum length of callstack. 291 | READ_INI_INT(config_path, "general", "callstack_length", buffer, sizeof(buffer), 292 | &config->callstack_length); 293 | 294 | // "Write as text" debugging feature. 295 | READ_INI_INT(config_path, "general", "write_as_text", buffer, sizeof(buffer), 296 | &config->write_as_text); 297 | 298 | // Symbolization settings. 299 | READ_INI_INT(config_path, "general", "symbolize", buffer, sizeof(buffer), 300 | &config->symbolize); 301 | READ_INI_STRING(config_path, "general", "symbol_path", buffer, sizeof(buffer)); 302 | config->symbol_path = strdup(buffer); 303 | 304 | return true; 305 | } 306 | 307 | __attribute__((noinline)) 308 | static void process_mem_access(BX_CPU_C *pcpu, bx_address lin, unsigned len, 309 | bx_address pc, log_data_st::mem_access_type access_type, 310 | char *disasm) { 311 | static unsigned last_repeated = 0; 312 | 313 | // Is this a continuous memory access (e.g. inlined memcpy or memcmp)? 314 | if (globals::last_ld.pc() != pc || 315 | globals::last_ld.len() != len || 316 | globals::last_ld.lin() + globals::last_ld.len() * last_repeated != lin || 317 | globals::last_ld.access_type() != access_type || 318 | !globals::last_ld_present) { 319 | // It's a separate one. Print out last_ld if it was present. 320 | if (globals::last_ld_present) { 321 | globals::last_ld.set_repeated(last_repeated); 322 | events::event_process_log(); 323 | } 324 | 325 | globals::last_ld.Clear(); 326 | globals::last_ld.set_lin(lin); 327 | globals::last_ld.set_len(len); 328 | globals::last_ld.set_pc(pc); 329 | globals::last_ld.set_access_type(access_type); 330 | globals::last_ld.set_pc_disasm(disasm); 331 | 332 | last_repeated = 1; 333 | globals::last_ld_present = invoke_system_handler(BX_OS_EVENT_FILL_INFO, pcpu, NULL); 334 | } else { 335 | // Continuation. 336 | last_repeated++; 337 | } 338 | } 339 | 340 | static void destroy_globals() { 341 | for (unsigned int i = 0; i < globals::modules.size(); i++) { 342 | delete globals::modules[i]; 343 | } 344 | globals::modules.clear(); 345 | 346 | globals::thread_states.clear(); 347 | 348 | globals::last_ld_present = false; 349 | } 350 | 351 | -------------------------------------------------------------------------------- /instrumentation/instrument.h: -------------------------------------------------------------------------------- 1 | ///////////////////////////////////////////////////////////////////////// 2 | // 3 | // Authors: Mateusz Jurczyk (mjurczyk@google.com) 4 | // Gynvael Coldwind (gynvael@google.com) 5 | // 6 | // Copyright 2013-2018 Google LLC 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | // 14 | // Unless required by applicable law or agreed to in writing, software 15 | // distributed under the License is distributed on an "AS IS" BASIS, 16 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | // See the License for the specific language governing permissions and 18 | // limitations under the License. 19 | // 20 | 21 | #ifndef BOCHSPWN_INSTRUMENT_H_ 22 | #define BOCHSPWN_INSTRUMENT_H_ 23 | 24 | #include 25 | #include 26 | 27 | #include "bochs.h" 28 | #include "cpu/cpu.h" 29 | 30 | #include "common.h" 31 | #include "logging.pb.h" 32 | 33 | // ------------------------------------------------------------------ 34 | // Implemented instrumentation. 35 | // ------------------------------------------------------------------ 36 | void bx_instr_initialize(unsigned cpu); 37 | void bx_instr_exit(unsigned cpu); 38 | void bx_instr_lin_access(unsigned cpu, bx_address lin, bx_address phy, 39 | unsigned len, unsigned memtype, unsigned rw); 40 | void bx_instr_before_execution(unsigned cpu, bxInstruction_c *i); 41 | 42 | #define BX_INSTR_INITIALIZE(cpu_id) \ 43 | bx_instr_initialize(cpu_id) 44 | 45 | #define BX_INSTR_EXIT(cpu_id) \ 46 | bx_instr_exit(cpu_id) 47 | 48 | #define BX_INSTR_LIN_ACCESS(cpu_id, lin, phy, len, memtype, rw) \ 49 | bx_instr_lin_access(cpu_id, lin, phy, len, memtype, rw) 50 | 51 | #define BX_INSTR_BEFORE_EXECUTION(cpu_id, i) \ 52 | bx_instr_before_execution(cpu_id, i) 53 | 54 | // ------------------------------------------------------------------ 55 | // Stubs for the rest of macros. 56 | // ------------------------------------------------------------------ 57 | /* initialization/deinitialization of instrumentalization */ 58 | #define BX_INSTR_INIT_ENV() 59 | #define BX_INSTR_EXIT_ENV() 60 | 61 | /* simulation init, shutdown, reset */ 62 | #define BX_INSTR_RESET(cpu_id, type) 63 | #define BX_INSTR_HLT(cpu_id) 64 | #define BX_INSTR_MWAIT(cpu_id, addr, len, flags) 65 | 66 | /* called from command line debugger */ 67 | #define BX_INSTR_DEBUG_PROMPT() 68 | #define BX_INSTR_DEBUG_CMD(cmd) 69 | 70 | /* branch resolution */ 71 | #define BX_INSTR_CNEAR_BRANCH_TAKEN(cpu_id, branch_eip, new_eip) 72 | #define BX_INSTR_CNEAR_BRANCH_NOT_TAKEN(cpu_id, branch_eip) 73 | #define BX_INSTR_UCNEAR_BRANCH(cpu_id, what, branch_eip, new_eip) 74 | #define BX_INSTR_FAR_BRANCH(cpu_id, what, prev_cs, prev_eip, new_cs, new_eip) 75 | 76 | /* decoding completed */ 77 | #define BX_INSTR_OPCODE(cpu_id, i, opcode, len, is32, is64) 78 | 79 | /* exceptional case and interrupt */ 80 | #define BX_INSTR_EXCEPTION(cpu_id, vector, error_code) 81 | #define BX_INSTR_INTERRUPT(cpu_id, vector) 82 | #define BX_INSTR_HWINTERRUPT(cpu_id, vector, cs, eip) 83 | 84 | /* TLB/CACHE control instruction executed */ 85 | #define BX_INSTR_CLFLUSH(cpu_id, laddr, paddr) 86 | #define BX_INSTR_CACHE_CNTRL(cpu_id, what) 87 | #define BX_INSTR_TLB_CNTRL(cpu_id, what, new_cr3) 88 | #define BX_INSTR_PREFETCH_HINT(cpu_id, what, seg, offset) 89 | 90 | /* execution */ 91 | #define BX_INSTR_AFTER_EXECUTION(cpu_id, i) 92 | #define BX_INSTR_REPEAT_ITERATION(cpu_id, i) 93 | 94 | /* physical memory access */ 95 | #define BX_INSTR_PHY_ACCESS(cpu_id, phy, len, memtype, rw) 96 | 97 | /* feedback from device units */ 98 | #define BX_INSTR_INP(addr, len) 99 | #define BX_INSTR_INP2(addr, len, val) 100 | #define BX_INSTR_OUTP(addr, len, val) 101 | 102 | /* wrmsr callback */ 103 | #define BX_INSTR_WRMSR(cpu_id, addr, value) 104 | 105 | #endif // BOCHSPWN_INSTRUMENT_H_ 106 | 107 | -------------------------------------------------------------------------------- /instrumentation/invoke.cc: -------------------------------------------------------------------------------- 1 | ///////////////////////////////////////////////////////////////////////// 2 | // 3 | // Authors: Mateusz Jurczyk (mjurczyk@google.com) 4 | // Gynvael Coldwind (gynvael@google.com) 5 | // 6 | // Copyright 2013-2018 Google LLC 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | // 14 | // Unless required by applicable law or agreed to in writing, software 15 | // distributed under the License is distributed on an "AS IS" BASIS, 16 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | // See the License for the specific language governing permissions and 18 | // limitations under the License. 19 | // 20 | 21 | #include "invoke.h" 22 | 23 | #include "common.h" 24 | 25 | bool invoke_system_handler(os_event_type type, void *arg1, void *arg2) { 26 | static const s_event_handler_func *h = NULL; 27 | 28 | if (!h) { 29 | char *system = globals::config.system; 30 | 31 | unsigned int i; 32 | for (i = 0; kSystemEventHandlers[i].system != NULL; i++) { 33 | if (!strcmp(system, kSystemEventHandlers[i].system)) { 34 | break; 35 | } 36 | } 37 | 38 | if (!kSystemEventHandlers[i].system) { 39 | abort(); 40 | } 41 | 42 | h = kSystemEventHandlers[i].handlers; 43 | } 44 | 45 | return h[type](arg1, arg2); 46 | } 47 | 48 | 49 | -------------------------------------------------------------------------------- /instrumentation/invoke.h: -------------------------------------------------------------------------------- 1 | ///////////////////////////////////////////////////////////////////////// 2 | // 3 | // Authors: Mateusz Jurczyk (mjurczyk@google.com) 4 | // Gynvael Coldwind (gynvael@google.com) 5 | // 6 | // Copyright 2013-2018 Google LLC 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | // 14 | // Unless required by applicable law or agreed to in writing, software 15 | // distributed under the License is distributed on an "AS IS" BASIS, 16 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | // See the License for the specific language governing permissions and 18 | // limitations under the License. 19 | // 20 | 21 | #ifndef BOCHSPWN_INVOKE_H_ 22 | #define BOCHSPWN_INVOKE_H_ 23 | 24 | #include "common.h" 25 | #include "events.h" 26 | #include "os_linux.h" 27 | #include "os_windows.h" 28 | #include "os_freebsd.h" 29 | #include "os_openbsd.h" 30 | 31 | // ------------------------------------------------------------------ 32 | // Bochspwn system-related definitions. 33 | // ------------------------------------------------------------------ 34 | const char *const kSupportedSystems[] = { 35 | "windows", 36 | "linux", 37 | "freebsd", 38 | "openbsd", 39 | NULL 40 | }; 41 | 42 | enum os_event_type { 43 | BX_OS_EVENT_INIT = 0, 44 | BX_OS_EVENT_CHECK_KERNEL_ADDR, 45 | BX_OS_EVENT_CHECK_USER_ADDR, 46 | BX_OS_EVENT_FILL_CID, 47 | BX_OS_EVENT_FILL_INFO, 48 | BX_OS_EVENT_INSTR_BEFORE_EXECUTION, 49 | BX_OS_EVENT_MAX 50 | }; 51 | 52 | typedef bool (*s_event_handler_func)(void *, void *); 53 | 54 | const struct tag_kSystemEventHandlers { 55 | const char *system; 56 | s_event_handler_func handlers[BX_OS_EVENT_MAX]; 57 | } kSystemEventHandlers[] = { 58 | {"windows", 59 | {(s_event_handler_func)windows::init, 60 | (s_event_handler_func)windows::check_kernel_addr, 61 | (s_event_handler_func)windows::check_user_addr, 62 | (s_event_handler_func)windows::fill_cid, 63 | (s_event_handler_func)windows::fill_info, 64 | (s_event_handler_func)NULL} 65 | }, 66 | {"linux", 67 | {(s_event_handler_func)linux::init, 68 | (s_event_handler_func)linux::check_kernel_addr, 69 | (s_event_handler_func)linux::check_user_addr, 70 | (s_event_handler_func)linux::fill_cid, 71 | (s_event_handler_func)linux::fill_info, 72 | (s_event_handler_func)NULL} 73 | }, 74 | {"freebsd", 75 | {(s_event_handler_func)freebsd::init, 76 | (s_event_handler_func)freebsd::check_kernel_addr, 77 | (s_event_handler_func)freebsd::check_user_addr, 78 | (s_event_handler_func)freebsd::fill_cid, 79 | (s_event_handler_func)freebsd::fill_info, 80 | (s_event_handler_func)freebsd::instr_before_execution} 81 | }, 82 | {"openbsd", 83 | {(s_event_handler_func)openbsd::init, 84 | (s_event_handler_func)openbsd::check_kernel_addr, 85 | (s_event_handler_func)openbsd::check_user_addr, 86 | (s_event_handler_func)openbsd::fill_cid, 87 | (s_event_handler_func)openbsd::fill_info, 88 | (s_event_handler_func)openbsd::instr_before_execution} 89 | }, 90 | {NULL, {NULL, NULL, NULL, NULL, NULL}} 91 | }; 92 | 93 | bool invoke_system_handler(os_event_type type, void *arg1, void *arg2); 94 | 95 | #endif // BOCHSPWN_INVOKE_H_ 96 | 97 | -------------------------------------------------------------------------------- /instrumentation/logging.proto: -------------------------------------------------------------------------------- 1 | ///////////////////////////////////////////////////////////////////////// 2 | // 3 | // Authors: Mateusz Jurczyk (mjurczyk@google.com) 4 | // Gynvael Coldwind (gynvael@google.com) 5 | // 6 | // Copyright 2013-2018 Google LLC 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | // 14 | // Unless required by applicable law or agreed to in writing, software 15 | // distributed under the License is distributed on an "AS IS" BASIS, 16 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | // See the License for the specific language governing permissions and 18 | // limitations under the License. 19 | // 20 | syntax = "proto2"; 21 | 22 | // Descriptor of a single executable module referenced in the stack trace(s) of 23 | // one or more memory access logs. 24 | message module_st { 25 | required string name = 1; 26 | required uint64 base_addr = 2; 27 | required uint64 size = 3; 28 | } 29 | 30 | // Descriptor of a single memory access, saved in the trace log file. 31 | message log_data_st { 32 | // Linear address of the accessed memory. 33 | required uint64 lin = 1; 34 | 35 | // Memory access length (in bytes). This value can be 1/2/4/8, and 36 | // symbolizes the size of a single atomic memory fetch. If a continuous 37 | // memory area has been referenced using fetches of the same size, the 38 | // "repeated" field will contain the number of repetitions. The following 39 | // value: 40 | // len * repeated 41 | // makes the total size of the referenced memory area. 42 | required uint32 len = 2; 43 | 44 | // How many fetches of consecutive memory addresses starting from "lin" 45 | // have been performed one after another. Typically equal to 1. 46 | required uint32 repeated = 3; 47 | 48 | enum mem_access_type { 49 | MEM_READ = 0; 50 | MEM_WRITE = 1; 51 | MEM_EXEC = 2; 52 | MEM_RW = 3; 53 | }; 54 | 55 | // Type of memory access. The Read/Write type is reserved for 56 | // instructions which read from a memory area and immediately write back 57 | // to it (e.g. inc [mem32]). 58 | required mem_access_type access_type = 4; 59 | 60 | // Absolute EIP or RIP of the instruction provoking the memory fetch. 61 | required uint64 pc = 5; 62 | 63 | // Number of syscalls previously invoked by this thread. 64 | required uint32 syscall_count = 6; 65 | 66 | // ID number of the last syscall invoked in the thread. 67 | required uint32 syscall_id = 7; 68 | 69 | // Process filename. 70 | required bytes image_file_name = 8; 71 | // Process ID. 72 | required uint32 process_id = 9; 73 | // Thread ID. 74 | required uint32 thread_id = 10; 75 | // Thread creation time. 76 | required uint64 create_time = 11; 77 | 78 | message callstack_item { 79 | required int32 module_idx = 1; 80 | required uint64 relative_pc = 2; 81 | required uint64 stack_frame = 3; 82 | 83 | // XXX: Windows 32-bit only. If the function has a SEH exception record, 84 | // the value contains the TryLevel field of the _EH3_EXCEPTION_REGISTRATION 85 | // structure, which indicates if the specific area of code being executed is 86 | // guarded by a try/except statement or not. A value of 0xFFFFFFFE means the 87 | // exception handler is disabled, while positive values close to 0 identify 88 | // an active try/except block. 89 | optional uint32 try_level = 4; 90 | } 91 | 92 | // A stack trace at the time of the memory access. 93 | // 94 | // Note: modules[stack_trace[0].module_idx].base_addr + stack_trace[0].relative_pc 95 | // should be equal to the "pc" value. 96 | repeated callstack_item stack_trace = 12; 97 | 98 | // Textual representation of the memory-fetching instruction. 99 | required string pc_disasm = 13; 100 | 101 | // XXX: Windows only. The field contains the PreviousMode bit denoting 102 | // whether the system service was called from user or kernel-mode. 103 | optional uint32 previous_mode = 14; 104 | } 105 | 106 | -------------------------------------------------------------------------------- /instrumentation/os_freebsd.cc: -------------------------------------------------------------------------------- 1 | ///////////////////////////////////////////////////////////////////////// 2 | // 3 | // Authors: Mateusz Jurczyk (mjurczyk@google.com) 4 | // Gynvael Coldwind (gynvael@google.com) 5 | // 6 | // Copyright 2013-2018 Google LLC 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | // 14 | // Unless required by applicable law or agreed to in writing, software 15 | // distributed under the License is distributed on an "AS IS" BASIS, 16 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | // See the License for the specific language governing permissions and 18 | // limitations under the License. 19 | // 20 | 21 | #include "os_freebsd.h" 22 | 23 | #include 24 | #include 25 | 26 | #include "common.h" 27 | #include "events.h" 28 | #include "instrument.h" 29 | #include "logging.pb.h" 30 | 31 | namespace freebsd { 32 | 33 | uint32_t guest_ptr_size; 34 | uint64_t user_space_boundary; 35 | uint64_t kernel_space_boundary; 36 | 37 | uint32_t off_thread_td_pid; 38 | uint32_t off_thread_td_proc; 39 | uint32_t off_proc_p_pid; 40 | uint32_t off_proc_p_comm; 41 | uint32_t conf_proc_p_comm_size; // MAXCOMLEN +1 in sys/param.h 42 | 43 | // TODO(gynvael): Support modules in the future... maybe. 44 | // There area over 400 modules on the list in our test OS and all 45 | // of them (every single one) points to the kernel image. So no 46 | // sense of supporting it atm. 47 | 48 | uint64_t kernel_start; 49 | uint64_t kernel_end; 50 | 51 | uint64_t copyin_addr; 52 | uint64_t copyinstr_addr; 53 | 54 | uint64_t copyin_addr_end; 55 | uint64_t copyinstr_addr_end; 56 | 57 | struct module_summary_st { 58 | uint64_t l_prev, l_next; 59 | uint64_t core_addr; 60 | uint32_t core_size; 61 | char name[MAX_MODULE_NAME_LEN]; 62 | }; 63 | 64 | // Helper routines. 65 | static bool get_kernel_gs_base(BX_CPU_C *pcpu, uint64_t *kernel_gs_base); 66 | static bool get_proc_pid_gid(BX_CPU_C *pcpu, uint64_t kernel_gs_base, 67 | uint64_t *addr_proc_struct, uint32_t *pid, uint32_t *tid); 68 | 69 | bool init(const char *config_path, void *unused) { 70 | char buffer[256]; 71 | 72 | // Read FreeBSD-specific configuration. 73 | READ_INI_INT(config_path, globals::config.os_version, "thread_td_tid", 74 | buffer, sizeof(buffer), &off_thread_td_pid); 75 | READ_INI_INT(config_path, globals::config.os_version, "thread_td_proc", 76 | buffer, sizeof(buffer), &off_thread_td_proc); 77 | READ_INI_INT(config_path, globals::config.os_version, "proc_p_pid", 78 | buffer, sizeof(buffer), &off_proc_p_pid); 79 | READ_INI_INT(config_path, globals::config.os_version, "proc_p_comm", 80 | buffer, sizeof(buffer), &off_proc_p_comm); 81 | READ_INI_INT(config_path, globals::config.os_version, "proc_p_comm_size", 82 | buffer, sizeof(buffer), &conf_proc_p_comm_size); 83 | 84 | READ_INI_ULL(config_path, globals::config.os_version, "kernel_start", 85 | buffer, sizeof(buffer), &kernel_start); 86 | READ_INI_ULL(config_path, globals::config.os_version, "kernel_end", 87 | buffer, sizeof(buffer), &kernel_end); 88 | 89 | READ_INI_ULL(config_path, globals::config.os_version, "copyin", 90 | buffer, sizeof(buffer), ©in_addr); 91 | READ_INI_ULL(config_path, globals::config.os_version, "copyinstr", 92 | buffer, sizeof(buffer), ©instr_addr); 93 | 94 | READ_INI_ULL(config_path, globals::config.os_version, "copyin_end", 95 | buffer, sizeof(buffer), ©in_addr_end); 96 | READ_INI_ULL(config_path, globals::config.os_version, "copyinstr_end", 97 | buffer, sizeof(buffer), ©instr_addr_end); 98 | 99 | // If addresses of these functions are given, then enable grabbing the RET 100 | // address when they are called. 101 | if (copyin_addr != 0 || copyinstr_addr != 0) { 102 | globals::has_instr_before_execution_handler = true; 103 | } 104 | 105 | // Put the kernel address and size in the special module list. 106 | module_info *mi = new module_info(kernel_start, kernel_end - kernel_start, "kernel"); 107 | events::event_new_module(mi); 108 | 109 | // Check some assumptions. 110 | if (conf_proc_p_comm_size >= MAX_PROC_COMM_LEN) { 111 | fprintf(stderr, 112 | "error: conf_proc_p_comm_size in config is larger than MAX_PROC_COMM_LEN;\n" 113 | " you can recompile with -DMAX_PROC_COMM_LEN=\n" 114 | " and try again.\n"); 115 | abort(); 116 | } 117 | 118 | // Read the configuration specific to guest bitness. 119 | if (globals::config.bitness == 32) { 120 | guest_ptr_size = 4; 121 | user_space_boundary = 0xbfffffff; 122 | kernel_space_boundary = 0xc0000000; 123 | } else { 124 | guest_ptr_size = 8; 125 | user_space_boundary = 0x0000080000000000LL; 126 | kernel_space_boundary = 0xffff800000000000LL; 127 | } 128 | 129 | return true; 130 | } 131 | 132 | bool check_kernel_addr(uint64_t *addr, void *unused) { 133 | if (guest_ptr_size == 4) { 134 | return (*addr >= 0xbfc00000); 135 | } 136 | 137 | return (*addr >= 0xffff800000000000LL); 138 | } 139 | 140 | bool check_user_addr(uint64_t *addr, void *unused) { 141 | if (guest_ptr_size == 4) { 142 | return (*addr < 0xbfc00000); 143 | } 144 | 145 | return (*addr < 0x0000080000000000LL); 146 | } 147 | 148 | static bool get_kernel_gs_base(BX_CPU_C *pcpu, uint64_t *kernel_gs_base) { 149 | 150 | // Is this 32-bit x86? 151 | if (guest_ptr_size == 4) { 152 | 153 | // Handle 32-bit x86. 154 | 155 | // First try the FS.base - if it's "in kernel mode" then there is nothing 156 | // mode to do. Otherwise read it from GDT (GPRIV_SEL offset 8). 157 | uint64_t base = pcpu->get_segment_base(BX_SEG_REG_FS); 158 | if (check_kernel_addr(&base, NULL)) { 159 | *kernel_gs_base = base; 160 | return true; 161 | } 162 | 163 | // Read the GDT entry. 164 | uint8_t seg_raw[8] = {0}; 165 | if (!read_lin_mem(pcpu, pcpu->gdtr.base + 8, 8, seg_raw)) { 166 | return false; 167 | } 168 | 169 | // Put together the base address. 170 | base = (uint32_t)seg_raw[2] | 171 | ((uint32_t)seg_raw[3] << 8) | 172 | ((uint32_t)seg_raw[4] << 16) | 173 | ((uint32_t)seg_raw[7] << 24); 174 | 175 | if (!check_kernel_addr(&base, NULL)) { 176 | return false; 177 | } 178 | 179 | *kernel_gs_base = base; 180 | 181 | return true; 182 | } 183 | 184 | // Handle 64-bit x86. 185 | 186 | // This is called before a system call is made either by an int, syscall or 187 | // systenter instruction. So, the GS segment is not yet in kernel mode. 188 | // It's best to read the kernel_gs_base from MSR C0000102H (MSR_KERNELGSbase). 189 | // There might be a case where an int, or sth, is invoked from ring0, so the 190 | // MSR will contain a user-mode address. In such case the GS base itself should 191 | // be read instead. 192 | uint64_t base = pcpu->msr.kernelgsbase; 193 | if (check_kernel_addr(&base, NULL)) { 194 | *kernel_gs_base = base; 195 | return true; 196 | } 197 | 198 | // Maybe the current GS is already "in kernel mode"? 199 | base = pcpu->get_segment_base(BX_SEG_REG_GS); 200 | if (check_kernel_addr(&base, NULL)) { 201 | *kernel_gs_base = base; 202 | return true; 203 | } 204 | 205 | // No luck. 206 | return false; 207 | } 208 | 209 | bool fill_cid(BX_CPU_C *pcpu, client_id *cid) { 210 | // Get kernel GS base which points to PCPU structure. 211 | uint64_t kernel_gs_base; 212 | if (!get_kernel_gs_base(pcpu, &kernel_gs_base)) { 213 | return false; 214 | } 215 | 216 | // Fetch the data. 217 | uint32_t pid, tid; 218 | uint64_t addr_proc_struct; // This is unused later on. 219 | 220 | if (!get_proc_pid_gid(pcpu, kernel_gs_base, &addr_proc_struct, &pid, &tid)) { 221 | return false; 222 | } 223 | 224 | // Fill the structure. 225 | cid->process_id = pid; 226 | cid->thread_id = tid; 227 | 228 | return true; 229 | } 230 | 231 | bool fill_info(BX_CPU_C *pcpu, void *unused) { 232 | bx_address pc = globals::last_ld.pc(); 233 | 234 | // Get kernel GS base which points to PCPU structure. 235 | uint64_t kernel_gs_base; 236 | if (!get_kernel_gs_base(pcpu, &kernel_gs_base)) { 237 | return false; 238 | } 239 | 240 | // Fetch the data. 241 | uint32_t pid, tid; 242 | uint64_t addr_proc_struct; // This is unused later on. 243 | 244 | if (!get_proc_pid_gid(pcpu, kernel_gs_base, &addr_proc_struct, 245 | &pid, &tid)) { 246 | return false; 247 | } 248 | 249 | globals::last_ld.set_process_id(pid); 250 | globals::last_ld.set_thread_id(tid); 251 | 252 | // Get the image file name. 253 | // Note: The conf_proc_p_comm_size vs MAX_PROC_COMM_LEN is checked in the 254 | // init() function. 255 | char name_buffer[MAX_PROC_COMM_LEN + 1] = {0}; 256 | if (!read_lin_mem(pcpu, addr_proc_struct + off_proc_p_comm, 257 | conf_proc_p_comm_size, name_buffer)) { 258 | return false; 259 | } 260 | globals::last_ld.set_image_file_name(name_buffer); 261 | 262 | // Get the thread create time. 263 | // TODO(gynvael): Check if this is possible. If not, just use proc address. 264 | // Btw, maybe a hash(addr_proc : addr_thread) would be better. 265 | globals::last_ld.set_create_time(addr_proc_struct); 266 | 267 | // Fill in the syscall count. 268 | thread_info& info = globals::thread_states[client_id(pid, tid)]; 269 | globals::last_ld.set_syscall_count(info.syscall_count); 270 | globals::last_ld.set_syscall_id(info.last_syscall_id); 271 | 272 | // Should the last_ret_addr be injected after the IP/top entry in the callstack? 273 | bool inject_last_ret_addr = false; 274 | if ((pc >= copyin_addr && pc < copyin_addr_end) || 275 | (pc >= copyinstr_addr && pc < copyinstr_addr_end)) { 276 | inject_last_ret_addr = true; 277 | } 278 | 279 | // Set the call stack. 280 | uint64_t ip = pc; 281 | uint64_t bp = pcpu->gen_reg[BX_64BIT_REG_RBP].rrx; 282 | 283 | for (unsigned int i = 0; i < globals::config.callstack_length && 284 | ip >= kernel_space_boundary && 285 | bp >= kernel_space_boundary; i++) { 286 | log_data_st::callstack_item *new_item = globals::last_ld.add_stack_trace(); 287 | 288 | if (ip >= kernel_start && ip <= kernel_end) { 289 | new_item->set_module_idx(0); 290 | new_item->set_relative_pc(ip - kernel_start); 291 | } else { 292 | new_item->set_module_idx(-1); 293 | new_item->set_relative_pc(ip); 294 | } 295 | 296 | // Inject? 297 | if (inject_last_ret_addr) { 298 | ip = info.last_ret_addr; 299 | inject_last_ret_addr = false; 300 | continue; 301 | } 302 | 303 | if (!bp || !read_lin_mem(pcpu, bp + guest_ptr_size, guest_ptr_size, &ip) || 304 | !read_lin_mem(pcpu, bp, guest_ptr_size, &bp)) { 305 | break; 306 | } 307 | } 308 | 309 | return true; 310 | } 311 | 312 | bool instr_before_execution(BX_CPU_C *pcpu, bxInstruction_c *i) { 313 | uint64_t rip = pcpu->prev_rip; 314 | 315 | // In not what we are looking for, just return. 316 | if (rip != copyin_addr && rip != copyinstr_addr) { 317 | return false; 318 | } 319 | 320 | // This is just after the call, so the return address is on the 321 | // top of the stack. 322 | uint64_t sp = pcpu->gen_reg[BX_64BIT_REG_RSP].rrx; 323 | uint64_t ret = 0; 324 | if (!read_lin_mem(pcpu, sp, guest_ptr_size, &ret)) { 325 | return false; 326 | } 327 | 328 | // Get kernel GS base which points to PCPU structure. 329 | uint64_t kernel_gs_base; 330 | if (!get_kernel_gs_base(pcpu, &kernel_gs_base)) { 331 | return false; 332 | } 333 | 334 | // Need to get pid and tid. 335 | uint32_t pid, tid; 336 | uint64_t addr_proc_struct; // This is unused later on. 337 | 338 | if (!get_proc_pid_gid(pcpu, kernel_gs_base, &addr_proc_struct, 339 | &pid, &tid)) { 340 | return false; 341 | } 342 | 343 | // Mark this as the latest jump. 344 | thread_info& info = globals::thread_states[client_id(pid, tid)]; 345 | info.last_ret_addr = ret; 346 | 347 | return true; 348 | } 349 | 350 | // ------------------------------------------------------------------ 351 | // Helper routines. 352 | // ------------------------------------------------------------------ 353 | static bool get_proc_pid_gid(BX_CPU_C *pcpu, uint64_t kernel_gs_base, uint64_t *addr_proc_struct, 354 | uint32_t *pid, uint32_t *tid) { 355 | // Get thread address. 356 | uint64_t thread_addr; 357 | if (!read_lin_mem(pcpu, kernel_gs_base, 358 | guest_ptr_size, &thread_addr)) { 359 | return false; 360 | } 361 | 362 | if (thread_addr < user_space_boundary) { 363 | return false; 364 | } 365 | 366 | // Get Thread ID. 367 | if (!read_lin_mem(pcpu, thread_addr + off_thread_td_pid, 4, tid)) { 368 | return false; 369 | } 370 | 371 | // Get proc structure address. 372 | if (!read_lin_mem(pcpu, thread_addr + off_thread_td_proc, guest_ptr_size, addr_proc_struct)) { 373 | return false; 374 | } 375 | 376 | if (*addr_proc_struct < user_space_boundary) { 377 | return false; 378 | } 379 | 380 | // Get Process ID. 381 | if (!read_lin_mem(pcpu, *addr_proc_struct + off_proc_p_pid, 4, pid)) { 382 | return false; 383 | } 384 | 385 | return true; 386 | } 387 | 388 | } // namespace freebsd 389 | 390 | -------------------------------------------------------------------------------- /instrumentation/os_freebsd.h: -------------------------------------------------------------------------------- 1 | ///////////////////////////////////////////////////////////////////////// 2 | // 3 | // Authors: Mateusz Jurczyk (mjurczyk@google.com) 4 | // Gynvael Coldwind (gynvael@google.com) 5 | // 6 | // Copyright 2013-2018 Google LLC 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | // 14 | // Unless required by applicable law or agreed to in writing, software 15 | // distributed under the License is distributed on an "AS IS" BASIS, 16 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | // See the License for the specific language governing permissions and 18 | // limitations under the License. 19 | // 20 | 21 | #ifndef BOCHSPWN_OS_FREEBSD_H_ 22 | #define BOCHSPWN_OS_FREEBSD_H_ 23 | 24 | #include 25 | 26 | #include "common.h" 27 | 28 | #ifndef MAX_PROC_COMM_LEN 29 | # define MAX_PROC_COMM_LEN 256 30 | #endif 31 | 32 | #ifndef MAX_MODULE_NAME_LEN 33 | # define MAX_MODULE_NAME_LEN 256 34 | #endif 35 | 36 | #ifndef MAX_MODULE_SIZE 37 | // 2MB to be safe, but this is quite excessive anyway. 38 | # define MAX_MODULE_SIZE (2 * 1024 * 1024) 39 | #endif 40 | 41 | namespace freebsd { 42 | 43 | // ------------------------------------------------------------------ 44 | // System events public interface. 45 | // ------------------------------------------------------------------ 46 | bool init(const char *, void *); 47 | bool check_kernel_addr(uint64_t *, void *); 48 | bool check_user_addr(uint64_t *, void *); 49 | bool fill_cid(BX_CPU_C *, client_id *); 50 | bool fill_info(BX_CPU_C *, void *); 51 | bool instr_before_execution(BX_CPU_C *, bxInstruction_c *); 52 | 53 | } // namespace freebsd 54 | 55 | #endif // BOCHSPWN_OS_FREEBSD_H_ 56 | 57 | -------------------------------------------------------------------------------- /instrumentation/os_linux.cc: -------------------------------------------------------------------------------- 1 | ///////////////////////////////////////////////////////////////////////// 2 | // 3 | // Authors: Mateusz Jurczyk (mjurczyk@google.com) 4 | // Gynvael Coldwind (gynvael@google.com) 5 | // 6 | // Copyright 2013-2018 Google LLC 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | // 14 | // Unless required by applicable law or agreed to in writing, software 15 | // distributed under the License is distributed on an "AS IS" BASIS, 16 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | // See the License for the specific language governing permissions and 18 | // limitations under the License. 19 | // 20 | 21 | #include "os_linux.h" 22 | 23 | #include 24 | #include 25 | 26 | #include "common.h" 27 | #include "events.h" 28 | #include "instrument.h" 29 | #include "logging.pb.h" 30 | 31 | namespace linux { 32 | 33 | // TODO(gynvael): move this to os_linux.h 34 | #ifndef MAX_TASK_COMM_LEN 35 | # define MAX_TASK_COMM_LEN 256 36 | #endif 37 | 38 | #ifndef MAX_MODULE_NAME_LEN 39 | # define MAX_MODULE_NAME_LEN 256 40 | #endif 41 | 42 | #ifndef MAX_MODULE_SIZE 43 | // 2MB to be safe, but this is quite excessive anyway. 44 | # define MAX_MODULE_SIZE (2 * 1024 * 1024) 45 | #endif 46 | 47 | uint32_t guest_ptr_size; 48 | uint64_t user_space_boundary; 49 | uint64_t kernel_space_boundary; 50 | 51 | // Read from config. 52 | uint32_t conf_thread_size; // THREAD_SIZE const in TODO(gynvael) 53 | uint32_t off_thread_info_task; 54 | uint32_t off_task_struct_pid; 55 | uint32_t off_task_struct_tgid; 56 | 57 | uint32_t conf_task_comm_len; // TASK_COMM_LEN const in /include/linux/sched.h 58 | uint32_t off_task_struct_comm; 59 | 60 | uint64_t addr_modules; 61 | uint32_t off_module_list; 62 | uint32_t off_module_name; 63 | uint32_t off_module_core; 64 | uint32_t off_module_core_size; 65 | uint32_t conf_module_name_len; // (64 - sizeof(unsigned long)) 66 | 67 | uint64_t kernel_start; 68 | uint64_t kernel_end; 69 | 70 | struct module_summary_st { 71 | uint64_t l_prev, l_next; 72 | uint64_t core_addr; 73 | uint32_t core_size; 74 | char name[MAX_MODULE_NAME_LEN]; 75 | }; 76 | 77 | // Helper routines. 78 | static bool get_task_struct_pid_gid(BX_CPU_C *pcpu, uint64_t rsp, uint64_t *addr_task_struct, 79 | uint32_t *tgid, uint32_t *pid); 80 | static bool fetch_module_info(BX_CPU_C *pcpu, uint64_t module_ptr, module_summary_st *m); 81 | static int update_module_list(BX_CPU_C *pcpu, uint64_t pc); 82 | 83 | bool init(const char *config_path, void *unused) { 84 | char buffer[256]; 85 | 86 | // Read Linux-specific configuration. 87 | READ_INI_INT(config_path, globals::config.os_version, "thread_size", 88 | buffer, sizeof(buffer), &conf_thread_size); 89 | READ_INI_INT(config_path, globals::config.os_version, "thread_info_task", 90 | buffer, sizeof(buffer), &off_thread_info_task); 91 | READ_INI_INT(config_path, globals::config.os_version, "task_struct_pid", 92 | buffer, sizeof(buffer), &off_task_struct_pid); 93 | READ_INI_INT(config_path, globals::config.os_version, "task_struct_tgid", 94 | buffer, sizeof(buffer), &off_task_struct_tgid); 95 | READ_INI_INT(config_path, globals::config.os_version, "task_struct_comm", 96 | buffer, sizeof(buffer), &off_task_struct_comm); 97 | READ_INI_INT(config_path, globals::config.os_version, "task_comm_len", 98 | buffer, sizeof(buffer), &conf_task_comm_len); 99 | 100 | READ_INI_ULL(config_path, globals::config.os_version, "modules", 101 | buffer, sizeof(buffer), &addr_modules); 102 | READ_INI_INT(config_path, globals::config.os_version, "module_list", 103 | buffer, sizeof(buffer), &off_module_list); 104 | READ_INI_INT(config_path, globals::config.os_version, "module_name", 105 | buffer, sizeof(buffer), &off_module_name); 106 | READ_INI_INT(config_path, globals::config.os_version, "module_core", 107 | buffer, sizeof(buffer), &off_module_core); 108 | READ_INI_INT(config_path, globals::config.os_version, "module_core_size", 109 | buffer, sizeof(buffer), &off_module_core_size); 110 | READ_INI_INT(config_path, globals::config.os_version, "module_name_len", 111 | buffer, sizeof(buffer), &conf_module_name_len); 112 | 113 | READ_INI_ULL(config_path, globals::config.os_version, "kernel_start", 114 | buffer, sizeof(buffer), &kernel_start); 115 | READ_INI_ULL(config_path, globals::config.os_version, "kernel_end", 116 | buffer, sizeof(buffer), &kernel_end); 117 | 118 | // Put the kernel address and size in the special module list. 119 | module_info *mi = new module_info(kernel_start, kernel_end - kernel_start, "kernel"); 120 | events::event_new_module(mi); 121 | 122 | // Check some assumptions. 123 | if (conf_task_comm_len >= MAX_TASK_COMM_LEN) { 124 | fprintf(stderr, 125 | "error: task_comm_len in config is larger than MAX_TASK_COMM_LEN;\n" 126 | " you can recompile with -DMAX_TASK_COMM_LEN=\n" 127 | " and try again\n"); 128 | abort(); 129 | } 130 | 131 | if (conf_module_name_len >= MAX_MODULE_NAME_LEN) { 132 | fprintf(stderr, 133 | "error: conf_module_name_len in config is larger than MAX_MODULE_NAME_LEN;\n" 134 | " you can recompile with -DMAX_MODULE_NAME_LEN=\n" 135 | " and try again\n"); 136 | abort(); 137 | } 138 | 139 | 140 | // Read the configuration specific to guest bitness. 141 | if (globals::config.bitness == 32) { 142 | guest_ptr_size = 4; 143 | // This depends on the kernel configuration options 144 | // (quote from Linux kernel - x86/Kconfig): 145 | // config PAGE_OFFSET 146 | // hex 147 | // default 0xB0000000 if VMSPLIT_3G_OPT 148 | // default 0x80000000 if VMSPLIT_2G 149 | // default 0x78000000 if VMSPLIT_2G_OPT 150 | // default 0x40000000 if VMSPLIT_1G 151 | // default 0xC0000000 152 | // depends on X86_32 153 | // We assume it's 0xC0000000. 154 | // 155 | // TODO(gynvael): Move this to config and fetch it in init(). 156 | user_space_boundary = 0xC0000000; 157 | kernel_space_boundary = 0xC0000000; 158 | } else { 159 | guest_ptr_size = 8; 160 | user_space_boundary = 0x0000080000000000LL; 161 | kernel_space_boundary = 0xffff800000000000LL; 162 | } 163 | 164 | return true; 165 | } 166 | 167 | bool check_kernel_addr(uint64_t *addr, void *unused) { 168 | if (guest_ptr_size == 4) { 169 | return (*addr >= 0xC0000000); 170 | } 171 | 172 | return (*addr >= 0xffff800000000000LL); 173 | } 174 | 175 | bool check_user_addr(uint64_t *addr, void *unused) { 176 | if (guest_ptr_size == 4) { 177 | return (*addr < 0xC0000000); 178 | } 179 | 180 | return (*addr < 0x0000080000000000LL); 181 | } 182 | 183 | bool fill_cid(BX_CPU_C *pcpu, client_id *cid) { 184 | // This is called before a system call is made either by an int, syscall or 185 | // systenter instruction. The ESP/RSP is not yet set to kernel mode, so 186 | // the first thing to do is to get the kernel-mode value. This can be done 187 | // in one of two ways: 188 | // (int way) from TSS+4 (either 4 or 8 bytes) 189 | // (sysenter way) from MSR IA32_SYSENTER_ESP 176h 190 | // The second way is the easiest. The problem would be only if Linux would 191 | // never register it. 192 | uint64_t kernel_rsp = 0; 193 | if (!read_lin_mem(pcpu, pcpu->tr.cache.u.segment.base + 4, guest_ptr_size, &kernel_rsp)) { 194 | return false; 195 | } 196 | 197 | // Fetch the data. 198 | uint32_t tgid, pid; 199 | uint64_t addr_task_struct; // This is later unused. 200 | 201 | // Note the kernel_rsp - 1 here - it's needed since the kernel_rsp 202 | // points past the end of the stack, and we later do a mask on it. 203 | // So we need to move the kernel_rsp into the stack itself for this 204 | // to work. 205 | if (!get_task_struct_pid_gid(pcpu, kernel_rsp - 1, &addr_task_struct, &tgid, &pid)) { 206 | return false; 207 | } 208 | 209 | // Fill the struct. 210 | cid->process_id = tgid; // Sic! Process ID is in tgid (thread group id). 211 | cid->thread_id = pid; // Sic! Thread ID is in pid. 212 | 213 | return true; 214 | } 215 | 216 | bool fill_info(BX_CPU_C *pcpu, void *unused) { 217 | bx_address pc = globals::last_ld.pc(); 218 | 219 | // Fetch task structure address, and pid and tgid fields. 220 | uint32_t tgid, pid; 221 | uint64_t addr_task_struct; 222 | 223 | if (!get_task_struct_pid_gid(pcpu, pcpu->gen_reg[BX_64BIT_REG_RBP].rrx, 224 | &addr_task_struct, &tgid, &pid)) { 225 | return false; 226 | } 227 | 228 | globals::last_ld.set_process_id(tgid); 229 | globals::last_ld.set_thread_id(pid); 230 | 231 | // Get the image file name. 232 | // Note: The task_comm_len vs MAX_TASK_COMM_LEN is checked in the 233 | // init() function. 234 | char name_buffer[MAX_TASK_COMM_LEN + 1] = {0}; 235 | if (!read_lin_mem(pcpu, addr_task_struct + off_task_struct_comm, 236 | conf_task_comm_len, name_buffer)) { 237 | return false; 238 | } 239 | globals::last_ld.set_image_file_name(name_buffer); 240 | 241 | // Get the thread create time. 242 | // Note: It seems linux kernel doesn't explicitly store the time, 243 | // but the time can be get from /proc/PID creation time - this might 244 | // be a little tricky from CPU level though. Will see. 245 | // Note2: address of task_struct or thread_info is good enough here btw. 246 | globals::last_ld.set_create_time(addr_task_struct); 247 | 248 | // Fill in the syscall cound. 249 | thread_info& info = globals::thread_states[client_id(tgid, pid)]; 250 | globals::last_ld.set_syscall_count(info.syscall_count); 251 | globals::last_ld.set_syscall_id(info.last_syscall_id); 252 | 253 | // Set the call stack. 254 | uint64_t ip = pc; 255 | uint64_t bp = pcpu->gen_reg[BX_64BIT_REG_RBP].rrx; 256 | int mod_idx = -1; 257 | module_info *mi = NULL; 258 | 259 | for (unsigned int i = 0; i < globals::config.callstack_length && 260 | ip >= kernel_space_boundary && 261 | bp >= kernel_space_boundary; i++) { 262 | // Optimization: check last module first. 263 | if (!mi || mi->module_base > ip || mi->module_base + mi->module_size <= ip) { 264 | mod_idx = find_module(ip); 265 | if (mod_idx == -1) { 266 | mod_idx = update_module_list(pcpu, ip); 267 | } 268 | 269 | if (mod_idx != -1) { 270 | mi = globals::modules[mod_idx]; 271 | } else { 272 | mi = NULL; 273 | } 274 | } 275 | 276 | log_data_st::callstack_item *new_item = globals::last_ld.add_stack_trace(); 277 | 278 | new_item->set_module_idx(mod_idx); 279 | if (mi) { 280 | new_item->set_relative_pc(ip - mi->module_base); 281 | } else { 282 | new_item->set_relative_pc(ip); 283 | } 284 | 285 | if (!bp || !read_lin_mem(pcpu, bp + guest_ptr_size, guest_ptr_size, &ip) || 286 | !read_lin_mem(pcpu, bp, guest_ptr_size, &bp)) { 287 | break; 288 | } 289 | } 290 | 291 | return true; 292 | } 293 | 294 | // ------------------------------------------------------------------ 295 | // Helper routines. 296 | // ------------------------------------------------------------------ 297 | 298 | // Traverse the kernel module list to get the information about the 299 | // driver that the "pc" is in. 300 | static int update_module_list(BX_CPU_C *pcpu, uint64_t pc) { 301 | // Get the address of the beginning of the list. 302 | uint64_t modules_start; 303 | if (!read_lin_mem(pcpu, addr_modules, guest_ptr_size, &modules_start)) { 304 | // It may be not yet loaded. 305 | return -1; 306 | } 307 | 308 | // Traverse the list. 309 | uint64_t pm = modules_start; 310 | for (;;) { 311 | // Fetch the module info. 312 | module_summary_st m; 313 | bool ret = fetch_module_info(pcpu, pm, &m); 314 | if (!ret) { 315 | break; 316 | } 317 | 318 | // Is this it? 319 | if (pc >= m.core_addr && pc < m.core_addr + m.core_size) { 320 | // Yes. We found it! 321 | module_info *mi = new module_info(m.core_addr, m.core_size, m.name); 322 | events::event_new_module(mi); 323 | return globals::modules.size() - 1; 324 | } 325 | 326 | // Iteratre. 327 | // TODO(gynvael): Check the actual terminator. 328 | pm = m.l_next; 329 | if (pm == addr_modules || pm == 0 || pm == modules_start) { 330 | break; 331 | } 332 | } 333 | 334 | // Not found. 335 | return -1; 336 | } 337 | 338 | // Note: This expects module_ptr to be passed without offset correction. 339 | // The correction will be made in this function. 340 | static bool fetch_module_info(BX_CPU_C *pcpu, uint64_t module_ptr, module_summary_st *m) { 341 | 342 | // Correct offset. 343 | module_ptr -= off_module_list; 344 | if (module_ptr < kernel_space_boundary) { 345 | return false; 346 | } 347 | 348 | // Clear the summary. 349 | memset(m, 0, sizeof(module_summary_st)); 350 | 351 | // Try to fetch name. 352 | if (!read_lin_mem(pcpu, module_ptr + off_module_name, 353 | conf_module_name_len, m->name)) { 354 | return false; 355 | } 356 | 357 | // Fetch list pointers in one read. 358 | unsigned char temp_buffer[2 * 8]; // At most: two 64-bit pointers. 359 | if (!read_lin_mem(pcpu, module_ptr + off_module_list, 360 | 2 * guest_ptr_size, temp_buffer)) { 361 | return false; 362 | } 363 | 364 | if (guest_ptr_size == 4) { 365 | m->l_next = *(uint32_t*)(temp_buffer + 0); 366 | m->l_prev = *(uint32_t*)(temp_buffer + 4); 367 | } else { 368 | m->l_next = *(uint64_t*)(temp_buffer + 0); 369 | m->l_prev = *(uint64_t*)(temp_buffer + 8); 370 | } 371 | 372 | // Check sanity of these pointers. If they are not sane, something's wrong. 373 | if (m->l_next < kernel_space_boundary || 374 | m->l_prev < kernel_space_boundary) { 375 | return false; 376 | } 377 | 378 | // Get module address and size in the kernel memory space. 379 | if (!read_lin_mem(pcpu, module_ptr + off_module_core, guest_ptr_size, &m->core_addr) || 380 | !read_lin_mem(pcpu, module_ptr + off_module_core_size, 4, &m->core_size)) { 381 | return false; 382 | } 383 | 384 | // Check sanity of both core address and size. 385 | if (m->core_addr < kernel_space_boundary || 386 | m->core_size > MAX_MODULE_SIZE) { 387 | return false; 388 | } 389 | 390 | return true; 391 | } 392 | 393 | static bool get_task_struct_pid_gid(BX_CPU_C *pcpu, uint64_t rsp, uint64_t *addr_task_struct, 394 | uint32_t *tgid, uint32_t *pid) { 395 | // Dervie the thread_info address from the kernel stack address. 396 | // It is found at the beginning of the stack area. 397 | // Note: this is the exact method this used in the kernel. 398 | uint64_t addr_thread_info = rsp; 399 | addr_thread_info &= ~((uint64_t)conf_thread_size - 1); 400 | 401 | // The thread_info structure should be in kernel memory. 402 | if (addr_thread_info < user_space_boundary) { 403 | return false; 404 | } 405 | 406 | // Get the task_struct address. 407 | if (!read_lin_mem(pcpu, addr_thread_info + off_thread_info_task, 408 | guest_ptr_size, addr_task_struct)) { 409 | return false; 410 | } 411 | 412 | // Check the address. 413 | if (*addr_task_struct < user_space_boundary) { 414 | return false; 415 | } 416 | 417 | // Fetch the process ID (tgid) and thread ID (pid; sic). 418 | // Note: We're assuming sizeof(pid_t) is always 4 bytes - this might 419 | // not be futureproof. 420 | if (!read_lin_mem(pcpu, *addr_task_struct + off_task_struct_tgid, 4, tgid) || 421 | !read_lin_mem(pcpu, *addr_task_struct + off_task_struct_pid, 4, pid)) { 422 | return false; 423 | } 424 | 425 | return true; 426 | } 427 | 428 | } // namespace linux 429 | 430 | -------------------------------------------------------------------------------- /instrumentation/os_linux.h: -------------------------------------------------------------------------------- 1 | ///////////////////////////////////////////////////////////////////////// 2 | // 3 | // Authors: Mateusz Jurczyk (mjurczyk@google.com) 4 | // Gynvael Coldwind (gynvael@google.com) 5 | // 6 | // Copyright 2013-2018 Google LLC 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | // 14 | // Unless required by applicable law or agreed to in writing, software 15 | // distributed under the License is distributed on an "AS IS" BASIS, 16 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | // See the License for the specific language governing permissions and 18 | // limitations under the License. 19 | // 20 | 21 | #ifndef BOCHSPWN_OS_LINUX_H_ 22 | #define BOCHSPWN_OS_LINUX_H_ 23 | 24 | #include 25 | 26 | #include "common.h" 27 | 28 | namespace linux { 29 | 30 | // ------------------------------------------------------------------ 31 | // System events public interface. 32 | // ------------------------------------------------------------------ 33 | bool init(const char *, void *); 34 | bool check_kernel_addr(uint64_t *, void *); 35 | bool check_user_addr(uint64_t *, void *); 36 | bool fill_cid(BX_CPU_C *, client_id *); 37 | bool fill_info(BX_CPU_C *, void *); 38 | 39 | } // namespace linux 40 | 41 | #endif // BOCHSPWN_OS_LINUX_H_ 42 | 43 | -------------------------------------------------------------------------------- /instrumentation/os_openbsd.cc: -------------------------------------------------------------------------------- 1 | ///////////////////////////////////////////////////////////////////////// 2 | // 3 | // Authors: Mateusz Jurczyk (mjurczyk@google.com) 4 | // Gynvael Coldwind (gynvael@google.com) 5 | // 6 | // Copyright 2013-2018 Google LLC 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | // 14 | // Unless required by applicable law or agreed to in writing, software 15 | // distributed under the License is distributed on an "AS IS" BASIS, 16 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | // See the License for the specific language governing permissions and 18 | // limitations under the License. 19 | // 20 | 21 | #include "os_openbsd.h" 22 | 23 | #include 24 | #include 25 | 26 | #include "common.h" 27 | #include "events.h" 28 | #include "instrument.h" 29 | #include "logging.pb.h" 30 | 31 | namespace openbsd { 32 | 33 | uint32_t guest_ptr_size; 34 | uint64_t user_space_boundary; 35 | uint64_t kernel_space_boundary; 36 | 37 | uint32_t off_cpu_info_ci_curproc; 38 | uint32_t off_proc_p_addr; 39 | uint32_t off_proc_p_pid; 40 | uint32_t off_proc_p_comm; 41 | uint32_t conf_comm_size; // MAXCOMLEN +1 (17 usually) 42 | uint32_t off_proc_p_p; 43 | uint32_t off_process_ps_pgrp; 44 | uint32_t off_pgrp_id; 45 | 46 | // Supposedly there are no external kernel modules in OpenBSD. 47 | uint64_t kernel_start; 48 | uint64_t kernel_end; 49 | 50 | uint64_t copyin_addr; 51 | uint64_t copyinstr_addr; 52 | 53 | uint64_t copyin_addr_end; 54 | uint64_t copyinstr_addr_end; 55 | 56 | struct module_summary_st { 57 | uint64_t l_prev, l_next; 58 | uint64_t core_addr; 59 | uint32_t core_size; 60 | char name[256]; 61 | }; 62 | 63 | // Helper routines. 64 | static bool get_kernel_gs_base(BX_CPU_C *pcpu, uint64_t *kernel_gs_base); 65 | static bool get_proc_pgid_pid(BX_CPU_C *pcpu, uint64_t kernel_gs_base, 66 | uint64_t *addr_proc_struct, uint32_t *pgid, uint32_t *pid); 67 | 68 | bool init(const char *config_path, void *unused) { 69 | char buffer[256]; 70 | 71 | // Read OpenBSD-specific configuration. 72 | READ_INI_INT(config_path, globals::config.os_version, "cpu_info_ci_curproc", 73 | buffer, sizeof(buffer), &off_cpu_info_ci_curproc); 74 | READ_INI_INT(config_path, globals::config.os_version, "proc_p_addr", 75 | buffer, sizeof(buffer), &off_proc_p_addr); 76 | READ_INI_INT(config_path, globals::config.os_version, "proc_p_pid", 77 | buffer, sizeof(buffer), &off_proc_p_pid); 78 | READ_INI_INT(config_path, globals::config.os_version, "proc_p_comm", 79 | buffer, sizeof(buffer), &off_proc_p_comm); 80 | READ_INI_INT(config_path, globals::config.os_version, "comm_size", 81 | buffer, sizeof(buffer), &conf_comm_size); 82 | 83 | READ_INI_INT(config_path, globals::config.os_version, "proc_p_p", 84 | buffer, sizeof(buffer), &off_proc_p_p); 85 | READ_INI_INT(config_path, globals::config.os_version, "process_ps_pgrp", 86 | buffer, sizeof(buffer), &off_process_ps_pgrp); 87 | READ_INI_INT(config_path, globals::config.os_version, "pgrp_id", 88 | buffer, sizeof(buffer), &off_pgrp_id); 89 | 90 | READ_INI_ULL(config_path, globals::config.os_version, "kernel_start", 91 | buffer, sizeof(buffer), &kernel_start); 92 | READ_INI_ULL(config_path, globals::config.os_version, "kernel_end", 93 | buffer, sizeof(buffer), &kernel_end); 94 | 95 | READ_INI_ULL(config_path, globals::config.os_version, "copyin", 96 | buffer, sizeof(buffer), ©in_addr); 97 | READ_INI_ULL(config_path, globals::config.os_version, "copyinstr", 98 | buffer, sizeof(buffer), ©instr_addr); 99 | 100 | READ_INI_ULL(config_path, globals::config.os_version, "copyin_end", 101 | buffer, sizeof(buffer), ©in_addr_end); 102 | READ_INI_ULL(config_path, globals::config.os_version, "copyinstr_end", 103 | buffer, sizeof(buffer), ©instr_addr_end); 104 | 105 | // If addresses of these functions are given, then enable grabbing the RET 106 | // address when they are called. 107 | if (copyin_addr != 0 || copyinstr_addr != 0) { 108 | globals::has_instr_before_execution_handler = true; 109 | } 110 | 111 | // Put the kernel address and size in the special module list. 112 | module_info *mi = new module_info(kernel_start, kernel_end - kernel_start, "kernel"); 113 | events::event_new_module(mi); 114 | 115 | // Check some assumptions. 116 | if (conf_comm_size >= MAX_PROC_COMM_LEN) { 117 | fprintf(stderr, 118 | "error: conf_proc_p_comm_size in config is larger than MAX_PROC_COMM_LEN;\n" 119 | " you can recompile with -DMAX_PROC_COMM_LEN=\n" 120 | " and try again.\n"); 121 | abort(); 122 | } 123 | 124 | // Read the configuration specific to guest bitness. 125 | if (globals::config.bitness == 32) { 126 | /*guest_ptr_size = 4; 127 | user_space_boundary = 0xC0000000; 128 | kernel_space_boundary = 0xC0000000;*/ 129 | fprintf(stderr, "error: 32-bit OpenBSD support is not supported.\n"); 130 | abort(); 131 | } else { 132 | guest_ptr_size = 8; 133 | user_space_boundary = 0x0000080000000000LL; 134 | kernel_space_boundary = 0xffff800000000000LL; 135 | } 136 | 137 | return true; 138 | } 139 | 140 | bool check_kernel_addr(uint64_t *addr, void *unused) { 141 | if (guest_ptr_size == 4) { 142 | return (*addr >= 0xC0000000); 143 | } 144 | 145 | return (*addr >= 0xffff800000000000LL); 146 | } 147 | 148 | bool check_user_addr(uint64_t *addr, void *unused) { 149 | if (guest_ptr_size == 4) { 150 | return (*addr < 0xC0000000); 151 | } 152 | 153 | return (*addr < 0x0000080000000000LL); 154 | } 155 | 156 | bool fill_cid(BX_CPU_C *pcpu, client_id *cid) { 157 | // Get kernel GS base which points to PCPU structure. 158 | uint64_t kernel_gs_base; 159 | if (!get_kernel_gs_base(pcpu, &kernel_gs_base)) { 160 | return false; 161 | } 162 | 163 | // Fetch the data. 164 | uint32_t pgid, pid; // Process group ID, process (thread) ID. 165 | uint64_t addr_proc_struct; // This is unused later on. 166 | 167 | if (!get_proc_pgid_pid(pcpu, kernel_gs_base, &addr_proc_struct, &pgid, &pid)) { 168 | return false; 169 | } 170 | 171 | // Fill the struct. 172 | cid->process_id = pgid; 173 | cid->thread_id = pid; 174 | 175 | return true; 176 | } 177 | 178 | bool fill_info(BX_CPU_C *pcpu, void *unused) { 179 | bx_address pc = globals::last_ld.pc(); 180 | 181 | // Get kernel GS base which points to PCPU structure. 182 | uint64_t kernel_gs_base; 183 | if (!get_kernel_gs_base(pcpu, &kernel_gs_base)) { 184 | return false; 185 | } 186 | 187 | // Fetch the data. 188 | uint32_t pgid, pid; 189 | uint64_t addr_proc_struct; 190 | 191 | if (!get_proc_pgid_pid(pcpu, kernel_gs_base, &addr_proc_struct, &pgid, &pid)) { 192 | return false; 193 | } 194 | 195 | globals::last_ld.set_process_id(pgid); 196 | globals::last_ld.set_thread_id(pid); 197 | 198 | // Get the image file name. 199 | // Note: The conf_proc_p_comm_size vs MAX_PROC_COMM_LEN is checked in the 200 | // init() function. 201 | char name_buffer[MAX_PROC_COMM_LEN + 1] = {0}; 202 | if (!read_lin_mem(pcpu, addr_proc_struct + off_proc_p_comm, conf_comm_size, name_buffer)) { 203 | return false; 204 | } 205 | globals::last_ld.set_image_file_name(name_buffer); 206 | 207 | // Get the thread create time. 208 | // TODO(gynvael): Check if this is possible. If not, just use proc address. 209 | // Btw, maybe a hash(addr_proc : addr_thread) would be better. 210 | globals::last_ld.set_create_time(addr_proc_struct); 211 | 212 | // Fill in the syscall count. 213 | thread_info& info = globals::thread_states[client_id(pgid, pid)]; 214 | globals::last_ld.set_syscall_count(info.syscall_count); 215 | globals::last_ld.set_syscall_id(info.last_syscall_id); 216 | 217 | // Should the last_ret_addr be injected after the IP/top entry in the callstack? 218 | bool inject_last_ret_addr = false; 219 | if ((pc >= copyin_addr && pc < copyin_addr_end) || 220 | (pc >= copyinstr_addr && pc < copyinstr_addr_end)) { 221 | inject_last_ret_addr = true; 222 | } 223 | 224 | // Set the call stack. 225 | uint64_t ip = pc; 226 | uint64_t bp = pcpu->gen_reg[BX_64BIT_REG_RBP].rrx; 227 | 228 | for (unsigned int i = 0; i < globals::config.callstack_length && 229 | ip >= kernel_space_boundary && 230 | bp >= kernel_space_boundary; i++) { 231 | log_data_st::callstack_item *new_item = globals::last_ld.add_stack_trace(); 232 | 233 | if (ip >= kernel_start && ip <= kernel_end) { 234 | new_item->set_module_idx(0); 235 | new_item->set_relative_pc(ip - kernel_start); 236 | } else { 237 | new_item->set_module_idx(-1); 238 | new_item->set_relative_pc(ip); 239 | } 240 | 241 | // Inject? 242 | if (inject_last_ret_addr) { 243 | ip = info.last_ret_addr; 244 | inject_last_ret_addr = false; 245 | continue; 246 | } 247 | 248 | if (!bp || !read_lin_mem(pcpu, bp + guest_ptr_size, guest_ptr_size, &ip) || 249 | !read_lin_mem(pcpu, bp, guest_ptr_size, &bp)) { 250 | break; 251 | } 252 | } 253 | 254 | return true; 255 | } 256 | 257 | bool instr_before_execution(BX_CPU_C *pcpu, bxInstruction_c *i) { 258 | uint64_t rip = pcpu->prev_rip; 259 | 260 | // In not what we are looking for, just return. 261 | if (rip != copyin_addr && rip != copyinstr_addr) { 262 | return false; 263 | } 264 | 265 | // This is just after the call, so the return address is on the 266 | // top of the stack. 267 | uint64_t sp = pcpu->gen_reg[BX_64BIT_REG_RSP].rrx; 268 | uint64_t ret = 0; 269 | if (!read_lin_mem(pcpu, sp, guest_ptr_size, &ret)) { 270 | return false; 271 | } 272 | 273 | // Get kernel GS base which points to PCPU structure. 274 | uint64_t kernel_gs_base; 275 | if (!get_kernel_gs_base(pcpu, &kernel_gs_base)) { 276 | return false; 277 | } 278 | 279 | // Need to get pgid and pid. 280 | uint32_t pgid, pid; 281 | uint64_t addr_proc_struct; // This is unused later on. 282 | 283 | if (!get_proc_pgid_pid(pcpu, kernel_gs_base, &addr_proc_struct, 284 | &pgid, &pid)) { 285 | return false; 286 | } 287 | 288 | // Mark this as the latest jump. 289 | thread_info& info = globals::thread_states[client_id(pgid, pid)]; 290 | info.last_ret_addr = ret; 291 | 292 | return true; 293 | } 294 | 295 | // ------------------------------------------------------------------ 296 | // Helper routines. 297 | // ------------------------------------------------------------------ 298 | static bool get_kernel_gs_base(BX_CPU_C *pcpu, uint64_t *kernel_gs_base) { 299 | // This is called before a system call is made either by an int, syscall or 300 | // systenter instruction. So, the GS segment is not yet in kernel mode. 301 | // It's best to read the kernel_gs_base from MSR C0000102H (MSR_KERNELGSbase). 302 | // There might be a case where an int, or sth, is invoked from ring0, so the 303 | // MSR will contain a user-mode address. In such case the GS base itself should 304 | // be read instead. 305 | uint64_t base = pcpu->msr.kernelgsbase; 306 | if (check_kernel_addr(&base, NULL)) { 307 | *kernel_gs_base = base; 308 | return true; 309 | } 310 | 311 | // Maybe the current GS is already "in kernel mode"? 312 | base = pcpu->get_segment_base(BX_SEG_REG_GS); 313 | if (check_kernel_addr(&base, NULL)) { 314 | *kernel_gs_base = base; 315 | return true; 316 | } 317 | 318 | // No luck. 319 | return false; 320 | } 321 | 322 | static bool get_proc_pgid_pid(BX_CPU_C *pcpu, uint64_t kernel_gs_base, 323 | uint64_t *addr_proc_struct, uint32_t *pgid, uint32_t *pid) { 324 | // Get proc address from cpu_info. 325 | uint64_t proc_addr; 326 | if (!read_lin_mem(pcpu, kernel_gs_base + off_cpu_info_ci_curproc, guest_ptr_size, &proc_addr)) { 327 | return false; 328 | } 329 | 330 | if (proc_addr < user_space_boundary) { 331 | return false; 332 | } 333 | 334 | // Get Process ID. 335 | if (!read_lin_mem(pcpu, proc_addr + off_proc_p_pid, 4, pgid)) { 336 | return false; 337 | } 338 | 339 | *addr_proc_struct = proc_addr; 340 | 341 | // Get pid (or actually p_addr, which should be enough). 342 | if (!read_lin_mem(pcpu, proc_addr + off_proc_p_addr, 4, &pid)) { 343 | return false; 344 | } 345 | 346 | return true; 347 | } 348 | 349 | } // namespace openbsd 350 | 351 | -------------------------------------------------------------------------------- /instrumentation/os_openbsd.h: -------------------------------------------------------------------------------- 1 | ///////////////////////////////////////////////////////////////////////// 2 | // 3 | // Authors: Mateusz Jurczyk (mjurczyk@google.com) 4 | // Gynvael Coldwind (gynvael@google.com) 5 | // 6 | // Copyright 2013-2018 Google LLC 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | // 14 | // Unless required by applicable law or agreed to in writing, software 15 | // distributed under the License is distributed on an "AS IS" BASIS, 16 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | // See the License for the specific language governing permissions and 18 | // limitations under the License. 19 | // 20 | 21 | #ifndef BOCHSPWN_OS_OPENBSD_H_ 22 | #define BOCHSPWN_OS_OPENBSD_H_ 23 | 24 | #include 25 | 26 | #include "common.h" 27 | 28 | #ifndef MAX_PROC_COMM_LEN 29 | # define MAX_PROC_COMM_LEN 256 30 | #endif 31 | 32 | namespace openbsd { 33 | 34 | // ------------------------------------------------------------------ 35 | // System events public interface. 36 | // ------------------------------------------------------------------ 37 | bool init(const char *, void *); 38 | bool check_kernel_addr(uint64_t *, void *); 39 | bool check_user_addr(uint64_t *, void *); 40 | bool fill_cid(BX_CPU_C *, client_id *); 41 | bool fill_info(BX_CPU_C *, void *); 42 | bool instr_before_execution(BX_CPU_C *, bxInstruction_c *); 43 | 44 | } // namespace openbsd 45 | 46 | #endif // BOCHSPWN_OS_OPENBSD_H_ 47 | 48 | -------------------------------------------------------------------------------- /instrumentation/os_windows.cc: -------------------------------------------------------------------------------- 1 | ///////////////////////////////////////////////////////////////////////// 2 | // 3 | // Authors: Mateusz Jurczyk (mjurczyk@google.com) 4 | // Gynvael Coldwind (gynvael@google.com) 5 | // 6 | // Copyright 2013-2018 Google LLC 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | // 14 | // Unless required by applicable law or agreed to in writing, software 15 | // distributed under the License is distributed on an "AS IS" BASIS, 16 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | // See the License for the specific language governing permissions and 18 | // limitations under the License. 19 | // 20 | 21 | #include "os_windows.h" 22 | 23 | #include 24 | #include 25 | 26 | #include "common.h" 27 | #include "events.h" 28 | #include "instrument.h" 29 | #include "logging.pb.h" 30 | 31 | // ------------------------------------------------------------------ 32 | // Configuration data, for detailed information see os_windows.h. 33 | // ------------------------------------------------------------------ 34 | namespace windows { 35 | 36 | uint32_t guest_ptr_size; 37 | uint64_t user_space_boundary; 38 | 39 | uint32_t off_kprcb; 40 | uint32_t off_current_thread; 41 | uint32_t off_tcb; 42 | uint32_t off_process; 43 | uint32_t off_client_id; 44 | uint32_t off_process_id; 45 | uint32_t off_thread_id; 46 | uint32_t off_create_time; 47 | uint32_t off_image_filename; 48 | uint32_t off_loadorder_flink; 49 | uint32_t off_basedllname; 50 | uint32_t off_baseaddress; 51 | uint32_t off_sizeofimage; 52 | uint32_t off_us_len; 53 | uint32_t off_us_buffer; 54 | uint32_t off_teb_cid; 55 | uint64_t off_psloadedmodulelist; 56 | uint32_t off_irql; 57 | uint32_t off_kdversionblock; 58 | uint32_t off_64bit_teb; 59 | uint32_t off_previous_mode; 60 | uint32_t off_exception_list; 61 | uint32_t off_next_exception; 62 | uint32_t off_try_level; 63 | 64 | // ------------------------------------------------------------------ 65 | // Public Windows-specific interface. 66 | // ------------------------------------------------------------------ 67 | bool init(const char *config_path, void *unused) { 68 | char buffer[256]; 69 | 70 | // Read generic Windows-specific configuration. 71 | READ_INI_INT(config_path, globals::config.os_version, "kprcb", 72 | buffer, sizeof(buffer), &off_kprcb); 73 | READ_INI_INT(config_path, globals::config.os_version, "current_thread", 74 | buffer, sizeof(buffer), &off_current_thread); 75 | READ_INI_INT(config_path, globals::config.os_version, "tcb", 76 | buffer, sizeof(buffer), &off_tcb); 77 | READ_INI_INT(config_path, globals::config.os_version, "process", 78 | buffer, sizeof(buffer), &off_process); 79 | READ_INI_INT(config_path, globals::config.os_version, "client_id", 80 | buffer, sizeof(buffer), &off_client_id); 81 | READ_INI_INT(config_path, globals::config.os_version, "process_id", 82 | buffer, sizeof(buffer), &off_process_id); 83 | READ_INI_INT(config_path, globals::config.os_version, "thread_id", 84 | buffer, sizeof(buffer), &off_thread_id); 85 | READ_INI_INT(config_path, globals::config.os_version, "create_time", 86 | buffer, sizeof(buffer), &off_create_time); 87 | READ_INI_INT(config_path, globals::config.os_version, "image_filename", 88 | buffer, sizeof(buffer), &off_image_filename); 89 | READ_INI_INT(config_path, globals::config.os_version, "loadorder_flink", 90 | buffer, sizeof(buffer), &off_loadorder_flink); 91 | READ_INI_INT(config_path, globals::config.os_version, "basedllname", 92 | buffer, sizeof(buffer), &off_basedllname); 93 | READ_INI_INT(config_path, globals::config.os_version, "baseaddress", 94 | buffer, sizeof(buffer), &off_baseaddress); 95 | READ_INI_INT(config_path, globals::config.os_version, "sizeofimage", 96 | buffer, sizeof(buffer), &off_sizeofimage); 97 | READ_INI_INT(config_path, globals::config.os_version, "us_len", 98 | buffer, sizeof(buffer), &off_us_len); 99 | READ_INI_INT(config_path, globals::config.os_version, "us_buffer", 100 | buffer, sizeof(buffer), &off_us_buffer); 101 | READ_INI_INT(config_path, globals::config.os_version, "teb_cid", 102 | buffer, sizeof(buffer), &off_teb_cid); 103 | READ_INI_ULL(config_path, globals::config.os_version, "psloadedmodulelist", 104 | buffer, sizeof(buffer), &off_psloadedmodulelist); 105 | READ_INI_INT(config_path, globals::config.os_version, "irql", 106 | buffer, sizeof(buffer), &off_irql); 107 | READ_INI_INT(config_path, globals::config.os_version, "previous_mode", 108 | buffer, sizeof(buffer), &off_previous_mode); 109 | READ_INI_INT(config_path, globals::config.os_version, "exception_list", 110 | buffer, sizeof(buffer), &off_exception_list); 111 | READ_INI_INT(config_path, globals::config.os_version, "next_exception", 112 | buffer, sizeof(buffer), &off_next_exception); 113 | READ_INI_INT(config_path, globals::config.os_version, "try_level", 114 | buffer, sizeof(buffer), &off_try_level); 115 | 116 | // Read configuration specific to guest bitness. 117 | if (globals::config.bitness == 32) { 118 | guest_ptr_size = 4; 119 | user_space_boundary = 0x7ffff000; 120 | 121 | READ_INI_INT(config_path, globals::config.os_version, "kdversionblock", 122 | buffer, sizeof(buffer), &off_kdversionblock); 123 | } else { 124 | guest_ptr_size = 8; 125 | user_space_boundary = 0x000007fffffff000LL; 126 | 127 | READ_INI_INT(config_path, globals::config.os_version, "64bit_teb", 128 | buffer, sizeof(buffer), &off_64bit_teb); 129 | } 130 | 131 | return true; 132 | } 133 | 134 | bool check_kernel_addr(uint64_t *addr, void *unused) { 135 | if (guest_ptr_size == 4) { 136 | return (*addr >= 0x80000000); 137 | } 138 | 139 | return (*addr >= 0xfffff80000000000LL); 140 | } 141 | 142 | bool check_user_addr(uint64_t *addr, void *unused) { 143 | if (guest_ptr_size == 4) { 144 | return (*addr < 0x7e000000); 145 | } 146 | 147 | return (*addr < 0x000007ff00000000LL); 148 | } 149 | 150 | bool fill_cid(BX_CPU_C *pcpu, client_id *cid) { 151 | uint64_t addr_teb = 0; 152 | 153 | // Obtain Thread Environment Block address 154 | if (guest_ptr_size == 4) { 155 | addr_teb = pcpu->get_segment_base(BX_SEG_REG_FS); 156 | } else { 157 | addr_teb = pcpu->get_segment_base(BX_SEG_REG_GS); 158 | if (!read_lin_mem(pcpu, addr_teb + off_64bit_teb, guest_ptr_size, &addr_teb)) { 159 | return false; 160 | } 161 | } 162 | 163 | if (addr_teb >= user_space_boundary) { 164 | return false; 165 | } 166 | 167 | // Read thread-specific TID/PID. 168 | uint64_t addr_clientid = addr_teb + off_teb_cid; 169 | if (!read_lin_mem(pcpu, addr_clientid + off_process_id, guest_ptr_size, &cid->process_id) || 170 | !read_lin_mem(pcpu, addr_clientid + off_thread_id, guest_ptr_size, &cid->thread_id)) { 171 | return false; 172 | } 173 | 174 | return true; 175 | } 176 | 177 | bool fill_info(BX_CPU_C *pcpu, void *unused) { 178 | bx_address pc = globals::last_ld.pc(); 179 | 180 | // Get PCR address. 181 | uint64_t addr_kpcr = 0; 182 | if (globals::config.bitness == 32) { 183 | addr_kpcr = pcpu->get_segment_base(BX_SEG_REG_FS); 184 | } else { 185 | addr_kpcr = pcpu->get_segment_base(BX_SEG_REG_GS); 186 | } 187 | 188 | if (addr_kpcr < user_space_boundary) { 189 | return false; 190 | 191 | } 192 | 193 | // Verify that current IRQL is not APC_MODE, as apparently there are lots 194 | // of false positives in kernel-mode APC callbacks referencing user-mode 195 | // memory. 196 | uint8_t irql; 197 | if (!read_lin_mem(pcpu, addr_kpcr + off_irql, 1, &irql) || irql == APC_MODE) { 198 | return false; 199 | } 200 | 201 | uint64_t addr_kprcb = addr_kpcr + off_kprcb; 202 | uint64_t addr_ethread = 0; 203 | if (!read_lin_mem(pcpu, addr_kprcb + off_current_thread, guest_ptr_size, &addr_ethread)) { 204 | return false; 205 | } 206 | 207 | uint64_t addr_clientid = addr_ethread + off_client_id; 208 | uint64_t pid = 0, tid = 0; 209 | read_lin_mem(pcpu, addr_clientid + off_process_id, guest_ptr_size, &pid); 210 | read_lin_mem(pcpu, addr_clientid + off_thread_id, guest_ptr_size, &tid); 211 | 212 | // We are not interested in the "System" process. 213 | if (pid == 0 || pid == 4) { 214 | return false; 215 | } 216 | 217 | globals::last_ld.set_process_id(pid); 218 | globals::last_ld.set_thread_id(tid); 219 | 220 | uint64_t addr_eprocess = 0; 221 | if (!read_lin_mem(pcpu, addr_ethread + off_tcb + off_process, guest_ptr_size, &addr_eprocess)) { 222 | return false; 223 | } 224 | 225 | static char image_file_name[16]; 226 | if (!read_lin_mem(pcpu, addr_eprocess + off_image_filename, 15, image_file_name)) { 227 | return false; 228 | } 229 | globals::last_ld.set_image_file_name(image_file_name); 230 | 231 | uint64_t create_time; 232 | if (!read_lin_mem(pcpu, addr_ethread + off_create_time, 8, &create_time)) { 233 | return false; 234 | } 235 | globals::last_ld.set_create_time(create_time); 236 | 237 | // Read the stack trace. 238 | uint64_t ip = pc; 239 | uint64_t bp = pcpu->gen_reg[BX_64BIT_REG_RBP].rrx; 240 | if (globals::config.bitness == 32) { 241 | int mod_idx = -1; 242 | module_info *mi = NULL; 243 | 244 | for (unsigned int i = 0; i < globals::config.callstack_length && 245 | ip >= user_space_boundary && 246 | bp >= user_space_boundary; i++) { 247 | // Optimization: check last module first. 248 | if (!mi || mi->module_base > ip || mi->module_base + mi->module_size <= ip) { 249 | mod_idx = find_module(ip); 250 | if (mod_idx == -1) { 251 | mod_idx = update_module_list(pcpu, ip); 252 | } 253 | 254 | if (mod_idx != -1) { 255 | mi = globals::modules[mod_idx]; 256 | } else { 257 | mi = NULL; 258 | } 259 | } 260 | 261 | log_data_st::callstack_item *new_item = globals::last_ld.add_stack_trace(); 262 | 263 | new_item->set_module_idx(mod_idx); 264 | if (mi) { 265 | new_item->set_relative_pc(ip - mi->module_base); 266 | } else { 267 | new_item->set_relative_pc(pc); 268 | } 269 | new_item->set_stack_frame(bp); 270 | 271 | if (!bp || !read_lin_mem(pcpu, bp + guest_ptr_size, guest_ptr_size, &ip) || 272 | !read_lin_mem(pcpu, bp, guest_ptr_size, &bp)) { 273 | break; 274 | } 275 | } 276 | } else { 277 | int mod_idx = find_module(ip); 278 | if (mod_idx == -1) { 279 | mod_idx = update_module_list(pcpu, ip); 280 | } 281 | 282 | module_info *mi; 283 | if (mod_idx != -1) { 284 | mi = globals::modules[mod_idx]; 285 | } else { 286 | mi = NULL; 287 | } 288 | 289 | log_data_st::callstack_item *new_item = globals::last_ld.add_stack_trace(); 290 | 291 | new_item->set_module_idx(mod_idx); 292 | if (mi) { 293 | new_item->set_relative_pc(ip - mi->module_base); 294 | } else { 295 | new_item->set_relative_pc(ip); 296 | } 297 | new_item->set_stack_frame(bp); 298 | } 299 | 300 | // Read the PreviousMode byte from KTHREAD (ETHREAD). 301 | uint8_t previous_mode = 0; 302 | if (!read_lin_mem(pcpu, addr_ethread + off_previous_mode, 1, &previous_mode)) { 303 | return false; 304 | } 305 | globals::last_ld.set_previous_mode(previous_mode); 306 | 307 | // Read all TryLevel values residing in the chain of SEH exception handler 308 | // records. This only works on 32-bit builds of Windows, as exception handling 309 | // is designed completely differently on 64-bit platforms. 310 | if (globals::config.bitness == 32) { 311 | uint32_t addr_exception_list = 0; 312 | if (!read_lin_mem(pcpu, addr_kpcr + off_exception_list, sizeof(uint32_t), &addr_exception_list)) { 313 | return false; 314 | } 315 | 316 | const int kMaxTryLevels = 16; 317 | for (int i = 0, callstack_idx = 0; 318 | i < kMaxTryLevels && callstack_idx < globals::last_ld.stack_trace_size(); 319 | i++) { 320 | uint32_t addr_next_exception = 0; 321 | if (!read_lin_mem(pcpu, addr_exception_list + off_next_exception, 322 | sizeof(uint32_t), &addr_next_exception)) { 323 | break; 324 | } 325 | 326 | uint32_t try_level = 0; 327 | if (!read_lin_mem(pcpu, addr_exception_list + off_try_level, sizeof(uint32_t), &try_level)) { 328 | break; 329 | } 330 | 331 | // Find the stack frame corresponding to the SEH record. 332 | while (callstack_idx < globals::last_ld.stack_trace_size() && 333 | globals::last_ld.stack_trace(callstack_idx).stack_frame() < addr_exception_list) { 334 | callstack_idx++; 335 | } 336 | 337 | // Save the TryLevel value, if the right stack frame was found. 338 | if (callstack_idx < globals::last_ld.stack_trace_size()) { 339 | globals::last_ld.mutable_stack_trace(callstack_idx)->set_try_level(try_level); 340 | } else { 341 | break; 342 | } 343 | 344 | // Detect the end of SEH chain. 345 | if (addr_next_exception == 0xFFFFFFFF) { 346 | break; 347 | } 348 | 349 | addr_exception_list = addr_next_exception; 350 | } 351 | } 352 | 353 | // Fill in the syscall count. 354 | thread_info& info = globals::thread_states[client_id(pid, tid)]; 355 | globals::last_ld.set_syscall_count(info.syscall_count); 356 | globals::last_ld.set_syscall_id(info.last_syscall_id); 357 | 358 | return true; 359 | } 360 | 361 | // ------------------------------------------------------------------ 362 | // Helper routines. 363 | // ------------------------------------------------------------------ 364 | 365 | // Traverse the PsLoadedModuleList linked list of drivers in search of 366 | // one that contains the "pc" address. 367 | int update_module_list(BX_CPU_C *pcpu, bx_address pc) { 368 | uint64_t addr_module = 0; 369 | 370 | if (globals::config.bitness == 32) { 371 | uint64_t addr_kpcr = pcpu->get_segment_base(BX_SEG_REG_FS); 372 | if (addr_kpcr < user_space_boundary) { 373 | return -1; 374 | } 375 | 376 | uint64_t addr_dbg_block = 0; 377 | if (!read_lin_mem(pcpu, addr_kpcr + off_kdversionblock, guest_ptr_size, 378 | &addr_dbg_block) || addr_dbg_block < user_space_boundary) { 379 | return -1; 380 | } 381 | 382 | if (!read_lin_mem(pcpu, addr_dbg_block + off_psloadedmodulelist, guest_ptr_size, 383 | &addr_module) || addr_module < user_space_boundary) { 384 | return -1; 385 | } 386 | } else { 387 | uint64_t addr_kpcr = pcpu->get_segment_base(BX_SEG_REG_GS); 388 | if (addr_kpcr < user_space_boundary) { 389 | return -1; 390 | } 391 | 392 | if (!read_lin_mem(pcpu, addr_kpcr + off_psloadedmodulelist, guest_ptr_size, 393 | &addr_module)) { 394 | return -1; 395 | } 396 | } 397 | 398 | // Iterate through driver information found in the kernel memory. 399 | uint64_t addr_module_start = addr_module; 400 | for (;;) { 401 | // Grab the base and image size. 402 | uint64_t base = 0; 403 | uint32_t imagesize = 0; 404 | if (!read_lin_mem(pcpu, addr_module + off_baseaddress, guest_ptr_size, &base) || 405 | !read_lin_mem(pcpu, addr_module + off_sizeofimage, sizeof(uint32_t), &imagesize)) { 406 | return -1; 407 | } 408 | 409 | // If "pc" belongs to the executable, read image name and insert a 410 | // descriptor in global database. 411 | if (imagesize != 0 && pc >= base && pc < base + imagesize) { 412 | uint16_t unicode_length = 0; 413 | uint64_t unicode_buffer = 0; 414 | 415 | if (!read_lin_mem(pcpu, addr_module + off_basedllname + off_us_len, 416 | sizeof(uint16_t), &unicode_length)) { 417 | return -1; 418 | } 419 | 420 | if (!read_lin_mem(pcpu, addr_module + off_basedllname + off_us_buffer, 421 | guest_ptr_size, &unicode_buffer)) { 422 | return -1; 423 | } 424 | 425 | if (unicode_length == 0 || unicode_buffer == 0) { 426 | return -1; 427 | } 428 | 429 | static uint16_t unicode_name[130] = {0}; 430 | unsigned to_fetch = unicode_length; 431 | if (to_fetch > 254) { 432 | to_fetch = 254; 433 | } 434 | 435 | if (!read_lin_mem(pcpu, unicode_buffer, to_fetch, &unicode_name)) { 436 | return -1; 437 | } 438 | 439 | size_t half_fetch = to_fetch / 2; // to_fetch in unicode characters. 440 | static char module_name[16]; 441 | for (unsigned i = 0; i < half_fetch && i < sizeof(module_name) - 1; i++) { 442 | module_name[i] = unicode_name[i]; 443 | } 444 | module_name[std::min(half_fetch, sizeof(module_name) - 1)] = '\0'; 445 | 446 | // Add to cache for future reference. 447 | module_info *mi = new module_info(base, imagesize, module_name); 448 | events::event_new_module(mi); 449 | 450 | return globals::modules.size() - 1; 451 | } 452 | 453 | if (!read_lin_mem(pcpu, addr_module + off_loadorder_flink, guest_ptr_size, &addr_module) || 454 | addr_module < user_space_boundary || 455 | addr_module - off_loadorder_flink == addr_module_start) { 456 | return -1; 457 | } 458 | 459 | addr_module -= off_loadorder_flink; 460 | } 461 | 462 | return -1; 463 | } 464 | 465 | } // namespace windows 466 | 467 | -------------------------------------------------------------------------------- /instrumentation/os_windows.h: -------------------------------------------------------------------------------- 1 | ///////////////////////////////////////////////////////////////////////// 2 | // 3 | // Authors: Mateusz Jurczyk (mjurczyk@google.com) 4 | // Gynvael Coldwind (gynvael@google.com) 5 | // 6 | // Copyright 2013-2018 Google LLC 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | // 14 | // Unless required by applicable law or agreed to in writing, software 15 | // distributed under the License is distributed on an "AS IS" BASIS, 16 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | // See the License for the specific language governing permissions and 18 | // limitations under the License. 19 | // 20 | 21 | #ifndef BOCHSPWN_OS_WINDOWS_H_ 22 | #define BOCHSPWN_OS_WINDOWS_H_ 23 | 24 | #include 25 | 26 | #include "common.h" 27 | 28 | namespace windows { 29 | 30 | // ------------------------------------------------------------------ 31 | // Constants. 32 | // ------------------------------------------------------------------ 33 | #define APC_MODE 1 34 | 35 | // ------------------------------------------------------------------ 36 | // System events public interface. 37 | // ------------------------------------------------------------------ 38 | bool init(const char *, void *); 39 | bool check_kernel_addr(uint64_t *, void *); 40 | bool check_user_addr(uint64_t *, void *); 41 | bool fill_cid(BX_CPU_C *, client_id *); 42 | bool fill_info(BX_CPU_C *, void *); 43 | 44 | // ------------------------------------------------------------------ 45 | // Helper routines. 46 | // ------------------------------------------------------------------ 47 | int update_module_list(BX_CPU_C *pcpu, bx_address pc); 48 | 49 | // ------------------------------------------------------------------ 50 | // Windows-specific offsets and information. 51 | // ------------------------------------------------------------------ 52 | extern uint32_t guest_ptr_size; // initialized based on bitness 53 | extern uint64_t user_space_boundary; // initialized based on bitness 54 | 55 | extern uint32_t off_kprcb; // in KPCR 56 | extern uint32_t off_current_thread; // in KPRCB 57 | extern uint32_t off_tcb; // in ETHREAD 58 | extern uint32_t off_process; // in TCB 59 | extern uint32_t off_client_id; // in ETHREAD 60 | extern uint32_t off_process_id; // in CLIENT_ID 61 | extern uint32_t off_thread_id; // in CLIENT_ID 62 | extern uint32_t off_create_time; // in ETHREAD 63 | extern uint32_t off_image_filename; // in EPROCESS 64 | extern uint32_t off_loadorder_flink; // in LDR_MODULE 65 | extern uint32_t off_basedllname; // in LDR_MODULE 66 | extern uint32_t off_baseaddress; // in LDR_MODULE 67 | extern uint32_t off_sizeofimage; // in LDR_MODULE 68 | extern uint32_t off_us_len; // in UNICODE_STRING 69 | extern uint32_t off_us_buffer; // in UNICODE_STRING 70 | extern uint32_t off_teb_cid; // in TEB 71 | extern uint32_t off_irql; // in KPCR 72 | // Note: this value has a different meaning between X86 and X64 73 | // architectures. 74 | // 75 | // On 32-bit Windows, it is the offset of the PsLoadedModuleList field against 76 | // the base of the DBGKD_GET_VERSION64 structure. 77 | // On 64-bit Windows, it is the offset of the global PsLoadedModuleList 78 | // symbol relative to the nt image base address. 79 | extern uint64_t off_psloadedmodulelist; 80 | 81 | // 32-bit only 82 | extern unsigned int off_kdversionblock; // in KPCR 83 | 84 | // 64-bit only 85 | extern unsigned int off_64bit_teb; // gs:[off_64bit_teb] == TEB 86 | 87 | } // namespace windows 88 | 89 | #endif // BOCHSPWN_OS_WINDOWS_H_ 90 | 91 | -------------------------------------------------------------------------------- /instrumentation/symbols.cc: -------------------------------------------------------------------------------- 1 | ///////////////////////////////////////////////////////////////////////// 2 | // 3 | // Authors: Mateusz Jurczyk (mjurczyk@google.com) 4 | // Gynvael Coldwind (gynvael@google.com) 5 | // 6 | // Copyright 2013-2018 Google LLC 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | // 14 | // Unless required by applicable law or agreed to in writing, software 15 | // distributed under the License is distributed on an "AS IS" BASIS, 16 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | // See the License for the specific language governing permissions and 18 | // limitations under the License. 19 | // 20 | 21 | #include "symbols.h" 22 | 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include "DbgHelp.h" 29 | 30 | #include "common.h" 31 | 32 | namespace symbols { 33 | 34 | std::map known_modules; 35 | 36 | std::string symbolize(const std::string& module, uint32_t offset) { 37 | static char pdb_path[256]; 38 | static char buffer[256]; 39 | std::map::iterator it; 40 | uint64_t pdb_base; 41 | uint64_t module_base; 42 | uint32_t file_size; 43 | 44 | // Check if module is already loaded. 45 | if (it = known_modules.find(module), it == known_modules.end()) { 46 | // Construct a full path of the corresponding .pdb file. 47 | snprintf(pdb_path, sizeof(pdb_path), "%s\\%s.pdb", globals::config.symbol_path, 48 | strip_ext(module).c_str()); 49 | 50 | if (!get_file_params(module, &module_base, &file_size)) { 51 | fprintf(stderr, "Unable to find \"%s\" debug file\n", pdb_path); 52 | 53 | known_modules[module] = new driver_sym(0, 0); 54 | snprintf(buffer, sizeof(buffer), "%s+%x", module.c_str(), offset); 55 | return std::string(buffer); 56 | } 57 | 58 | pdb_base = SymLoadModule64(GetCurrentProcess(), NULL, pdb_path, NULL, module_base, file_size); 59 | if (!pdb_base) { 60 | fprintf(stderr, "SymLoadModule64 failed, %lu\n", GetLastError()); 61 | 62 | known_modules[module] = new driver_sym(0, 0); 63 | snprintf(buffer, sizeof(buffer), "%s+%x", module.c_str(), offset); 64 | return std::string(buffer); 65 | } 66 | 67 | known_modules[module] = new driver_sym(pdb_base, module_base); 68 | } else if (!it->second->pdb_base) { 69 | snprintf(buffer, sizeof(buffer), "%s+%x", module.c_str(), offset); 70 | return std::string(buffer); 71 | } else { 72 | module_base = it->second->module_base; 73 | } 74 | 75 | symbol_info_package sip; 76 | uint64_t displacement = 0; 77 | 78 | if (!SymFromAddr(GetCurrentProcess(), module_base + offset, &displacement, &sip.si)) { 79 | snprintf(buffer, sizeof(buffer), "%s+%x", module.c_str(), offset); 80 | } else { 81 | snprintf(buffer, sizeof(buffer), "%s!%s+%.8llx", module.c_str(), sip.si.Name, displacement); 82 | } 83 | 84 | return std::string(buffer); 85 | } 86 | 87 | void initialize() { 88 | uint32_t options = SymGetOptions(); 89 | options |= SYMOPT_DEBUG; 90 | SymSetOptions(options); 91 | 92 | if (!SymInitialize(GetCurrentProcess(), NULL, FALSE)) { 93 | fprintf(stderr, "SymInitialize() failed, %lu. Consider setting \"symbolize=0\" " 94 | "in your configuration file.\n", GetLastError()); 95 | abort(); 96 | } 97 | } 98 | 99 | void destroy() { 100 | for (std::map::iterator it = known_modules.begin(); 101 | it != known_modules.end(); it++) { 102 | SymUnloadModule64(GetCurrentProcess(), it->second->pdb_base); 103 | delete it->second; 104 | } 105 | 106 | known_modules.clear(); 107 | } 108 | 109 | const std::string strip_ext(const std::string file_name) { 110 | size_t x = file_name.find_last_of("."); 111 | if (x == std::string::npos) { 112 | return file_name; 113 | } 114 | 115 | return file_name.substr(0, x); 116 | } 117 | 118 | bool get_file_params(const std::string& module, uint64_t *base_address, uint32_t *file_size) { 119 | int idx = find_module_by_name(module); 120 | if (idx == -1) { 121 | return false; 122 | } 123 | 124 | const module_info *mi = globals::modules[idx]; 125 | *base_address = mi->module_base; 126 | *file_size = mi->module_size; 127 | return true; 128 | } 129 | 130 | } // namespace symbols 131 | 132 | -------------------------------------------------------------------------------- /instrumentation/symbols.h: -------------------------------------------------------------------------------- 1 | ///////////////////////////////////////////////////////////////////////// 2 | // 3 | // Authors: Mateusz Jurczyk (mjurczyk@google.com) 4 | // Gynvael Coldwind (gynvael@google.com) 5 | // 6 | // Copyright 2013-2018 Google LLC 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | // 14 | // Unless required by applicable law or agreed to in writing, software 15 | // distributed under the License is distributed on an "AS IS" BASIS, 16 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | // See the License for the specific language governing permissions and 18 | // limitations under the License. 19 | // 20 | 21 | #ifndef BOCHSPWN_SYMBOLS_H_ 22 | #define BOCHSPWN_SYMBOLS_H_ 23 | 24 | #include 25 | #include "DbgHelp.h" 26 | 27 | #include 28 | #include 29 | #include 30 | 31 | #include "common.h" 32 | 33 | namespace symbols { 34 | 35 | // ------------------------------------------------------------------ 36 | // Structures. 37 | // ------------------------------------------------------------------ 38 | struct driver_sym { 39 | uint64_t pdb_base; 40 | uint64_t module_base; 41 | 42 | driver_sym(uint64_t pbase, uint64_t mbase) : pdb_base(pbase), module_base(mbase) {} 43 | }; 44 | 45 | struct symbol_info_package : public SYMBOL_INFO_PACKAGE { 46 | symbol_info_package() { 47 | si.SizeOfStruct = sizeof(SYMBOL_INFO); 48 | si.MaxNameLen = sizeof(name); 49 | } 50 | }; 51 | 52 | // ------------------------------------------------------------------ 53 | // Public interface. 54 | // ------------------------------------------------------------------ 55 | std::string symbolize(const std::string& module, uint32_t offset); 56 | 57 | // ------------------------------------------------------------------ 58 | // Helper functions. 59 | // ------------------------------------------------------------------ 60 | void initialize(); 61 | void destroy(); 62 | 63 | const std::string strip_ext(const std::string file_name); 64 | 65 | bool get_file_params(const std::string& module, uint64_t *base_address, uint32_t *file_size); 66 | 67 | // ------------------------------------------------------------------ 68 | // Globals. 69 | // ------------------------------------------------------------------ 70 | extern std::map known_modules; 71 | 72 | } // namespace symbols 73 | 74 | #endif // BOCHSPWN_SYMBOLS_H_ 75 | 76 | -------------------------------------------------------------------------------- /third_party/instrumentation/Makefile.in: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2001 The Bochs Project 2 | # 3 | # Modified by Google LLC 4 | # Mateusz Jurczyk (mjurczyk@google.com) 5 | # and Gynvael Coldwind (gynvael@google.com) 6 | # 7 | # This library is free software; you can redistribute it and/or 8 | # modify it under the terms of the GNU Lesser General Public 9 | # License as published by the Free Software Foundation; either 10 | # version 2 of the License, or (at your option) any later version. 11 | # 12 | # This library is distributed in the hope that it will be useful, 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 | # Lesser General Public License for more details. 16 | # 17 | # You should have received a copy of the GNU Lesser General Public 18 | # License along with this library; if not, write to the Free Software 19 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 20 | # 21 | # NOTE: This is a standard Bochs instrument/stubs/Makefile.in. We 22 | # added a couple .o files so it works OK with Bochspwn. 23 | # -- Bochspwn authors 24 | 25 | 26 | @SUFFIX_LINE@ 27 | 28 | srcdir = @srcdir@ 29 | VPATH = @srcdir@ 30 | 31 | SHELL = @SHELL@ 32 | 33 | @SET_MAKE@ 34 | 35 | CC = @CC@ 36 | CFLAGS = -L. -L/usr/local/lib -Wall -Wno-pointer-arith @CFLAGS@ 37 | CXX = @CXX@ 38 | CXXFLAGS = -L. -L/usr/local/lib -Wall -Wno-pointer-arith @CXXFLAGS@ 39 | 40 | LDFLAGS = @LDFLAGS@ 41 | LIBS = -lprotobuf -ldbghelp @LIBS@ 42 | RANLIB = @RANLIB@ 43 | 44 | 45 | # =========================================================== 46 | # end of configurable options 47 | # =========================================================== 48 | 49 | 50 | BX_OBJS = \ 51 | instrument.o \ 52 | common.o \ 53 | invoke.o \ 54 | logging.pb.o \ 55 | events.o \ 56 | os_linux.o \ 57 | os_windows.o \ 58 | os_freebsd.o \ 59 | os_openbsd.o \ 60 | symbols.o \ 61 | mem_interface.o 62 | 63 | 64 | BX_INCLUDES = 65 | 66 | BX_INCDIRS = -I../.. -I$(srcdir)/../.. -I. -I$(srcdir)/. 67 | 68 | .@CPP_SUFFIX@.o: 69 | $(CXX) -c $(CXXFLAGS) $(BX_INCDIRS) @CXXFP@$< @OFP@$@ 70 | 71 | 72 | .c.o: 73 | $(CC) -c $(CFLAGS) $(BX_INCDIRS) @CFP@$< @OFP@$@ 74 | 75 | 76 | 77 | libinstrument.a: $(BX_OBJS) 78 | @RMCOMMAND@ libinstrument.a 79 | @MAKELIB@ $(BX_OBJS) 80 | $(RANLIB) libinstrument.a 81 | 82 | $(BX_OBJS): $(BX_INCLUDES) 83 | 84 | 85 | clean: 86 | @RMCOMMAND@ *.o 87 | @RMCOMMAND@ *.a 88 | 89 | dist-clean: clean 90 | @RMCOMMAND@ Makefile 91 | -------------------------------------------------------------------------------- /third_party/instrumentation/mem_interface.cc: -------------------------------------------------------------------------------- 1 | ///////////////////////////////////////////////////////////////////////// 2 | // 3 | // Copyright (C) 2001-2013 The Bochs Project 4 | // 5 | // Modified by Google LLC 6 | // Mateusz Jurczyk (mjurczyk@google.com) 7 | // and Gynvael Coldwind (gynvael@google.com) 8 | // 9 | // This library is free software; you can redistribute it and/or 10 | // modify it under the terms of the GNU Lesser General Public 11 | // License as published by the Free Software Foundation; either 12 | // version 2 of the License, or (at your option) any later version. 13 | // 14 | // This library is distributed in the hope that it will be useful, 15 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 | // Lesser General Public License for more details. 18 | // 19 | // You should have received a copy of the GNU Lesser General Public 20 | // License along with this library; if not, write to the Free Software 21 | // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 22 | // 23 | // Modification note: 24 | // The code below is a modified version of bx_dbg_read_linear from Bochs 25 | // file \bx_debug\dbg_main.cc. 26 | // The interface was slightly modified by us. 27 | // 28 | ///////////////////////////////////////////////////////////////////////// 29 | 30 | #include "mem_interface.h" 31 | 32 | // Function reads data from specified virtual memory address. Returns false 33 | // on failure. 34 | bool read_lin_mem(BX_CPU_C *pcpu, bx_address laddr, unsigned len, void *buf) { 35 | unsigned remainsInPage; 36 | bx_phy_address paddr; 37 | unsigned read_len; 38 | bx_bool paddr_valid; 39 | 40 | next_page: 41 | remainsInPage = 0x1000 - PAGE_OFFSET(laddr); 42 | read_len = (remainsInPage < len) ? remainsInPage : len; 43 | 44 | paddr_valid = pcpu->dbg_xlate_linear2phy(laddr, &paddr); 45 | if (paddr_valid) { 46 | if (!BX_MEM(0)->dbg_fetch_mem(pcpu, paddr, read_len, (Bit8u*)buf)) { 47 | return false; 48 | } 49 | } else { 50 | return false; 51 | } 52 | 53 | /* check for access across multiple pages */ 54 | if (remainsInPage < len) { 55 | laddr += read_len; 56 | len -= read_len; 57 | buf += read_len; 58 | goto next_page; 59 | } 60 | 61 | return true; 62 | } 63 | 64 | -------------------------------------------------------------------------------- /third_party/instrumentation/mem_interface.h: -------------------------------------------------------------------------------- 1 | ///////////////////////////////////////////////////////////////////////// 2 | // 3 | // Copyright (C) 2001-2013 The Bochs Project 4 | // 5 | // Modified by Google LLC 6 | // Mateusz Jurczyk (mjurczyk@google.com) 7 | // and Gynvael Coldwind (gynvael@google.com) 8 | // 9 | // This library is free software; you can redistribute it and/or 10 | // modify it under the terms of the GNU Lesser General Public 11 | // License as published by the Free Software Foundation; either 12 | // version 2 of the License, or (at your option) any later version. 13 | // 14 | // This library is distributed in the hope that it will be useful, 15 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 | // Lesser General Public License for more details. 18 | // 19 | // You should have received a copy of the GNU Lesser General Public 20 | // License along with this library; if not, write to the Free Software 21 | // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 22 | // 23 | // Modification note: 24 | // This is basically a very narrow version of Bochs' /bx_debug/debug.h. 25 | // It's cut down to only one function. 26 | // 27 | ///////////////////////////////////////////////////////////////////////// 28 | 29 | #ifndef BOCHSPWN_MEM_INTERFACE_H_ 30 | #define BOCHSPWN_MEM_INTERFACE_H_ 31 | 32 | #include "bochs.h" 33 | #include "cpu/cpu.h" 34 | 35 | // Read linear memory. 36 | bool read_lin_mem(BX_CPU_C *pcpu, bx_address laddr, unsigned len, void *buf); 37 | 38 | #endif // BOCHSPWN_MEM_INTERFACE_H_ 39 | 40 | -------------------------------------------------------------------------------- /tools/Makefile: -------------------------------------------------------------------------------- 1 | # 2 | # Authors: Mateusz Jurczyk (mjurczyk@google.com) 3 | # Gynvael Coldwind (gynvael@google.com) 4 | # 5 | # Copyright 2013-2018 Google LLC 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | CXX = g++ 21 | CXXFLAGS = -Wall -Wextra -O3 -Wno-format 22 | LDFLAGS = logging.pb.o common.o -lprotobuf 23 | PROTOC = protoc 24 | 25 | all: print stats doubleread separate unhandled_access count_callstack_depth count_excp_handlers no_cidll win32_symbolize 26 | 27 | common.o: common.cc common.h 28 | ${CXX} ${CXXFLAGS} -c $< -o $@ 29 | 30 | logging.pb.h logging.pb.cc: logging.proto 31 | ${PROTOC} logging.proto --cpp_out=. 32 | 33 | logging.pb.o: logging.pb.cc logging.pb.h 34 | ${CXX} ${CXXFLAGS} -c logging.pb.cc -o $@ 35 | 36 | print: print.cc logging.pb.o common.o 37 | ${CXX} ${CXXFLAGS} $< -o $@ ${LDFLAGS} 38 | 39 | stats: stats.cc logging.pb.o common.o 40 | ${CXX} ${CXXFLAGS} $< -o $@ ${LDFLAGS} 41 | 42 | doubleread: doubleread.cc logging.pb.o common.o 43 | ${CXX} ${CXXFLAGS} $< -o $@ ${LDFLAGS} 44 | 45 | separate: separate.cc logging.pb.o common.o 46 | ${CXX} ${CXXFLAGS} $< -o $@ ${LDFLAGS} 47 | 48 | unhandled_access: unhandled_access.cc logging.pb.o common.o 49 | ${CXX} ${CXXFLAGS} $< -o $@ ${LDFLAGS} 50 | 51 | count_callstack_depth: count_callstack_depth.cc logging.pb.o common.o 52 | ${CXX} ${CXXFLAGS} $< -o $@ ${LDFLAGS} 53 | 54 | count_excp_handlers: count_excp_handlers.cc logging.pb.o common.o 55 | ${CXX} ${CXXFLAGS} $< -o $@ ${LDFLAGS} 56 | 57 | no_cidll: no_cidll.cc logging.pb.o common.o 58 | ${CXX} ${CXXFLAGS} $< -o $@ ${LDFLAGS} 59 | 60 | win32_symbolize: win32_symbolize.cc 61 | ${CXX} ${CXXFLAGS} $< -o $@ -ldbghelp -lpsapi 62 | 63 | clean: 64 | rm *.o 65 | rm logging.pb.* 66 | -------------------------------------------------------------------------------- /tools/common.cc: -------------------------------------------------------------------------------- 1 | ///////////////////////////////////////////////////////////////////////// 2 | // 3 | // Authors: Mateusz Jurczyk (mjurczyk@google.com) 4 | // Gynvael Coldwind (gynvael@google.com) 5 | // 6 | // Copyright 2013-2018 Google LLC 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | // 14 | // Unless required by applicable law or agreed to in writing, software 15 | // distributed under the License is distributed on an "AS IS" BASIS, 16 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | // See the License for the specific language governing permissions and 18 | // limitations under the License. 19 | // 20 | 21 | #include "common.h" 22 | 23 | #define __STDC_FORMAT_MACROS 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | 31 | #include "logging.pb.h" 32 | 33 | const char *translate_mem_access(log_data_st::mem_access_type type) { 34 | switch (type) { 35 | case log_data_st::MEM_READ: return "READ"; 36 | case log_data_st::MEM_WRITE: return "WRITE"; 37 | case log_data_st::MEM_EXEC: return "EXEC"; 38 | case log_data_st::MEM_RW: return "R/W"; 39 | } 40 | 41 | return "INVALID"; 42 | } 43 | 44 | bool LoadModuleList(const std::string& module_list_path, std::vector *module_list) { 45 | // Arbitrarily chosen maximum length of a module descriptor. 46 | const unsigned int kAssumedMaxLength = 128; 47 | static uint8_t buffer[kAssumedMaxLength]; 48 | 49 | module_list->clear(); 50 | 51 | FILE *f = fopen(module_list_path.c_str(), "rb"); 52 | if (f == NULL) { 53 | return false; 54 | } 55 | 56 | while (!feof(f)) { 57 | uint32_t size; 58 | if (fread(&size, sizeof(uint32_t), 1, f) != 1) { 59 | break; 60 | } 61 | 62 | if (size > kAssumedMaxLength) { 63 | fprintf(stderr, "Malformed protocol buffer of length %u encountered.\n", size); 64 | return false; 65 | } 66 | 67 | if (fread(buffer, sizeof(uint8_t), size, f) != size) { 68 | return false; 69 | } 70 | 71 | std::string protobuf; 72 | protobuf.assign((const char *)buffer, size); 73 | 74 | module_st module; 75 | if (!module.ParseFromString(protobuf)) { 76 | return false; 77 | } 78 | 79 | module_info mod_info = {module.base_addr(), module.size(), module.name()}; 80 | module_list->push_back(mod_info); 81 | } 82 | 83 | fclose(f); 84 | return true; 85 | } 86 | 87 | std::string LogDataAsText(const log_data_st& ld, const std::vector& modules) { 88 | char buffer[256]; 89 | std::string ret; 90 | 91 | snprintf(buffer, sizeof(buffer), 92 | "[pid/tid/ct: %.8x/%.8x/%.8x%.8x] {%16s} %.8x, %.8x: %s of %" PRIx64 " " 93 | "(%u * %u bytes), pc = %" PRIx64 " [ %40s ]\n", 94 | ld.process_id(), ld.thread_id(), 95 | (unsigned)(ld.create_time() >> 32), 96 | (unsigned)(ld.create_time()), 97 | ld.image_file_name().c_str(), 98 | (unsigned)ld.syscall_count(), 99 | (unsigned)ld.syscall_id(), 100 | translate_mem_access(ld.access_type()), 101 | ld.lin(), 102 | (unsigned)ld.repeated(), 103 | (unsigned)ld.len(), 104 | ld.pc(), 105 | ld.pc_disasm().c_str()); 106 | ret = buffer; 107 | 108 | if (ld.has_previous_mode()) { 109 | snprintf(buffer, sizeof(buffer), "[previous mode: %d]\n", ld.previous_mode()); 110 | ret += buffer; 111 | } 112 | 113 | for (int i = 0; i < ld.stack_trace_size(); i++) { 114 | int module_idx = ld.stack_trace(i).module_idx(); 115 | if (module_idx == -1) { 116 | snprintf(buffer, sizeof(buffer), " #%i 0x%" PRIx64 " (???"")", 117 | i, ld.stack_trace(i).relative_pc()); 118 | } else { 119 | assert((unsigned)module_idx < modules.size()); 120 | snprintf(buffer, sizeof(buffer), " #%i 0x%" PRIx64 " (%s+%.8x)", i, 121 | (modules[module_idx].base + ld.stack_trace(i).relative_pc()), 122 | modules[module_idx].name.c_str(), 123 | (unsigned)ld.stack_trace(i).relative_pc()); 124 | } 125 | ret += buffer; 126 | 127 | if (ld.stack_trace(i).has_try_level()) { 128 | uint32_t try_level = ld.stack_trace(i).try_level(); 129 | if (try_level == 0xFFFFFFFE) { 130 | snprintf(buffer, sizeof(buffer), " <===== SEH disabled"); 131 | } else { 132 | snprintf(buffer, sizeof(buffer), " <===== SEH enabled (#%u)", try_level); 133 | } 134 | ret += buffer; 135 | } 136 | 137 | ret += "\n"; 138 | } 139 | 140 | return ret; 141 | } 142 | 143 | log_data_st *LoadNextRecord(FILE *f, std::string *out_protobuf, log_data_st *ld) { 144 | // Arbitrarily chosen maximum length of a memory access descriptor. 145 | // Currently, the size is somewhere between 50 and 150. 146 | const unsigned int kAssumedMaxLength = 1024; 147 | 148 | static uint8_t buffer[kAssumedMaxLength]; 149 | uint32_t size; 150 | 151 | if (fread(&size, sizeof(uint32_t), 1, f) != 1) { 152 | return NULL; 153 | } 154 | 155 | if (size > kAssumedMaxLength) { 156 | fprintf(stderr, "Malformed protocol buffer of length %u encountered.\n", size); 157 | return NULL; 158 | } 159 | 160 | if (fread(buffer, sizeof(uint8_t), size, f) != size) { 161 | return NULL; 162 | } 163 | 164 | std::string protobuf; 165 | protobuf.assign((const char *)buffer, size); 166 | 167 | log_data_st *new_ld; 168 | 169 | if (!ld) { 170 | new_ld = new log_data_st; 171 | } else { 172 | new_ld = ld; 173 | } 174 | 175 | if (!new_ld->ParseFromString(protobuf)) { 176 | fprintf(stderr, "ParseFromString failed\n"); 177 | delete new_ld; 178 | return NULL; 179 | } 180 | 181 | if (out_protobuf != NULL) { 182 | *out_protobuf = protobuf; 183 | } 184 | 185 | return new_ld; 186 | } 187 | -------------------------------------------------------------------------------- /tools/common.h: -------------------------------------------------------------------------------- 1 | ///////////////////////////////////////////////////////////////////////// 2 | // 3 | // Authors: Mateusz Jurczyk (mjurczyk@google.com) 4 | // Gynvael Coldwind (gynvael@google.com) 5 | // 6 | // Copyright 2013-2018 Google LLC 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | // 14 | // Unless required by applicable law or agreed to in writing, software 15 | // distributed under the License is distributed on an "AS IS" BASIS, 16 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | // See the License for the specific language governing permissions and 18 | // limitations under the License. 19 | // 20 | 21 | #ifndef BOCHSPWN_COMMON_H_ 22 | #define BOCHSPWN_COMMON_H_ 23 | 24 | #include 25 | #include 26 | #include 27 | 28 | #include "logging.pb.h" 29 | 30 | struct module_info { 31 | uint64_t base; 32 | uint64_t size; 33 | std::string name; 34 | }; 35 | 36 | bool LoadModuleList(const std::string& module_list_path, std::vector *module_list); 37 | std::string LogDataAsText(const log_data_st& ld, const std::vector& modules); 38 | log_data_st *LoadNextRecord(FILE *f, std::string *out_protobuf, log_data_st *ld); 39 | 40 | #endif // BOCHSPWN_COMMON_H_ 41 | -------------------------------------------------------------------------------- /tools/count_callstack_depth.cc: -------------------------------------------------------------------------------- 1 | ///////////////////////////////////////////////////////////////////////// 2 | // 3 | // Authors: Mateusz Jurczyk (mjurczyk@google.com) 4 | // Gynvael Coldwind (gynvael@google.com) 5 | // 6 | // Copyright 2013-2018 Google LLC 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | // 14 | // Unless required by applicable law or agreed to in writing, software 15 | // distributed under the License is distributed on an "AS IS" BASIS, 16 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | // See the License for the specific language governing permissions and 18 | // limitations under the License. 19 | // 20 | 21 | #define __STDC_FORMAT_MACROS 22 | #include 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | 35 | #include "common.h" 36 | #include "logging.pb.h" 37 | 38 | #ifndef MAX_PATH 39 | # define MAX_PATH 256 40 | #endif 41 | 42 | // Structure characterizes a stack trace by storing a list of absolute 43 | // addresses. Used to uniquely identify code paths. 44 | struct StackTrace { 45 | std::vector trace; 46 | 47 | bool operator< (const StackTrace& a) const { 48 | return (trace < a.trace); 49 | } 50 | bool operator!= (const StackTrace& a) const { 51 | return (trace != a.trace); 52 | } 53 | }; 54 | 55 | namespace globals { 56 | 57 | // A container of unique stack traces encountered by the tool so far. 58 | std::set unique_traces; 59 | 60 | } // namespace globals 61 | 62 | int main(int argc, const char **argv) { 63 | if (argc < 3) { 64 | fprintf(stderr, "Usage: %s /path/to/memory/logs /path/to/modules/list\n", argv[0]); 65 | return EXIT_SUCCESS; 66 | } 67 | 68 | const char *logs_path = argv[1]; 69 | const char *modules_list_path = argv[2]; 70 | 71 | DIR *dirp = opendir(logs_path); 72 | if (!dirp) { 73 | fprintf(stderr, "Unable to open the \"%s\" directory\n", logs_path); 74 | return EXIT_FAILURE; 75 | } 76 | 77 | // Load the module list. 78 | std::vector modules; 79 | if (!LoadModuleList(modules_list_path, &modules)) { 80 | fprintf(stderr, "Unable to load the module list from \"%s\".\n", modules_list_path); 81 | return EXIT_FAILURE; 82 | } 83 | 84 | unsigned int file_count = 1; 85 | uint64_t bytes_processed = 0; 86 | std::map callstack_depths; 87 | 88 | // List all files in the specified directory. 89 | struct dirent *dp; 90 | while ((dp = readdir(dirp)) != NULL) { 91 | static char buffer[MAX_PATH]; 92 | 93 | if (!strcmp(dp->d_name, ".") || !strcmp(dp->d_name, "..")) { 94 | continue; 95 | } 96 | 97 | snprintf(buffer, MAX_PATH, "%s/%s", logs_path, dp->d_name); 98 | 99 | FILE *f = fopen(buffer, "rb"); 100 | if (!f) { 101 | fprintf(stderr, "Unable to load file \"%s\"\n", buffer); 102 | return EXIT_FAILURE; 103 | } 104 | 105 | fprintf(stderr, "[%.4u] Loaded file \"%s\" (%" PRIu64 " bytes processed)\n", 106 | file_count++, dp->d_name, bytes_processed); 107 | 108 | // Read records until the file is over. 109 | static log_data_st ld; 110 | while (LoadNextRecord(f, NULL, &ld)) { 111 | bytes_processed += ld.ByteSize(); 112 | 113 | if (ld.stack_trace_size() > 0) { 114 | int module_idx = ld.stack_trace(0).module_idx(); 115 | 116 | bool consistent = true; 117 | for (int i = 1; i < ld.stack_trace_size() - 1; i++) { 118 | if (ld.stack_trace(i).module_idx() != module_idx) { 119 | consistent = false; 120 | break; 121 | } 122 | } 123 | 124 | if (consistent) { 125 | StackTrace signature; 126 | 127 | for (int j = 0; j < ld.stack_trace_size(); j++) { 128 | int module_idx = ld.stack_trace(j).module_idx(); 129 | if (module_idx == -1) { 130 | signature.trace.push_back(ld.stack_trace(j).relative_pc()); 131 | } else { 132 | signature.trace.push_back(modules[module_idx].base + 133 | ld.stack_trace(j).relative_pc()); 134 | } 135 | } 136 | 137 | if (globals::unique_traces.find(signature) == globals::unique_traces.end()) { 138 | // Save information about the callstack depth. 139 | callstack_depths[ld.stack_trace_size()]++; 140 | 141 | // Save the signature to avoid duplicates in the future. 142 | globals::unique_traces.insert(signature); 143 | } 144 | } 145 | } 146 | } 147 | 148 | fclose(f); 149 | } 150 | 151 | printf("--------------------------------------- Depths:\n"); 152 | for (const auto& it : callstack_depths) { 153 | printf("%.2d: %10llu\n", it.first, it.second); 154 | } 155 | 156 | return EXIT_SUCCESS; 157 | } 158 | -------------------------------------------------------------------------------- /tools/count_excp_handlers.cc: -------------------------------------------------------------------------------- 1 | ///////////////////////////////////////////////////////////////////////// 2 | // 3 | // Authors: Mateusz Jurczyk (mjurczyk@google.com) 4 | // Gynvael Coldwind (gynvael@google.com) 5 | // 6 | // Copyright 2013-2018 Google LLC 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | // 14 | // Unless required by applicable law or agreed to in writing, software 15 | // distributed under the License is distributed on an "AS IS" BASIS, 16 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | // See the License for the specific language governing permissions and 18 | // limitations under the License. 19 | // 20 | 21 | #define __STDC_FORMAT_MACROS 22 | #include 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | 36 | #include "common.h" 37 | #include "logging.pb.h" 38 | 39 | #ifndef MAX_PATH 40 | # define MAX_PATH 256 41 | #endif 42 | 43 | // Structure characterizes a stack trace by storing a list of absolute 44 | // addresses. Used to uniquely identify code paths. 45 | struct StackTrace { 46 | std::vector trace; 47 | 48 | bool operator< (const StackTrace& a) const { 49 | return (trace < a.trace); 50 | } 51 | bool operator!= (const StackTrace& a) const { 52 | return (trace != a.trace); 53 | } 54 | }; 55 | 56 | namespace globals { 57 | 58 | // A container of unique stack traces encountered by the tool so far. 59 | std::set unique_traces; 60 | 61 | } // namespace globals 62 | 63 | int main(int argc, const char **argv) { 64 | if (argc < 3) { 65 | fprintf(stderr, "Usage: %s /path/to/memory/logs /path/to/modules/list\n", argv[0]); 66 | return EXIT_SUCCESS; 67 | } 68 | 69 | const char *logs_path = argv[1]; 70 | const char *modules_list_path = argv[2]; 71 | 72 | DIR *dirp = opendir(logs_path); 73 | if (!dirp) { 74 | fprintf(stderr, "Unable to open the \"%s\" directory\n", logs_path); 75 | return EXIT_FAILURE; 76 | } 77 | 78 | // Load the module list. 79 | std::vector modules; 80 | if (!LoadModuleList(modules_list_path, &modules)) { 81 | fprintf(stderr, "Unable to load the module list from \"%s\".\n", modules_list_path); 82 | return EXIT_FAILURE; 83 | } 84 | 85 | unsigned int file_count = 1; 86 | uint64_t bytes_processed = 0; 87 | std::map try_levels; 88 | 89 | // List all files in the specified directory. 90 | struct dirent *dp; 91 | while ((dp = readdir(dirp)) != NULL) { 92 | static char buffer[MAX_PATH]; 93 | 94 | if (!strcmp(dp->d_name, ".") || !strcmp(dp->d_name, "..")) { 95 | continue; 96 | } 97 | 98 | snprintf(buffer, MAX_PATH, "%s/%s", logs_path, dp->d_name); 99 | 100 | FILE *f = fopen(buffer, "rb"); 101 | if (!f) { 102 | fprintf(stderr, "Unable to load file \"%s\"\n", buffer); 103 | return EXIT_FAILURE; 104 | } 105 | 106 | fprintf(stderr, "[%.4u] Loaded file \"%s\" (%" PRIu64 " bytes processed)\n", 107 | file_count++, dp->d_name, bytes_processed); 108 | 109 | // Read records until the file is over. 110 | static log_data_st ld; 111 | while (LoadNextRecord(f, NULL, &ld)) { 112 | bytes_processed += ld.ByteSize(); 113 | 114 | StackTrace signature; 115 | 116 | for (int j = 0; j < ld.stack_trace_size(); j++) { 117 | int module_idx = ld.stack_trace(j).module_idx(); 118 | if (module_idx == -1) { 119 | signature.trace.push_back(ld.stack_trace(j).relative_pc()); 120 | } else { 121 | signature.trace.push_back(modules[module_idx].base + 122 | ld.stack_trace(j).relative_pc()); 123 | } 124 | } 125 | 126 | if (globals::unique_traces.find(signature) == globals::unique_traces.end()) { 127 | int first_handler_idx = -1; 128 | for (int i = 0; i < ld.stack_trace_size(); i++) { 129 | if (ld.stack_trace(i).has_try_level() && ld.stack_trace(i).try_level() != 0xFFFFFFFE) { 130 | first_handler_idx = i; 131 | break; 132 | } 133 | } 134 | 135 | try_levels[first_handler_idx]++; 136 | 137 | // Save the signature to avoid duplicates in the future. 138 | globals::unique_traces.insert(signature); 139 | } 140 | } 141 | 142 | fclose(f); 143 | } 144 | 145 | printf("--------------------------------------- Depths:\n"); 146 | for (std::map::iterator it = try_levels.begin(); it != try_levels.end(); it++) { 147 | printf("%.2d: %10llu\n", it->first, it->second); 148 | } 149 | 150 | return EXIT_SUCCESS; 151 | } 152 | 153 | -------------------------------------------------------------------------------- /tools/doubleread.cc: -------------------------------------------------------------------------------- 1 | ///////////////////////////////////////////////////////////////////////// 2 | // 3 | // Authors: Mateusz Jurczyk (mjurczyk@google.com) 4 | // Gynvael Coldwind (gynvael@google.com) 5 | // 6 | // Copyright 2013-2018 Google LLC 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | // 14 | // Unless required by applicable law or agreed to in writing, software 15 | // distributed under the License is distributed on an "AS IS" BASIS, 16 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | // See the License for the specific language governing permissions and 18 | // limitations under the License. 19 | // 20 | 21 | #define __STDC_FORMAT_MACROS 22 | #include 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | 36 | #include "common.h" 37 | #include "logging.pb.h" 38 | 39 | #ifndef MAX_PATH 40 | # define MAX_PATH 256 41 | #endif 42 | 43 | // Structure characterizes a stack trace by storing a list of absolute 44 | // addresses. Used to uniquely identify code paths. 45 | struct StackTrace { 46 | std::vector trace; 47 | 48 | bool operator< (const StackTrace& a) const { 49 | return (trace < a.trace); 50 | } 51 | bool operator!= (const StackTrace& a) const { 52 | return (trace != a.trace); 53 | } 54 | }; 55 | 56 | namespace globals { 57 | 58 | // The STL structure stores information about double fetch candidates 59 | // already found by examining the logs. 60 | // 61 | // unique_mult_fetches.size() is equal to the number of potential 62 | // vulnerabilities found so far. The structure lives throughout the entire 63 | // lifespan of the tool. 64 | std::set > unique_mult_fetches; 65 | 66 | // The two STL structures store information about all memory accesses 67 | // encountered during the currently processed syscall (meaning that when 68 | // the next syscall in order is found in the logs, both structures are 69 | // wiped). 70 | // 71 | // For memory_accesses[key] = {acc1, acc2, ...}, "key" is the absolute 72 | // virtual address being referenced, while {acc1, acc2, ...} is a list of 73 | // corresponding memory access descriptors. 74 | // 75 | // If memory_accesses[key].size() > 1 for any "key", it means that the 76 | // "key" address was referenced more than once, and therefore becomes a 77 | // double fetch candidate. 78 | // 79 | // The access_structs list is used for correct memory management: multiple 80 | // keys in memory_accesses can reference the same memory access descriptor, 81 | // so in order to clean up the allocation properly, we need to store a list 82 | // of unique log_data_st structures referenced by memory_accesses. 83 | std::map > memory_accesses; 84 | std::vector access_structs; 85 | 86 | // Output files are written on a per-kernel module basis. To avoid 87 | // opening/closing file handles once for every single double fetch, we 88 | // cache them in the log_file_handles structure. 89 | std::map log_file_handles; 90 | 91 | }; // namespace globals 92 | 93 | // The routine takes information about a multiple fetch, verifies it 94 | // against the currently known database of vulnerabilities and prints out 95 | // its details to an adequate log file, if the provided report is unique. 96 | // 97 | // Parameters: 98 | // * output_path - Contains a path to the output directory, where all 99 | // output log files are stored. 100 | // 101 | // * address - virtual address of the memory being referenced multiple 102 | // times. 103 | // 104 | // * accesses - a list of memory access descriptors, of size greater or 105 | // equal to 2 (otherwise, it wouldn't be a vulnerability). 106 | // 107 | void HandleMultipleFetch(const char *output_path, 108 | const std::vector& modules, 109 | uint64_t address, 110 | const std::vector& accesses) { 111 | // Some memory locations are referenced tens or hundreds of times within 112 | // a single syscall. In order to optimize CPU consumption, we assume that 113 | // a maximum of four first stack traces are enough to uniquely 114 | // characterize a multiple-fetch. 115 | const unsigned int kMaxMeaningfulAccesses = 4; 116 | // For the very same reason, we want to limit the number of memory 117 | // references printed out in the output logs. Otherwise, they become 118 | // blown away with records of >100 memory references. 119 | const unsigned int kMaxOutputAccesses = 4; 120 | 121 | // Create a signature of the multi-fetch report. 122 | std::vector signature; 123 | for (unsigned int i = 0; i < accesses.size() && 124 | i < kMaxMeaningfulAccesses; i++) { 125 | StackTrace local_trace; 126 | 127 | for (int j = 0; j < accesses[i]->stack_trace_size(); j++) { 128 | int module_idx = accesses[i]->stack_trace(j).module_idx(); 129 | if (module_idx == -1) { 130 | local_trace.trace.push_back(accesses[i]->stack_trace(j).relative_pc()); 131 | } else { 132 | local_trace.trace.push_back(modules[module_idx].base + 133 | accesses[i]->stack_trace(j).relative_pc()); 134 | } 135 | } 136 | signature.push_back(local_trace); 137 | } 138 | 139 | // See if the signature has already been observed, and if there's any 140 | // stack trace available for the first read. 141 | if (globals::unique_mult_fetches.find(signature) == globals::unique_mult_fetches.end() && 142 | accesses[0]->stack_trace_size() > 0) { 143 | int module_idx = accesses[0]->stack_trace(0).module_idx(); 144 | std::string module_name; 145 | 146 | if (module_idx == -1) { 147 | module_name = "unknown"; 148 | } else { 149 | module_name = modules[module_idx].name; 150 | } 151 | 152 | // If module was encountered for the first time, attempt to open a 153 | // corresponding output file. 154 | if (!globals::log_file_handles[module_name]) { 155 | char full_path[MAX_PATH]; 156 | FILE *f; 157 | 158 | snprintf(full_path, MAX_PATH, "%s/%s", output_path, module_name.c_str()); 159 | 160 | f = fopen(full_path, "a+"); 161 | if (!f) { 162 | fprintf(stderr, "Unable to open output file \"%s\"\n", full_path); 163 | abort(); 164 | } else { 165 | globals::log_file_handles[module_name] = f; 166 | } 167 | } 168 | 169 | FILE *f = globals::log_file_handles[module_name]; 170 | assert(f != NULL); 171 | 172 | // Print out some detailed information regarding the double read. 173 | fprintf(f, "------------------------------ found double-read of address 0x%.8x%.8x\n", 174 | (unsigned)(address >> 32), (unsigned)(address)); 175 | 176 | for (unsigned int i = 0; i < accesses.size(); ) { 177 | if (i >= kMaxOutputAccesses) { 178 | fprintf(f, "[... %u more reads to follow ...]\n", (unsigned)(accesses.size() - i)); 179 | break; 180 | } 181 | 182 | unsigned int j; 183 | for (j = 1; i + j < signature.size(); j++) { 184 | if (signature[i] != signature[i + j]) break; 185 | } 186 | 187 | fprintf(f, "Read no. %u", i + 1); 188 | if (j > 1) { 189 | fprintf(f, " (X %u):\n", j); 190 | } else { 191 | fprintf(f, ":\n"); 192 | } 193 | fprintf(f, "%s\n", LogDataAsText(*accesses[i], modules).c_str()); 194 | 195 | i += j; 196 | } 197 | fflush(f); 198 | 199 | // Save the multifetch signature to avoid duplicates in the future. 200 | globals::unique_mult_fetches.insert(signature); 201 | } 202 | } 203 | 204 | int main(int argc, const char **argv) { 205 | if (argc < 4) { 206 | fprintf(stderr, "Usage: %s /path/to/memory/logs /path/to/modules/list /path/to/output/logs\n", argv[0]); 207 | return EXIT_SUCCESS; 208 | } 209 | 210 | const char *logs_path = argv[1]; 211 | const char *modules_list_path = argv[2]; 212 | const char *output_path = argv[3]; 213 | 214 | DIR *dirp = opendir(logs_path); 215 | if (!dirp) { 216 | fprintf(stderr, "Unable to open the \"%s\" directory\n", logs_path); 217 | return EXIT_FAILURE; 218 | } 219 | 220 | // Load the module list. 221 | std::vector modules; 222 | if (!LoadModuleList(modules_list_path, &modules)) { 223 | fprintf(stderr, "Unable to load the module list from \"%s\".\n", modules_list_path); 224 | return EXIT_FAILURE; 225 | } 226 | 227 | unsigned int file_count = 1; 228 | uint32_t cur_syscall_count = (uint32_t)(-1); 229 | uint64_t bytes_processed = 0; 230 | 231 | // List all files in the specified directory. 232 | struct dirent *dp; 233 | while ((dp = readdir(dirp)) != NULL) { 234 | static char buffer[MAX_PATH]; 235 | 236 | if (!strcmp(dp->d_name, ".") || !strcmp(dp->d_name, "..")) { 237 | continue; 238 | } 239 | 240 | snprintf(buffer, MAX_PATH, "%s/%s", logs_path, dp->d_name); 241 | 242 | FILE *f = fopen(buffer, "rb"); 243 | if (!f) { 244 | fprintf(stderr, "Unable to load file \"%s\"\n", buffer); 245 | return EXIT_FAILURE; 246 | } 247 | 248 | fprintf(stderr, "[%.4u] Loaded file \"%s\" (%" PRIu64 " bytes processed)\n", 249 | file_count++, dp->d_name, bytes_processed); 250 | 251 | // Read records until the file is over, or we're out of memory. 252 | static log_data_st ld; 253 | while (LoadNextRecord(f, NULL, &ld)) { 254 | bytes_processed += ld.ByteSize(); 255 | 256 | if (ld.syscall_count() != cur_syscall_count) { 257 | // We have a new syscall: go through all memory accesses encountered 258 | // during the last one and print out information about double fetches. 259 | for (const auto& it : globals::memory_accesses) { 260 | if (it.second.size() > 1) { 261 | HandleMultipleFetch(output_path, modules, it.first, it.second); 262 | } 263 | } 264 | 265 | // Free all old structures for the last syscall and cleanup information 266 | // about accessed addresses. 267 | for (auto access_struct : globals::access_structs) { 268 | delete access_struct; 269 | } 270 | 271 | globals::access_structs.clear(); 272 | globals::memory_accesses.clear(); 273 | 274 | // Remember the currently handled syscall count. 275 | cur_syscall_count = ld.syscall_count(); 276 | } 277 | 278 | // If syscall_count is 0 or the size of the memory access is 1 byte, ignore the entry. 279 | if (ld.syscall_count() == 0 || ld.len() < 2) { 280 | continue; 281 | } 282 | 283 | // Catch OOM exceptions and handle them gracefully. 284 | try { 285 | if (ld.access_type() == log_data_st::MEM_READ) { 286 | // If it's a READ, save information about it. 287 | log_data_st *new_ld = new log_data_st; 288 | *new_ld = ld; 289 | 290 | globals::access_structs.push_back(new_ld); 291 | 292 | // For atomic accesses, save only the base access address. For longer ones 293 | // (e.g. memcpy-like records), mark each single byte of the region as accessed 294 | // for further analysis. 295 | if (ld.repeated() == 1) { 296 | globals::memory_accesses[ld.lin()].push_back(new_ld); 297 | } else { 298 | for (uint64_t i = 0; i < ld.len() * ld.repeated(); i++) { 299 | globals::memory_accesses[ld.lin() + i].push_back(new_ld); 300 | } 301 | } 302 | } else if (ld.access_type() == log_data_st::MEM_WRITE) { 303 | // If it's a WRITE, check if it's a part of an inlined 304 | // ProbeForWrite() call. 305 | if (!globals::access_structs.empty()) { 306 | log_data_st *last_ld = globals::access_structs.back(); 307 | 308 | if (last_ld->lin() == ld.lin() && 309 | last_ld->pc() >= ld.pc() - 8 && last_ld->pc() < ld.pc() && 310 | last_ld->repeated() == ld.repeated() && last_ld->repeated() == 1 && 311 | last_ld->len() == ld.len() && 312 | last_ld->access_type() == log_data_st::MEM_READ) { 313 | // All conditions for a ProbeForWrite() are met, remove the 314 | // last READ record. 315 | globals::memory_accesses[ld.lin()].pop_back(); 316 | 317 | delete last_ld; 318 | globals::access_structs.pop_back(); 319 | } 320 | } 321 | } 322 | } catch (std::bad_alloc& ba) { 323 | // Reset the current syscall count, which will cause the overall state to be reset. 324 | cur_syscall_count = (uint32_t)(-1); 325 | } 326 | } 327 | 328 | fclose(f); 329 | 330 | // Go through the list of memory accesses one last time. 331 | for (const auto& it : globals::memory_accesses) { 332 | if (it.second.size() > 1) { 333 | HandleMultipleFetch(output_path, modules, it.first, it.second); 334 | } 335 | } 336 | 337 | // Free all old structures for the last syscall and cleanup information 338 | // about accessed addresses. 339 | for (auto access_struct : globals::access_structs) { 340 | delete access_struct; 341 | } 342 | 343 | globals::access_structs.clear(); 344 | globals::memory_accesses.clear(); 345 | cur_syscall_count = (uint32_t)(-1); 346 | } 347 | 348 | for (const auto& it : globals::log_file_handles) { 349 | fclose(it.second); 350 | } 351 | globals::log_file_handles.clear(); 352 | 353 | return EXIT_SUCCESS; 354 | } 355 | -------------------------------------------------------------------------------- /tools/linux_symbolize.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # 3 | # Authors: Mateusz Jurczyk (mjurczyk@google.com) 4 | # Gynvael Coldwind (gynvael@google.com) 5 | # 6 | # Copyright 2013 Google Inc. All Rights Reserved. 7 | # 8 | # Licensed under the Apache License, Version 2.0 (the "License"); 9 | # you may not use this file except in compliance with the License. 10 | # You may obtain a copy of the License at 11 | # 12 | # http:#www.apache.org/licenses/LICENSE-2.0 13 | # 14 | # Unless required by applicable law or agreed to in writing, software 15 | # distributed under the License is distributed on an "AS IS" BASIS, 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | # See the License for the specific language governing permissions and 18 | # limitations under the License. 19 | # 20 | 21 | import popen2 22 | import re 23 | import sys 24 | 25 | if len(sys.argv) < 3: 26 | print "usage: linux_symbolize.py " 27 | sys.exit(1) 28 | 29 | # KLine looks like this 30 | # #0 0xffffffff813534bf (kernel+003534bf) 31 | 32 | kerneladdr = 0xffffffff81000000 33 | what_we_need = {} 34 | 35 | f = open(sys.argv[1], "r") 36 | for ln in f: 37 | m = re.match(r" #[0-9].*\(([^+]+)\+([0-9a-fA-F]+)\)", ln) 38 | if not m: 39 | continue 40 | 41 | what_we_need[(m.group(1), int(m.group(2), 16))] = 1 42 | 43 | f.close() 44 | 45 | # Send query. 46 | (stdout, stdin) = popen2.popen2("addr2line -f -e %s" % sys.argv[2]) 47 | for k in what_we_need: 48 | stdin.write("%x\n" % (k[1] + kerneladdr)) 49 | 50 | stdin.close() 51 | 52 | # Get answer. 53 | for k in what_we_need: 54 | what_we_need[k] = "%24s %s" % ( 55 | stdout.readline().strip(), 56 | stdout.readline().strip() 57 | ) 58 | 59 | f = open(sys.argv[1], "r") 60 | for ln in f: 61 | m = re.match(r" #[0-9].*\(([^+]+)\+([0-9a-fA-F]+)\)", ln) 62 | if not m: 63 | sys.stdout.write(ln) 64 | continue 65 | 66 | k = (m.group(1), int(m.group(2), 16)) 67 | print "%s %s" % (ln.rstrip(), what_we_need[k]) 68 | 69 | f.close() 70 | 71 | -------------------------------------------------------------------------------- /tools/logging.proto: -------------------------------------------------------------------------------- 1 | ///////////////////////////////////////////////////////////////////////// 2 | // 3 | // Authors: Mateusz Jurczyk (mjurczyk@google.com) 4 | // Gynvael Coldwind (gynvael@google.com) 5 | // 6 | // Copyright 2013-2018 Google LLC 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | // 14 | // Unless required by applicable law or agreed to in writing, software 15 | // distributed under the License is distributed on an "AS IS" BASIS, 16 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | // See the License for the specific language governing permissions and 18 | // limitations under the License. 19 | // 20 | syntax = "proto2"; 21 | 22 | // Descriptor of a single executable module referenced in the stack trace(s) of 23 | // one or more memory access logs. 24 | message module_st { 25 | required string name = 1; 26 | required uint64 base_addr = 2; 27 | required uint64 size = 3; 28 | } 29 | 30 | // Descriptor of a single memory access, saved in the trace log file. 31 | message log_data_st { 32 | // Linear address of the accessed memory. 33 | required uint64 lin = 1; 34 | 35 | // Memory access length (in bytes). This value can be 1/2/4/8, and 36 | // symbolizes the size of a single atomic memory fetch. If a continuous 37 | // memory area has been referenced using fetches of the same size, the 38 | // "repeated" field will contain the number of repetitions. The following 39 | // value: 40 | // len * repeated 41 | // makes the total size of the referenced memory area. 42 | required uint32 len = 2; 43 | 44 | // How many fetches of consecutive memory addresses starting from "lin" 45 | // have been performed one after another. Typically equal to 1. 46 | required uint32 repeated = 3; 47 | 48 | enum mem_access_type { 49 | MEM_READ = 0; 50 | MEM_WRITE = 1; 51 | MEM_EXEC = 2; 52 | MEM_RW = 3; 53 | }; 54 | 55 | // Type of memory access. The Read/Write type is reserved for 56 | // instructions which read from a memory area and immediately write back 57 | // to it (e.g. inc [mem32]). 58 | required mem_access_type access_type = 4; 59 | 60 | // Absolute EIP or RIP of the instruction provoking the memory fetch. 61 | required uint64 pc = 5; 62 | 63 | // Number of syscalls previously invoked by this thread. 64 | required uint32 syscall_count = 6; 65 | 66 | // ID number of the last syscall invoked in the thread. 67 | required uint32 syscall_id = 7; 68 | 69 | // Process filename. 70 | required bytes image_file_name = 8; 71 | // Process ID. 72 | required uint32 process_id = 9; 73 | // Thread ID. 74 | required uint32 thread_id = 10; 75 | // Thread creation time. 76 | required uint64 create_time = 11; 77 | 78 | message callstack_item { 79 | required int32 module_idx = 1; 80 | required uint64 relative_pc = 2; 81 | required uint64 stack_frame = 3; 82 | 83 | // XXX: Windows 32-bit only. If the function has a SEH exception record, 84 | // the value contains the TryLevel field of the _EH3_EXCEPTION_REGISTRATION 85 | // structure, which indicates if the specific area of code being executed is 86 | // guarded by a try/except statement or not. A value of 0xFFFFFFFE means the 87 | // exception handler is disabled, while positive values close to 0 identify 88 | // an active try/except block. 89 | optional uint32 try_level = 4; 90 | } 91 | 92 | // A stack trace at the time of the memory access. 93 | // 94 | // Note: modules[stack_trace[0].module_idx].base_addr + stack_trace[0].relative_pc 95 | // should be equal to the "pc" value. 96 | repeated callstack_item stack_trace = 12; 97 | 98 | // Textual representation of the memory-fetching instruction. 99 | required string pc_disasm = 13; 100 | 101 | // XXX: Windows only. The field contains the PreviousMode bit denoting 102 | // whether the system service was called from user or kernel-mode. 103 | optional uint32 previous_mode = 14; 104 | } 105 | 106 | -------------------------------------------------------------------------------- /tools/no_cidll.cc: -------------------------------------------------------------------------------- 1 | ///////////////////////////////////////////////////////////////////////// 2 | // 3 | // Authors: Mateusz Jurczyk (mjurczyk@google.com) 4 | // Gynvael Coldwind (gynvael@google.com) 5 | // 6 | // Copyright 2013-2018 Google LLC 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | // 14 | // Unless required by applicable law or agreed to in writing, software 15 | // distributed under the License is distributed on an "AS IS" BASIS, 16 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | // See the License for the specific language governing permissions and 18 | // limitations under the License. 19 | // 20 | 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | #include "common.h" 28 | #include "logging.pb.h" 29 | 30 | int main(int argc, char **argv) { 31 | if (argc < 4) { 32 | fprintf(stderr, "Usage: %s \n", argv[0]); 33 | return EXIT_FAILURE; 34 | } 35 | 36 | std::vector modules; 37 | if (!LoadModuleList(argv[2], &modules)) { 38 | fprintf(stderr, "Unable to load the module list from \"%s\".\n", argv[2]); 39 | return EXIT_FAILURE; 40 | } 41 | 42 | int cidll_module_idx = -1; 43 | for (unsigned int i = 0; i < modules.size(); i++) { 44 | if (modules[i].name == "CI.dll") { 45 | cidll_module_idx = i; 46 | } 47 | } 48 | assert(cidll_module_idx != -1); 49 | 50 | FILE *fi = fopen(argv[1], "rb"); 51 | FILE *fo = fopen(argv[3], "wb+"); 52 | if (!fi || !fo) { 53 | fprintf(stderr, "Unable to open input and/or output file\n"); 54 | return EXIT_FAILURE; 55 | } 56 | 57 | log_data_st ld; 58 | std::string protobuf; 59 | while (LoadNextRecord(fi, &protobuf, &ld)) { 60 | uint32_t size = protobuf.size(); 61 | 62 | bool allowed = true; 63 | for (int i = 0; i < ld.stack_trace_size(); i++) { 64 | if (ld.stack_trace(i).module_idx() == cidll_module_idx) { 65 | allowed = false; 66 | break; 67 | } 68 | } 69 | 70 | if (allowed) { 71 | if (fwrite(&size, sizeof(uint32_t), 1, fo) != 1 || 72 | fwrite(protobuf.data(), sizeof(uint8_t), size, fo) != size) { 73 | fprintf(stderr, "Unable to write protobuf back to output file\n"); 74 | break; 75 | } 76 | } 77 | } 78 | 79 | fclose(fi); 80 | fclose(fo); 81 | 82 | return EXIT_SUCCESS; 83 | } 84 | -------------------------------------------------------------------------------- /tools/print.cc: -------------------------------------------------------------------------------- 1 | ///////////////////////////////////////////////////////////////////////// 2 | // 3 | // Authors: Mateusz Jurczyk (mjurczyk@google.com) 4 | // Gynvael Coldwind (gynvael@google.com) 5 | // 6 | // Copyright 2013-2018 Google LLC 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | // 14 | // Unless required by applicable law or agreed to in writing, software 15 | // distributed under the License is distributed on an "AS IS" BASIS, 16 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | // See the License for the specific language governing permissions and 18 | // limitations under the License. 19 | // 20 | 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | #include "common.h" 28 | #include "logging.pb.h" 29 | 30 | int main(int argc, char **argv) { 31 | if (argc < 3) { 32 | fprintf(stderr, "Usage: %s \n", argv[0]); 33 | return EXIT_FAILURE; 34 | } 35 | 36 | std::vector modules; 37 | if (!LoadModuleList(argv[2], &modules)) { 38 | fprintf(stderr, "Unable to load the module list from \"%s\".\n", argv[2]); 39 | return EXIT_FAILURE; 40 | } 41 | 42 | FILE *f = fopen(argv[1], "rb"); 43 | if (!f) { 44 | fprintf(stderr, "Unable to open input file \"%s\".\n", argv[1]); 45 | return EXIT_FAILURE; 46 | } 47 | 48 | log_data_st ld; 49 | while (LoadNextRecord(f, NULL, &ld)) { 50 | printf("%s", LogDataAsText(ld, modules).c_str()); 51 | } 52 | 53 | if (!feof(f)) { 54 | fprintf(stderr, "Failure at offset %lu\n", ftell(f)); 55 | } 56 | 57 | fclose(f); 58 | return EXIT_SUCCESS; 59 | } 60 | -------------------------------------------------------------------------------- /tools/separate.cc: -------------------------------------------------------------------------------- 1 | ///////////////////////////////////////////////////////////////////////// 2 | // 3 | // Authors: Mateusz Jurczyk (mjurczyk@google.com) 4 | // Gynvael Coldwind (gynvael@google.com) 5 | // 6 | // Copyright 2013-2018 Google LLC 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | // 14 | // Unless required by applicable law or agreed to in writing, software 15 | // distributed under the License is distributed on an "AS IS" BASIS, 16 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | // See the License for the specific language governing permissions and 18 | // limitations under the License. 19 | // 20 | 21 | #include 22 | 23 | #include 24 | #include 25 | #include 26 | 27 | #include "common.h" 28 | #include "logging.pb.h" 29 | 30 | typedef std::map file_map_t; 31 | 32 | void FinishFileMap(file_map_t *m) { 33 | for (const auto& it : *m) { 34 | fflush(it.second); 35 | fclose(it.second); 36 | } 37 | 38 | m->clear(); 39 | } 40 | 41 | void AddRecordToFile(file_map_t *m, char *file_path, const std::string& protobuf, 42 | bool append = false) { 43 | FILE *f; 44 | 45 | if (m->find(file_path) == m->end()) { 46 | f = fopen(file_path, "ab"); 47 | 48 | if (!f) { 49 | if (append) { 50 | fprintf(stderr, "Unable to create file \"%s\" on second try.\n", 51 | file_path); 52 | exit(EXIT_FAILURE); 53 | } 54 | 55 | fprintf(stderr, "Unable to create file \"%s\" on first try, retrying\n", 56 | file_path); 57 | FinishFileMap(m); 58 | AddRecordToFile(m, file_path, protobuf, true); 59 | return; 60 | } 61 | 62 | if (append) { 63 | fprintf(stderr, "Retry successful.\n"); 64 | } 65 | 66 | (*m)[file_path] = f; 67 | } else { 68 | f = (*m)[file_path]; 69 | } 70 | 71 | uint32_t size = protobuf.size(); 72 | if (fwrite(&size, sizeof(uint32_t), 1, f) != 1 || 73 | fwrite(protobuf.data(), sizeof(uint8_t), size, f) != size) { 74 | fprintf(stderr, "Unable to write record to \"%s\"\n", file_path); 75 | } 76 | } 77 | 78 | int main(int argc, char **argv) { 79 | if (argc != 3) { 80 | fprintf(stderr, "Usage: %s \n", argv[0]); 81 | return EXIT_FAILURE; 82 | } 83 | 84 | const char *log_file_path = argv[1]; 85 | const char *output_dir = argv[2]; 86 | 87 | FILE *f = fopen(log_file_path, "rb"); 88 | if (!f) { 89 | fprintf(stderr, "Unable to open \"%s\"\n", log_file_path); 90 | return EXIT_FAILURE; 91 | } 92 | 93 | file_map_t thread_logs; 94 | log_data_st ld; 95 | std::string protobuf; 96 | char unique_thread_path[256]; 97 | 98 | while (LoadNextRecord(f, &protobuf, &ld)) { 99 | snprintf(unique_thread_path, sizeof(unique_thread_path), 100 | "%s/%.8x%.8x-%.8x-%.8x.bin", 101 | output_dir, 102 | (uint32_t)(ld.create_time() >> 32), 103 | (uint32_t)(ld.create_time()), 104 | ld.process_id(), 105 | ld.thread_id()); 106 | 107 | AddRecordToFile(&thread_logs, unique_thread_path, protobuf); 108 | } 109 | 110 | FinishFileMap(&thread_logs); 111 | fclose(f); 112 | 113 | return EXIT_SUCCESS; 114 | } 115 | 116 | -------------------------------------------------------------------------------- /tools/stats.cc: -------------------------------------------------------------------------------- 1 | ///////////////////////////////////////////////////////////////////////// 2 | // 3 | // Authors: Mateusz Jurczyk (mjurczyk@google.com) 4 | // Gynvael Coldwind (gynvael@google.com) 5 | // 6 | // Copyright 2013-2018 Google LLC 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | // 14 | // Unless required by applicable law or agreed to in writing, software 15 | // distributed under the License is distributed on an "AS IS" BASIS, 16 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | // See the License for the specific language governing permissions and 18 | // limitations under the License. 19 | // 20 | 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | 30 | #include "common.h" 31 | #include "logging.pb.h" 32 | 33 | namespace globals { 34 | 35 | uint64_t total_records; 36 | uint64_t total_reads, total_writes; 37 | uint64_t total_memory_read, total_memory_written; 38 | std::map per_exe_reads, per_exe_writes; 39 | std::map per_module_reads, per_module_writes; 40 | 41 | } // namespace globals 42 | 43 | int main(int argc, char **argv) { 44 | if (argc < 3) { 45 | fprintf(stderr, "Usage: %s \n", argv[0]); 46 | return EXIT_FAILURE; 47 | } 48 | 49 | std::vector modules; 50 | if (!LoadModuleList(argv[2], &modules)) { 51 | fprintf(stderr, "Unable to load the module list from \"%s\".\n", argv[2]); 52 | return EXIT_FAILURE; 53 | } 54 | 55 | FILE *f = fopen(argv[1], "rb"); 56 | if (!f) { 57 | fprintf(stderr, "Unable to open input file \"%s\".\n", argv[1]); 58 | return EXIT_FAILURE; 59 | } 60 | 61 | log_data_st ld; 62 | while (LoadNextRecord(f, NULL, &ld)) { 63 | globals::total_records++; 64 | 65 | if (ld.access_type() == log_data_st::MEM_READ) { 66 | globals::total_reads++; 67 | globals::total_memory_read += ld.len() * ld.repeated(); 68 | globals::per_exe_reads[ld.image_file_name()]++; 69 | 70 | std::set seen_modules; 71 | for (int i = 0; i < ld.stack_trace_size(); i++) { 72 | int module_idx = ld.stack_trace(i).module_idx(); 73 | 74 | if (seen_modules.find(module_idx) != seen_modules.end()) { 75 | continue; 76 | } 77 | 78 | seen_modules.insert(module_idx); 79 | 80 | std::string name; 81 | if (module_idx == -1) { 82 | name = "unknown"; 83 | } else { 84 | name = modules[module_idx].name; 85 | } 86 | 87 | globals::per_module_reads[name]++; 88 | } 89 | } else if (ld.access_type() == log_data_st::MEM_WRITE) { 90 | globals::total_writes++; 91 | globals::total_memory_written += ld.len() * ld.repeated(); 92 | globals::per_exe_writes[ld.image_file_name()]++; 93 | 94 | std::set seen_modules; 95 | for (int i = 0; i < ld.stack_trace_size(); i++) { 96 | int module_idx = ld.stack_trace(i).module_idx(); 97 | 98 | if (seen_modules.find(module_idx) != seen_modules.end()) { 99 | continue; 100 | } 101 | 102 | seen_modules.insert(module_idx); 103 | 104 | std::string name; 105 | if (module_idx == -1) { 106 | name = "unknown"; 107 | } else { 108 | name = modules[module_idx].name; 109 | } 110 | 111 | globals::per_module_writes[name]++; 112 | } 113 | } 114 | } 115 | 116 | printf("Total records: %lld\n", globals::total_records); 117 | printf(" Reads: %lld\n", globals::total_reads); 118 | printf(" Writes: %lld\n", globals::total_writes); 119 | 120 | printf("Total memory read: %lld\n", globals::total_memory_read); 121 | printf("Total memory written: %lld\n", globals::total_memory_written); 122 | 123 | printf("Per executable memory reads:\n"); 124 | for (const auto& it : globals::per_exe_reads) { 125 | printf(" %s: %lld\n", it.first.c_str(), it.second); 126 | } 127 | 128 | printf("Per executable memory writes:\n"); 129 | for (const auto& it : globals::per_exe_writes) { 130 | printf(" %s: %lld\n", it.first.c_str(), it.second); 131 | } 132 | 133 | printf("Per module memory reads:\n"); 134 | for (const auto& it : globals::per_module_reads) { 135 | printf(" %s: %lld\n", it.first.c_str(), it.second); 136 | } 137 | 138 | printf("Per module memory writes:\n"); 139 | for (const auto& it : globals::per_module_writes) { 140 | printf(" %s: %lld\n", it.first.c_str(), it.second); 141 | } 142 | 143 | fclose(f); 144 | return EXIT_SUCCESS; 145 | } 146 | -------------------------------------------------------------------------------- /tools/unhandled_access.cc: -------------------------------------------------------------------------------- 1 | ///////////////////////////////////////////////////////////////////////// 2 | // 3 | // Authors: Mateusz Jurczyk (mjurczyk@google.com) 4 | // Gynvael Coldwind (gynvael@google.com) 5 | // 6 | // Copyright 2013-2018 Google LLC 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | // 14 | // Unless required by applicable law or agreed to in writing, software 15 | // distributed under the License is distributed on an "AS IS" BASIS, 16 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | // See the License for the specific language governing permissions and 18 | // limitations under the License. 19 | // 20 | 21 | #define __STDC_FORMAT_MACROS 22 | #include 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | 36 | #include "common.h" 37 | #include "logging.pb.h" 38 | 39 | #ifndef MAX_PATH 40 | # define MAX_PATH 256 41 | #endif 42 | 43 | // Structure characterizes a stack trace by storing a list of absolute 44 | // addresses. Used to uniquely identify code paths. 45 | struct StackTrace { 46 | std::vector trace; 47 | 48 | bool operator< (const StackTrace& a) const { 49 | return (trace < a.trace); 50 | } 51 | bool operator!= (const StackTrace& a) const { 52 | return (trace != a.trace); 53 | } 54 | }; 55 | 56 | namespace globals { 57 | 58 | // The STL structure stores information about unhandled accesses already found 59 | // by examining the logs. 60 | // 61 | // unique_traces.size() is equal to the number of potential 62 | // vulnerabilities found so far. The structure lives throughout the entire 63 | // lifespan of the tool. 64 | std::set unique_traces; 65 | 66 | // Output files are written on a per-kernel module basis. To avoid 67 | // opening/closing file handles once for every single double fetch, we 68 | // cache them in the log_file_handles structure. 69 | std::map log_file_handles; 70 | 71 | }; // namespace globals 72 | 73 | // The routine takes information about an unhandled access, verifies it 74 | // against the currently known database of vulnerabilities and prints out 75 | // its details to an adequate log file, if the provided report is unique. 76 | // 77 | // Parameters: 78 | // * output_path - Contains a path to the output directory, where all 79 | // output log files are stored. 80 | // 81 | // * address - virtual address of the memory being referenced multiple 82 | // times. 83 | // 84 | // * accesses - a list of memory access descriptors, of size greater or 85 | // equal to 2 (otherwise, it wouldn't be a vulnerability). 86 | // 87 | void HandleBadAccess(const char *output_path, 88 | const std::vector& modules, 89 | const log_data_st& access) { 90 | // Create a signature of the multi-fetch report. 91 | StackTrace signature; 92 | 93 | for (int j = 0; j < access.stack_trace_size(); j++) { 94 | int module_idx = access.stack_trace(j).module_idx(); 95 | if (module_idx == -1) { 96 | signature.trace.push_back(access.stack_trace(j).relative_pc()); 97 | } else { 98 | signature.trace.push_back(modules[module_idx].base + 99 | access.stack_trace(j).relative_pc()); 100 | } 101 | } 102 | 103 | // See if the signature has already been observed, and if there's any 104 | // stack trace available for the first read. 105 | if (globals::unique_traces.find(signature) == globals::unique_traces.end() && 106 | access.stack_trace_size() > 0) { 107 | int module_idx = access.stack_trace(0).module_idx(); 108 | std::string module_name; 109 | 110 | if (module_idx == -1) { 111 | module_name = "unknown"; 112 | } else { 113 | module_name = modules[module_idx].name; 114 | } 115 | 116 | // If module was encountered for the first time, attempt to open a 117 | // corresponding output file. 118 | if (!globals::log_file_handles[module_name]) { 119 | char full_path[MAX_PATH]; 120 | FILE *f; 121 | 122 | snprintf(full_path, MAX_PATH, "%s/%s", output_path, module_name.c_str()); 123 | 124 | f = fopen(full_path, "a+"); 125 | if (!f) { 126 | fprintf(stderr, "Unable to open output file \"%s\"\n", full_path); 127 | abort(); 128 | } else { 129 | globals::log_file_handles[module_name] = f; 130 | } 131 | } 132 | 133 | FILE *f = globals::log_file_handles[module_name]; 134 | assert(f != NULL); 135 | 136 | // Print out some detailed information regarding the memory access. 137 | fprintf(f, "------------------------------ found unhandled-access of address %#llx\n\n", access.lin()); 138 | fprintf(f, "%s\n", LogDataAsText(access, modules).c_str()); 139 | fflush(f); 140 | 141 | // Save the signature to avoid duplicates in the future. 142 | globals::unique_traces.insert(signature); 143 | } 144 | } 145 | 146 | int main(int argc, const char **argv) { 147 | if (argc < 4) { 148 | fprintf(stderr, "Usage: %s /path/to/memory/logs /path/to/modules/list /path/to/output/logs\n", argv[0]); 149 | return EXIT_SUCCESS; 150 | } 151 | 152 | const char *logs_path = argv[1]; 153 | const char *modules_list_path = argv[2]; 154 | const char *output_path = argv[3]; 155 | 156 | DIR *dirp = opendir(logs_path); 157 | if (!dirp) { 158 | fprintf(stderr, "Unable to open the \"%s\" directory\n", logs_path); 159 | return EXIT_FAILURE; 160 | } 161 | 162 | // Load the module list. 163 | std::vector modules; 164 | if (!LoadModuleList(modules_list_path, &modules)) { 165 | fprintf(stderr, "Unable to load the module list from \"%s\".\n", modules_list_path); 166 | return EXIT_FAILURE; 167 | } 168 | 169 | unsigned int file_count = 1; 170 | uint64_t bytes_processed = 0; 171 | 172 | // List all files in the specified directory. 173 | struct dirent *dp; 174 | while ((dp = readdir(dirp)) != NULL) { 175 | static char buffer[MAX_PATH]; 176 | 177 | if (!strcmp(dp->d_name, ".") || !strcmp(dp->d_name, "..")) { 178 | continue; 179 | } 180 | 181 | snprintf(buffer, MAX_PATH, "%s/%s", logs_path, dp->d_name); 182 | 183 | FILE *f = fopen(buffer, "rb"); 184 | if (!f) { 185 | fprintf(stderr, "Unable to load file \"%s\"\n", buffer); 186 | return EXIT_FAILURE; 187 | } 188 | 189 | fprintf(stderr, "[%.4u] Loaded file \"%s\" (%" PRIu64 " bytes processed)\n", 190 | file_count++, dp->d_name, bytes_processed); 191 | 192 | // Read records until the file is over, or we're out of memory. 193 | static log_data_st ld; 194 | while (LoadNextRecord(f, NULL, &ld)) { 195 | bytes_processed += ld.ByteSize(); 196 | 197 | bool handler_active = false; 198 | for (int i = 0; i < ld.stack_trace_size(); i++) { 199 | if (ld.stack_trace(i).has_try_level() && ld.stack_trace(i).try_level() != 0xFFFFFFFE) { 200 | handler_active = true; 201 | break; 202 | } 203 | } 204 | 205 | if (!handler_active) { 206 | HandleBadAccess(output_path, modules, ld); 207 | } 208 | } 209 | 210 | fclose(f); 211 | } 212 | 213 | for (const auto& it : globals::log_file_handles) { 214 | fclose(it.second); 215 | } 216 | globals::log_file_handles.clear(); 217 | 218 | return EXIT_SUCCESS; 219 | } 220 | -------------------------------------------------------------------------------- /tools/win32_symbolize.cc: -------------------------------------------------------------------------------- 1 | ///////////////////////////////////////////////////////////////////////// 2 | // 3 | // Authors: Mateusz Jurczyk (mjurczyk@google.com) 4 | // Gynvael Coldwind (gynvael@google.com) 5 | // 6 | // Copyright 2013-2018 Google LLC 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | // 14 | // Unless required by applicable law or agreed to in writing, software 15 | // distributed under the License is distributed on an "AS IS" BASIS, 16 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | // See the License for the specific language governing permissions and 18 | // limitations under the License. 19 | // 20 | 21 | #include 22 | 23 | #include 24 | #include 25 | #include 26 | 27 | #define _NO_CVCONST_H 28 | #include 29 | 30 | struct CSymbolInfoPackage : public SYMBOL_INFO_PACKAGE { 31 | CSymbolInfoPackage() { 32 | si.SizeOfStruct = sizeof(SYMBOL_INFO); 33 | si.MaxNameLen = sizeof(name); 34 | } 35 | }; 36 | 37 | static void usage(const char *program_name) { 38 | fprintf(stderr, "Usage: %s <.pdb file path> \n", program_name); 39 | } 40 | 41 | int main(int argc, char **argv) { 42 | bool ret; 43 | 44 | if (argc < 3) { 45 | usage(argv[0]); 46 | return EXIT_FAILURE; 47 | } 48 | 49 | DWORD64 sym_offset = 0; 50 | if (sscanf(argv[2], "%x", &sym_offset) != 1) { 51 | usage(argv[0]); 52 | return EXIT_FAILURE; 53 | } 54 | 55 | DWORD options = SymGetOptions(); 56 | options |= SYMOPT_DEBUG; 57 | SymSetOptions(options); 58 | 59 | ret = SymInitialize(GetCurrentProcess(), NULL, FALSE); 60 | if (!ret) { 61 | printf("???+%.8x\n", sym_offset); 62 | fprintf(stderr, "SymInitialize() failed, %u\n", GetLastError()); 63 | return EXIT_FAILURE; 64 | } 65 | 66 | do { 67 | // Since we are only loading a single symbol file with DbgHelp, we can "fake" 68 | // both the base address to have a constant, arbitrary value, and the image size 69 | // to be a fixed length that is guaranteed to be large enough to cover every 70 | // possible real image size. 71 | const PCHAR file_name = argv[1]; 72 | const DWORD64 base_address = 0x10000000; 73 | const DWORD image_size = 0x10000000; 74 | 75 | DWORD64 mod_base = SymLoadModule64(GetCurrentProcess(), NULL, file_name, NULL, base_address, image_size); 76 | if (!mod_base) { 77 | printf("???+%.8x\n", sym_offset); 78 | fprintf(stderr, "SymLoadModule64() failed, %u\n", GetLastError()); 79 | break; 80 | } 81 | 82 | CSymbolInfoPackage sip; 83 | DWORD64 displacement = 0; 84 | 85 | ret = SymFromAddr(GetCurrentProcess(), base_address + sym_offset, &displacement, &sip.si); 86 | if (!ret) { 87 | printf("???+%.8x\n", sym_offset); 88 | fprintf(stderr, "SymFromAddr() failed, %u\n", GetLastError()); 89 | break; 90 | } else { 91 | printf("%s+%.8llx\n", sip.si.Name, displacement); 92 | } 93 | 94 | ret = SymUnloadModule64(GetCurrentProcess(), mod_base); 95 | if (!ret) { 96 | fprintf(stderr, "SymUnloadModule64() failed, %u\n", GetLastError()); 97 | } 98 | } while (0); 99 | 100 | ret = SymCleanup(GetCurrentProcess()); 101 | if (!ret) { 102 | fprintf(stderr, "SymCleanup() failed, %u\n", GetLastError()); 103 | } 104 | 105 | return 0; 106 | } 107 | -------------------------------------------------------------------------------- /tools/win32_symbolize.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # 3 | # Authors: Mateusz Jurczyk (mjurczyk@google.com) 4 | # Gynvael Coldwind (gynvael@google.com) 5 | # 6 | # Copyright 2013-2018 Google LLC 7 | # 8 | # Licensed under the Apache License, Version 2.0 (the "License"); 9 | # you may not use this file except in compliance with the License. 10 | # You may obtain a copy of the License at 11 | # 12 | # http:#www.apache.org/licenses/LICENSE-2.0 13 | # 14 | # Unless required by applicable law or agreed to in writing, software 15 | # distributed under the License is distributed on an "AS IS" BASIS, 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | # See the License for the specific language governing permissions and 18 | # limitations under the License. 19 | # 20 | 21 | import os 22 | import re 23 | import subprocess 24 | import sys 25 | 26 | def main(argv): 27 | if len(argv) < 3: 28 | sys.stderr.write("Usage: %s \n" % sys.argv[0]) 29 | sys.exit(1) 30 | 31 | try: 32 | f = open(argv[1], "r") 33 | except IOError: 34 | sys.stderr.write("Unable to open input file \"%s\"\n" % argv[1]) 35 | sys.exit(1) 36 | 37 | symbols_path = sys.argv[2] 38 | for line in f: 39 | while True: 40 | match = re.match("([a-zA-Z0-9]+\.[a-zA-Z]+)\+([0-9a-fA-F]+).*", line) 41 | if match == None: 42 | match = re.match(".*[^a-zA-Z0-9.]+([a-zA-Z0-9]+\.[a-zA-Z]+)\+([0-9a-fA-F]+).*", line) 43 | if match == None: 44 | break 45 | 46 | image_name = match.group(1) 47 | offset = match.group(2) 48 | 49 | # Look up a corresponding pdb file 50 | file_name, file_ext = os.path.splitext(image_name) 51 | pdb_path = symbols_path + "/" + file_name + ".pdb" 52 | 53 | if os.path.isfile(pdb_path) == False: 54 | sys.stderr.write("PDB file \"%s\" for module \"%s\" not found\n" % (pdb_path, image_name)) 55 | break 56 | 57 | p = subprocess.Popen([os.path.dirname(os.path.realpath(__file__)) + "\\win32_symbolize.exe", pdb_path, offset], 58 | stdout = subprocess.PIPE, stderr = subprocess.PIPE) 59 | stdout, stderr = p.communicate() 60 | 61 | if p.returncode != 0: 62 | sys.stderr.write("Native symbolizer failed with code %u: \"%s\"\n" % (p.returncode, stderr)) 63 | else: 64 | line = line.replace("%s+%s" % (image_name, offset), "(%.8x) %s!%s" % (int(offset, 16), file_name, stdout.strip())) 65 | break 66 | 67 | # Display the final version of the line 68 | print line.strip() 69 | 70 | f.close() 71 | 72 | if __name__ == "__main__": 73 | main(sys.argv) 74 | 75 | --------------------------------------------------------------------------------