├── .flake8 ├── .github └── workflows │ └── ci.yml ├── .gitignore ├── LICENSE ├── README.rst ├── docs ├── .gitignore ├── Makefile ├── conf.py ├── index.rst └── requirements.txt ├── ptracer ├── __init__.py ├── _lltraceback.c ├── _ptracer.py ├── _syscall.py └── ptrace │ ├── __init__.py │ ├── _defs_linux.py │ ├── _defs_linux_64.py │ ├── _gen_defs_linux_64.py │ ├── _ptrace.c │ ├── defs.py │ ├── memory.py │ ├── platform.py │ ├── ptrace.py │ ├── syscalldef.py │ └── syscalls.py ├── setup.py ├── tests ├── __init__.py ├── test_lltraceback.py ├── test_ptrace.py └── test_ptracer.py ├── tools ├── extract_ptrace_constants.py └── requirements.txt └── tox.ini /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | ignore = E402,E731 3 | exclude = .git,__pycache__,build,dist,.eggs,.tox,ptracer/ptrace/_gen_defs*,yacctab.py,lextab.py 4 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | pull_request: 8 | branches: 9 | - master 10 | 11 | jobs: 12 | build: 13 | runs-on: ubuntu-latest 14 | strategy: 15 | fail-fast: false 16 | matrix: 17 | python-version: [2.7, 3.7, 3.8, 3.9, "3.10"] 18 | 19 | steps: 20 | - uses: actions/checkout@v2 21 | - name: Set up Python ${{ matrix.python-version }} 22 | uses: actions/setup-python@v2 23 | with: 24 | python-version: ${{ matrix.python-version }} 25 | - name: Install dependencies 26 | if: matrix.python-version == '2.7' 27 | run: | 28 | python -m pip install --upgrade pip 29 | pip install mock 30 | - name: Tests 31 | run: | 32 | python setup.py test 33 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *._* 2 | *.pyc 3 | *.pyo 4 | *.so 5 | *~ 6 | .#* 7 | .DS_Store 8 | \#*# 9 | /test*.py 10 | /.local 11 | /build 12 | __pycache__/ 13 | /*.egg 14 | /*.egg-info 15 | /dist 16 | /.cache 17 | /.eggs 18 | /.tox 19 | *,cover 20 | .coverage 21 | /lextab.py 22 | /yacctab.py 23 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (C) 2017-present Pinterest Inc. 2 | 3 | Apache License 4 | Version 2.0, January 2004 5 | http://www.apache.org/licenses/ 6 | 7 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 8 | 9 | 1. Definitions. 10 | 11 | "License" shall mean the terms and conditions for use, reproduction, 12 | and distribution as defined by Sections 1 through 9 of this document. 13 | 14 | "Licensor" shall mean the copyright owner or entity authorized by 15 | the copyright owner that is granting the License. 16 | 17 | "Legal Entity" shall mean the union of the acting entity and all 18 | other entities that control, are controlled by, or are under common 19 | control with that entity. For the purposes of this definition, 20 | "control" means (i) the power, direct or indirect, to cause the 21 | direction or management of such entity, whether by contract or 22 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 23 | outstanding shares, or (iii) beneficial ownership of such entity. 24 | 25 | "You" (or "Your") shall mean an individual or Legal Entity 26 | exercising permissions granted by this License. 27 | 28 | "Source" form shall mean the preferred form for making modifications, 29 | including but not limited to software source code, documentation 30 | source, and configuration files. 31 | 32 | "Object" form shall mean any form resulting from mechanical 33 | transformation or translation of a Source form, including but 34 | not limited to compiled object code, generated documentation, 35 | and conversions to other media types. 36 | 37 | "Work" shall mean the work of authorship, whether in Source or 38 | Object form, made available under the License, as indicated by a 39 | copyright notice that is included in or attached to the work 40 | (an example is provided in the Appendix below). 41 | 42 | "Derivative Works" shall mean any work, whether in Source or Object 43 | form, that is based on (or derived from) the Work and for which the 44 | editorial revisions, annotations, elaborations, or other modifications 45 | represent, as a whole, an original work of authorship. For the purposes 46 | of this License, Derivative Works shall not include works that remain 47 | separable from, or merely link (or bind by name) to the interfaces of, 48 | the Work and Derivative Works thereof. 49 | 50 | "Contribution" shall mean any work of authorship, including 51 | the original version of the Work and any modifications or additions 52 | to that Work or Derivative Works thereof, that is intentionally 53 | submitted to Licensor for inclusion in the Work by the copyright owner 54 | or by an individual or Legal Entity authorized to submit on behalf of 55 | the copyright owner. For the purposes of this definition, "submitted" 56 | means any form of electronic, verbal, or written communication sent 57 | to the Licensor or its representatives, including but not limited to 58 | communication on electronic mailing lists, source code control systems, 59 | and issue tracking systems that are managed by, or on behalf of, the 60 | Licensor for the purpose of discussing and improving the Work, but 61 | excluding communication that is conspicuously marked or otherwise 62 | designated in writing by the copyright owner as "Not a Contribution." 63 | 64 | "Contributor" shall mean Licensor and any individual or Legal Entity 65 | on behalf of whom a Contribution has been received by Licensor and 66 | subsequently incorporated within the Work. 67 | 68 | 2. Grant of Copyright License. Subject to the terms and conditions of 69 | this License, each Contributor hereby grants to You a perpetual, 70 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 71 | copyright license to reproduce, prepare Derivative Works of, 72 | publicly display, publicly perform, sublicense, and distribute the 73 | Work and such Derivative Works in Source or Object form. 74 | 75 | 3. Grant of Patent License. Subject to the terms and conditions of 76 | this License, each Contributor hereby grants to You a perpetual, 77 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 78 | (except as stated in this section) patent license to make, have made, 79 | use, offer to sell, sell, import, and otherwise transfer the Work, 80 | where such license applies only to those patent claims licensable 81 | by such Contributor that are necessarily infringed by their 82 | Contribution(s) alone or by combination of their Contribution(s) 83 | with the Work to which such Contribution(s) was submitted. If You 84 | institute patent litigation against any entity (including a 85 | cross-claim or counterclaim in a lawsuit) alleging that the Work 86 | or a Contribution incorporated within the Work constitutes direct 87 | or contributory patent infringement, then any patent licenses 88 | granted to You under this License for that Work shall terminate 89 | as of the date such litigation is filed. 90 | 91 | 4. Redistribution. You may reproduce and distribute copies of the 92 | Work or Derivative Works thereof in any medium, with or without 93 | modifications, and in Source or Object form, provided that You 94 | meet the following conditions: 95 | 96 | (a) You must give any other recipients of the Work or 97 | Derivative Works a copy of this License; and 98 | 99 | (b) You must cause any modified files to carry prominent notices 100 | stating that You changed the files; and 101 | 102 | (c) You must retain, in the Source form of any Derivative Works 103 | that You distribute, all copyright, patent, trademark, and 104 | attribution notices from the Source form of the Work, 105 | excluding those notices that do not pertain to any part of 106 | the Derivative Works; and 107 | 108 | (d) If the Work includes a "NOTICE" text file as part of its 109 | distribution, then any Derivative Works that You distribute must 110 | include a readable copy of the attribution notices contained 111 | within such NOTICE file, excluding those notices that do not 112 | pertain to any part of the Derivative Works, in at least one 113 | of the following places: within a NOTICE text file distributed 114 | as part of the Derivative Works; within the Source form or 115 | documentation, if provided along with the Derivative Works; or, 116 | within a display generated by the Derivative Works, if and 117 | wherever such third-party notices normally appear. The contents 118 | of the NOTICE file are for informational purposes only and 119 | do not modify the License. You may add Your own attribution 120 | notices within Derivative Works that You distribute, alongside 121 | or as an addendum to the NOTICE text from the Work, provided 122 | that such additional attribution notices cannot be construed 123 | as modifying the License. 124 | 125 | You may add Your own copyright statement to Your modifications and 126 | may provide additional or different license terms and conditions 127 | for use, reproduction, or distribution of Your modifications, or 128 | for any such Derivative Works as a whole, provided Your use, 129 | reproduction, and distribution of the Work otherwise complies with 130 | the conditions stated in this License. 131 | 132 | 5. Submission of Contributions. Unless You explicitly state otherwise, 133 | any Contribution intentionally submitted for inclusion in the Work 134 | by You to the Licensor shall be under the terms and conditions of 135 | this License, without any additional terms or conditions. 136 | Notwithstanding the above, nothing herein shall supersede or modify 137 | the terms of any separate license agreement you may have executed 138 | with Licensor regarding such Contributions. 139 | 140 | 6. Trademarks. This License does not grant permission to use the trade 141 | names, trademarks, service marks, or product names of the Licensor, 142 | except as required for reasonable and customary use in describing the 143 | origin of the Work and reproducing the content of the NOTICE file. 144 | 145 | 7. Disclaimer of Warranty. Unless required by applicable law or 146 | agreed to in writing, Licensor provides the Work (and each 147 | Contributor provides its Contributions) on an "AS IS" BASIS, 148 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 149 | implied, including, without limitation, any warranties or conditions 150 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 151 | PARTICULAR PURPOSE. You are solely responsible for determining the 152 | appropriateness of using or redistributing the Work and assume any 153 | risks associated with Your exercise of permissions under this License. 154 | 155 | 8. Limitation of Liability. In no event and under no legal theory, 156 | whether in tort (including negligence), contract, or otherwise, 157 | unless required by applicable law (such as deliberate and grossly 158 | negligent acts) or agreed to in writing, shall any Contributor be 159 | liable to You for damages, including any direct, indirect, special, 160 | incidental, or consequential damages of any character arising as a 161 | result of this License or out of the use or inability to use the 162 | Work (including but not limited to damages for loss of goodwill, 163 | work stoppage, computer failure or malfunction, or any and all 164 | other commercial damages or losses), even if such Contributor 165 | has been advised of the possibility of such damages. 166 | 167 | 9. Accepting Warranty or Additional Liability. While redistributing 168 | the Work or Derivative Works thereof, You may choose to offer, 169 | and charge a fee for, acceptance of support, warranty, indemnity, 170 | or other liability obligations and/or rights consistent with this 171 | License. However, in accepting such obligations, You may act only 172 | on Your own behalf and on Your sole responsibility, not on behalf 173 | of any other Contributor, and only if You agree to indemnify, 174 | defend, and hold each Contributor harmless for any liability 175 | incurred by, or claims asserted against, such Contributor by reason 176 | of your accepting any such warranty or additional liability. 177 | 178 | END OF TERMS AND CONDITIONS 179 | 180 | APPENDIX: How to apply the Apache License to your work. 181 | 182 | To apply the Apache License to your work, attach the following 183 | boilerplate notice, with the fields enclosed by brackets "[]" 184 | replaced with your own identifying information. (Don't include 185 | the brackets!) The text should be enclosed in the appropriate 186 | comment syntax for the file format. We also recommend that a 187 | file or class name and description of purpose be included on the 188 | same "printed page" as the copyright notice for easier 189 | identification within third-party archives. 190 | 191 | Copyright (C) 2017-present Pinterest Inc. 192 | 193 | Licensed under the Apache License, Version 2.0 (the "License"); 194 | you may not use this file except in compliance with the License. 195 | You may obtain a copy of the License at 196 | 197 | http://www.apache.org/licenses/LICENSE-2.0 198 | 199 | Unless required by applicable law or agreed to in writing, software 200 | distributed under the License is distributed on an "AS IS" BASIS, 201 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 202 | See the License for the specific language governing permissions and 203 | limitations under the License. 204 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | ptracer -- a library for ptrace-based tracing of Python programs 2 | ================================================================ 3 | 4 | Ptracer is a library providing on-demand system call tracing in Python 5 | programs. 6 | 7 | 8 | Basic Usage 9 | ----------- 10 | 11 | .. code-block:: python 12 | 13 | import traceback 14 | import ptracer 15 | 16 | def callback(syscall): 17 | print('{}({}) -> {}'.format( 18 | syscall.name, 19 | ', '.join(repr(arg.value) for arg in syscall.args), 20 | syscall.result.text)) 21 | print('Traceback: ') 22 | print(''.join(traceback.format_list(syscall.traceback))) 23 | 24 | with ptracer.context(callback): 25 | open('/dev/null', 'wb') 26 | 27 | 28 | Filtering 29 | --------- 30 | 31 | Ptracer allows elaborate syscall filtering via the *filter* argument: 32 | 33 | .. code-block:: python 34 | 35 | flt = [ 36 | ptracer.SysCallPattern( 37 | name='open', 38 | args=[ 39 | re.compile(b'/tmp/.*'), 40 | lambda arg: arg.value & os.O_WRONLY 41 | ], 42 | result=lambda res: res.value > 0 43 | ) 44 | ] 45 | 46 | with ptracer.context(callback, filter=flt): 47 | # traced code 48 | ... 49 | 50 | 51 | In the above example, ptracer will invoke the callback only for successful 52 | attempts to open files in the "/tmp" directory for writing. 53 | 54 | 55 | Documentation 56 | ------------- 57 | 58 | The documentation is available on 59 | `ptracer.readthedocs.io `_. 60 | -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | _build 2 | _templates 3 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = python -m sphinx 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # Internal variables. 11 | PAPEROPT_a4 = -D latex_paper_size=a4 12 | PAPEROPT_letter = -D latex_paper_size=letter 13 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 14 | # the i18n builder cannot share the environment and doctrees with the others 15 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 16 | 17 | .PHONY: help 18 | help: 19 | @echo "Please use \`make ' where is one of" 20 | @echo " html to make standalone HTML files" 21 | @echo " dirhtml to make HTML files named index.html in directories" 22 | @echo " singlehtml to make a single large HTML file" 23 | @echo " pickle to make pickle files" 24 | @echo " json to make JSON files" 25 | @echo " htmlhelp to make HTML files and a HTML help project" 26 | @echo " qthelp to make HTML files and a qthelp project" 27 | @echo " applehelp to make an Apple Help Book" 28 | @echo " devhelp to make HTML files and a Devhelp project" 29 | @echo " epub to make an epub" 30 | @echo " epub3 to make an epub3" 31 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 32 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 33 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 34 | @echo " text to make text files" 35 | @echo " man to make manual pages" 36 | @echo " texinfo to make Texinfo files" 37 | @echo " info to make Texinfo files and run them through makeinfo" 38 | @echo " gettext to make PO message catalogs" 39 | @echo " changes to make an overview of all changed/added/deprecated items" 40 | @echo " xml to make Docutils-native XML files" 41 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 42 | @echo " linkcheck to check all external links for integrity" 43 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 44 | @echo " coverage to run coverage check of the documentation (if enabled)" 45 | @echo " dummy to check syntax errors of document sources" 46 | 47 | .PHONY: clean 48 | clean: 49 | rm -rf $(BUILDDIR)/* 50 | 51 | .PHONY: html 52 | html: 53 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 54 | @echo 55 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 56 | 57 | .PHONY: dirhtml 58 | dirhtml: 59 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 60 | @echo 61 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 62 | 63 | .PHONY: singlehtml 64 | singlehtml: 65 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 66 | @echo 67 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 68 | 69 | .PHONY: pickle 70 | pickle: 71 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 72 | @echo 73 | @echo "Build finished; now you can process the pickle files." 74 | 75 | .PHONY: json 76 | json: 77 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 78 | @echo 79 | @echo "Build finished; now you can process the JSON files." 80 | 81 | .PHONY: htmlhelp 82 | htmlhelp: 83 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 84 | @echo 85 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 86 | ".hhp project file in $(BUILDDIR)/htmlhelp." 87 | 88 | .PHONY: qthelp 89 | qthelp: 90 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 91 | @echo 92 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 93 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 94 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/asyncpg.qhcp" 95 | @echo "To view the help file:" 96 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/asyncpg.qhc" 97 | 98 | .PHONY: applehelp 99 | applehelp: 100 | $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp 101 | @echo 102 | @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." 103 | @echo "N.B. You won't be able to view it unless you put it in" \ 104 | "~/Library/Documentation/Help or install it in your application" \ 105 | "bundle." 106 | 107 | .PHONY: devhelp 108 | devhelp: 109 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 110 | @echo 111 | @echo "Build finished." 112 | @echo "To view the help file:" 113 | @echo "# mkdir -p $$HOME/.local/share/devhelp/asyncpg" 114 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/asyncpg" 115 | @echo "# devhelp" 116 | 117 | .PHONY: epub 118 | epub: 119 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 120 | @echo 121 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 122 | 123 | .PHONY: epub3 124 | epub3: 125 | $(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3 126 | @echo 127 | @echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3." 128 | 129 | .PHONY: latex 130 | latex: 131 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 132 | @echo 133 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 134 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 135 | "(use \`make latexpdf' here to do that automatically)." 136 | 137 | .PHONY: latexpdf 138 | latexpdf: 139 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 140 | @echo "Running LaTeX files through pdflatex..." 141 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 142 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 143 | 144 | .PHONY: latexpdfja 145 | latexpdfja: 146 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 147 | @echo "Running LaTeX files through platex and dvipdfmx..." 148 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 149 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 150 | 151 | .PHONY: text 152 | text: 153 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 154 | @echo 155 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 156 | 157 | .PHONY: man 158 | man: 159 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 160 | @echo 161 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 162 | 163 | .PHONY: texinfo 164 | texinfo: 165 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 166 | @echo 167 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 168 | @echo "Run \`make' in that directory to run these through makeinfo" \ 169 | "(use \`make info' here to do that automatically)." 170 | 171 | .PHONY: info 172 | info: 173 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 174 | @echo "Running Texinfo files through makeinfo..." 175 | make -C $(BUILDDIR)/texinfo info 176 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 177 | 178 | .PHONY: gettext 179 | gettext: 180 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 181 | @echo 182 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 183 | 184 | .PHONY: changes 185 | changes: 186 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 187 | @echo 188 | @echo "The overview file is in $(BUILDDIR)/changes." 189 | 190 | .PHONY: linkcheck 191 | linkcheck: 192 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 193 | @echo 194 | @echo "Link check complete; look for any errors in the above output " \ 195 | "or in $(BUILDDIR)/linkcheck/output.txt." 196 | 197 | .PHONY: doctest 198 | doctest: 199 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 200 | @echo "Testing of doctests in the sources finished, look at the " \ 201 | "results in $(BUILDDIR)/doctest/output.txt." 202 | 203 | .PHONY: coverage 204 | coverage: 205 | $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage 206 | @echo "Testing of coverage in the sources finished, look at the " \ 207 | "results in $(BUILDDIR)/coverage/python.txt." 208 | 209 | .PHONY: xml 210 | xml: 211 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 212 | @echo 213 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 214 | 215 | .PHONY: pseudoxml 216 | pseudoxml: 217 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 218 | @echo 219 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 220 | 221 | .PHONY: dummy 222 | dummy: 223 | $(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy 224 | @echo 225 | @echo "Build finished. Dummy builder generates no files." 226 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | import sys 5 | 6 | import alabaster 7 | import sphinx_rtd_theme # noqa 8 | 9 | sys.path.insert(0, os.path.abspath('..')) 10 | 11 | version_file = os.path.join(os.path.dirname(os.path.dirname(__file__)), 12 | 'ptracer', '__init__.py') 13 | 14 | with open(version_file, 'r') as f: 15 | for line in f: 16 | if line.startswith('__version__ ='): 17 | _, _, version = line.partition('=') 18 | version = version.strip(" \n'\"") 19 | break 20 | else: 21 | raise RuntimeError( 22 | 'could not determine the version from ptracer/__init__.py') 23 | 24 | # -- General configuration ------------------------------------------------ 25 | 26 | extensions = [ 27 | 'sphinx.ext.autodoc', 28 | 'sphinx.ext.viewcode', 29 | 'sphinx.ext.intersphinx', 30 | ] 31 | 32 | add_module_names = False 33 | 34 | templates_path = ['_templates'] 35 | source_suffix = '.rst' 36 | master_doc = 'index' 37 | project = 'ptracer' 38 | copyright = '2017-present, Pinterest Inc' 39 | author = 'Pinterest Inc.' 40 | release = version 41 | language = None 42 | exclude_patterns = ['_build'] 43 | pygments_style = 'sphinx' 44 | todo_include_todos = False 45 | suppress_warnings = ['image.nonlocal_uri'] 46 | 47 | # -- Options for HTML output ---------------------------------------------- 48 | 49 | html_theme = 'sphinx_rtd_theme' 50 | html_theme_path = [alabaster.get_path()] 51 | html_title = 'Ptracer Documentation' 52 | html_short_title = 'ptracer' 53 | html_static_path = [] 54 | html_show_sourcelink = False 55 | html_show_sphinx = False 56 | html_show_copyright = True 57 | htmlhelp_basename = 'ptracerdoc' 58 | 59 | 60 | # -- Options for LaTeX output --------------------------------------------- 61 | 62 | latex_elements = {} 63 | 64 | latex_documents = [ 65 | (master_doc, 'ptracer.tex', 'Ptracer Documentation', 66 | author, 'manual'), 67 | ] 68 | 69 | 70 | # -- Options for manual page output --------------------------------------- 71 | 72 | man_pages = [ 73 | (master_doc, 'ptracer', 'Ptracer Documentation', 74 | [author], 1) 75 | ] 76 | 77 | 78 | # -- Options for Texinfo output ------------------------------------------- 79 | 80 | texinfo_documents = [ 81 | (master_doc, 'ptracer', 'Ptracer Documentation', 82 | author, 'ptracer', 83 | 'Ptracer is a library providing on-demand system call tracing in ' 84 | 'Python programs.', 85 | 'Miscellaneous'), 86 | ] 87 | 88 | # -- Options for intersphinx ---------------------------------------------- 89 | 90 | intersphinx_mapping = {'python': ('https://docs.python.org/3', None)} 91 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | ======= 2 | ptracer 3 | ======= 4 | 5 | .. module:: ptracer 6 | :synopsis: On-demand system call tracing in Python programs. 7 | 8 | .. currentmodule:: ptracer 9 | 10 | **ptracer** is a library providing on-demand, programmatic system call tracing 11 | in Python programs using 12 | `ptrace `_. 13 | 14 | **ptracer** works on Python 2.7 and Python 3.5 or later. Currently, only 15 | 64-bit Linux platforms are supported. 16 | 17 | 18 | .. _ptracer-installation: 19 | 20 | Installation 21 | ============ 22 | 23 | **ptracer** has no external dependencies and the recommended way to 24 | install it is to use **pip**: 25 | 26 | .. code-block:: bash 27 | 28 | $ pip install ptracer 29 | 30 | 31 | Building from source 32 | -------------------- 33 | 34 | If you want to build **ptracer** from a Git checkout you will need: 35 | 36 | * A working C compiler. 37 | * CPython header files. These can usually be obtained by installing 38 | the relevant Python development package: **python-dev**/**python3-dev** 39 | on Debian/Ubuntu, **python-devel**/**python3-devel** on RHEL/Fedora. 40 | 41 | Once the above requirements are satisfied, use the usual ``setup.py`` commands 42 | or ``pip install -e .`` to install the newly built version in development mode. 43 | 44 | 45 | Running tests 46 | ------------- 47 | 48 | To execute the testsuite simply run: 49 | 50 | .. code-block:: bash 51 | 52 | $ python setup.py test 53 | 54 | 55 | .. _ptracer-usage: 56 | 57 | ptracer Usage 58 | ============= 59 | 60 | The most common way of tracing a block of code is to surround it with the 61 | :func:`context` context manager: 62 | 63 | .. code-block:: python 64 | 65 | import traceback 66 | import ptracer 67 | 68 | def callback(syscall): 69 | print('{}({}) -> {}'.format( 70 | syscall.name, 71 | ', '.join(repr(arg.value) for arg in syscall.args), 72 | syscall.result.text)) 73 | print('Traceback: ') 74 | print(''.join(traceback.format_list(syscall.traceback))) 75 | 76 | with ptracer.context(callback): 77 | open('/dev/null', 'wb') 78 | 79 | ``ptracer`` also provides the explicit :func:`enable` and 80 | :func:`disable` functions to begin and terminate tracing. 81 | 82 | 83 | Filtering 84 | --------- 85 | 86 | Ptracer allows elaborate syscall filtering via the *filter* argument: 87 | 88 | .. code-block:: python 89 | 90 | flt = [ 91 | ptracer.SysCallPattern( 92 | name='open', 93 | args=[ 94 | re.compile(b'/tmp/.*'), 95 | lambda arg: arg.value & os.O_WRONLY 96 | ], 97 | result=lambda res: res.value > 0 98 | ) 99 | ] 100 | 101 | with ptracer.context(callback, filter=flt): 102 | # traced code 103 | ... 104 | 105 | 106 | In the above example, ptracer will invoke the callback only for successful 107 | attempts to open files in the "/tmp" directory for writing. 108 | 109 | See the documentation for the 110 | :class:`SysCallPattern` class for more 111 | information on setting up filters. 112 | 113 | 114 | .. _ptracer-api-reference: 115 | 116 | Module Reference 117 | ================ 118 | 119 | .. function:: context(callback, filter=None) 120 | 121 | Set up and return a tracing context object that should be used as a context 122 | manager. Tracing will begin once the context manager block is entered, 123 | and will terminate on block exit. 124 | 125 | The *callback* parameter specifies a callable that should accept a 126 | :class:`SysCall` instance as a single argument. The *callback* is 127 | invoked asynchronously in a thread separate from the traced program. 128 | 129 | If *filter* is not ``None``, it is expected to contain a 130 | :class:`SysCallPattern` instance or an iterable of ``SysCallPattern`` 131 | instances. The *callback* will be called if the syscall matches any of 132 | the provided patterns. If *filter* is ``None``, no filtering is done, 133 | and *callback* will be invoked for every syscall. 134 | 135 | 136 | .. function:: enable(callback, filter=None) 137 | 138 | Start tracing of the current program immediately. The *callback* and 139 | *filter* arguments have the same meaning as in :func:`context`. To stop 140 | tracing call :func:`disable`. 141 | 142 | 143 | .. function:: disable() 144 | 145 | Stop tracing of the current program. 146 | 147 | 148 | .. class:: SysCall 149 | 150 | A description of a system call performed by a program. ``SysCall`` 151 | instances are passed to the callback passed to :func:`context` or 152 | :func:`enable`. 153 | 154 | .. attribute:: name 155 | 156 | The name of the system call. If the name could not be identified, 157 | the property will contain ``''``, where ``syscallnumber`` 158 | is a platform-specific integer representing the system call. 159 | 160 | .. attribute:: pid 161 | 162 | The system identifier of the OS thread in which the system call 163 | was performed. 164 | 165 | .. attribute:: args 166 | 167 | A list of :class:`SysCallArg` instances representing the system call 168 | arguments. The values of the arguments are taken *after* the system 169 | call exit. 170 | 171 | .. attribute:: result 172 | 173 | An instance of :class:`SysCallResult` representing the system call 174 | return value. 175 | 176 | .. attribute:: traceback 177 | 178 | A list of stack trace entries similar to the one returned by 179 | :func:`traceback.extract_stack `. 180 | 181 | The trace corresponds to the call stack which triggered the system call. 182 | 183 | 184 | .. class:: SysCallArg 185 | 186 | A description of a system call argument. Instances of :class:`SysCall` 187 | contain a list of ``SysCallArg`` objects in the ``args`` attribute. 188 | 189 | .. attribute:: name 190 | 191 | The name of the syscall parameter. If the name could not be identified, 192 | this property will contain ``paramN`` for the N-th argument. 193 | 194 | .. attribute:: type 195 | 196 | The type of the syscall parameter represented by a :class:`CType` 197 | instance. If the real type could not be identified, the type will 198 | be reported as ``unsigned long``. 199 | 200 | .. attribute:: raw_value 201 | 202 | An integer representing the raw value of the syscall argument. 203 | 204 | .. attribute:: value 205 | 206 | An object representing the unpacked value of the syscall argument 207 | according to its type. For pointer values this will be the dereferenced 208 | value. Known types will be converted into corresponding Python values. 209 | 210 | 211 | .. class:: SysCallResult 212 | 213 | A description of a system call return value. Instances of :class:`SysCall` 214 | contain an ``SysCallResult`` object in the ``result`` attribute. 215 | 216 | .. attribute:: type 217 | 218 | The type of the syscall return value represented by a :class:`CType` 219 | instance. If the real type could not be identified, the type will 220 | be reported as ``unsigned long``. 221 | 222 | .. attribute:: raw_value 223 | 224 | An integer representing the raw value of the syscall return value. 225 | 226 | .. attribute:: value 227 | 228 | An object representing the unpacked value of the syscall return value 229 | according to its type. For pointer values this will be the dereferenced 230 | value. Known types will be converted into corresponding Python values. 231 | 232 | 233 | .. class:: CType 234 | 235 | A description of a system call value type. 236 | 237 | .. attribute:: names 238 | 239 | A list of tokens in the C declaration of the type. For example, 240 | ``'unsigned long'`` will be represented as ``['unsigned', 'long']``. 241 | 242 | .. attribute:: ctype 243 | 244 | A :ref:`ctypes data type `. 245 | 246 | .. attribute:: ptr_indirection 247 | 248 | The number of pointer indirections. For example, a ``'const char **'`` 249 | type will have ``ptr_indirection`` of ``2``, and the ``ctype`` attribute 250 | set to :class:`c_char `. 251 | 252 | 253 | .. class:: SysCallPattern(name=None, args=None, result=None) 254 | 255 | An object used to match system calls. *name*, *args*, and *result* specify 256 | the *patterns* for the corresponding attributes of the :class:`SysCall` 257 | object. If specified, *args*, should be a list of patterns matching the 258 | order of syscall arguments, and not all arguments have to be listed. 259 | Each pattern value can be: 260 | 261 | - A callable that receives a :class:`SysCallArg` or a :class:`SysCallResult` 262 | instance and returns ``True`` when the value matches, and ``False`` 263 | otherwise. 264 | 265 | - An object with a ``match()`` method that received an unpacked value 266 | of a syscall attribute and returns ``True`` when the value matches, and 267 | ``False`` otherwise. 268 | A :ref:`regular expression object ` can be used. 269 | For example: ``SysCallPattern(name=re.compile('open.*'))``. 270 | 271 | - Any python object, which is compared with the unpacked value directly. 272 | 273 | .. method:: match(syscall) 274 | 275 | Return ``True`` if *syscall* matches the pattern and ``False`` otherwise. 276 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx 2 | sphinx_rtd_theme 3 | -------------------------------------------------------------------------------- /ptracer/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017-present Pinterest Inc. 2 | # 3 | # This module is part of ptracer and is released under 4 | # the Apache 2.0 License: http://www.apache.org/licenses/LICENSE-2.0 5 | 6 | 7 | __version__ = '0.6.1' 8 | __all__ = ['context', 'enable', 'disable', 'SysCallPattern'] 9 | 10 | 11 | try: 12 | import Queue as queue 13 | except ImportError: 14 | import queue 15 | import multiprocessing 16 | import threading 17 | 18 | from . import _ptracer 19 | from ._ptracer import PtracerError # NOQA 20 | from ._syscall import SysCallPattern # NOQA 21 | from . import _lltraceback 22 | 23 | 24 | class TracingContext(object): 25 | def __init__(self): 26 | self.enabled = False 27 | 28 | def enable(self, handler_cb, filter=None): 29 | if self.enabled: 30 | raise RuntimeError('tracing context is already enabled') 31 | 32 | self.enabled = True 33 | self.thread_stop_event = threading.Event() 34 | 35 | debugger_start_event = threading.Event() 36 | 37 | # Debugger error queue. 38 | self.error_queue = multiprocessing.Queue() 39 | 40 | if isinstance(filter, SysCallPattern): 41 | filter = [filter] 42 | 43 | self.ptrace_thread = threading.Thread( 44 | target=_ptracer._tracing_thread, 45 | args=(handler_cb, self.thread_stop_event, debugger_start_event, 46 | {_lltraceback.gettid(): threading.current_thread().ident}, 47 | filter, self.error_queue)) 48 | 49 | self.ptrace_thread.start() 50 | 51 | # Wait for debugger to start 52 | if not debugger_start_event.wait(1): 53 | try: 54 | self.disable() 55 | except Exception: 56 | raise 57 | else: 58 | raise PtracerError('Unhandled exception in ptrace process') 59 | else: 60 | try: 61 | # Perform a magic syscall to enable 62 | # syscall callback invocation. 63 | open(b'\x01\x02\x03', 'r') 64 | except IOError: 65 | pass 66 | 67 | def disable(self): 68 | if not self.enabled: 69 | return 70 | 71 | try: 72 | # Notify the debugger we're not tracing anymore. 73 | open(b'\x03\x02\x01', 'r') 74 | except IOError: 75 | pass 76 | 77 | self.enabled = False 78 | self.thread_stop_event.set() 79 | self.thread_stop_event = None 80 | self.ptrace_thread.join() 81 | self.ptrace_thread = None 82 | 83 | try: 84 | error = self.error_queue.get_nowait() 85 | except queue.Empty: 86 | error = None 87 | 88 | self.error_queue.close() 89 | self.error_queue = None 90 | 91 | if error is not None: 92 | raise error 93 | 94 | 95 | class context(object): 96 | """Tracing context manager.""" 97 | 98 | def __init__(self, handler_cb, filter=None): 99 | self.handler_cb = handler_cb 100 | self.filter = filter 101 | 102 | def __enter__(self): 103 | _context.enable(self.handler_cb, filter=self.filter) 104 | 105 | def __exit__(self, exc_type, exc_value, exc_tb): 106 | _context.disable() 107 | 108 | 109 | _context = TracingContext() 110 | enable = _context.enable 111 | disable = _context.disable 112 | -------------------------------------------------------------------------------- /ptracer/_lltraceback.c: -------------------------------------------------------------------------------- 1 | /* Copyright (C) 2017-present Pinterest Inc. 2 | * 3 | * This module is part of ptracer and is released under 4 | * the Apache 2.0 License: http://www.apache.org/licenses/LICENSE-2.0 5 | */ 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #ifdef __linux__ 15 | #include 16 | #endif 17 | #ifdef __APPLE__ 18 | #include 19 | #endif 20 | #ifdef __DragonFly__ 21 | #include 22 | #endif 23 | #ifdef __FreeBSD__ 24 | #include 25 | #endif 26 | #ifdef __NetBSD__ 27 | #include 28 | #endif 29 | 30 | #include 31 | 32 | #include "Python.h" 33 | #include "frameobject.h" 34 | 35 | 36 | #if PY_MAJOR_VERSION >= 3 37 | # define PYSTRING_CHECK PyUnicode_Check 38 | #else 39 | # define PYSTRING_CHECK PyString_Check 40 | #endif 41 | 42 | // bpo-42262 added Py_XNewRef() to Python 3.10.0a3 43 | #if PY_VERSION_HEX < 0x030A00A3 && !defined(Py_XNewRef) 44 | static inline PyObject* _Py_XNewRef(PyObject *obj) 45 | { 46 | Py_XINCREF(obj); 47 | return obj; 48 | } 49 | #define Py_XNewRef(obj) _Py_XNewRef((PyObject*)(obj)) 50 | #endif 51 | 52 | // bpo-40429 added PyThreadState_GetFrame() to Python 3.9.0b1 53 | #if PY_VERSION_HEX < 0x030900B1 54 | static inline PyFrameObject* 55 | PyThreadState_GetFrame(PyThreadState *tstate) 56 | { 57 | assert(tstate != NULL); 58 | return (PyFrameObject *)Py_XNewRef(tstate->frame); 59 | } 60 | #endif 61 | 62 | 63 | static long 64 | _portable_gettid(void) 65 | { 66 | long tid = -1; 67 | 68 | #if defined(__linux__) 69 | tid = syscall(__NR_gettid); 70 | #elif defined (__APPLE__) 71 | tid = mach_thread_self(); 72 | // On Mach thread_t is a refcounted resource (a "send right"), 73 | // so we need to "release" it. 74 | mach_port_deallocate(mach_task_self(), tid); 75 | #elif defined (__DragonFly__) 76 | tid = lwp_gettid(); 77 | #elif defined (__FreeBSD__) 78 | thr_self(&tid); 79 | #elif defined (__NetBSD__) 80 | tid = _lwp_self(); 81 | #elif defined (__OpenBSD__) 82 | tid = getthrid(); 83 | #else 84 | errno = ENOSYS; 85 | #endif 86 | 87 | return tid; 88 | } 89 | 90 | 91 | static PyObject* 92 | lltraceback_gettid(PyObject *self) 93 | { 94 | return PyLong_FromLong(_portable_gettid()); 95 | } 96 | 97 | 98 | struct lltraceback_thread_map_entry { 99 | long kernel_tid; 100 | long python_tid; 101 | }; 102 | 103 | 104 | struct lltraceback_thread_map { 105 | size_t count; 106 | size_t capacity; 107 | struct lltraceback_thread_map_entry *entries; 108 | }; 109 | 110 | 111 | struct lltraceback_state { 112 | int enabled; 113 | int ctlreadfd; 114 | int ctlwritefd; 115 | int inputfd; 116 | int outputfd; 117 | PyInterpreterState *interp; 118 | pthread_t thread_id; 119 | struct lltraceback_thread_map thread_map; 120 | }; 121 | 122 | 123 | static struct lltraceback_state _state; 124 | 125 | 126 | static struct lltraceback_thread_map_entry * 127 | lltraceback_thread_map_find(struct lltraceback_thread_map *map, 128 | long kernel_tid) 129 | { 130 | size_t i; 131 | for (i = 0; i < map->count; i++) { 132 | if (map->entries[i].kernel_tid == kernel_tid) { 133 | return map->entries + i; 134 | } 135 | } 136 | 137 | return NULL; 138 | } 139 | 140 | static long 141 | lltraceback_thread_map_get(struct lltraceback_thread_map *map, long kernel_tid) 142 | { 143 | struct lltraceback_thread_map_entry *entry; 144 | 145 | entry = lltraceback_thread_map_find(map, kernel_tid); 146 | if (entry != NULL) { 147 | return entry->python_tid; 148 | } else { 149 | return 0L; 150 | } 151 | } 152 | 153 | static int 154 | lltraceback_thread_map_insert(struct lltraceback_thread_map *map, 155 | long kernel_tid, long python_tid) 156 | { 157 | if (map->count == map->capacity) { 158 | struct lltraceback_thread_map_entry *new_map; 159 | size_t count = map->capacity + 100; 160 | size_t sz = sizeof(struct lltraceback_thread_map_entry) * count; 161 | if (map->entries == NULL) { 162 | new_map = PyMem_Malloc(sz); 163 | } else { 164 | new_map = PyMem_Realloc(map->entries, sz); 165 | } 166 | if (new_map == NULL) { 167 | return -1; 168 | } 169 | map->entries = new_map; 170 | map->capacity = count; 171 | } 172 | 173 | map->entries[map->count].kernel_tid = kernel_tid; 174 | map->entries[map->count].python_tid = python_tid; 175 | map->count += 1; 176 | 177 | return 0; 178 | } 179 | 180 | static int 181 | lltraceback_thread_map_set(struct lltraceback_thread_map *map, 182 | long kernel_tid, long python_tid) 183 | { 184 | struct lltraceback_thread_map_entry *ex; 185 | 186 | ex = lltraceback_thread_map_find(map, kernel_tid); 187 | if (ex == NULL) { 188 | return lltraceback_thread_map_insert(map, kernel_tid, python_tid); 189 | } else { 190 | ex->python_tid = python_tid; 191 | return 0; 192 | } 193 | } 194 | 195 | 196 | static Py_ssize_t 197 | _read(int fd, char *buf, size_t len) 198 | { 199 | Py_ssize_t res; 200 | 201 | do { 202 | res = read(fd, buf, len); 203 | } while (res < 0 && errno == EINTR); 204 | 205 | return res; 206 | } 207 | 208 | 209 | static Py_ssize_t 210 | _write(int fd, const char *buf, size_t count) 211 | { 212 | Py_ssize_t res; 213 | 214 | do { 215 | res = write(fd, buf, count); 216 | } while (res < 0 && errno == EINTR); 217 | 218 | return res; 219 | } 220 | 221 | 222 | static void * 223 | _fatal_error(const char *msg, int err) 224 | { 225 | fprintf(stderr, "fatal error in lltraceback utility thread: %s (errno: %d)", 226 | msg, err); 227 | return NULL; 228 | } 229 | 230 | 231 | static ssize_t 232 | _write_int32(int fd, int32_t i) 233 | { 234 | uint32_t n = htonl((uint32_t)i); 235 | 236 | return _write(fd, (char *)&n, 4); 237 | } 238 | #define write_int32(fd, i) _write_int32((fd), (i)); 239 | 240 | static ssize_t 241 | _write_string(int fd, PyObject *text) 242 | { 243 | Py_ssize_t size; 244 | #if PY_MAJOR_VERSION >= 3 245 | const char *s; 246 | s = PyUnicode_AsUTF8AndSize(text, &size); 247 | #else 248 | char *s; 249 | PyString_AsStringAndSize(text, &s, &size); 250 | #endif 251 | write_int32(fd, (int32_t)size); 252 | return _write(fd, s, (size_t)size); 253 | } 254 | #define write_string(fd, s) _write_string((fd), (s)); 255 | 256 | 257 | static ssize_t 258 | _write_cstring(int fd, const char *s, Py_ssize_t len) 259 | { 260 | write_int32(fd, (int32_t)len); 261 | return _write(fd, s, (size_t)len); 262 | } 263 | #define write_cstring(fd, s, l) _write_cstring((fd), (s), (l)); 264 | 265 | 266 | static int 267 | dump_frame(int fd, PyFrameObject *frame) 268 | { 269 | PyCodeObject *code; 270 | int lineno; 271 | int v, len; 272 | char lineno_str[11]; // ULONG_MAX is 10 chars long in base 10. 273 | char *lineno_ptr = &lineno_str[10]; 274 | 275 | lineno_str[10] = 0; 276 | 277 | code = frame->f_code; 278 | if (code == NULL || code->co_filename == NULL || 279 | !PYSTRING_CHECK(code->co_filename)) 280 | { 281 | write_cstring(fd, "", 0); 282 | } else { 283 | write_string(fd, code->co_filename); 284 | } 285 | 286 | #if (PY_MAJOR_VERSION <= 2 && PY_MINOR_VERSION < 7) \ 287 | || (PY_MAJOR_VERSION >= 3 && PY_MINOR_VERSION < 2) 288 | lineno = PyCode_Addr2Line(code, frame->f_lasti); 289 | #else 290 | lineno = PyFrame_GetLineNumber(frame); 291 | #endif 292 | 293 | len = 0; 294 | v = lineno; 295 | do { 296 | *lineno_ptr = (char)('0' + (v % 10)); 297 | lineno_ptr -= 1; 298 | v /= 10; 299 | len += 1; 300 | } while (v); 301 | 302 | write_cstring(fd, lineno_ptr + 1, len); 303 | 304 | if (code == NULL || code->co_name == NULL || 305 | !PYSTRING_CHECK(code->co_name)) 306 | { 307 | write_cstring(fd, "", 0); 308 | } else { 309 | write_string(fd, code->co_name); 310 | } 311 | 312 | return 0; 313 | } 314 | 315 | static int 316 | dump_traceback(int fd, PyThreadState *tstate) 317 | { 318 | PyFrameObject *frame; 319 | PyFrameObject *top_frame = NULL; 320 | int depth; 321 | 322 | if (tstate != NULL) { 323 | top_frame = PyThreadState_GetFrame(tstate); 324 | } 325 | 326 | if (top_frame == NULL) { 327 | write_int32(fd, 0); 328 | return 0; 329 | } 330 | 331 | depth = 0; 332 | frame = top_frame; 333 | while (frame != NULL) { 334 | if (!PyFrame_Check(frame)) 335 | break; 336 | frame = frame->f_back; 337 | depth++; 338 | } 339 | 340 | write_int32(fd, depth); 341 | write_int32(fd, 3); 342 | 343 | frame = top_frame; 344 | while (frame != NULL) { 345 | if (!PyFrame_Check(frame)) 346 | break; 347 | if (dump_frame(fd, frame) < 0) { 348 | return -1; 349 | } 350 | frame = frame->f_back; 351 | } 352 | 353 | return 0; 354 | } 355 | 356 | 357 | static void * 358 | lltraceback_thread(void *arg) 359 | { 360 | struct lltraceback_state *state = arg; 361 | int status; 362 | fd_set rfds; 363 | char buf[8]; 364 | ssize_t read, total_read; 365 | int64_t _tid; 366 | long _thread_id; 367 | int nfds; 368 | PyThreadState *tstate; 369 | 370 | FD_ZERO(&rfds); 371 | read = 0; 372 | total_read = 0; 373 | 374 | if (state->inputfd > state->ctlreadfd) { 375 | nfds = state->inputfd + 1; 376 | } else { 377 | nfds = state->ctlreadfd + 1; 378 | } 379 | 380 | for (;;) { 381 | FD_SET(state->inputfd, &rfds); 382 | FD_SET(state->ctlreadfd, &rfds); 383 | 384 | status = select(nfds, &rfds, NULL, NULL, NULL); 385 | if (status < 0) { 386 | return _fatal_error("while selecting from input pipe", (errno)); 387 | } 388 | 389 | if (FD_ISSET(state->ctlreadfd, &rfds)) { 390 | break; 391 | } 392 | 393 | read = _read(state->inputfd, buf + total_read, 394 | (size_t)(8 - total_read)); 395 | if (read < 0) { 396 | if (errno == EAGAIN) { 397 | continue; 398 | } else { 399 | return _fatal_error("while reading from input pipe", (errno)); 400 | } 401 | } 402 | 403 | total_read += read; 404 | if (total_read < 8) { 405 | continue; 406 | } else { 407 | total_read = 0; 408 | } 409 | 410 | _tid = ((int64_t)htonl(*(uint32_t *)buf) << 32) 411 | | htonl(*(uint32_t *)(buf + 4)); 412 | 413 | _thread_id = lltraceback_thread_map_get(&state->thread_map, _tid); 414 | if (_thread_id == 0L) { 415 | if (dump_traceback(state->outputfd, NULL) < 0) { 416 | break; 417 | } 418 | } else { 419 | tstate = PyInterpreterState_ThreadHead(state->interp); 420 | // Find the requested thread state. 421 | while (tstate != NULL && (long)tstate->thread_id != _thread_id) { 422 | tstate = PyThreadState_Next(tstate); 423 | } 424 | if (dump_traceback(state->outputfd, tstate) < 0) { 425 | break; 426 | } 427 | } 428 | } 429 | 430 | return NULL; 431 | } 432 | 433 | 434 | static PyObject* 435 | _new_thread_hook(PyObject *self, PyObject *args) 436 | { 437 | long _tid; 438 | int res; 439 | int err = 0; 440 | PyThreadState *tstate; 441 | PyObject *pyres = NULL; 442 | PyObject *sys = NULL; 443 | PyObject *sys_settrace = NULL; 444 | PyObject *frame = NULL; 445 | PyObject *event = NULL; 446 | PyObject *arg = NULL; 447 | 448 | if (!PyArg_ParseTuple(args, 449 | "OOO:_new_thread_hook", &frame, &event, &arg)) 450 | { 451 | goto error; 452 | } 453 | 454 | sys = PyImport_ImportModule("sys"); 455 | if (sys == NULL) { 456 | goto error; 457 | } 458 | 459 | sys_settrace = PyObject_GetAttrString(sys, "settrace"); 460 | if (sys_settrace == NULL) { 461 | goto error; 462 | } 463 | 464 | Py_INCREF(Py_None); 465 | pyres = PyObject_CallFunctionObjArgs(sys_settrace, Py_None, NULL); 466 | Py_DECREF(Py_None); 467 | if (pyres == NULL) { 468 | goto error; 469 | } 470 | 471 | _tid = _portable_gettid(); 472 | if (_tid < 0) { 473 | PyErr_SetFromErrno(PyExc_OSError); 474 | goto error; 475 | } 476 | 477 | tstate = PyThreadState_Get(); 478 | 479 | res = lltraceback_thread_map_set( 480 | &_state.thread_map, _tid, (long)tstate->thread_id); 481 | if (res == -1) { 482 | goto error; 483 | } 484 | 485 | goto finally; 486 | 487 | error: 488 | err = 1; 489 | 490 | finally: 491 | Py_XDECREF(pyres); 492 | Py_XDECREF(sys); 493 | Py_XDECREF(sys_settrace); 494 | 495 | if (err == 1) { 496 | return NULL; 497 | } else { 498 | Py_RETURN_NONE; 499 | } 500 | } 501 | 502 | 503 | static PyObject* 504 | lltraceback_start_thread(PyObject *self, PyObject *args, PyObject *kwargs) 505 | { 506 | static char *kwlist[] = {"inputfd", "outputfd", "thread_map", NULL}; 507 | int inputfd, outputfd, status, err = 0; 508 | int controlfd[2]; 509 | PyThreadState *tstate = PyThreadState_Get(); 510 | PyObject *thread_map = NULL; 511 | PyObject *threading = NULL; 512 | PyObject *threading_settrace = NULL; 513 | PyObject *new_thread_hook_cb = NULL; 514 | PyObject *res = NULL; 515 | 516 | static PyMethodDef new_thread_hook_def = { 517 | "_new_thread_hook", (PyCFunction)_new_thread_hook, METH_VARARGS 518 | }; 519 | 520 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, 521 | "ii|O:start_thread", kwlist, &inputfd, &outputfd, &thread_map)) 522 | { 523 | return NULL; 524 | } 525 | 526 | if (pipe(controlfd) != 0) { 527 | PyErr_SetFromErrno(PyExc_OSError); 528 | goto error; 529 | } 530 | 531 | if (fcntl(controlfd[0], F_SETFL, O_NONBLOCK) != 0) { 532 | PyErr_SetFromErrno(PyExc_OSError); 533 | goto error; 534 | } 535 | 536 | if (fcntl(inputfd, F_SETFL, O_NONBLOCK) != 0) { 537 | PyErr_SetFromErrno(PyExc_OSError); 538 | goto error; 539 | } 540 | 541 | _state.ctlreadfd = controlfd[0]; 542 | _state.ctlwritefd = controlfd[1]; 543 | _state.inputfd = inputfd; 544 | _state.outputfd = outputfd; 545 | _state.interp = tstate->interp; 546 | 547 | if (thread_map != NULL) { 548 | PyObject *key, *value; 549 | long kernel_tid, python_tid; 550 | Py_ssize_t pos = 0; 551 | 552 | if (!PyDict_Check(thread_map)) { 553 | PyErr_SetString(PyExc_ValueError, "thread_map must be a dict"); 554 | goto error; 555 | } 556 | 557 | while (PyDict_Next(thread_map, &pos, &key, &value)) { 558 | kernel_tid = PyLong_AsLong(key); 559 | python_tid = PyLong_AsLong(value); 560 | 561 | lltraceback_thread_map_set( 562 | &_state.thread_map, kernel_tid, python_tid); 563 | } 564 | } 565 | 566 | lltraceback_thread_map_set( 567 | &_state.thread_map, _portable_gettid(), (long)tstate->thread_id); 568 | 569 | status = pthread_create(&_state.thread_id, NULL, 570 | lltraceback_thread, &_state); 571 | if (status != 0) { 572 | PyErr_SetFromErrno(PyExc_OSError); 573 | goto error; 574 | } 575 | 576 | threading = PyImport_ImportModule("threading"); 577 | if (threading == NULL) { 578 | goto error; 579 | } 580 | 581 | threading_settrace = PyObject_GetAttrString(threading, "settrace"); 582 | if (threading_settrace == NULL) { 583 | goto error; 584 | } 585 | 586 | new_thread_hook_cb = PyCFunction_New(&new_thread_hook_def, self); 587 | if (new_thread_hook_cb == NULL) { 588 | goto error; 589 | } 590 | 591 | res = PyObject_CallFunctionObjArgs( 592 | threading_settrace, new_thread_hook_cb, NULL); 593 | if (res == NULL) { 594 | goto error; 595 | } 596 | Py_DECREF(res); 597 | 598 | goto finally; 599 | 600 | error: 601 | memset(&_state, 0, sizeof(struct lltraceback_state)); 602 | err = 1; 603 | 604 | finally: 605 | Py_XDECREF(threading_settrace); 606 | Py_XDECREF(threading); 607 | Py_XDECREF(thread_map); 608 | Py_XDECREF(new_thread_hook_cb); 609 | 610 | if (err) { 611 | return NULL; 612 | } else { 613 | Py_RETURN_NONE; 614 | } 615 | } 616 | 617 | 618 | static PyObject* 619 | lltraceback_stop_thread(PyObject *self) 620 | { 621 | void *retval; 622 | 623 | if (_write(_state.ctlwritefd, "\x01", 1) < 0) { 624 | PyErr_SetFromErrno(PyExc_OSError); 625 | return NULL; 626 | } 627 | 628 | if (pthread_join(_state.thread_id, &retval) != 0) { 629 | PyErr_SetFromErrno(PyExc_OSError); 630 | return NULL; 631 | } 632 | 633 | close(_state.ctlreadfd); 634 | close(_state.ctlwritefd); 635 | 636 | Py_RETURN_NONE; 637 | } 638 | 639 | 640 | PyDoc_STRVAR(module_doc, 641 | "low-level traceback helper"); 642 | 643 | static PyMethodDef module_methods[] = { 644 | {"start_thread", 645 | (PyCFunction)lltraceback_start_thread, METH_VARARGS | METH_KEYWORDS, 646 | PyDoc_STR("start lltraceback thread")}, 647 | {"stop_thread", 648 | (PyCFunction)lltraceback_stop_thread, METH_NOARGS, 649 | PyDoc_STR("stop lltraceback thread")}, 650 | {"gettid", 651 | (PyCFunction)lltraceback_gettid, METH_NOARGS, 652 | PyDoc_STR("return kernel thread identifier for the current thread")}, 653 | {NULL, NULL} /* sentinel */ 654 | }; 655 | 656 | #if PY_MAJOR_VERSION >= 3 657 | static struct PyModuleDef module_def = { 658 | PyModuleDef_HEAD_INIT, 659 | "_lltraceback", 660 | module_doc, 661 | 0, /* non negative size to be able to unload the module */ 662 | module_methods, 663 | NULL, 664 | NULL, 665 | NULL, 666 | NULL 667 | }; 668 | #endif 669 | 670 | 671 | PyMODINIT_FUNC 672 | #if PY_MAJOR_VERSION >= 3 673 | PyInit__lltraceback(void) 674 | #else 675 | init_lltraceback(void) 676 | #endif 677 | { 678 | PyObject *m; 679 | 680 | #if PY_MAJOR_VERSION >= 3 681 | m = PyModule_Create(&module_def); 682 | #else 683 | m = Py_InitModule3("_lltraceback", module_methods, module_doc); 684 | #endif 685 | if (m == NULL) { 686 | #if PY_MAJOR_VERSION >= 3 687 | return NULL; 688 | #else 689 | return; 690 | #endif 691 | } 692 | 693 | #if PY_MAJOR_VERSION >= 3 694 | return m; 695 | #else 696 | return; 697 | #endif 698 | } 699 | -------------------------------------------------------------------------------- /ptracer/_ptracer.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017-present Pinterest Inc. 2 | # 3 | # This module is part of ptracer and is released under 4 | # the Apache 2.0 License: http://www.apache.org/licenses/LICENSE-2.0 5 | 6 | 7 | from __future__ import print_function 8 | 9 | import errno 10 | import fcntl 11 | import linecache 12 | import logging 13 | import multiprocessing 14 | import os 15 | try: 16 | import Queue as queue 17 | except ImportError: 18 | import queue 19 | import select 20 | import signal 21 | import struct 22 | import threading 23 | import time 24 | import traceback 25 | 26 | from ptracer import _lltraceback 27 | from ptracer import ptrace 28 | 29 | 30 | logger = logging.getLogger('ptracer') 31 | 32 | 33 | class PtracerError(Exception): 34 | def __init__(self, msg, orig_exc=None): 35 | super(PtracerError, self).__init__(msg) 36 | self.orig_exc = orig_exc 37 | 38 | def __str__(self): 39 | if self.orig_exc is not None: 40 | return '{}\n{}'.format(self.args[0], self.orig_exc) 41 | else: 42 | return self.args[0] 43 | 44 | 45 | # The tracing thread is run parallel to the traced thread, 46 | # the syscall callback will be called in this thread. 47 | def _tracing_thread(handler_cb, thread_stop_event, debugger_start_event, 48 | thread_map, syscall_filter, error_queue): 49 | 50 | # The main syscall queue. 51 | syscall_queue = multiprocessing.Queue() 52 | 53 | # Pipes for communication with lltraceback helper. 54 | stack_response_read, stack_response_write = os.pipe() 55 | stack_request_read, stack_request_write = os.pipe() 56 | 57 | # Debugger process start request event. 58 | dbgproc_start = multiprocessing.Event() 59 | # Debugger process stop request event. 60 | dbgproc_stop = multiprocessing.Event() 61 | # Debugger process started response event. 62 | dbgproc_started = multiprocessing.Event() 63 | 64 | # The actual tracing is done by a subprocess. 65 | # It is necessary because of the GIL, as a ptrace-stopped thread 66 | # holding the GIL would block the tracer thread as well. 67 | ptrace_process = multiprocessing.Process( 68 | target=_tracing_process, 69 | args=(os.getpid(), dbgproc_started, dbgproc_start, dbgproc_stop, 70 | stack_request_write, stack_response_read, 71 | syscall_queue, syscall_filter, error_queue)) 72 | 73 | ptrace_process.start() 74 | 75 | if hasattr(ptrace, 'set_ptracer'): 76 | # On systems with Yama LSM enabled in mode 1 (e.g. Ubuntu Trusty), 77 | # PTRACE_ATTACH will fail with EPERM unless we call PR_SET_PTRACER 78 | # with the PID of the tracing process. 79 | try: 80 | ptrace.set_ptracer(ptrace_process.pid) 81 | except OSError as e: 82 | pass 83 | 84 | dbgproc_start.set() 85 | 86 | # The lltraceback thread is a low-level GIL-independent thread 87 | # that is used to dump the current call stack in a given Python thread. 88 | _lltraceback.start_thread( 89 | stack_request_read, stack_response_write, thread_map) 90 | 91 | try: 92 | # Wait for debugger to start 93 | if not dbgproc_started.wait(1): 94 | # If the debugger has not started in 1 second, assume 95 | # it died and bail out. 96 | return 97 | 98 | # Notify the main thread that we're ready. 99 | debugger_start_event.set() 100 | 101 | while True: 102 | if thread_stop_event.is_set() or not ptrace_process.is_alive(): 103 | # The tracing context has exited, stop the debugger. 104 | dbgproc_stop.set() 105 | 106 | while True: 107 | # Drain the syscall queue. 108 | try: 109 | syscall = syscall_queue.get(timeout=0.1) 110 | except queue.Empty: 111 | break 112 | else: 113 | handler_cb(syscall) 114 | break 115 | 116 | try: 117 | event = syscall_queue.get_nowait() 118 | except queue.Empty: 119 | time.sleep(0.05) 120 | continue 121 | 122 | try: 123 | handler_cb(event) 124 | except Exception as e: 125 | logger.exception('EXCEPTION IN SYSCALL CALLBACK') 126 | finally: 127 | dbgproc_stop.set() 128 | _lltraceback.stop_thread() 129 | ptrace_process.join(1) 130 | if ptrace_process.exitcode is None: 131 | ptrace_process.terminate() 132 | ptrace_process.join(1) 133 | if ptrace_process.exitcode is None: 134 | os.kill(ptrace_process.pid, signal.SIGKILL) 135 | 136 | os.close(stack_response_read) 137 | os.close(stack_response_write) 138 | os.close(stack_request_read) 139 | os.close(stack_request_write) 140 | 141 | 142 | def _tracing_process(pid, dbgproc_started, dbgproc_start, dbgproc_stop, 143 | stack_request_pipe, stack_response_pipe, 144 | syscall_queue, syscall_filter, error_queue): 145 | # The tracing process consists of two threads: 146 | # the first reads the call stacks from the tracee, and the second 147 | # does the actual tracing. 148 | 149 | if not dbgproc_start.wait(1): 150 | # The parent failed to continue the startup. 151 | err = PtracerError('Debugger startup handshake failed') 152 | error_queue.put_nowait(err) 153 | return 154 | 155 | stack_queue = queue.Queue() 156 | 157 | dbgthread_stop = threading.Event() 158 | 159 | debugger_thread = threading.Thread( 160 | target=_debugger_thread, 161 | args=(pid, dbgproc_started, dbgthread_stop, 162 | stack_request_pipe, stack_queue, 163 | syscall_queue, syscall_filter, error_queue), 164 | name='pytracer-debugger') 165 | 166 | debugger_thread.daemon = True 167 | debugger_thread.start() 168 | 169 | try: 170 | _read_callstacks(stack_response_pipe, stack_queue, debugger_thread, 171 | dbgproc_stop) 172 | 173 | except Exception: 174 | logger.debug('Unhandled exception in ptrace process', exc_info=True) 175 | err = PtracerError('Unhandled exception in ptrace process', 176 | orig_exc=traceback.format_exc()) 177 | error_queue.put_nowait(err) 178 | 179 | finally: 180 | syscall_queue.close() 181 | 182 | if debugger_thread.is_alive(): 183 | # Unblock the debugger if it is waiting on the stack queue 184 | stack_queue.put_nowait(None) 185 | dbgthread_stop.set() 186 | debugger_thread.join() 187 | 188 | 189 | def _read_callstacks(stack_response_pipe, stack_queue, debugger_thread, 190 | dbgproc_stop): 191 | buf = b'' 192 | stacklen = -1 193 | tuplesize = 0 194 | elemlen = -1 195 | required_len = 4 196 | stack = [] 197 | entry = [] 198 | 199 | fcntl.fcntl(stack_response_pipe, fcntl.F_SETFL, os.O_NONBLOCK) 200 | 201 | # The call stack format is as follows: 202 | # stack_length:uint32_t 203 | # entry_tuple_len:uint32_t 204 | # ( 205 | # (item_length:uint32_t 206 | # item_data:char[item_length]) * entry_tuple_len 207 | # ) * stack_length 208 | while True: 209 | ready, _, _ = select.select([stack_response_pipe], [], [], 1.0) 210 | 211 | if ready: 212 | try: 213 | buf += os.read(stack_response_pipe, 4096) 214 | except OSError as e: 215 | if e.errno == errno.EAGAIN: 216 | pass 217 | else: 218 | raise 219 | else: 220 | while len(buf) >= required_len: 221 | if stacklen == -1: 222 | stacklen = struct.unpack('!i', buf[:4])[0] 223 | buf = buf[4:] 224 | if stacklen == 0: 225 | # No stack could be extracted. 226 | stack_queue.put_nowait([]) 227 | stacklen = -1 228 | 229 | elif tuplesize == 0: 230 | tuplesize = struct.unpack('!i', buf[:4])[0] 231 | buf = buf[4:] 232 | 233 | elif elemlen == -1: 234 | elemlen = struct.unpack('!i', buf[:4])[0] 235 | buf = buf[4:] 236 | required_len = elemlen 237 | 238 | else: 239 | elem = buf[:elemlen] 240 | buf = buf[elemlen:] 241 | 242 | if len(entry) == 1: 243 | elem = int(elem) 244 | else: 245 | elem = elem.decode('utf-8') 246 | entry.append(elem) 247 | if len(entry) == tuplesize: 248 | if tuplesize == 3: 249 | entry.append( 250 | linecache.getline(entry[0], entry[1])) 251 | 252 | stack.append(tuple(entry)) 253 | entry = [] 254 | if len(stack) == stacklen: 255 | stack_queue.put_nowait(list(reversed(stack))) 256 | stack = [] 257 | stacklen = -1 258 | tuplesize = 0 259 | 260 | elemlen = -1 261 | required_len = 4 262 | 263 | if not debugger_thread.is_alive(): 264 | # The debugger thread has stopped. 265 | break 266 | 267 | if dbgproc_stop.is_set(): 268 | # We were asked to stop by the traced process. 269 | break 270 | 271 | 272 | def _debugger_thread(main_pid, dbgproc_started, dbgthread_stop, 273 | stack_request_pipe, stack_queue, 274 | syscall_queue, syscall_filter, error_queue): 275 | try: 276 | _debugger_thread_inner(main_pid, dbgproc_started, dbgthread_stop, 277 | stack_request_pipe, stack_queue, syscall_queue, 278 | syscall_filter) 279 | except Exception: 280 | logger.debug('Unhandled exception in ptrace process', exc_info=True) 281 | err = PtracerError('Unhandled exception in ptrace process', 282 | orig_exc=traceback.format_exc()) 283 | error_queue.put_nowait(err) 284 | 285 | 286 | def _debugger_thread_inner(main_pid, dbgproc_started, dbgthread_stop, 287 | stack_request_pipe, stack_queue, 288 | syscall_queue, syscall_filter): 289 | ptrace_options = ptrace.PTRACE_O_TRACECLONE 290 | # Attach to the tracee and wait for it to stop. 291 | ptrace.attach_and_wait(main_pid, ptrace_options) 292 | 293 | if syscall_filter is not None: 294 | filter_ = lambda sc: any(m.match(sc) for m in syscall_filter) 295 | else: 296 | filter_ = None 297 | 298 | syscall_trap = signal.SIGTRAP | 0x80 299 | enabled = False 300 | signum = 0 301 | syscall_state = {} 302 | sigstop_received = set() 303 | 304 | processes = {main_pid} 305 | mem_fds = {} 306 | mem_fds[main_pid] = _open_procmem(main_pid) 307 | 308 | # Notify the parent that we are ready to start tracing. 309 | dbgproc_started.set() 310 | 311 | try: 312 | # Restart the tracee and enter the tracing loop. 313 | ptrace.syscall(main_pid) 314 | 315 | while True: 316 | if dbgthread_stop.is_set(): 317 | break 318 | 319 | pid, status = ptrace.wait(-1) 320 | 321 | if os.WIFEXITED(status) or os.WIFSIGNALED(status): 322 | # Traced thread has died. 323 | processes.discard(pid) 324 | mem_fd = mem_fds.get(pid) 325 | if mem_fd is not None: 326 | try: 327 | os.close(mem_fd) 328 | except IOError: 329 | pass 330 | if not processes: 331 | break 332 | else: 333 | continue 334 | 335 | elif os.WIFSTOPPED(status): 336 | ptrace_event = ptrace.WPTRACEEVENT(status) 337 | if ptrace_event == ptrace.PTRACE_EVENT_CLONE: 338 | # A new thread has been created. 339 | new_pid = ptrace.geteventmsg(pid) 340 | # See the comment below for the explanation of this check. 341 | if new_pid not in sigstop_received: 342 | ptrace.wait_for_trace_stop(new_pid) 343 | try: 344 | ptrace.syscall(new_pid) 345 | except OSError as e: 346 | if e.errno != errno.ESRCH: 347 | # The new thread might have already died. 348 | raise 349 | else: 350 | sigstop_received.discard(new_pid) 351 | 352 | mem_fds[new_pid] = _open_procmem(new_pid) 353 | 354 | processes.add(new_pid) 355 | ptrace.syscall(pid) 356 | continue 357 | 358 | stopsig = os.WSTOPSIG(status) 359 | if stopsig != syscall_trap: 360 | # Signal-delivery-stop. 361 | 362 | # The special condition below is for cases when we 363 | # receive a SIGSTOP for a newly created thread _before_ 364 | # receiving the PTRACE_EVENT_CLONE event for its parent. 365 | # In this case we must not forward the signal, but 366 | # must record its receipt so that once we _do_ receive 367 | # PTRACE_EVENT_CLONE for the parent, we don't wait for 368 | # SIGSTOP in the child again. 369 | if (stopsig != signal.SIGSTOP or 370 | pid in processes or 371 | all(syscall.name != 'clone' 372 | for syscall in syscall_state.values() 373 | if syscall is not None)): 374 | # forward the signal 375 | signum = stopsig 376 | else: 377 | sigstop_received.add(pid) 378 | else: 379 | # Syscall-stop. 380 | syscall = syscall_state.get(pid) 381 | regs = ptrace.getregs(pid) 382 | mem_fd = mem_fds.get(pid) 383 | 384 | if syscall is None: 385 | # Syscall-enter-stop. 386 | syscall_state[pid] = ptrace.syscall_enter( 387 | pid, regs, mem_fd) 388 | else: 389 | # Syscall-exit-stop. 390 | ptrace.syscall_exit(syscall, regs, mem_fd) 391 | 392 | if enabled: 393 | # Stop tracing once the tracee executes 394 | # the magic open() in ptracer.disable(). 395 | stop_tracing = ( 396 | syscall.name == 'open' and 397 | syscall.args[0].value == b'\x03\x02\x01' 398 | ) or ( 399 | syscall.name == 'openat' and 400 | syscall.args[1].value == b'\x03\x02\x01' 401 | ) 402 | 403 | if stop_tracing: 404 | break 405 | elif filter_ is None or filter_(syscall): 406 | # Wait for the traceback to arrive. 407 | os.write(stack_request_pipe, 408 | struct.pack('!Q', pid)) 409 | stack = stack_queue.get() 410 | if stack is None: 411 | ptrace.cont(pid) 412 | break 413 | 414 | syscall.traceback = stack 415 | syscall_queue.put_nowait(syscall) 416 | 417 | elif not enabled: 418 | # Start tracing once the tracee executes 419 | # the magic open() in ptracer.enable(). 420 | start_tracing = ( 421 | syscall.name == 'open' and 422 | syscall.args[0].value == b'\x01\x02\x03' 423 | ) or ( 424 | syscall.name == 'openat' and 425 | syscall.args[1].value == b'\x01\x02\x03' 426 | ) 427 | 428 | if start_tracing: 429 | enabled = True 430 | 431 | syscall_state[pid] = None 432 | else: 433 | logger.error('unexpected status of traced process %s: %s', 434 | pid, status) 435 | 436 | # Continue until next syscall. 437 | ptrace.syscall(pid, signum) 438 | signum = 0 439 | finally: 440 | for process in processes: 441 | try: 442 | ptrace.detach(process) 443 | except OSError as e: 444 | if e.errno == errno.ESRCH: 445 | pass 446 | else: 447 | raise 448 | 449 | for fd in mem_fds.values(): 450 | try: 451 | os.close(fd) 452 | except (OSError, IOError): 453 | pass 454 | 455 | 456 | def _open_procmem(pid): 457 | try: 458 | mem_fd = os.open('/proc/{}/mem'.format(pid), os.O_RDONLY) 459 | except IOError as e: 460 | if e.errno == errno.EACCESS: 461 | logger.debug('cannot access /proc/{}/mem'.format(pid), 462 | exc_info=True) 463 | return None 464 | else: 465 | raise 466 | else: 467 | return mem_fd 468 | -------------------------------------------------------------------------------- /ptracer/_syscall.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017-present Pinterest Inc. 2 | # 3 | # This module is part of ptracer and is released under 4 | # the Apache 2.0 License: http://www.apache.org/licenses/LICENSE-2.0 5 | 6 | 7 | import functools 8 | import operator 9 | 10 | 11 | def _maybe_format(value): 12 | if isinstance(value, (bytes, bytearray, str)): 13 | return value 14 | return '{}'.format(value) 15 | 16 | 17 | class SysCallPattern(object): 18 | def __init__(self, name=None, args=None, result=None): 19 | self.name = name 20 | self.args = args 21 | self.result = result 22 | 23 | self.matcher = [] 24 | 25 | if name is not None: 26 | self.matcher.append(self._get_comparator( 27 | operator.attrgetter('_name'), name)) 28 | 29 | if result is not None: 30 | self.matcher.append(self._get_comparator( 31 | operator.attrgetter('result'), result)) 32 | 33 | if args: 34 | def arg_getter(call, argno): 35 | return call.args[argno] 36 | 37 | for i, arg in enumerate(args): 38 | if arg is None: 39 | continue 40 | 41 | indirection = functools.partial(arg_getter, argno=i) 42 | self.matcher.append(self._get_comparator(indirection, arg)) 43 | 44 | def _get_comparator(self, indirection, value): 45 | if callable(value): 46 | checker = value 47 | getter = indirection 48 | elif hasattr(value, 'match'): 49 | checker = value.match 50 | getter = lambda sc: _maybe_format(indirection(sc).value) 51 | else: 52 | checker = lambda v: v == value 53 | getter = lambda sc: indirection(sc).value 54 | 55 | return getter, checker 56 | 57 | def match(self, syscall): 58 | return all(m[1](m[0](syscall)) for m in self.matcher) 59 | -------------------------------------------------------------------------------- /ptracer/ptrace/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017-present Pinterest Inc. 2 | # 3 | # This module is part of ptracer and is released under 4 | # the Apache 2.0 License: http://www.apache.org/licenses/LICENSE-2.0 5 | 6 | from .ptrace import * # noqa 7 | from .syscalls import syscall_enter, syscall_exit # noqa 8 | -------------------------------------------------------------------------------- /ptracer/ptrace/_defs_linux.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017-present Pinterest Inc. 2 | # 3 | # This module is part of ptracer and is released under 4 | # the Apache 2.0 License: http://www.apache.org/licenses/LICENSE-2.0 5 | 6 | 7 | import ctypes 8 | import os 9 | import signal 10 | 11 | from . import platform 12 | from . import syscalldef 13 | 14 | if platform.BITS == 64: 15 | from ._defs_linux_64 import * # noqa 16 | else: 17 | raise RuntimeError('unsupported platform: {} ({} bit)'.format( 18 | platform.PLATFORM, platform.BITS)) 19 | 20 | from ._gen_defs_linux_64 import SYSCALLS 21 | 22 | 23 | WALL = 0x40000000 24 | 25 | 26 | # Must not intepret the fifth arg of futext() as a pointer 27 | # because it might be a value: 28 | # int futex(int *uaddr, int futex_op, int val, 29 | # const struct timespec *timeout, /* or: uint32_t val2 */ 30 | # int *uaddr2, int val3); 31 | SYSCALLS['futex'].params[3] = syscalldef.SysCallParamSig( 32 | name='val2', type=syscalldef.CType(['uint32_t'], ctypes.c_uint32, 0)) 33 | SYSCALLS['futex'].params[4] = syscalldef.SysCallParamSig( 34 | name='uaddr2', type=syscalldef.CType(['void', '*'], ctypes.c_void_p, 0)) 35 | 36 | 37 | class c_int_Array_2(ctypes.Array): 38 | _length_ = 2 39 | _type_ = ctypes.c_int 40 | 41 | 42 | SYSCALLS['pipe'].params[0] = syscalldef.SysCallParamSig( 43 | name='pipefd', type=syscalldef.CType(['int', '[2]'], c_int_Array_2, 0)) 44 | SYSCALLS['pipe2'].params[0] = syscalldef.SysCallParamSig( 45 | name='pipefd', type=syscalldef.CType(['int', '[2]'], c_int_Array_2, 0)) 46 | 47 | 48 | def WPTRACEEVENT(status): 49 | if os.WIFSTOPPED(status): 50 | stopsig = os.WSTOPSIG(status) 51 | if stopsig == signal.SIGTRAP: 52 | return status >> 16 53 | 54 | return 0 55 | -------------------------------------------------------------------------------- /ptracer/ptrace/_defs_linux_64.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017-present Pinterest Inc. 2 | # 3 | # This module is part of ptracer and is released under 4 | # the Apache 2.0 License: http://www.apache.org/licenses/LICENSE-2.0 5 | 6 | 7 | from ._gen_defs_linux_64 import * # noqa 8 | 9 | 10 | FUNCTION_REG = 'orig_rax' 11 | ARGS_REGS = ('rdi', 'rsi', 'rdx', 'r10', 'r8', 'r9') 12 | RETURN_REG = 'rax' 13 | -------------------------------------------------------------------------------- /ptracer/ptrace/_ptrace.c: -------------------------------------------------------------------------------- 1 | /* Copyright (C) 2017-present Pinterest Inc. 2 | * 3 | * This module is part of ptracer and is released under 4 | * the Apache 2.0 License: http://www.apache.org/licenses/LICENSE-2.0 5 | */ 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | #ifdef __linux__ 12 | #include 13 | #endif 14 | 15 | #include "Python.h" 16 | 17 | 18 | #ifdef PR_SET_PTRACER 19 | static PyObject* 20 | _set_ptracer(PyObject *self, PyObject *args, PyObject *kwargs) 21 | { 22 | static char *kwlist[] = {"pid", NULL}; 23 | pid_t pid; 24 | int err = 0; 25 | 26 | if (!PyArg_ParseTupleAndKeywords( 27 | args, kwargs, "i:set_ptracer", kwlist, &pid)) 28 | { 29 | goto error; 30 | } 31 | 32 | if (prctl(PR_SET_PTRACER, pid, 0, 0, 0) < 0) { 33 | PyErr_SetFromErrno(PyExc_OSError); 34 | goto error; 35 | } 36 | 37 | goto finally; 38 | 39 | error: 40 | err = 1; 41 | 42 | finally: 43 | if (err) { 44 | return NULL; 45 | } else { 46 | Py_RETURN_NONE; 47 | } 48 | } 49 | #endif // PR_SET_PTRACER 50 | 51 | 52 | static PyObject* 53 | _ptrace(PyObject *self, PyObject *args, PyObject *kwargs) 54 | { 55 | static char *kwlist[] = {"request", "pid", "addr", "data", NULL}; 56 | unsigned int request; 57 | pid_t pid; 58 | void *addr; 59 | void *data; 60 | int err = 0; 61 | long ptrace_result; 62 | PyObject *result; 63 | 64 | #if UINTPTR_MAX == 0xffffffffffffffff 65 | static const char _ptrace_argfmt[] = "Ii|KK:ptrace"; 66 | #elif UINTPTR_MAX == 0xffffffff 67 | static const char _ptrace_argfmt[] = "Ii|kk:ptrace"; 68 | #endif 69 | 70 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, 71 | _ptrace_argfmt, kwlist, &request, &pid, &addr, &data)) 72 | { 73 | goto error; 74 | } 75 | 76 | errno = 0; 77 | ptrace_result = ptrace(request, pid, addr, data); 78 | if (ptrace_result == -1 && errno != 0) { 79 | PyErr_SetFromErrno(PyExc_OSError); 80 | goto error; 81 | } 82 | 83 | result = PyLong_FromLong(ptrace_result); 84 | if (result == NULL) { 85 | goto error; 86 | } 87 | 88 | goto finally; 89 | 90 | error: 91 | err = 1; 92 | 93 | finally: 94 | if (err) { 95 | return NULL; 96 | } else { 97 | return result; 98 | } 99 | } 100 | 101 | 102 | PyDoc_STRVAR(module_doc, 103 | "ptrace binding"); 104 | 105 | static PyMethodDef module_methods[] = { 106 | {"ptrace", 107 | (PyCFunction)_ptrace, METH_VARARGS | METH_KEYWORDS, 108 | PyDoc_STR("process trace")}, 109 | #ifdef PR_SET_PTRACER 110 | {"set_ptracer", 111 | (PyCFunction)_set_ptracer, METH_VARARGS | METH_KEYWORDS, 112 | PyDoc_STR("allow *pid* to trace the current process")}, 113 | #endif 114 | {NULL, NULL} /* sentinel */ 115 | }; 116 | 117 | #if PY_MAJOR_VERSION >= 3 118 | static struct PyModuleDef module_def = { 119 | PyModuleDef_HEAD_INIT, 120 | "_ptrace", 121 | module_doc, 122 | 0, /* non negative size to be able to unload the module */ 123 | module_methods, 124 | NULL, 125 | NULL, 126 | NULL, 127 | NULL 128 | }; 129 | #endif 130 | 131 | 132 | PyMODINIT_FUNC 133 | #if PY_MAJOR_VERSION >= 3 134 | PyInit__ptrace(void) 135 | #else 136 | init_ptrace(void) 137 | #endif 138 | { 139 | PyObject *m; 140 | 141 | #if PY_MAJOR_VERSION >= 3 142 | m = PyModule_Create(&module_def); 143 | #else 144 | m = Py_InitModule3("_ptrace", module_methods, module_doc); 145 | #endif 146 | if (m == NULL) { 147 | #if PY_MAJOR_VERSION >= 3 148 | return NULL; 149 | #else 150 | return; 151 | #endif 152 | } 153 | 154 | #if PY_MAJOR_VERSION >= 3 155 | return m; 156 | #else 157 | return; 158 | #endif 159 | } 160 | -------------------------------------------------------------------------------- /ptracer/ptrace/defs.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017-present Pinterest Inc. 2 | # 3 | # This module is part of ptracer and is released under 4 | # the Apache 2.0 License: http://www.apache.org/licenses/LICENSE-2.0 5 | 6 | 7 | from . import platform 8 | from .platform import * # noqa 9 | 10 | 11 | if platform.PLATFORM == 'linux': 12 | from ._defs_linux import * # noqa 13 | -------------------------------------------------------------------------------- /ptracer/ptrace/memory.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017-present Pinterest Inc. 2 | # 3 | # This module is part of ptracer and is released under 4 | # the Apache 2.0 License: http://www.apache.org/licenses/LICENSE-2.0 5 | 6 | 7 | import ctypes 8 | import os 9 | import struct 10 | 11 | from . import defs 12 | from . import ptrace 13 | 14 | 15 | _CONTAINER_TYPES = (ctypes.Array, ctypes.Structure, ctypes.Union) 16 | 17 | 18 | def _ptrace_read_word(pid, address): 19 | word = ptrace.peektext(pid, address) 20 | return struct.pack('q', word) 21 | 22 | 23 | def ptrace_read(pid, address, bytecount): 24 | # PTRACE_PEEK reads must be aligned on the word boundary. 25 | lpad = address % defs.WORD_SIZE 26 | rpad = (bytecount + lpad) % defs.WORD_SIZE 27 | if rpad: 28 | rpad = defs.WORD_SIZE - rpad 29 | 30 | data = bytearray(bytecount) 31 | offset = 0 32 | 33 | if lpad: 34 | word = _ptrace_read_word(pid, address - lpad) 35 | chunk = word[lpad:] 36 | chunk_len = len(chunk) 37 | data[offset:offset + chunk_len] = word[lpad:] 38 | offset += chunk_len 39 | bytecount -= chunk_len 40 | 41 | wordcount = (bytecount + rpad) // defs.WORD_SIZE 42 | for _ in range(wordcount - 1): 43 | word = _ptrace_read_word(pid, address + offset) 44 | data[offset:offset + defs.WORD_SIZE] = word 45 | offset += defs.WORD_SIZE 46 | 47 | if wordcount: 48 | word = _ptrace_read_word(pid, address - lpad) 49 | if rpad: 50 | data[offset:offset + defs.WORD_SIZE - rpad] = word[:-rpad] 51 | else: 52 | data[offset:offset + defs.WORD_SIZE] = word 53 | 54 | return data 55 | 56 | 57 | def procmem_read(fd, address, bytecount): 58 | os.lseek(fd, address, os.SEEK_SET) 59 | return bytearray(os.read(fd, bytecount)) 60 | 61 | 62 | def read_c_type_ptr(pid, address, c_type, indirection=1, mem_fd=None): 63 | if indirection > 1: 64 | address = read_c_type_ptr(pid, address, ctypes.c_void_p, 65 | indirection - 1) 66 | 67 | if issubclass(c_type, ctypes.c_char): 68 | if mem_fd is not None: 69 | try: 70 | return procmem_read_c_string(mem_fd, address) 71 | except IOError: 72 | return ptrace_read_c_string(pid, address) 73 | else: 74 | return ptrace_read_c_string(pid, address) 75 | else: 76 | bytecount = ctypes.sizeof(c_type) 77 | if mem_fd is not None: 78 | try: 79 | data = procmem_read(mem_fd, address, bytecount) 80 | except IOError: 81 | data = ptrace_read(pid, address, bytecount) 82 | else: 83 | data = ptrace_read(pid, address, bytecount) 84 | 85 | c_value = c_type.from_buffer(data) 86 | 87 | if not issubclass(c_type, _CONTAINER_TYPES): 88 | value = c_value.value 89 | elif issubclass(c_type, ctypes.Array): 90 | value = tuple(c_value) 91 | else: 92 | value = c_value 93 | 94 | return value 95 | 96 | 97 | def ptrace_read_c_string(pid, address, max_size=1024): 98 | # PTRACE_PEEK reads must be aligned on the word boundary. 99 | bytecount = max_size 100 | lpad = address % defs.WORD_SIZE 101 | rpad = (bytecount + lpad) % defs.WORD_SIZE 102 | if rpad: 103 | bytecount += defs.WORD_SIZE - rpad 104 | 105 | data = bytearray(bytecount) 106 | offset = 0 107 | 108 | if lpad: 109 | word = _ptrace_read_word(pid, address - lpad) 110 | chunk = word[lpad:] 111 | nulpos = chunk.find(b'\x00') 112 | if nulpos != -1: 113 | return chunk[:nulpos] 114 | 115 | chunk_len = len(chunk) 116 | data[offset:offset + chunk_len] = word[lpad:] 117 | offset += chunk_len 118 | bytecount -= chunk_len 119 | 120 | wordcount = (bytecount + rpad) // defs.WORD_SIZE 121 | for _ in range(wordcount): 122 | word = _ptrace_read_word(pid, address + offset) 123 | nulpos = word.find(b'\x00') 124 | if nulpos != -1: 125 | data[offset:offset + nulpos] = word[:nulpos] 126 | return bytes(data[:offset + nulpos]) 127 | else: 128 | chunk_len = defs.WORD_SIZE 129 | 130 | data[offset:offset + defs.WORD_SIZE] = word 131 | offset += defs.WORD_SIZE 132 | 133 | return bytes(data) 134 | 135 | 136 | def procmem_read_c_string(fd, address, max_size=1024): 137 | data = procmem_read(fd, address, max_size) 138 | nulpos = data.find(b'\x00') 139 | if nulpos != -1: 140 | return data[:nulpos] 141 | else: 142 | return data 143 | -------------------------------------------------------------------------------- /ptracer/ptrace/platform.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017-present Pinterest Inc. 2 | # 3 | # This module is part of ptracer and is released under 4 | # the Apache 2.0 License: http://www.apache.org/licenses/LICENSE-2.0 5 | 6 | 7 | import ctypes 8 | import sys 9 | 10 | 11 | PLATFORM = None 12 | WORD_SIZE = ctypes.sizeof(ctypes.c_void_p) 13 | BITS = WORD_SIZE * 8 14 | 15 | if sys.platform.startswith('linux'): 16 | PLATFORM = 'linux' 17 | 18 | if PLATFORM is None or BITS != 64: 19 | raise RuntimeError('unsupported platform: {} ({} bit)'.format( 20 | sys.platform, BITS)) 21 | -------------------------------------------------------------------------------- /ptracer/ptrace/ptrace.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017-present Pinterest Inc. 2 | # 3 | # This module is part of ptracer and is released under 4 | # the Apache 2.0 License: http://www.apache.org/licenses/LICENSE-2.0 5 | 6 | 7 | import ctypes 8 | import errno 9 | import os 10 | import signal 11 | 12 | from .defs import * # noqa 13 | 14 | from . import defs 15 | from ._ptrace import ptrace as _ptrace 16 | try: 17 | from ._ptrace import set_ptracer # noqa 18 | except ImportError: 19 | pass 20 | 21 | 22 | def traceme(): 23 | return _ptrace(defs.PTRACE_TRACEME, 0, 0, 0) 24 | 25 | 26 | def peektext(pid, addr): 27 | return _ptrace(defs.PTRACE_PEEKTEXT, pid, addr, 0) 28 | 29 | 30 | def peekdata(pid, addr): 31 | return _ptrace(defs.PTRACE_PEEKDATA, pid, addr, 0) 32 | 33 | 34 | def peekuser(pid, addr): 35 | return _ptrace(defs.PTRACE_PEEKUSER, pid, addr, 0) 36 | 37 | 38 | def poketext(pid, addr, data): 39 | return _ptrace(defs.PTRACE_POKETEXT, pid, addr, data) 40 | 41 | 42 | def pokedata(pid, addr, data): 43 | return _ptrace(defs.PTRACE_POKEDATA, pid, addr, data) 44 | 45 | 46 | def pokeuser(pid, addr, data): 47 | return _ptrace(defs.PTRACE_POKEUSER, pid, addr, data) 48 | 49 | 50 | def getregs(pid): 51 | regs = defs.user_regs_struct() 52 | _ptrace(defs.PTRACE_GETREGS, pid, 0, ctypes.addressof(regs)) 53 | return regs 54 | 55 | 56 | def getfpregs(pid): 57 | regs = defs.user_fpregs_struct() 58 | _ptrace(defs.PTRACE_GETREGS, pid, 0, ctypes.addressof(regs)) 59 | return regs 60 | 61 | 62 | def getsiginfo(pid): 63 | siginfo = defs.siginfo_t() 64 | _ptrace(defs.PTRACE_GETSIGINFO, pid, 0, ctypes.addressof(siginfo)) 65 | return siginfo 66 | 67 | 68 | def setoptions(pid, options): 69 | return _ptrace(defs.PTRACE_SETOPTIONS, pid, 0, options) 70 | 71 | 72 | def geteventmsg(pid): 73 | data = ctypes.c_ulong() 74 | _ptrace(defs.PTRACE_GETEVENTMSG, pid, 0, ctypes.addressof(data)) 75 | return data.value 76 | 77 | 78 | def cont(pid, signum=0): 79 | return _ptrace(defs.PTRACE_CONT, pid, 0, signum) 80 | 81 | 82 | def syscall(pid, signum=0): 83 | return _ptrace(defs.PTRACE_SYSCALL, pid, 0, signum) 84 | 85 | 86 | def kill(pid): 87 | return _ptrace(defs.PTRACE_KILL, pid, 0, 0) 88 | 89 | 90 | def attach(pid): 91 | return _ptrace(defs.PTRACE_ATTACH, pid, 0, 0) 92 | 93 | 94 | def attach_and_wait(pid, options=0): 95 | attach(pid) 96 | wait_for_trace_stop(pid) 97 | options |= defs.PTRACE_O_TRACESYSGOOD 98 | setoptions(pid, options) 99 | 100 | 101 | def wait_for_trace_stop(pid): 102 | try: 103 | _wait_for_trace_stop(pid) 104 | except BaseException: 105 | try: 106 | # If _wait_for_trace_stop fails for any reason, 107 | # we must try to detach from the tracee to avoid 108 | # leaving it blocked. 109 | detach(pid) 110 | except BaseException: 111 | pass 112 | 113 | raise 114 | 115 | 116 | def _wait_for_trace_stop(pid): 117 | try: 118 | # First, check if the tracee is already stopped. 119 | siginfo = getsiginfo(pid) 120 | except OSError as e: 121 | if e.errno == errno.ESRCH: 122 | # The tracee is still running, so we'll wait 123 | pass 124 | else: 125 | raise 126 | else: 127 | # Normally, PTRACE_ATTACH will send a SIGSTOP to the tracee, 128 | # which we will see here. However, on some kernels the actual 129 | # signal may sometimes be SIGTRAP, and that seems to happen 130 | # when the previous tracer had died without calling PTRACE_DETACH 131 | # on this process first. In this case, we need to restart the process 132 | # and wait for the real SIGSTOP. 133 | if siginfo.si_signo == signal.SIGTRAP: 134 | cont(pid, siginfo.si_signo) 135 | elif is_stop_signal(siginfo.si_signo): 136 | return 137 | else: 138 | raise OSError('traced process has stopped with an unexpected ' 139 | 'signal {}'.format(siginfo.si_signo)) 140 | 141 | pid, status = wait(pid) 142 | 143 | if os.WIFEXITED(status): 144 | raise OSError('traced process {} has exited with exit code {}'.format( 145 | pid, os.WEXITSTATUS(status))) 146 | 147 | elif os.WIFSIGNALED(status): 148 | raise OSError('traced process {} has been killed by ' 149 | 'the {} signal {}'.format(pid, os.WTERMSIG(status))) 150 | 151 | if not os.WIFSTOPPED(status): 152 | raise OSError('waitpid({}) returned an unexpected status {}'.format( 153 | pid, hex(status))) 154 | 155 | stopsig = os.WSTOPSIG(status) 156 | if stopsig != signal.SIGSTOP: 157 | raise OSError('waitpid({}) returned an unexpected status {}'.format( 158 | pid, hex(status))) 159 | 160 | 161 | def wait(pid, options=0): 162 | options |= defs.WALL 163 | return os.waitpid(pid, options) 164 | 165 | 166 | def detach(pid, signum=0): 167 | return _ptrace(defs.PTRACE_DETACH, pid, 0, signum) 168 | 169 | 170 | def is_stop_signal(signum): 171 | return signum in (signal.SIGSTOP, signal.SIGTSTP, 172 | signal.SIGTTIN, signal.SIGTTOU) 173 | -------------------------------------------------------------------------------- /ptracer/ptrace/syscalldef.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017-present Pinterest Inc. 2 | # 3 | # This module is part of ptracer and is released under 4 | # the Apache 2.0 License: http://www.apache.org/licenses/LICENSE-2.0 5 | 6 | 7 | class CType(object): 8 | __slots__ = ('names', 'ctype', 'ptr_indirection') 9 | 10 | def __init__(self, names, ctype, ptr_indirection): 11 | self.names = names 12 | self.ctype = ctype 13 | self.ptr_indirection = ptr_indirection 14 | 15 | def __repr__(self): 16 | return ''.format(' '.join(self.names)) 17 | 18 | 19 | class SysCallParamSig(object): 20 | __slots__ = ('name', 'type') 21 | 22 | def __init__(self, name, type): 23 | self.name = name 24 | self.type = type 25 | 26 | def __repr__(self): 27 | return ''.format(self.type, self.name) 28 | 29 | 30 | class SysCallSig(object): 31 | __slots__ = ('name', 'params', 'result') 32 | 33 | def __init__(self, name, params, result): 34 | self.name = name 35 | self.params = params 36 | self.result = result 37 | 38 | 39 | class SysCallArg(object): 40 | def __init__(self, name, type, raw_value, value): 41 | self.name = name 42 | self.type = type 43 | self.raw_value = raw_value 44 | self.value = value 45 | 46 | def __repr__(self): 47 | return ''.format(self.name, self.value) 48 | 49 | 50 | class SysCallResult(object): 51 | def __init__(self, type, raw_value, value): 52 | self.type = type 53 | self.raw_value = raw_value 54 | self.value = value 55 | 56 | def __repr__(self): 57 | return ''.format(self.value) 58 | 59 | 60 | class SysCall(object): 61 | def __init__(self, name, args, result, pid, traceback=None): 62 | self.name = name 63 | self._name = SysCallArg(None, None, self.name, self.name) 64 | self.pid = pid 65 | self.args = args 66 | self.result = result 67 | self.traceback = traceback 68 | 69 | def __repr__(self): 70 | return ''.format(self.name) 71 | -------------------------------------------------------------------------------- /ptracer/ptrace/syscalls.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017-present Pinterest Inc. 2 | # 3 | # This module is part of ptracer and is released under 4 | # the Apache 2.0 License: http://www.apache.org/licenses/LICENSE-2.0 5 | 6 | 7 | import ctypes 8 | 9 | from . import defs 10 | from . import memory 11 | from . import syscalldef 12 | 13 | 14 | _ulong_t = syscalldef.CType( 15 | names=['unsigned', 'long'], ctype=ctypes.c_ulong, ptr_indirection=0) 16 | 17 | _unknown_syscall = syscalldef.SysCallSig( 18 | name='unknown', 19 | params=[ 20 | syscalldef.SysCallParamSig( 21 | name='param{}'.format(i), 22 | type=_ulong_t 23 | ) for i in range(len(defs.ARGS_REGS)) 24 | ], 25 | result=_ulong_t 26 | ) 27 | 28 | 29 | def syscall_enter(pid, regs, mem_fd=None): 30 | syscall_num = getattr(regs, defs.FUNCTION_REG) 31 | syscall_name = defs.SYSCALL_NUMBERS.get( 32 | syscall_num, '<{}>'.format(syscall_num)) 33 | 34 | signature = defs.SYSCALLS.get(syscall_name, _unknown_syscall) 35 | 36 | args = [] 37 | 38 | for i, param in enumerate(signature.params): 39 | raw_value = getattr(regs, defs.ARGS_REGS[i]) 40 | ptype = param.type 41 | 42 | if ptype.ptr_indirection or issubclass(ptype.ctype, ctypes.Array): 43 | if raw_value != 0: 44 | value = memory.read_c_type_ptr( 45 | pid, raw_value, ptype.ctype, ptype.ptr_indirection, mem_fd) 46 | else: 47 | value = None 48 | else: 49 | value = ptype.ctype(raw_value).value 50 | 51 | arg = syscalldef.SysCallArg(name=param.name, type=param.type, 52 | raw_value=raw_value, value=value) 53 | 54 | args.append(arg) 55 | 56 | syscall = syscalldef.SysCall( 57 | name=syscall_name, args=args, result=None, pid=pid) 58 | 59 | return syscall 60 | 61 | 62 | def syscall_exit(syscall, regs, mem_fd=None): 63 | signature = defs.SYSCALLS.get(syscall.name, _unknown_syscall) 64 | 65 | for i, param in enumerate(signature.params): 66 | ptype = param.type 67 | if (not ptype.ptr_indirection and 68 | not issubclass(ptype.ctype, ctypes.Array)): 69 | continue 70 | 71 | raw_value = getattr(regs, defs.ARGS_REGS[i]) 72 | if raw_value != 0: 73 | value = memory.read_c_type_ptr( 74 | syscall.pid, raw_value, ptype.ctype, 75 | ptype.ptr_indirection, mem_fd) 76 | else: 77 | value = None 78 | 79 | arg = syscall.args[i] 80 | arg.raw_value = raw_value 81 | arg.value = value 82 | 83 | restype = signature.result 84 | raw_result = getattr(regs, defs.RETURN_REG) 85 | 86 | if restype.ptr_indirection: 87 | if raw_result != 0: 88 | value = memory.read_c_type_ptr(syscall.pid, raw_result, restype, 89 | restype.ptr_indirection) 90 | else: 91 | value = None 92 | else: 93 | value = signature.result.ctype(raw_result).value 94 | 95 | syscall.result = syscalldef.SysCallResult(restype, raw_result, value) 96 | 97 | return syscall 98 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path 3 | import sys 4 | 5 | import setuptools 6 | 7 | 8 | CFLAGS = ['-Wall', '-Wsign-compare', '-Wconversion'] 9 | 10 | 11 | if sys.platform in ('win32', 'cygwin', 'cli'): 12 | raise RuntimeError('ptracer is a Unix-only library') 13 | 14 | with open(os.path.join(os.path.dirname(__file__), 'README.rst')) as f: 15 | readme = f.read() 16 | 17 | with open(os.path.join( 18 | os.path.dirname(__file__), 'ptracer', '__init__.py')) as f: 19 | for line in f: 20 | if line.startswith('__version__ ='): 21 | _, _, version = line.partition('=') 22 | VERSION = version.strip(" \n'\"") 23 | break 24 | else: 25 | raise RuntimeError( 26 | 'unable to read the version from ptracer/__init__.py') 27 | 28 | setuptools.setup( 29 | name='ptracer', 30 | version=VERSION, 31 | description='On-demand system call tracing for Python programs.', 32 | long_description=readme, 33 | classifiers=[ 34 | 'License :: OSI Approved :: Apache Software License', 35 | 'Intended Audience :: Developers', 36 | 'Programming Language :: Python', 37 | 'Programming Language :: Python :: 3', 38 | 'Operating System :: POSIX :: Linux', 39 | 'Development Status :: 4 - Beta', 40 | ], 41 | platforms=['POSIX'], 42 | license='Apache License, Version 2.0', 43 | provides=['ptracer'], 44 | packages=['ptracer', 'ptracer.ptrace'], 45 | ext_modules=[ 46 | setuptools.Extension( 47 | 'ptracer._lltraceback', 48 | ['ptracer/_lltraceback.c'], 49 | extra_compile_args=CFLAGS, 50 | ), 51 | setuptools.Extension( 52 | 'ptracer.ptrace._ptrace', 53 | ['ptracer/ptrace/_ptrace.c'], 54 | extra_compile_args=CFLAGS, 55 | ) 56 | ], 57 | test_suite='tests.suite', 58 | ) 59 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017-present Pinterest Inc. 2 | # 3 | # This module is part of ptracer and is released under 4 | # the Apache 2.0 License: http://www.apache.org/licenses/LICENSE-2.0 5 | 6 | 7 | import sys 8 | import unittest 9 | 10 | 11 | def suite(): 12 | test_loader = unittest.TestLoader() 13 | test_suite = test_loader.discover('.', pattern='test_*.py') 14 | return test_suite 15 | 16 | 17 | if __name__ == '__main__': 18 | runner = unittest.runner.TextTestRunner() 19 | result = runner.run(suite()) 20 | sys.exit(not result.wasSuccessful()) 21 | -------------------------------------------------------------------------------- /tests/test_lltraceback.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017-present Pinterest Inc. 2 | # 3 | # This module is part of ptracer and is released under 4 | # the Apache 2.0 License: http://www.apache.org/licenses/LICENSE-2.0 5 | 6 | 7 | import linecache 8 | import os 9 | import struct 10 | import unittest 11 | 12 | import ptracer._lltraceback 13 | 14 | 15 | class TestLLTraceback(unittest.TestCase): 16 | def test_lltraceback(self): 17 | control_read, control_write = os.pipe() 18 | output_read, output_write = os.pipe() 19 | 20 | thread_id = ptracer._lltraceback.gettid() 21 | ptracer._lltraceback.start_thread(control_read, output_write) 22 | os.write(control_write, struct.pack('!Q', thread_id)) 23 | 24 | stack_depth = struct.unpack('!L', os.read(output_read, 4))[0] 25 | tuple_length = struct.unpack('!L', os.read(output_read, 4))[0] 26 | 27 | stack = [] 28 | 29 | for i in range(stack_depth): 30 | entry = [] 31 | for j in range(tuple_length): 32 | item_len = struct.unpack('!L', os.read(output_read, 4))[0] 33 | item_data = os.read(output_read, item_len) 34 | if j == 1: 35 | lineno = int(item_data) 36 | entry.append(lineno) 37 | else: 38 | entry.append(item_data.decode('utf8')) 39 | 40 | if len(entry) < 4: 41 | entry.append(linecache.getline(entry[0], entry[1])) 42 | 43 | stack.append(entry) 44 | 45 | ptracer._lltraceback.stop_thread() 46 | 47 | self.assertGreater(len(stack), 0) 48 | -------------------------------------------------------------------------------- /tests/test_ptrace.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017-present Pinterest Inc. 2 | # 3 | # This module is part of ptracer and is released under 4 | # the Apache 2.0 License: http://www.apache.org/licenses/LICENSE-2.0 5 | 6 | 7 | import errno 8 | import multiprocessing 9 | import os 10 | import signal 11 | import time 12 | import unittest 13 | 14 | from ptracer import ptrace 15 | 16 | 17 | class TestPtrace(unittest.TestCase): 18 | def test_ptrace_syscalls(self): 19 | def process_func(): 20 | ptrace.traceme() 21 | os.kill(os.getpid(), signal.SIGSTOP) 22 | 23 | with open('/dev/null', 'w') as f: 24 | f.write('foo') 25 | 26 | rd, wr = os.pipe() 27 | os.close(rd) 28 | os.close(wr) 29 | 30 | try: 31 | process = multiprocessing.Process(target=process_func) 32 | process.start() 33 | 34 | pid, status = os.waitpid(process.pid, 0) 35 | 36 | self.assertTrue(os.WIFSTOPPED(status)) 37 | stopsig = os.WSTOPSIG(status) 38 | self.assertEqual(stopsig, signal.SIGSTOP) 39 | 40 | ptrace.setoptions(process.pid, ptrace.PTRACE_O_TRACESYSGOOD) 41 | 42 | syscalls = [] 43 | in_syscall = None 44 | 45 | while True: 46 | ptrace.syscall(process.pid) 47 | pid, status = os.waitpid(process.pid, 0) 48 | 49 | if os.WIFEXITED(status): 50 | break 51 | 52 | self.assertTrue(os.WIFSTOPPED(status)) 53 | 54 | stopsig = os.WSTOPSIG(status) 55 | self.assertTrue(stopsig & 0x80) 56 | self.assertEqual(stopsig & 0x7F, signal.SIGTRAP) 57 | 58 | regs = ptrace.getregs(process.pid) 59 | if not in_syscall: 60 | syscall = ptrace.syscall_enter(process.pid, regs) 61 | syscalls.append(syscall) 62 | in_syscall = syscall 63 | else: 64 | ptrace.syscall_exit(in_syscall, regs) 65 | in_syscall = None 66 | 67 | finally: 68 | try: 69 | os.kill(process.pid, signal.SIGKILL) 70 | except OSError as e: 71 | if e.errno == errno.ESRCH: 72 | pass 73 | else: 74 | raise 75 | 76 | syscalls = [ 77 | s for s in syscalls if s.name 78 | in {'open', 'openat', 'write', 'close'} 79 | ] 80 | 81 | self.assertEqual(len(syscalls), 5) 82 | 83 | open_call, write_call, close_call = syscalls[:3] 84 | 85 | if open_call.name == 'openat': 86 | self.assertEqual(open_call.args[1].value, b'/dev/null') 87 | else: 88 | self.assertEqual(open_call.args[0].value, b'/dev/null') 89 | 90 | fno = open_call.result.value 91 | self.assertGreater(fno, 0) 92 | 93 | self.assertIsNotNone(open_call.result.type) 94 | 95 | self.assertEqual(write_call.args[0].value, fno) 96 | self.assertEqual(write_call.args[2].value, 3) 97 | self.assertEqual(write_call.result.value, 3) 98 | 99 | self.assertEqual(close_call.args[0].value, fno) 100 | 101 | def test_ptrace_attach(self): 102 | def process_func(): 103 | time.sleep(0.1) 104 | 105 | try: 106 | process = multiprocessing.Process(target=process_func) 107 | process.start() 108 | 109 | ptrace.attach_and_wait(process.pid) 110 | ptrace.cont(process.pid) 111 | 112 | finally: 113 | try: 114 | os.kill(process.pid, signal.SIGKILL) 115 | except OSError as e: 116 | if e.errno == errno.ESRCH: 117 | pass 118 | else: 119 | raise 120 | 121 | def test_ptrace_procmem(self): 122 | def process_func(): 123 | ptrace.traceme() 124 | os.kill(os.getpid(), signal.SIGSTOP) 125 | 126 | with open('/dev/null', 'w') as f: 127 | f.write('foo') 128 | 129 | rd, wr = os.pipe() 130 | os.close(rd) 131 | os.close(wr) 132 | 133 | try: 134 | process = multiprocessing.Process(target=process_func) 135 | process.start() 136 | 137 | pid, status = os.waitpid(process.pid, 0) 138 | ptrace.setoptions(process.pid, ptrace.PTRACE_O_TRACESYSGOOD) 139 | 140 | syscalls = [] 141 | in_syscall = None 142 | 143 | mem_fd = os.open('/proc/{}/mem'.format(pid), os.O_RDONLY) 144 | 145 | while True: 146 | ptrace.syscall(process.pid) 147 | pid, status = ptrace.wait(process.pid) 148 | 149 | if os.WIFEXITED(status): 150 | break 151 | 152 | regs = ptrace.getregs(process.pid) 153 | if not in_syscall: 154 | syscall = ptrace.syscall_enter(process.pid, regs, mem_fd) 155 | syscalls.append(syscall) 156 | in_syscall = syscall 157 | else: 158 | ptrace.syscall_exit(in_syscall, regs, mem_fd) 159 | in_syscall = None 160 | 161 | finally: 162 | os.close(mem_fd) 163 | 164 | try: 165 | os.kill(process.pid, signal.SIGKILL) 166 | except OSError as e: 167 | if e.errno == errno.ESRCH: 168 | pass 169 | else: 170 | raise 171 | 172 | syscalls = [ 173 | s for s in syscalls if s.name 174 | in {'open', 'openat', 'write', 'close'} 175 | ] 176 | 177 | self.assertEqual(len(syscalls), 5) 178 | 179 | open_call, write_call, close_call = syscalls[:3] 180 | 181 | if open_call.name == 'openat': 182 | self.assertEqual(open_call.args[1].value, b'/dev/null') 183 | else: 184 | self.assertEqual(open_call.args[0].value, b'/dev/null') 185 | fno = open_call.result.value 186 | self.assertGreater(fno, 0) 187 | 188 | self.assertEqual(write_call.args[0].value, fno) 189 | self.assertEqual(write_call.args[2].value, 3) 190 | self.assertEqual(write_call.result.value, 3) 191 | 192 | self.assertEqual(close_call.args[0].value, fno) 193 | -------------------------------------------------------------------------------- /tests/test_ptracer.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017-present Pinterest Inc. 2 | # 3 | # This module is part of ptracer and is released under 4 | # the Apache 2.0 License: http://www.apache.org/licenses/LICENSE-2.0 5 | 6 | 7 | import errno 8 | import os 9 | import re 10 | import threading 11 | import unittest 12 | 13 | try: 14 | from unittest import mock 15 | except ImportError: 16 | import mock 17 | 18 | import ptracer 19 | 20 | 21 | eperm_mock = mock.Mock( 22 | side_effect=OSError(errno.EPERM, 'Operation not permitted')) 23 | 24 | 25 | class TestPtracer(unittest.TestCase): 26 | @mock.patch('ptracer.ptrace.attach_and_wait', eperm_mock) 27 | def test_ptracer__fail_01(self): 28 | with self.assertRaisesRegexp(ptracer.PtracerError, 29 | 'Operation not permitted'): 30 | with ptracer.context(lambda s: None): 31 | f = open('/dev/zero', 'r') 32 | f.close() 33 | 34 | @mock.patch('ptracer.ptrace.syscall', eperm_mock) 35 | def test_ptracer__fail_02(self): 36 | with self.assertRaisesRegexp(ptracer.PtracerError, 37 | 'Operation not permitted'): 38 | with ptracer.context(lambda s: None): 39 | f = open('/dev/zero', 'r') 40 | f.close() 41 | 42 | @mock.patch('ptracer.ptrace.syscall_exit', eperm_mock) 43 | def test_ptracer__fail_03(self): 44 | with self.assertRaisesRegexp(ptracer.PtracerError, 45 | 'Operation not permitted'): 46 | with ptracer.context(lambda s: None): 47 | f = open('/dev/zero', 'r') 48 | f.close() 49 | 50 | @mock.patch('ptracer.ptrace.ptrace.getsiginfo', eperm_mock) 51 | def test_ptracer__fail_04(self): 52 | with self.assertRaisesRegexp(ptracer.PtracerError, 53 | 'Operation not permitted'): 54 | with ptracer.context(lambda s: None): 55 | f = open('/dev/zero', 'r') 56 | f.close() 57 | 58 | def test_ptracer_basic(self): 59 | syscalls = [] 60 | 61 | with ptracer.context(syscalls.append): 62 | f = open('/dev/zero', 'r') 63 | f.close() 64 | 65 | self.assertGreater(len(syscalls), 0) 66 | 67 | def test_ptracer_filter_01(self): 68 | syscalls = [] 69 | 70 | def _trace(pattern): 71 | syscalls[:] = [] 72 | 73 | with ptracer.context(syscalls.append, filter=pattern): 74 | f = open('/dev/null', 'w') 75 | f.close() 76 | f = open('/dev/zero', 'r') 77 | f.close() 78 | try: 79 | open('/dev/nonexistent', 'r') 80 | except IOError: 81 | pass 82 | 83 | _trace([ 84 | ptracer.SysCallPattern(name=re.compile('op.*')) 85 | ]) 86 | 87 | self.assertEqual(len(syscalls), 3) 88 | 89 | _trace([ 90 | ptracer.SysCallPattern( 91 | name=re.compile('openat'), 92 | args=[ 93 | None, 94 | b'/dev/null' 95 | ] 96 | ) 97 | ]) 98 | 99 | self.assertEqual(len(syscalls), 1) 100 | 101 | _trace([ 102 | ptracer.SysCallPattern( 103 | name=re.compile('openat'), 104 | args=[ 105 | None, 106 | b'/dev/null' 107 | ] 108 | ) 109 | ]) 110 | 111 | self.assertEqual(len(syscalls), 1) 112 | 113 | _trace([ 114 | ptracer.SysCallPattern( 115 | name=re.compile('openat'), 116 | args=[ 117 | None, 118 | re.compile(b'.*/null'), 119 | ] 120 | ) 121 | ]) 122 | 123 | self.assertEqual(len(syscalls), 1) 124 | 125 | _trace([ 126 | ptracer.SysCallPattern( 127 | name=re.compile('openat'), 128 | args=[ 129 | None, 130 | None, 131 | lambda arg: arg.value & os.O_WRONLY 132 | ] 133 | ) 134 | ]) 135 | 136 | self.assertEqual(len(syscalls), 1) 137 | 138 | _trace([ 139 | ptracer.SysCallPattern( 140 | name=re.compile('op.*'), 141 | result=lambda res: res.value < 0 142 | ) 143 | ]) 144 | 145 | self.assertEqual(len(syscalls), 1) 146 | 147 | def test_ptracer_threading(self): 148 | syscalls = [] 149 | 150 | def _thread(): 151 | f = open('/dev/zero', 'r') 152 | f.close() 153 | 154 | flt = ptracer.SysCallPattern( 155 | name='openat', 156 | args=[ 157 | None, 158 | b'/dev/zero' 159 | ] 160 | ) 161 | 162 | with ptracer.context(syscalls.append, filter=flt): 163 | thread = threading.Thread(target=_thread) 164 | thread.start() 165 | thread.join() 166 | 167 | self.assertEqual(len(syscalls), 1) 168 | -------------------------------------------------------------------------------- /tools/extract_ptrace_constants.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Copyright (C) 2017-present Pinterest Inc. 4 | # 5 | # This module is part of ptracer and is released under 6 | # the Apache 2.0 License: http://www.apache.org/licenses/LICENSE-2.0 7 | 8 | 9 | import argparse 10 | import ctypes 11 | import operator 12 | import os 13 | import re 14 | import tempfile 15 | 16 | import pycparser 17 | from pycparserext import ext_c_parser 18 | from pycparser import c_ast 19 | 20 | 21 | def main(): 22 | parser = argparse.ArgumentParser( 23 | description='generate ptrace/defs.py from sys/ptrace.h') 24 | parser.add_argument('-I', type=str, help='include path', 25 | dest='include_path', nargs='*', default=[ 26 | '/usr/src/linux/include', 27 | '/usr/src/linux/arch/x86/include/', 28 | '/usr/src/linux/arch/x86/include/generated/' 29 | ]) 30 | parser.add_argument('--ptrace-h', type=str, dest='ptrace_h', 31 | default='/usr/include/sys/ptrace.h', 32 | help='path to sys/ptrace.h') 33 | parser.add_argument('--user-h', type=str, dest='user_h', 34 | default='/usr/include/sys/user.h', 35 | help='path to sys/user.h') 36 | parser.add_argument('--signal-h', type=str, dest='signal_h', 37 | default='/usr/include/signal.h', 38 | help='path to signal.h') 39 | parser.add_argument('--linux-src', type=str, dest='linux_tree', 40 | default='/usr/src/linux', 41 | help='path to linux kernel source tree') 42 | 43 | args = parser.parse_args() 44 | 45 | output = [] 46 | output.append('# Automatically generated from system headers.') 47 | output.append('# DO NOT EDIT.') 48 | output.append('\nimport ctypes') 49 | output.append( 50 | '\nfrom .syscalldef import CType, SysCallSig, SysCallParamSig') 51 | 52 | output.extend(read_ptrace_h(args.ptrace_h, args.include_path)) 53 | output.extend(read_user_h(args.user_h, args.include_path)) 54 | output.extend(read_signal_h(args.signal_h, args.include_path)) 55 | 56 | # Generic syscalls. 57 | syscalls_h = os.path.join(args.linux_tree, 'include/linux/syscalls.h') 58 | # Arch-specific syscalls. 59 | arch_syscalls_h = os.path.join( 60 | args.linux_tree, 'arch/x86/include/asm/syscalls.h') 61 | output.extend(read_syscalls_h([syscalls_h, arch_syscalls_h], 62 | args.include_path)) 63 | 64 | unistd_h = os.path.join( 65 | args.linux_tree, 'arch/x86/include/generated/uapi/asm/unistd_64.h') 66 | output.extend(read_unistd_h(unistd_h, args.include_path)) 67 | 68 | print('\n'.join(output)) 69 | 70 | 71 | def die(msg): 72 | raise ValueError(msg) 73 | 74 | 75 | def get_header_text(path, include_path=[], defines=[], strip_includes=False): 76 | if strip_includes: 77 | with open(path, 'r') as f: 78 | text = f.read() 79 | 80 | text = re.sub(r'#include.*', '', text) 81 | tfile, path = tempfile.mkstemp() 82 | try: 83 | os.write(tfile, text.encode('utf-8')) 84 | finally: 85 | os.close(tfile) 86 | 87 | else: 88 | tfile = None 89 | 90 | cpp_args = [] 91 | 92 | if include_path: 93 | for ip in include_path: 94 | cpp_args.extend(['-I', ip]) 95 | 96 | if defines: 97 | for d in defines: 98 | cpp_args.extend(['-D', d]) 99 | 100 | try: 101 | text = pycparser.preprocess_file(path, cpp_args=cpp_args) 102 | finally: 103 | if tfile is not None: 104 | os.unlink(path) 105 | 106 | return text 107 | 108 | 109 | def read_ptrace_h(path, include_path): 110 | text = get_header_text(path, include_path) 111 | parser = ext_c_parser.GnuCParser() 112 | fileast = parser.parse(text, path) 113 | 114 | if not isinstance(fileast, c_ast.FileAST): 115 | die('could not parse user.h') 116 | 117 | output = ['\n'] 118 | 119 | for decl in fileast.ext: 120 | if (not isinstance(decl, c_ast.Decl) or 121 | not isinstance(decl.type, c_ast.Enum)): 122 | continue 123 | 124 | for item in decl.type.values.enumerators: 125 | if item.name.startswith('PTRACE_'): 126 | output.append('{} = {}'.format( 127 | item.name, render_const(item.value))) 128 | 129 | typedefs = parse_typedefs(fileast) 130 | structs = {'__ptrace_peeksiginfo_args'} 131 | output.extend(parse_structs(fileast, structs, typedefs)) 132 | 133 | return output 134 | 135 | 136 | def render_const(value): 137 | if isinstance(value, c_ast.Constant): 138 | return value.value 139 | elif isinstance(value, c_ast.BinaryOp): 140 | return '{} {} {}'.format( 141 | render_const(value.left), value.op, render_const(value.right)) 142 | else: 143 | die('unexpected constant value: {!r}'.format(value)) 144 | 145 | 146 | def read_user_h(path, include_path): 147 | text = get_header_text(path, include_path) 148 | parser = ext_c_parser.GnuCParser() 149 | fileast = parser.parse(text, path) 150 | 151 | if not isinstance(fileast, c_ast.FileAST): 152 | die('could not parse user.h') 153 | 154 | typedefs = parse_typedefs(fileast) 155 | structs = {'user_regs_struct', 'user_fpregs_struct'} 156 | return parse_structs(fileast, structs, typedefs) 157 | 158 | 159 | def read_signal_h(path, include_path): 160 | text = get_header_text(path, include_path) 161 | parser = ext_c_parser.GnuCParser() 162 | fileast = parser.parse(text, path) 163 | 164 | if not isinstance(fileast, c_ast.FileAST): 165 | die('could not parse signal.h') 166 | 167 | typedefs = parse_typedefs(fileast) 168 | structs = {'siginfo_t'} 169 | return parse_structs(fileast, structs, typedefs) 170 | 171 | 172 | def read_syscalls_h(paths, include_path, defines=[]): 173 | output = ['\n\nSYSCALLS = {'] 174 | 175 | for path in paths: 176 | output.extend(_read_syscalls_h(path, include_path, defines)) 177 | 178 | output.append('}') 179 | 180 | return output 181 | 182 | 183 | def _read_syscalls_h(path, include_path, defines): 184 | output = [] 185 | 186 | text = get_header_text(path, include_path, defines=defines, 187 | strip_includes=True) 188 | 189 | parser = pycparser.CParser() 190 | 191 | typedefs = [ 192 | 'typedef unsigned int qid_t;', 193 | 'typedef long time_t;', 194 | 'typedef unsigned int uid_t;', 195 | 'typedef unsigned int gid_t;', 196 | 'typedef unsigned short old_uid_t;', 197 | 'typedef unsigned short old_gid_t;', 198 | 'typedef int pid_t;', 199 | 'typedef void *cap_user_header_t;', 200 | 'typedef void *cap_user_data_t;', 201 | 'typedef unsigned long old_sigset_t;', 202 | 'typedef int timer_t;', 203 | 'typedef int clockid_t;', 204 | 'typedef unsigned int u32;', 205 | 'typedef unsigned int __u32;', 206 | 'typedef int __s32;', 207 | 'typedef unsigned long long u64;', 208 | 'typedef unsigned long sigset_t;', 209 | 'typedef unsigned int size_t;', 210 | 'typedef struct siginfo_t siginfo_t;', 211 | 'typedef struct sigset_t sigset_t;', 212 | 'typedef struct fd_set fd_set;', 213 | 'typedef void *__sighandler_t;', 214 | 'typedef long long off_t;', 215 | 'typedef long long loff_t;', 216 | 'typedef unsigned short umode_t;', 217 | 'typedef unsigned long aio_context_t;', 218 | 'typedef int key_t;', 219 | 'typedef int mqd_t;', 220 | 'typedef int key_serial_t;', 221 | ] 222 | 223 | text = '\n'.join(typedefs) + '\n' + text 224 | text = re.sub(r'asmlinkage|__user', '', text) 225 | text = re.sub(r'\*\s*\*\s*(\W)', '**__foo\\1', text) 226 | 227 | fileast = parser.parse(text, path) 228 | 229 | if not isinstance(fileast, c_ast.FileAST): 230 | die('could not parse syscalls.h') 231 | 232 | typedefs = parse_typedefs(fileast) 233 | 234 | for decl in fileast.ext: 235 | if (not isinstance(decl, c_ast.Decl) or 236 | not isinstance(decl.type, c_ast.FuncDecl)): 237 | continue 238 | 239 | if not decl.name.startswith('sys_'): 240 | continue 241 | 242 | name = decl.name[len('sys_'):] 243 | 244 | output.append(' {!r}: SysCallSig('.format(name)) 245 | output.append(' {!r},'.format(name)) 246 | output.append(' params=[') 247 | 248 | params = decl.type.args.params 249 | for param in params: 250 | pdecl = [] 251 | pdecl.append('SysCallParamSig(') 252 | pdecl.append(' {!r},'.format(param.name)) 253 | pdecl.append(' CType(') 254 | ctype, ptr_indirection = get_ctypes_type(param.type, typedefs) 255 | pdecl.append(' {!r},'.format(render_type(param.type))) 256 | pdecl.append(' {},'.format(ctype)) 257 | pdecl.append(' {}'.format(ptr_indirection)) 258 | pdecl.append(' )') 259 | pdecl.append('),') 260 | 261 | output.extend(' {}'.format(p) for p in pdecl) 262 | 263 | output.append(' ],') 264 | 265 | ctype, ptr_indirection = get_ctypes_type(decl.type.type, typedefs) 266 | output.append(' result=CType({!r}, {}, {})'.format( 267 | render_type(decl.type.type), ctype, ptr_indirection 268 | )) 269 | output.append(' ),') 270 | 271 | return output 272 | 273 | 274 | def read_unistd_h(path, include_path): 275 | with open(path, 'r') as f: 276 | text = f.read() 277 | 278 | output = ['\nSYSCALL_NUMBERS = {'] 279 | 280 | for name, no in re.findall(r'#define\s+__NR_(\w+)\s+(\d+)', text): 281 | output.append(' {}: {!r},'.format(no, name)) 282 | 283 | output.append('}') 284 | 285 | return output 286 | 287 | 288 | def parse_typedefs(fileast): 289 | typedefs = {} 290 | 291 | for decl in fileast.ext: 292 | if not isinstance(decl, c_ast.Typedef): 293 | continue 294 | 295 | typedefs[decl.name] = render_type(decl.type) 296 | 297 | return typedefs 298 | 299 | 300 | def parse_structs(fileast, structs, typedefs): 301 | output = [] 302 | 303 | for decl in fileast.ext: 304 | if ((not isinstance(decl, c_ast.Decl) or 305 | not isinstance(decl.type, c_ast.Struct)) and 306 | not isinstance(decl, c_ast.Typedef)): 307 | continue 308 | 309 | struct, struct_name = get_struct_and_name(decl) 310 | if struct_name not in structs: 311 | continue 312 | 313 | definitions = parse_struct(decl, typedefs) 314 | 315 | for name, base, fields in definitions: 316 | output.append('\n\nclass {}({}):'.format(name, base)) 317 | output.append(' _fields_ = (') 318 | for field_name, field_type in fields: 319 | output.append(' ({!r}, {}),'.format( 320 | field_name, field_type)) 321 | output.append(' )') 322 | 323 | return output 324 | 325 | 326 | _anon_struct_ctr = 1 327 | 328 | 329 | def get_struct_and_name(decl): 330 | if isinstance(decl, c_ast.Typedef): 331 | struct = decl.type.type 332 | struct_name = decl.name 333 | else: 334 | struct = decl.type 335 | struct_name = struct.name 336 | 337 | if not struct_name: 338 | global _anon_struct_ctr 339 | 340 | struct_name = '_anon_{}'.format(_anon_struct_ctr) 341 | _anon_struct_ctr += 1 342 | 343 | return struct, struct_name 344 | 345 | 346 | def parse_struct(decl, typedefs, is_union=False): 347 | definitions = [] 348 | 349 | struct, struct_name = get_struct_and_name(decl) 350 | 351 | fields = [] 352 | 353 | for field_decl in struct.decls: 354 | if isinstance(field_decl.type.type, c_ast.Union): 355 | definitions.extend( 356 | parse_struct(field_decl.type, typedefs, is_union=True)) 357 | ctype = definitions[-1][0] 358 | 359 | elif isinstance(field_decl.type.type, c_ast.Struct): 360 | definitions.extend( 361 | parse_struct(field_decl.type, typedefs)) 362 | ctype = definitions[-1][0] 363 | else: 364 | ctype = get_final_ctypes_type(field_decl.type, typedefs) 365 | 366 | fields.append((field_decl.name, ctype)) 367 | 368 | base = 'ctypes.Union' if is_union else 'ctypes.Structure' 369 | definitions.append((struct_name, base, fields)) 370 | 371 | return definitions 372 | 373 | 374 | ctype_map = { 375 | ('void',): 'ctypes.c_long', 376 | ('char',): 'ctypes.c_char', 377 | ('unsigned', 'char',): 'ctypes.c_char', 378 | ('unsigned', 'short'): 'ctypes.c_ushort', 379 | ('unsigned', 'short', 'int'): 'ctypes.c_ushort', 380 | ('unsigned', 'int'): 'ctypes.c_uint', 381 | ('unsigned',): 'ctypes.c_uint', 382 | ('unsigned', 'long'): 'ctypes.c_ulong', 383 | ('unsigned', 'long', 'int'): 'ctypes.c_ulong', 384 | ('unsigned', 'long', 'long',): 'ctypes.c_ulonglong', 385 | ('unsigned', 'long', 'long', 'int'): 'ctypes.c_ulonglong', 386 | ('__uint64_t',): 'ctypes.c_uint64', 387 | ('__uint32_t',): 'ctypes.c_uint32', 388 | ('__uint16_t',): 'ctypes.c_uint16', 389 | ('short',): 'ctypes.c_short', 390 | ('short', 'int'): 'ctypes.c_short', 391 | ('int',): 'ctypes.c_int', 392 | ('signed', 'int'): 'ctypes.c_int', 393 | ('long',): 'ctypes.c_long', 394 | ('long', 'int'): 'ctypes.c_long', 395 | ('long', 'long'): 'ctypes.c_longlong', 396 | ('long', 'long', 'int'): 'ctypes.c_longlong', 397 | ('__int64_t',): 'ctypes.c_int64', 398 | ('__int32_t',): 'ctypes.c_int32', 399 | ('__int16_t',): 'ctypes.c_int16', 400 | } 401 | 402 | 403 | def get_ctypes_type(typedecl, typedefs): 404 | ptr_indirection = 0 405 | 406 | if isinstance(typedecl, c_ast.TypeDecl): 407 | if isinstance(typedecl.type, c_ast.IdentifierType): 408 | tnames = typedecl.type.names 409 | 410 | while True: 411 | if ((len(tnames) == 1 and tnames[0] in typedefs) or 412 | (tnames[-1] in typedefs and tnames[-2] not in 413 | {'struct', 'union'})): 414 | tnames = list(tnames[:-1]) + list(typedefs[tnames[-1]]) 415 | else: 416 | break 417 | 418 | ptr_indirection = 1 if tnames[-1] == '*' else 0 419 | if ptr_indirection: 420 | tnames = tnames[:-1] 421 | 422 | if len(tnames) > 1 and tnames[-2] == 'struct': 423 | ctype = 'ctypes.c_void_p' 424 | ptr_indirection = 0 425 | elif len(tnames) > 1 and tnames[-2] == 'union': 426 | ctype = 'ctypes.c_void_p' 427 | ptr_indirection = 0 428 | else: 429 | ctype = ctype_map.get(tuple(tnames)) 430 | if ctype is None: 431 | die('unrecognized C type: {}'.format(' '.join(tnames))) 432 | 433 | elif isinstance(typedecl.type, c_ast.Struct): 434 | ctype = 'ctypes.c_void_p' 435 | 436 | elif isinstance(typedecl.type, c_ast.Union): 437 | ctype = 'ctypes.c_void_p' 438 | 439 | else: 440 | die('unexpected syntax in type declaration: {!r}'.format( 441 | typedecl.type)) 442 | 443 | elif isinstance(typedecl, c_ast.PtrDecl): 444 | ctype, ptr_indirection = get_ctypes_type( 445 | typedecl.type, typedefs) 446 | 447 | if ctype != 'ctypes.c_void_p': 448 | ptr_indirection += 1 449 | 450 | elif isinstance(typedecl, c_ast.ArrayDecl): 451 | array_type, ptr_indirection = get_ctypes_type(typedecl.type, typedefs) 452 | dim = fold_const_expr(typedecl.dim, typedefs) 453 | ctype = '{} * {}'.format(array_type, dim) 454 | 455 | else: 456 | die('unexpected syntax in type declaration: {!r}'.format( 457 | typedecl)) 458 | 459 | return ctype, ptr_indirection 460 | 461 | 462 | def get_final_ctypes_type(typedecl, typedefs): 463 | ctype, ptr_indirection = get_ctypes_type(typedecl, typedefs) 464 | 465 | if ptr_indirection: 466 | if ctype == 'ctypes.c_char': 467 | ctype = 'ctypes.c_char_p' 468 | else: 469 | ctype = 'ctypes.c_void_p' 470 | 471 | return ctype 472 | 473 | 474 | _binopmap = { 475 | '+': operator.add, 476 | '-': operator.sub, 477 | '*': operator.mul, 478 | '/': operator.floordiv, 479 | '<<': operator.lshift, 480 | '>>': operator.rshift 481 | } 482 | 483 | 484 | _unopmap = { 485 | '+': operator.pos, 486 | '-': operator.neg, 487 | 'sizeof': ctypes.sizeof, 488 | } 489 | 490 | 491 | _literalmap = { 492 | 'int': int, 493 | 'char': int, 494 | 'float': float, 495 | } 496 | 497 | 498 | def fold_const_expr(expr, typedefs): 499 | if isinstance(expr, c_ast.BinaryOp): 500 | left = fold_const_expr(expr.left, typedefs) 501 | right = fold_const_expr(expr.right, typedefs) 502 | oper = _binopmap.get(expr.op) 503 | if oper is None: 504 | die('cannot fold binop with {!r}'.format(expr.op)) 505 | 506 | result = oper(left, right) 507 | 508 | elif isinstance(expr, c_ast.UnaryOp): 509 | operand = fold_const_expr(expr.expr, typedefs) 510 | oper = _unopmap.get(expr.op) 511 | 512 | if oper is None: 513 | die('cannot fold unop with {!r}'.format(expr.op)) 514 | 515 | result = oper(operand) 516 | 517 | elif isinstance(expr, c_ast.Constant): 518 | lit_type = _literalmap.get(expr.type) 519 | if lit_type is None: 520 | die('unexpected constant type: {!r}'.format(expr.type)) 521 | result = lit_type(expr.value) 522 | 523 | elif isinstance(expr, c_ast.Typename): 524 | # sizeof operand 525 | result = get_final_ctypes_type(expr.type, typedefs) 526 | _, _, typ = result.rpartition('.') 527 | result = getattr(ctypes, typ) 528 | 529 | else: 530 | die('cannot fold {!r} expr'.format(expr)) 531 | 532 | return result 533 | 534 | 535 | def render_type(typedecl): 536 | res = [] 537 | 538 | if isinstance(typedecl, (c_ast.TypeDecl, c_ast.Typename)): 539 | res.extend(typedecl.quals) 540 | res.extend(render_type(typedecl.type)) 541 | 542 | elif isinstance(typedecl, c_ast.PtrDecl): 543 | res.extend(typedecl.quals) 544 | res.extend(render_type(typedecl.type)) 545 | res.append('*') 546 | 547 | elif isinstance(typedecl, c_ast.IdentifierType): 548 | res.extend(typedecl.names) 549 | 550 | elif isinstance(typedecl, c_ast.Struct): 551 | res.extend(['struct', typedecl.name]) 552 | 553 | elif isinstance(typedecl, c_ast.Union): 554 | res.extend(['union', typedecl.name]) 555 | 556 | elif isinstance(typedecl, (c_ast.FuncDecl, ext_c_parser.FuncDeclExt)): 557 | ret = render_type(typedecl.type) 558 | args = [] 559 | for param in typedecl.args.params: 560 | args.append(' '.join(render_type(param))) 561 | ret.append('({})'.format(', '.join(args))) 562 | 563 | res.extend(ret) 564 | 565 | elif isinstance(typedecl, c_ast.ArrayDecl): 566 | res.extend(render_type(typedecl.type)) 567 | if typedecl.dim is None: 568 | res.append('[]') 569 | elif isinstance(typedecl.dim, c_ast.Constant): 570 | res.append('[{}]'.format(typedecl.dim.value)) 571 | else: 572 | die('non-constant dimension in array declaration') 573 | 574 | else: 575 | die('unexpected {!r}'.format(typedecl)) 576 | 577 | return res 578 | 579 | 580 | if __name__ == '__main__': 581 | main() 582 | -------------------------------------------------------------------------------- /tools/requirements.txt: -------------------------------------------------------------------------------- 1 | pycparserext==2016.2 2 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist=py27,py37,py38,py39,py310 3 | 4 | [testenv] 5 | commands=python setup.py test 6 | --------------------------------------------------------------------------------