├── .gitignore ├── .travis.yml ├── Dockerfile ├── LICENSE.md ├── README.md ├── bin └── sibyl ├── doc ├── ADD_ABI.md ├── ADD_TEST.md ├── ADVANCED_USE.md ├── CHANGELOG.md ├── CONFIG.md ├── LEARNING.md ├── TESTING.md └── img │ ├── ghidra_screen.png │ ├── ghidra_screen2.png │ └── ida_screen.png ├── ext ├── ghidra │ ├── ExportFunction.java │ └── find.py ├── ida │ └── find.py ├── pin_tracer │ ├── Makefile │ └── pin_tracer.cpp └── radare2 │ └── r2_find.py ├── setup.py ├── sibyl ├── __init__.py ├── abi │ ├── __init__.py │ ├── abi.py │ ├── arm.py │ ├── mep.py │ ├── mips.py │ └── x86.py ├── actions │ ├── __init__.py │ ├── action.py │ ├── config.py │ ├── find.py │ ├── func.py │ └── learn.py ├── commons.py ├── config.py ├── engine │ ├── __init__.py │ ├── engine.py │ ├── miasm.py │ └── qemu.py ├── heuristics │ ├── __init__.py │ ├── arch.py │ ├── csts.py │ ├── func.py │ └── heuristic.py ├── learn │ ├── __init__.py │ ├── findref.py │ ├── generator │ │ ├── __init__.py │ │ ├── generator.py │ │ ├── pythongenerator.py │ │ └── templates.py │ ├── learn.py │ ├── learnexception.py │ ├── replay.py │ ├── trace.py │ └── tracer │ │ ├── __init__.py │ │ ├── miasm.py │ │ ├── pin.py │ │ └── tracer.py ├── test │ ├── __init__.py │ ├── ctype.py │ ├── stdlib.py │ ├── string.py │ └── test.py └── testlauncher.py └── test ├── find ├── Makefile ├── __init__.py ├── run_ctests.py ├── test_ctype.c ├── test_stdlib.c ├── test_string.c └── test_stub.c ├── learn ├── Makefile ├── __init__.py ├── add.c ├── add.h ├── copy_struct.c ├── copy_struct.h ├── deref_struct.c ├── deref_struct.h ├── doublePtr.c ├── doublePtr.h ├── my_strcpy.c ├── my_strcpy.h ├── my_strlen.c ├── my_strlen.h ├── numerous_arguments.c ├── numerous_arguments.h ├── run_tests.py ├── several_traces.c └── several_traces.h ├── run_all_tests.py └── utils ├── __init__.py └── log.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Emacs 9 | *~ 10 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | language: python 3 | python: 4 | - "2.7" 5 | addons: 6 | apt: 7 | packages: 8 | - make 9 | - gcc 10 | - python-virtualenv 11 | - unzip 12 | - gcc-multilib 13 | before_script: 14 | - "cd .." 15 | # make virtual env 16 | - "python /usr/lib/python2.7/dist-packages/virtualenv.py virtualenv;" 17 | - "cd virtualenv;" 18 | - "source bin/activate;" 19 | # install elfesteem 20 | - "git clone https://github.com/serpilliere/elfesteem elfesteem && cd elfesteem && python setup.py install && cd ..;" 21 | # install pyparsing 22 | - "pip install pyparsing" 23 | # install miasm 24 | - "cd ..;git clone -b 'v0.1.1' https://github.com/cea-sec/miasm miasm && cd miasm;" 25 | - "python setup.py build build_ext -I$(pwd)/../virtualenv/include -L$(pwd)/../virtualenv/tinycc" 26 | - "python setup.py install" 27 | - "cd ..;" 28 | # install unicorn 29 | - "git clone https://github.com/unicorn-engine/unicorn && cd unicorn && make && make -C bindings/python && export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$(pwd) && cd ..;" 30 | - "cd unicorn/bindings/python && python setup.py install && cd ../../../;" 31 | - "python -c 'import unicorn'" 32 | # install pycparser 33 | - "pip install pycparser" 34 | # install pycrypto 35 | - "pip install pycrypto" 36 | # install Sibyl 37 | - "cd Sibyl && python setup.py install && cd ..;" 38 | # get tests 39 | - "git clone https://github.com/commial/Sibyl-tests sibyl-tests;" 40 | # prepare env 41 | - "export SIBYLTEST=$(pwd)/sibyl-tests" 42 | - "export SIBYL=$(pwd)/Sibyl" 43 | script: 44 | # Sibyl regression tests 45 | - "cd $SIBYL/test && python run_all_tests.py;" 46 | # Sibyl bigger test 47 | - "cd $SIBYLTEST && ./run.sh;" 48 | # Sibyl regression tests with heuristics, may not end in reasonnable time 49 | - "cd $SIBYL/test && python run_all_tests.py -f -a;" -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # This file is part of Sibyl. 2 | # Copyright 2017 Camille MOUGEY 3 | # 4 | # Sibyl is free software: you can redistribute it and/or modify it 5 | # under the terms of the GNU General Public License as published by 6 | # the Free Software Foundation, either version 3 of the License, or 7 | # (at your option) any later version. 8 | # 9 | # Sibyl is distributed in the hope that it will be useful, but WITHOUT 10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 11 | # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 12 | # License for more details. 13 | # 14 | # You should have received a copy of the GNU General Public License 15 | # along with Sibyl. If not, see . 16 | 17 | FROM miasm/tested:latest 18 | MAINTAINER Camille Mougey 19 | USER root 20 | 21 | # Get unicorn 22 | RUN apt-get install -y python-pip &&\ 23 | pip install --pre unicorn 24 | 25 | # Get Sibyl 26 | ADD https://github.com/cea-sec/Sibyl/archive/master.tar.gz /opt/Sibyl.tar.gz 27 | RUN cd /opt &&\ 28 | tar xzvf Sibyl.tar.gz &&\ 29 | rm Sibyl.tar.gz &&\ 30 | mv Sibyl-master Sibyl &&\ 31 | chown -Rh miasm2 Sibyl &&\ 32 | cd Sibyl &&\ 33 | python setup.py install 34 | 35 | # Prepare the environment 36 | WORKDIR /opt/Sibyl 37 | USER miasm2 38 | 39 | CMD ["/usr/local/bin/sibyl"] 40 | -------------------------------------------------------------------------------- /bin/sibyl: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python2 2 | 3 | # This file is part of Sibyl. 4 | # Copyright 2014 - 2017 Camille MOUGEY 5 | # 6 | # Sibyl is free software: you can redistribute it and/or modify it 7 | # under the terms of the GNU General Public License as published by 8 | # the Free Software Foundation, either version 3 of the License, or 9 | # (at your option) any later version. 10 | # 11 | # Sibyl is distributed in the hope that it will be useful, but WITHOUT 12 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 13 | # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 14 | # License for more details. 15 | # 16 | # You should have received a copy of the GNU General Public License 17 | # along with Sibyl. If not, see . 18 | "Sibyl client" 19 | 20 | import sys 21 | 22 | from sibyl.commons import print_table 23 | from sibyl.actions import ACTIONS, load_action 24 | 25 | if __name__ == "__main__": 26 | 27 | if len(sys.argv) < 2: 28 | print "Usage: %s [action]\n" % sys.argv[0] 29 | print "Actions:" 30 | # Sort actions by name and print them 31 | actions = [(action.name, action.desc) 32 | for action in sorted(ACTIONS, 33 | key=lambda action: action.name)] 34 | print_table(actions, 35 | title=False, 36 | separator=" ", 37 | level=1) 38 | exit(0) 39 | 40 | action = sys.argv[1] 41 | 42 | # Try to guess action 43 | guessed = [act for act in ACTIONS if act.name.startswith(action)] 44 | if len(guessed) == 1: 45 | # Action found, redirect to it 46 | load_action(guessed[0], sys.argv[2:]) 47 | else: 48 | # Action not found 49 | if len(guessed) == 0: 50 | print "Unknown action: %s" % action 51 | else: 52 | print "Ambiguous action: %s" % " ".join(guess.name 53 | for guess in guessed) 54 | exit(-1) 55 | 56 | -------------------------------------------------------------------------------- /doc/ADD_ABI.md: -------------------------------------------------------------------------------- 1 | Adding an ABI 2 | ------------- 3 | 4 | ### Overview 5 | 6 | The ABI descriptions in Sibyl are quite simple for now. 7 | Their are located in `sibyl/abi`, and all inherits from `sibyl.abi.abi:ABI`. 8 | 9 | The convention is to regroup them by relative architecture. 10 | 11 | ### Add an ABI 12 | 13 | Here is a commented fake ABI, where arguments are first passed by registers, and 14 | then by stack: 15 | 16 | ```Python 17 | class ABI_CUSTOM(abi.ABIRegsStack): 18 | 19 | # Map argument number -> register name 20 | regs_mapping = ["A0", "A1", "A2", "A3"] 21 | # Associate this ABI to a given architecture, to be used when this 22 | # achitecture is recognized 23 | arch = ["mips32b", "mips32l"] 24 | 25 | # Indicate how the return address has to be set (stack, specific register, 26 | # ...) 27 | def set_ret(self, ret_addr): 28 | self.jitter.cpu.RA = ret_addr 29 | 30 | # Indicate how an element is push on the stack, for stack based arguments 31 | def vm_push(self, element): 32 | self.jitter.push_uint32_t(element) 33 | ``` 34 | 35 | Finally, the class just has to be added to the `sibyl.abi:ABIS` list to be 36 | considered. 37 | -------------------------------------------------------------------------------- /doc/ADD_TEST.md: -------------------------------------------------------------------------------- 1 | Adding a test case 2 | ------------------ 3 | 4 | ### Overview 5 | 6 | Available test cases are in the directory _sibyl/test/_. 7 | A test has to extend the class _sibyl.test.test.Test_ and provide at least: 8 | 9 | * _func_: the name of the function to test 10 | * _tests_: a `TestSetTest` instance, composed of (_init_, _check_) methods 11 | respectively called to initialize the VM and check the resulting state 12 | 13 | Finally, the class has to be "announced", by beeing in the `TESTS` (list) 14 | variable of the module. 15 | 16 | ### Example 17 | 18 | Here is a commented case: 19 | ```Python 20 | class TestA64l(test.Test): 21 | 22 | my_string = "v/" 23 | value = 123 24 | 25 | # Test 26 | def init(self): 27 | # Alloc a string thanks to a common API, in read only 28 | self.my_addr = self._alloc_string(self.my_string) 29 | # Set the first argument independently of ABI 30 | self._add_arg(0, self.my_addr) 31 | 32 | def check(self): 33 | # Get the result independently of ABI 34 | result = self._get_result() 35 | # Check the expected result, and verify memory 36 | return all([result == self.value, 37 | self._ensure_mem(self.my_addr, self.my_string)]) 38 | 39 | # Properties 40 | func = "a64l" 41 | tests = TestSetTest(init, check) 42 | ``` 43 | 44 | A more elaborated test can be found in _sibyl/test/ctype.py_. 45 | 46 | ### TestSetTest 47 | 48 | In order to avoid false positive, it is recommended to use multiple tests. For 49 | instance, testing a `strlen` with at least two strings (different in size) will 50 | avoid finding function returning always the same number. 51 | 52 | To do so, `TestSetTest` instance can be combined through `&` and `|` operator, 53 | respectively imposing the success of both tests, or the success of one of them. 54 | 55 | For instance (`TestStrlen`): 56 | ```Python 57 | tests = TestSetTest(init, check) & TestSetTest(init2, check2) 58 | ``` 59 | 60 | Tests to run can also be choosen on the fly, depending on previous test results. 61 | In fact, `Test.tests` act as a generator. It is asked to produce a `(init, 62 | check)` couple only after the previous check. The `func` attribute is read only 63 | on success, so it can be changed in the same time than test strategy if needed. 64 | 65 | For instance, `TestIsCharset` in `ctype.py` implements a test strategy based on 66 | a decision tree. 67 | 68 | ### Subscribing custom tests 69 | 70 | To avoid modifying the sibyl package for each new test, one can add them in the 71 | configuration file. 72 | 73 | In the section `tests`, one just have to add: 74 | ``` 75 | [tests] 76 | name = path/to/source.py 77 | ``` 78 | 79 | Where `source.py` offers a `TESTS` variable. 80 | 81 | Here is two example of organisation: 82 | * A file with custom tests implementation, offering `TESTS` at its end 83 | * A directory with several tests implemenration, and a single file merging them 84 | in its `TESTS` variable 85 | 86 | For more detail on configuration, please consult the relative documentation. 87 | 88 | Once the configuration done, the new tests should appear in the `Tests 89 | availables` section of `sibyl config`, and in the help of `--tests` options of 90 | `sibyl find` under name `name`. 91 | 92 | Without specifying tests (ie. all tests) or with `-t name`, these tests 93 | will be used in the identification. 94 | 95 | 96 | ### Debugging its tests 97 | 98 | A few trick can be used to debug the tests. 99 | 100 | The Python `pdb` module is a good start to obtain and inspect the context in 101 | `init` or `check` methods. 102 | 103 | As error are masked, because they are considered as a recognition fail, one 104 | would probably want to avoid this exception catching. To do this, remove error 105 | catching code in `sibyl/engine/miasm.py::MiasmEngine.run` (if you're using one 106 | of the Miasm jitter). 107 | 108 | Adding jitter log could also help, for instance by adding in this same method: 109 | ```Python 110 | self.jitter.jit.log_mn = True 111 | self.jitter.jit.log_regs = True 112 | ``` 113 | 114 | Please refer to Miasm for more information on this. 115 | 116 | Finally, it is often easier to: 117 | * deactivate multiprocessing (`find -p`) 118 | * use only your function, on one test (`find -t name addr1`) 119 | * deactivate timeout (`find -t 0`) 120 | -------------------------------------------------------------------------------- /doc/ADVANCED_USE.md: -------------------------------------------------------------------------------- 1 | Avanced use 2 | ----------- 3 | 4 | ### Architecture selection 5 | 6 | The architecture might be automatically recognized from the binary (for 7 | instance, using ELF or PE information). 8 | 9 | If not, the architecture can be specified / forced using the `--architecture` 10 | argument of `find`. 11 | 12 | ### Mapping shift 13 | 14 | It may happens that the file format is not correctly recognized (for instance, 15 | in firmware), and that having the binary mapped at a specific address modify the 16 | function behavior (ie. they use absolute address). 17 | 18 | For these cases, the option `--mapping-base` can be used to specify a base 19 | address for the binary mapping. 20 | 21 | ### Emulation engine 22 | A few emulation engine are supported. Through the `--jitter` option, one can 23 | specified: 24 | 25 | * `python`: use a full Python emulation 26 | * `tcc` or `gcc`: use a C compiler to JiT code (thanks to Miasm) 27 | * `LLVM`: use LLVM JiT capabilities (thanks to Miasm) 28 | * `qemu`: use the Unicorn (http://www.unicorn-engine.org/) QEMU binding 29 | 30 | Empirically, the `qemu` jitter happens to be the fastest, but requires an 31 | additionnal dependency. In addition, it may not support a custom architecture 32 | added to Miasm. 33 | 34 | The second fastest jitter is `gcc`, because of the repeated call to the same 35 | function (and its cache). In addition, it requires a very common dependency. 36 | 37 | ### Function heuristic 38 | 39 | The `sibyl func` action provides a way to find possible function addresses. It 40 | uses heuristics, which can be individually activated or de-activated using `-e` 41 | and `-d` options. 42 | 43 | For instance, the `recursive_call` heuristics may take a long time to ends, 44 | where the `pattern_matching` one is very fast but innacurate. 45 | 46 | The full list can be obtain in the `--help` description. 47 | 48 | If an IDA or GHIDRA installation is detected, it will be used primarily as an 49 | addresses source. They can be enabled / disabled through `-e`/`-d` options. 50 | 51 | ### Addresses specification 52 | 53 | The targeted addresses can be specified in three ways: 54 | * using the addresses, such as `sibyl find my_binary 0x11223344 0x22334455 12345` 55 | * using a file, such as `sibyl find my_binary /tmp/addrs` 56 | * using stdin, such as `sibyl func my_binary | sibyl find my_binary -` 57 | 58 | ### ABI selection 59 | 60 | The ABI can be specified or overwritten thanks to the `--abi` option of `sibyl 61 | find`. 62 | 63 | If only one ABI is available for the target architecture, it will be selected 64 | automatically. Otherwise, the command line will ask for more precision. 65 | 66 | The choosen ABI is indicated if the verbosity level is high enough. 67 | 68 | ### Linking with other tools 69 | 70 | Sibyl output is intended to be human readable. 71 | 72 | But, depending on the usage, some options are provided for an easier linking: 73 | * `sibyl find` can deliver results in JSON format (`-o JSON`) 74 | * `sibyl config` can be requested for direct value, or possible value of a 75 | configuration element (`-V element`) 76 | * the `sibyl` module can be used as an API 77 | -------------------------------------------------------------------------------- /doc/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | Changelog 2 | --------- 3 | 4 | ### Version 0.2 5 | 6 | * Land the new learning module (initial version from [@graux-pierre](https://github.com/graux-pierre)) 7 | * Add documentation on how-to-learn a function, associated principle and limits 8 | * Mine function address from IDA in sibyl func 9 | * Support multiple strategy for trace pruning 10 | * Add support for API stubbing 11 | * Radare2 find script from [@m-1-k-3](https://github.com/m-1-k-3) 12 | * Toshiba MeP support from [@guedou](https://github.com/guedou) 13 | 14 | Minors : 15 | 16 | * Various fixes from [@serpilliere](https://github.com/serpilliere) 17 | * Reflect API changes for Miasm v0.1.1 18 | * Add configuration associated with PIN (PIN_ROOT + tracer path) 19 | * Add regression tests for the learning module 20 | * Support expanduser in config 21 | * Restrict `bzero` implementation to avoid false positive 22 | * Adds support for function returning a non-allocated pointer 23 | 24 | ### Version 0.1 25 | 26 | This is the initial release, including: 27 | 28 | * Sibyl as a Python module 29 | * CLI `sibyl` 30 | * IDA stub 31 | * Configuration management 32 | * Support for Miasm, QEMU engine 33 | * Support for a few ABI 34 | * Support for a few functions of _string.h_, _ctype.h_ and _stdlib.h_ 35 | * Regression tests 36 | * PoC of a learning module 37 | * Documentation 38 | -------------------------------------------------------------------------------- /doc/CONFIG.md: -------------------------------------------------------------------------------- 1 | Configuration 2 | ------------- 3 | 4 | ### Configuration files 5 | 6 | The default Sibyl configuration can be overwritten with configuration file. 7 | 8 | These files are taken in account if they are located in any of the location (and 9 | in the same order) returned by `sibyl config` when no configuration are 10 | available: 11 | ``` 12 | $ sibyl config 13 | No configuration file found. Supported paths: 14 | /etc/sibyl.conf 15 | /etc/sibyl/sibyl.conf 16 | /usr/local/etc/sibyl.conf 17 | /usr/local/etc/sibyl/sibyl.conf 18 | /home/user/sibyl.conf 19 | /home/user/.sibyl.conf 20 | ... 21 | ``` 22 | 23 | The resulting configuration file can be obtain through `sibyl config -d`. 24 | 25 | ### Default configuration 26 | 27 | The default configuration is equivalent to: 28 | 29 | ```Python 30 | [find] 31 | jit_engine = qemu,miasm 32 | stubs = $MIASM/os_dep/win_api_x86_32.py,$MIASM/os_dep/linux_stdlib.py 33 | 34 | [tests] 35 | ctype = $SIBYL/test/ctype.py 36 | string = $SIBYL/test/string.py 37 | stdlib = $SIBYL/test/stdlib.py 38 | 39 | [miasm] 40 | jit_engine = gcc,llvm,tcc,python 41 | 42 | [pin] 43 | root = $PIN_ROOT 44 | tracer = $SIBYL/ext/pin_tracer/pin_tracer.so 45 | 46 | [learn] 47 | prune_strategy = branch 48 | prune_keep = 1 49 | prune_keep_max = 5 50 | 51 | [ida] 52 | idaq64 = 53 | ``` 54 | 55 | ### Section 'find' 56 | 57 | This section is relative to the `find` action. 58 | 59 | The `jit_engine` parameter is a list, separated by ',', of jitter engine 60 | preference. 61 | If the first engine is not available, then the second is used, and so on. 62 | The keyword `miasm` can be used to stand for the Miasm elected engine. 63 | 64 | To known the jitter engine elected, use `sibyl config -V jit_engine`. 65 | 66 | The `stubs` parameter is a list, separated by ',' of Python file path. These 67 | files can implement stubs (as Python function with the correct name). These 68 | stubs will be used to emulate external APIs, on supported jitter engines, during 69 | the `find` action. 70 | 71 | ### Section 'tests' 72 | 73 | This section links to available test sets. By default, only Sibyl ones are 74 | present. 75 | 76 | The syntax is: `name = path/to/file.py`. 77 | 78 | The list of registered tests can be obtain withe 79 | `sibyl config -V available_tests_keys`. 80 | 81 | For more information on tests, please refer to the corresponding documentation. 82 | 83 | ### Section 'miasm' 84 | 85 | This section highlights options relative to Miasm use. 86 | 87 | The `miasm_engine` parameter is a list, separated by ',', of jitter engine 88 | preference when Miasm is used. 89 | If the first engine is not available, then the second is used, and so on. 90 | 91 | To known the jitter engine elected, use `sibyl config -V miasm_engine`. 92 | 93 | ### Section 'pin' 94 | 95 | This section contains options relative to PIN use. 96 | 97 | The `root` parameter is the root path of the Intel Pin installation (the one 98 | containing the `pin` binary). 99 | By default, the environment variable `$PIN_ROOT` is used (if it exists). 100 | If `pin` is already in the user's path, this parameter can be ignored. 101 | 102 | The `tracer` parameter is the path of the compiled version of the tracer 103 | `ext/pin_tracer/pin_tracer.cpp`, which will probably looks like 104 | `/path/to/sibyl/ext/pin_tracer/pin_tracer.so`. 105 | 106 | ### Section 'learn' 107 | 108 | This section contains options relative to the `learn` action. 109 | 110 | The `prune_strategy` parameter indicates which strategy should be used to prune 111 | the obtained snapshots. Current supported values are `branch`, `keep`, `keepall`. 112 | 113 | The `prune_keep` value specifies the number of snapshot to keep per prunning. 114 | 115 | The `prune_keep_map` value specifies the overall maximum number of snapshot to 116 | keep. `0` means no limit. 117 | 118 | Please refer to the related documentation for more information. 119 | 120 | ### Section 'ida' 121 | 122 | This section contains options relative to IDA use. 123 | 124 | The `idaq64` parameter is the path of the `idaq64` binary. It will be used to 125 | find the executable if it is not in the `$PATH`. 126 | 127 | ### Section 'ghidra' 128 | 129 | This section contains options relative to GHIDRA use. 130 | 131 | The `headless` parameter is the path of the `analyzeHeadless` binary (usually at 132 | `GHIDRA_PATH/support/analyzeHeadless`). It will be used to find the executable 133 | if it is not in the `$PATH`. 134 | 135 | The `export_function` is the path of the script used to discover function 136 | (launched as a *GHIDRA preScript*). The one provided with Sibyl is named 137 | `ExportFunction.java` (`ext/ghidra/ExportFunction.java`). 138 | 139 | 140 | ### Configuration overview 141 | 142 | Using `sibyl config` without option, one can obtain: 143 | * the configuration file used, if any 144 | * available configuration file paths 145 | * elected jit engine 146 | * loaded Tests, associated to their names 147 | 148 | ### API 149 | 150 | Sibyl configuration is available from `sibyl.config:config`. 151 | 152 | This `Config` instance provides: 153 | * `jit_engine`: Name of engine to use for jit 154 | * `available_tests`: dictionnary mapping test group name to corresponding classes 155 | 156 | ### Path handling 157 | 158 | This rules are applied for path: 159 | * the token `$SIBYL` can be used to point to Sibyl installation dir; 160 | * the token `$MIASM` can be used to point to Miasm2 installation dir; 161 | * `~` or `~user` are replaced with the `user` home directory; 162 | * Environment variables are expanded; 163 | 164 | -------------------------------------------------------------------------------- /doc/LEARNING.md: -------------------------------------------------------------------------------- 1 | Learning 2 | -------- 3 | 4 | ### Overview 5 | 6 | The learning module can be used to automatically create a new test from an 7 | existing binary. It requires: 8 | 9 | * A binary implementing the targeted function 10 | * At least one call to this function with working arguments 11 | * The targeted function prototype, including involved structures 12 | 13 | With these information, the module will run an instrumented version of the 14 | function, and collect *semantic side effects*. 15 | 16 | Then, these side effects are abstracted in an architecture agnostic form, close 17 | to the C language. 18 | 19 | During the recognition phase, this form is derived according to the expected 20 | memory layout: structure padding, `int` size, etc. 21 | 22 | If there are multiple calls to the function, Sibyl will apply a pruning policy 23 | to keep the only relevant ones, according to the associated configuration. 24 | 25 | :warning: Depending on the target binary, a few precaution should be taken; 26 | indeed, depending on the used *tracer*, the binary might be run in an 27 | unsandboxed environment. 28 | 29 | ### Example 30 | 31 | Let's 32 | use 33 | [SoftFp, 2016/12/20 release](https://bellard.org/softfp/softfp-2016-12-20.tar.gz), 34 | a software floating point library, as an example. 35 | 36 | We will target a few arithmetic functions, and use the regression test 37 | `softfptest` to retrieve calls to these functions. 38 | 39 | #### Setup 40 | 41 | `softfptest` is slightly modified before being used. Indeed, we only need a few 42 | run of the main loop to obtain a fairly amount of calls to the targeted 43 | functions. As a result, the learning process will be faster. 44 | 45 | Function prototypes are also needed: 46 | 47 | ```C 48 | typedef unsigned long int uint64_t; 49 | typedef unsigned int uint32_t; 50 | 51 | typedef uint64_t sfloat64; 52 | 53 | typedef enum { 54 | RM_RNE, 55 | RM_RTZ, 56 | RM_RDN, 57 | RM_RUP, 58 | RM_RMM, 59 | } RoundingModeEnum; 60 | 61 | 62 | sfloat64 add_sf64(sfloat64 a, sfloat64 b, RoundingModeEnum rm, uint32_t *pfflags); 63 | sfloat64 mul_sf64(sfloat64 a, sfloat64 b, RoundingModeEnum rm, uint32_t *pfflags); 64 | sfloat64 div_sf64(sfloat64 a, sfloat64 b, RoundingModeEnum rm, uint32_t *pfflags); 65 | sfloat64 sqrt_sf64(sfloat64 a, RoundingModeEnum rm, uint32_t *pfflags); 66 | sfloat64 fma_sf64(sfloat64 a, sfloat64 b, sfloat64 c, RoundingModeEnum rm, uint32_t *pfflags); 67 | sfloat64 min_sf64(sfloat64 a, sfloat64 b, uint32_t *pfflags); 68 | sfloat64 max_sf64(sfloat64 a, sfloat64 b, uint32_t *pfflags); 69 | ``` 70 | 71 | Also, the *PIN tracer* has to be compiled: 72 | 73 | ``` 74 | $ cd ext/pin_tracer 75 | $ PIN_ROOT=/opt/... make 76 | ... 77 | ``` 78 | 79 | And the configuration set accordingly (see [the associated documentation](CONFIG.md) for more detail). 80 | 81 | #### Options 82 | 83 | The target action is `learn`. 84 | 85 | In this example, the *tracer* used is *PIN*, for performance reasons and because 86 | the target binary is available on a supported architecture. In other cases, the 87 | *Miasm* tracer is still available. 88 | 89 | A lot of calls returns zero (due to the architecture of the regression test). To 90 | ignore them (there are mostly irrelevant and pollute the resulting tests), 91 | `--avoid-null` (`-z`) is used. 92 | 93 | The result is dumped in a Python file: `-o float_{NAME}.py`. 94 | 95 | #### Learning 96 | 97 | The complete command line is: 98 | ``` 99 | $ sibyl learn -v -z {FUNC_NAME} softfptest soft.h -o float_{NAME}.py 100 | ``` 101 | 102 | One may notice that a few warning are displayed: 103 | ``` 104 | WARNING: argument pfflags not used?! 105 | ``` 106 | 107 | Indeed, Sibyl has detected that the `pfflags` argument seems to not be used in 108 | any of the calls keep. This could indicate a lack of call example, a too 109 | restrictive implementation, or a useless argument. 110 | 111 | #### Obtained test 112 | 113 | The resulting test looks like: 114 | ```Python 115 | from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE 116 | 117 | from sibyl.test.test import TestHeader, TestSetTest 118 | 119 | class Testmax_sf64(TestHeader): 120 | '''This is an auto-generated class, using the Sibyl learn module''' 121 | 122 | func = "max_sf64" 123 | header = ''' 124 | typedef unsigned long int uint64_t; 125 | ... 126 | ''' 127 | 128 | def init1(self): 129 | 130 | self._add_arg(0, 0) # arg0_a 131 | self._add_arg(1, 9218868437227405312) # arg1_b 132 | 133 | def check1(self): 134 | return all(( 135 | # Check output value 136 | self._get_result() == 0x7ff0000000000000, 137 | )) 138 | 139 | ... 140 | tests = TestSetTest(init1, check1) & TestSetTest(init2, check2) & TestSetTest(init3, check3) & TestSetTest(init4, check4) & TestSetTest(init5, check5) 141 | 142 | TESTS = [Testmax_sf64] 143 | ``` 144 | 145 | When type understanding is needed, the tests are a bit more complicated (from `mul_sf64`): 146 | ```Python 147 | def init2(self): 148 | # arg3_pfflags 149 | base0_ptr_size = self.field_addr("arg3_pfflags", "*(arg3_pfflags)") + self.sizeof("*(arg3_pfflags)") 150 | base0_ptr = self._alloc_mem(base0_ptr_size, read=True, write=True) 151 | 152 | self._add_arg(0, 0) # arg0_a 153 | self._add_arg(1, 9218868437227405312) # arg1_b 154 | self._add_arg(2, 0) # arg2_rm 155 | self._add_arg(3, base0_ptr) # arg3_pfflags 156 | 157 | # *(arg3_pfflags) = 0x0 158 | self._write_mem(base0_ptr, self.pack(0x0, self.sizeof("*(arg3_pfflags)"))) 159 | 160 | self.base0_ptr = base0_ptr 161 | 162 | def check2(self): 163 | return all(( 164 | # Check output value 165 | self._get_result() == 0x7ff8000000000000, 166 | # *(arg3_pfflags) == 0x10 167 | self._ensure_mem(self.base0_ptr, self.pack(0x10, self.sizeof("*(arg3_pfflags)"))), 168 | )) 169 | 170 | ``` 171 | 172 | #### Replay 173 | 174 | Outputs are directly usable as Sibyl test. To regroup them in a common test set, one can create a Python script merging `TESTS` list from the different scripts, as: 175 | 176 | ```Python 177 | out = [] 178 | for f in ["add", "mul", "div", "sqrt", "fma", "min", "max"]: 179 | execfile("float_%s_sf64.py" % f) 180 | out += TESTS 181 | 182 | TESTS = out 183 | ``` 184 | 185 | To inform Sibyl about this new test set, a line is added in the configuration 186 | (see [the associated documentation](CONFIG.md) for more detail): 187 | 188 | ```Python 189 | [tests] 190 | sfloat = /path/to/float.py 191 | ``` 192 | 193 | The tests are now detected by Sibyl, as stated by this command line: 194 | ``` 195 | $ sibyl config 196 | ... 197 | sfloat (7) 198 | add_sf64, mul_sf64, div_sf64, sqrt_sf64, fma_sf64, min_sf64, max_sf64 199 | ``` 200 | 201 | At this stage, they are replayable on new binaries, for instance on an obfuscated version of `softfptest`: 202 | ``` 203 | $ sibyl func softfptest.obfu | sibyl find -v -t sfloat -b ABI_AMD64_SYSTEMV softfptest.obfu - 204 | Guessed architecture: x86_64 205 | Found 405 addresses 206 | Found 7 test cases 207 | 0x004330d0 : max_sf64 208 | 0x0042a9a0 : mul_sf64 209 | 0x00431e30 : min_sf64 210 | 0x0042bc90 : fma_sf64 211 | 0x00430c70 : sqrt_sf64 212 | 0x00429270 : add_sf64 213 | 0x0042ee70 : div_sf64 214 | ``` 215 | 216 | ### Known limitations 217 | 218 | The learning module has known limitations. 219 | 220 | As mentioned in the previous section, it is necessary to have a working binary, 221 | which call the function with valid arguments. 222 | 223 | In addition, this binary must be *traceable*, which could not be the case, 224 | depending on the architecture. 225 | 226 | The limitation of Sibyl are also applied to this module; for instance and for 227 | now, there is no support of floating argument, or ABI specificity such as 228 | structure in-lining in arguments. 229 | 230 | For now Sibyl does not track, and then does not support, functions using an 231 | allocator for their semantic use (for example, a function allocating a new 232 | structure through `malloc`). 233 | -------------------------------------------------------------------------------- /doc/TESTING.md: -------------------------------------------------------------------------------- 1 | Testing 2 | ------- 3 | 4 | ### Integrated regression tests 5 | 6 | Sibyl is provided with a few regression tests. 7 | 8 | To test a Sibyl installation: 9 | 10 | ``` 11 | $ cd c_tests && python run_ctests.py 12 | ... 13 | ``` 14 | 15 | Heuristics can be tested by using `-f` and `-a` options, respectively for 16 | functions and architecture guessing. 17 | 18 | One should have at least a few functions detected. Depending on your system, the 19 | package `libc6-dev-i386` may be required to build the tests. 20 | 21 | Depending on the current Sibyl state, some functions can be misdetected or 22 | absent. 23 | 24 | ### External regression tests 25 | 26 | Sibyl commits go through a CI process, which includes tests on real programs. 27 | These tests are available 28 | on [Sibyl-tests](https://github.com/commial/Sibyl-tests) repository. 29 | 30 | 31 | ### Learning tests 32 | 33 | As Learning documentation, this part will completely change soon. 34 | -------------------------------------------------------------------------------- /doc/img/ghidra_screen.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cea-sec/Sibyl/14866eb8ef3a65fcc4535faaf76eb42faf64d313/doc/img/ghidra_screen.png -------------------------------------------------------------------------------- /doc/img/ghidra_screen2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cea-sec/Sibyl/14866eb8ef3a65fcc4535faaf76eb42faf64d313/doc/img/ghidra_screen2.png -------------------------------------------------------------------------------- /doc/img/ida_screen.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cea-sec/Sibyl/14866eb8ef3a65fcc4535faaf76eb42faf64d313/doc/img/ida_screen.png -------------------------------------------------------------------------------- /ext/ghidra/ExportFunction.java: -------------------------------------------------------------------------------- 1 | import ghidra.program.model.address.Address; 2 | import ghidra.program.model.listing.*; 3 | import ghidra.app.util.headless.HeadlessScript; 4 | 5 | /* 6 | * Print out found functions' address. Do not consider external functions. 7 | * This script can be run as a preScript, Ghidra already have functions at this point. 8 | */ 9 | public class ExportFunction extends HeadlessScript { 10 | 11 | @Override 12 | public void run() throws Exception { 13 | // Find and print found functions 14 | Listing listing = currentProgram.getListing(); 15 | FunctionIterator iter = listing.getFunctions(true); 16 | while (iter.hasNext() && !monitor.isCancelled()) { 17 | Function f = iter.next(); 18 | if (f.isExternal()) { 19 | continue; 20 | } 21 | /* 22 | * Let's consider already labeled functions 23 | String fName = f.getName(); 24 | if (!fName.startsWith("FUN_")) { 25 | continue; 26 | } 27 | */ 28 | Address entry = f.getEntryPoint(); 29 | if (entry != null) { 30 | println(String.format("0x%x", entry.getOffset())); 31 | } 32 | } 33 | setHeadlessContinuationOption(HeadlessContinuationOption.ABORT_AND_DELETE); 34 | 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /ext/ghidra/find.py: -------------------------------------------------------------------------------- 1 | #Sibyl "find" launcher 2 | #@author MOUGEY Camille 3 | #@category FunctionID 4 | 5 | import json 6 | import os 7 | import subprocess 8 | import time 9 | 10 | # Find SIBYL find.py 11 | identify_binary = "sibyl" 12 | env = os.environ 13 | 14 | # Sibyl launching 15 | def exec_cmd(command_line): 16 | """Launch the command line @command_line""" 17 | global env 18 | process = subprocess.Popen(command_line, 19 | stdout=subprocess.PIPE, 20 | env=env) 21 | 22 | result, _ = process.communicate() 23 | 24 | if process.returncode != 0: 25 | # An error occured 26 | raise RuntimeError("An error occured, please consult the console") 27 | 28 | return result 29 | 30 | def available_tests(): 31 | """Get the available tests""" 32 | global identify_binary 33 | command_line = [identify_binary, "config", "-V", "available_tests_keys"] 34 | return eval(exec_cmd(command_line)) 35 | 36 | def parse_output(command_line): 37 | """Parse the output of find.py""" 38 | result = exec_cmd(command_line) 39 | 40 | for result in json.loads(result)["results"]: 41 | address, candidates = result["address"], result["functions"] 42 | if candidates: 43 | yield address, map(str, candidates) 44 | 45 | 46 | def handle_found(addr, candidates): 47 | """Callback when @candidates have been found for a given address @addr. 48 | Print and add an GHIDRA comment at @addr 49 | @addr: address of the function analyzed 50 | @candidates: list of string of possible matched functions 51 | """ 52 | print("Found %s at %s" % (",".join(candidates), hex(addr))) 53 | listing = currentProgram.getListing() 54 | codeUnit = listing.getCodeUnitAt(toAddr(addr)) 55 | codeUnit.setComment(codeUnit.PLATE_COMMENT, "Sibyl - %s" % ",".join(candidates)) 56 | 57 | 58 | def launch_on_funcs(architecture, abi, funcs, test_set, map_addr=None, 59 | jitter=None, buf_size=2000): 60 | """Launch identification on functions. 61 | @architecture: str standing for current architecture 62 | @abi: str standing for expected ABI 63 | @funcs: list of function addresses (int) to check 64 | @test_set: list of test sets to run 65 | Optional arguments: 66 | @map_addr: (optional) the base address where the binary has to be loaded if 67 | format is not recognized 68 | @jitter: (optional) jitter engine to use (gcc, tcc, llvm, python, qemu) 69 | @buf_size: (optional) number of argument to pass to each instance of sibyl. 70 | High number means speed; low number means less ressources and higher 71 | frequency of report 72 | """ 73 | 74 | # Check Sibyl availability 75 | global identify_binary 76 | if not identify_binary: 77 | raise ValueError("A valid Sibyl path to find.py must be supplied") 78 | 79 | # Get binary information 80 | filename = str(currentProgram.getExecutablePath()) 81 | nb_func = len(funcs) 82 | 83 | # Prepare run 84 | starttime = time.time() 85 | nb_found = 0 86 | add_map = [] 87 | if isinstance(map_addr, int): 88 | add_map = ["-m", hex(map_addr)] 89 | 90 | # Launch identification 91 | monitor.setMessage("Launch identification on %d function(s)" % nb_func) 92 | options = ["-a", architecture, "-b", abi, "-o", "JSON"] 93 | for test_name in test_set: 94 | options += ["-t", test_name] 95 | if jitter is not None: 96 | options += ["-j", jitter] 97 | options += add_map 98 | res = {} 99 | 100 | for i in xrange(0, len(funcs), buf_size): 101 | # Build command line 102 | addresses = funcs[i:i + buf_size] 103 | command_line = [identify_binary, "find"] 104 | command_line += options 105 | command_line += [filename] 106 | command_line += addresses 107 | 108 | # Call Sibyl and keep only stdout 109 | for addr, candidates in parse_output(command_line): 110 | handle_found(addr, candidates) 111 | res[addr] = candidates 112 | nb_found += 1 113 | 114 | # Print current status and estimated time 115 | curtime = (time.time() - starttime) 116 | maxi = min(i + buf_size, len(funcs)) 117 | estimatedtime = (curtime * nb_func) / maxi 118 | remaintime = estimatedtime - curtime 119 | monitor.setMessage("Current: %.02f%% (FUN_%s)| Estimated time remaining: %.02fs" % (((100. /nb_func) * maxi), 120 | addresses[-1], 121 | remaintime)) 122 | if monitor.isCancelled(): 123 | print "Early break asked by the user" 124 | break 125 | 126 | print "Finished ! Found %d candidates in %.02fs" % (nb_found, time.time() - starttime) 127 | return res 128 | 129 | 130 | GHIDRAArch2MiasmArch = { 131 | "x86/little/32": "x86_32", 132 | } 133 | 134 | GHIDRAABI2SibylABI = { 135 | ("x86_32", "default"): "ABIStdCall_x86_32", 136 | } 137 | 138 | if __name__ == "__main__": 139 | processor_name, abi = str(currentProgram.getLanguage()).rsplit("/", 1) 140 | m_arch = GHIDRAArch2MiasmArch.get(processor_name, None) 141 | if processor_name is None: 142 | popup("Unsupported architecture: %s" % processor_name) 143 | os.exit(0) 144 | 145 | s_abi = GHIDRAABI2SibylABI.get((m_arch, abi), None) 146 | if s_abi is None: 147 | popup("Unsupported ABI: (%s, %s)" % (m_arch, abi)) 148 | os.exit(0) 149 | 150 | monitor.setMessage("Get functions address...") 151 | cur, whole = "Current function", "Whole program" 152 | choice = askChoice("Target", "Target function(s)", [cur, whole], cur) 153 | if choice == cur: 154 | addrs = ["0x%x" % getFunctionContaining(currentAddress).entryPoint.getOffset()] 155 | else: 156 | addrs = [] 157 | for func in currentProgram.getListing().getFunctions(True): 158 | if func.isExternal(): 159 | continue 160 | 161 | # Ignore already labeled functions 162 | # name = func.getName() 163 | # if not name.startswith("FUN_"): 164 | # # Ignore already labeled functions 165 | # continue 166 | 167 | addr = func.getEntryPoint() 168 | if addr is not None: 169 | addrs.append("0x%x" % addr.getOffset()) 170 | 171 | monitor.setMessage("Get available tests...") 172 | AVAILABLE_TESTS = available_tests() 173 | testset = askChoices( 174 | "Test set", "Testsets to enable", AVAILABLE_TESTS, AVAILABLE_TESTS 175 | ) 176 | 177 | launch_on_funcs(m_arch, s_abi, addrs, testset) 178 | -------------------------------------------------------------------------------- /ext/pin_tracer/Makefile: -------------------------------------------------------------------------------- 1 | ifneq ("$(PIN_ROOT)", "") 2 | CONFIG_ROOT := $(PIN_ROOT)/source/tools/Config 3 | include $(CONFIG_ROOT)/makefile.config 4 | include $(TOOLS_ROOT)/Config/makefile.default.rules 5 | endif 6 | 7 | all: intel64 8 | 9 | intel64: 10 | ifeq ("$(PIN_ROOT)", "") 11 | # PIN_ROOT has to point the root directory of pin-3.0-76991-gcc-linux 12 | $(error PIN_ROOT variable is not set) 13 | endif 14 | $(MAKE) TARGET=intel64 obj-intel64/pin_tracer.so 15 | mv obj-intel64/pin_tracer.so . 16 | 17 | clean: 18 | rm -rf obj-intel64 pin_tracer.so 19 | -------------------------------------------------------------------------------- /ext/pin_tracer/pin_tracer.cpp: -------------------------------------------------------------------------------- 1 | /* The file should be compiled with pin-3.0-76991-gcc-linux */ 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "pin.H" 9 | 10 | /* Set pin option and gobal variable for the address of the traced function */ 11 | KNOB KnobFunctionAddr(KNOB_MODE_WRITEONCE, "pintool", 12 | "a", "0x0", "function address to trace"); 13 | uint64_t functionAddr = 0x0; 14 | 15 | /* Set pin option and gobal variable for the output file */ 16 | KNOB KnobOutputFile(KNOB_MODE_WRITEONCE, "pintool", 17 | "o", "out.trace", "outputfile"); 18 | FILE * trace; 19 | 20 | /* Boolean variable. True if traced function is currently executed (then tracer have to produce output), false else. */ 21 | unsigned char instrument = 0; 22 | 23 | /* Value of RSP at the beginning of each function execution. 24 | It is used to detect the end of the function (initial rsp lower than current rsp)*/ 25 | ADDRINT rspInit; 26 | 27 | 28 | /* Error checking functions */ 29 | 30 | #define check_error(func, func_name) { \ 31 | if((func) < 0){ \ 32 | perror(func_name); \ 33 | abort(); \ 34 | }} 35 | 36 | #define check_fprintf_error(func) check_error(func, "fprintf") 37 | #define check_snprintf_error(func) check_error(func, "snprintf") 38 | 39 | 40 | /* Print a memory read record */ 41 | VOID RecordMemRead(VOID * ip, VOID * addr, UINT32 size, char* disass) 42 | { 43 | if( instrument ) { 44 | switch(size){ 45 | case 1: check_fprintf_error(fprintf(trace,"R %p %x %x %p\n", addr, size, *(uint8_t*)addr, ip)); break; 46 | case 2: check_fprintf_error(fprintf(trace,"R %p %x %x %p\n", addr, size, *(uint16_t*)addr, ip));break; 47 | case 4: check_fprintf_error(fprintf(trace,"R %p %x %x %p\n", addr, size, *(uint32_t*)addr, ip));break; 48 | case 8: check_fprintf_error(fprintf(trace,"R %p %x %lx %p\n", addr, size, *(uint64_t*)addr, ip));break; 49 | case 16: check_fprintf_error(fprintf(trace,"R %p %x %lx%016lx %p\n", addr, size, *(((uint64_t*)addr)+1), *(uint64_t*)addr, ip));break; 50 | case 32: check_fprintf_error(fprintf(trace,"R %p %x %lx%032lx %p\n", addr, size, *(((uint64_t*)addr)+1), *(uint64_t*)addr, ip));break; 51 | default: fprintf(stderr, "abort: read size is not managed ([%p]%u @%p(%s))\n",addr,size,ip,disass);abort(); 52 | } 53 | } 54 | } 55 | 56 | /* Global variables used to communicate between RecordMemWriteContent and RecordMemWriteAddr functions */ 57 | VOID * last_addr; 58 | UINT32 last_size; 59 | 60 | /* Print the written value and the address of the instruction of a memory write record */ 61 | VOID RecordMemWriteContent(VOID * ip) 62 | { 63 | if( instrument ) { 64 | switch(last_size){ 65 | case 1: check_fprintf_error(fprintf(trace,"%x %p\n", *(uint8_t*)last_addr, ip));break; 66 | case 2: check_fprintf_error(fprintf(trace,"%x %p\n", *(uint16_t*)last_addr, ip));break; 67 | case 4: check_fprintf_error(fprintf(trace,"%x %p\n", *(uint32_t*)last_addr, ip));break; 68 | case 8: check_fprintf_error(fprintf(trace,"%lx %p\n", *(uint64_t*)last_addr, ip));break; 69 | case 16: check_fprintf_error(fprintf(trace,"%lx%016lx %p\n", *(((uint64_t*)last_addr)+1), *(uint64_t*)last_addr, ip));break; 70 | case 32: check_fprintf_error(fprintf(trace,"%lx%032lx %p\n", *(((uint64_t*)last_addr)+1), *(uint64_t*)last_addr, ip));break; 71 | default: fprintf(stderr, "abort: write size is not managed ([%p]%i @%p)\n", last_addr, last_size, ip); abort(); 72 | } 73 | } 74 | } 75 | 76 | /* Print the written address and size of a memory write record */ 77 | VOID RecordMemWriteAddr(VOID * ip, VOID * addr, UINT32 size) 78 | { 79 | if( instrument ) { 80 | switch(size){ 81 | case 1: check_fprintf_error(fprintf(trace,"W %p %x ", addr, size));break; 82 | case 2: check_fprintf_error(fprintf(trace,"W %p %x ", addr, size));break; 83 | case 4: check_fprintf_error(fprintf(trace,"W %p %x ", addr, size));break; 84 | case 8: check_fprintf_error(fprintf(trace,"W %p %x ", addr, size));break; 85 | case 16: check_fprintf_error(fprintf(trace,"W %p %x ", addr, size));break; 86 | case 32: check_fprintf_error(fprintf(trace,"W %p %x ", addr, size));break; 87 | default: fprintf(stderr, "abort: write size is not managed ([%p]%i @%p)\n", addr, size, ip); abort(); 88 | } 89 | last_size = size; 90 | last_addr = addr; 91 | } 92 | } 93 | 94 | VOID DumpRegsI(VOID * ip, ADDRINT rax, ADDRINT rbx, ADDRINT rcx, ADDRINT rdx, ADDRINT rsi, ADDRINT rdi, ADDRINT rbp, ADDRINT rsp, ADDRINT r8, ADDRINT r9, ADDRINT r10, ADDRINT r11, ADDRINT r12, ADDRINT r13, ADDRINT r14, ADDRINT r15) { 95 | /* If the function is not already begun and IP is at its first instruction, 96 | we log input registers */ 97 | if( !instrument && (uint64_t)ip == functionAddr ) { 98 | instrument = 1; 99 | check_fprintf_error(fprintf(trace,"I %lx %lx %lx %lx %lx %lx %lx %lx %lx %lx %lx %lx %lx %lx %lx %lx\n", rax, rbx, rcx, rdx, rsi, rdi, rbp, rsp, r8, r9, r10, r11, r12, r13, r14, r15)); 100 | rspInit = rsp; 101 | } 102 | } 103 | 104 | VOID DumpCall(VOID* ip, ADDRINT rsp) { 105 | /* If the function was running*/ 106 | if( instrument ) 107 | fprintf(trace, "CALL %p %p\n", ip, (void*) rsp); 108 | } 109 | 110 | VOID DumpRet(VOID* ip, ADDRINT rsp, ADDRINT rax) { 111 | /* If the function was running*/ 112 | if( instrument ) 113 | fprintf(trace, "RET %p %lx %lx\n", ip, rsp, rax); 114 | } 115 | 116 | 117 | VOID DumpRegsO(VOID * ip, ADDRINT rax, ADDRINT rbx, ADDRINT rcx, ADDRINT rdx, ADDRINT rsi, ADDRINT rdi, ADDRINT rbp, ADDRINT rsp, ADDRINT r8, ADDRINT r9, ADDRINT r10, ADDRINT r11, ADDRINT r12, ADDRINT r13, ADDRINT r14, ADDRINT r15) { 118 | /* If the function was running*/ 119 | if( instrument ) { 120 | /* Log the executed instruction address */ 121 | fprintf(trace,"@ %p\n", ip); 122 | 123 | /* If the function is finished */ 124 | if( rspInit < rsp ) { 125 | /* Log output registers */ 126 | instrument = 0; 127 | check_fprintf_error(fprintf(trace,"O %lx %lx %lx %lx %lx %lx %lx %lx %lx %lx %lx %lx %lx %lx %lx %lx\n", rax, rbx, rcx, rdx, rsi, rdi, rbp, rsp, r8, r9, r10, r11, r12, r13, r14, r15)); 128 | } 129 | } 130 | } 131 | 132 | /* Is called for every instruction and instruments reads and writes accesses */ 133 | VOID Instruction(INS ins, VOID *v) 134 | { 135 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)DumpRegsI, 136 | IARG_INST_PTR, 137 | IARG_REG_VALUE, REG_RAX, 138 | IARG_REG_VALUE, REG_RBX, 139 | IARG_REG_VALUE, REG_RCX, 140 | IARG_REG_VALUE, REG_RDX, 141 | IARG_REG_VALUE, REG_RSI, 142 | IARG_REG_VALUE, REG_RDI, 143 | IARG_REG_VALUE, REG_RBP, 144 | IARG_REG_VALUE, REG_RSP, 145 | IARG_REG_VALUE, REG_R8, 146 | IARG_REG_VALUE, REG_R9, 147 | IARG_REG_VALUE, REG_R10, 148 | IARG_REG_VALUE, REG_R11, 149 | IARG_REG_VALUE, REG_R12, 150 | IARG_REG_VALUE, REG_R13, 151 | IARG_REG_VALUE, REG_R14, 152 | IARG_REG_VALUE, REG_R15, 153 | IARG_END); 154 | 155 | IPOINT ipt; 156 | if(INS_HasFallThrough(ins)) ipt = IPOINT_AFTER; 157 | else if(INS_IsBranchOrCall(ins) || INS_IsSysret(ins) || INS_IsRet(ins)) ipt = IPOINT_TAKEN_BRANCH; 158 | else ipt = IPOINT_BEFORE; 159 | 160 | INS_InsertCall(ins, ipt, (AFUNPTR)DumpRegsO, 161 | IARG_INST_PTR, 162 | IARG_REG_VALUE, REG_RAX, 163 | IARG_REG_VALUE, REG_RBX, 164 | IARG_REG_VALUE, REG_RCX, 165 | IARG_REG_VALUE, REG_RDX, 166 | IARG_REG_VALUE, REG_RSI, 167 | IARG_REG_VALUE, REG_RDI, 168 | IARG_REG_VALUE, REG_RBP, 169 | IARG_REG_VALUE, REG_RSP, 170 | IARG_REG_VALUE, REG_R8, 171 | IARG_REG_VALUE, REG_R9, 172 | IARG_REG_VALUE, REG_R10, 173 | IARG_REG_VALUE, REG_R11, 174 | IARG_REG_VALUE, REG_R12, 175 | IARG_REG_VALUE, REG_R13, 176 | IARG_REG_VALUE, REG_R14, 177 | IARG_REG_VALUE, REG_R15, 178 | IARG_END); 179 | 180 | 181 | if (INS_IsCall(ins)) { 182 | // We cannot use INS_DirectBranchOrCallTargetAddress -> CALL RAX 183 | INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)DumpCall, 184 | IARG_INST_PTR, 185 | IARG_REG_VALUE, REG_RSP, 186 | IARG_END); 187 | 188 | } 189 | 190 | if (INS_IsRet(ins)) { 191 | INS_InsertCall(ins, IPOINT_TAKEN_BRANCH, (AFUNPTR)DumpRet, 192 | IARG_INST_PTR, 193 | IARG_REG_VALUE, REG_RSP, 194 | IARG_REG_VALUE, REG_RAX, 195 | IARG_END); 196 | 197 | } 198 | 199 | /* Iterate over each memory operand of the instruction */ 200 | UINT32 memOperands = INS_MemoryOperandCount(ins); 201 | for (UINT32 memOp = 0; memOp < memOperands; memOp++) { 202 | 203 | /* If it is a read operand, log it */ 204 | if (INS_MemoryOperandIsRead(ins, memOp)) { 205 | INS_InsertPredicatedCall(ins, IPOINT_BEFORE, (AFUNPTR)RecordMemRead, 206 | IARG_INST_PTR, 207 | IARG_MEMORYOP_EA, memOp, 208 | IARG_MEMORYREAD_SIZE, 209 | IARG_PTR,strdup(INS_Disassemble(ins).c_str()), 210 | IARG_END); 211 | } 212 | 213 | /* If it is a write operand, log it */ 214 | if (INS_MemoryOperandIsWritten(ins, memOp)) { 215 | IPOINT ipt; 216 | ipt = INS_HasFallThrough(ins)? IPOINT_AFTER : IPOINT_TAKEN_BRANCH; 217 | INS_InsertPredicatedCall(ins, ipt, (AFUNPTR)RecordMemWriteContent, 218 | IARG_INST_PTR, 219 | IARG_CALL_ORDER, CALL_ORDER_FIRST, 220 | IARG_END); 221 | INS_InsertPredicatedCall(ins, IPOINT_BEFORE, (AFUNPTR)RecordMemWriteAddr, 222 | IARG_INST_PTR, 223 | IARG_MEMORYOP_EA, memOp, 224 | IARG_MEMORYWRITE_SIZE, 225 | IARG_CALL_ORDER, CALL_ORDER_LAST, 226 | IARG_END); 227 | } 228 | } 229 | } 230 | 231 | void InstImage(IMG img, void *v) 232 | { 233 | fprintf(trace,"IMG %s\n", IMG_Name(img).c_str()); 234 | 235 | for (SYM sym = IMG_RegsymHead(img); SYM_Valid(sym); sym = SYM_Next(sym)) { 236 | fprintf(trace,"S %p %s\n", (void *) SYM_Address(sym), 237 | SYM_Name(sym).c_str()); 238 | } 239 | } 240 | 241 | VOID Fini(INT32 code, VOID *v) 242 | { 243 | fclose(trace); 244 | } 245 | 246 | INT32 Usage() 247 | { 248 | PIN_ERROR( "This Pintool prints a trace of the read/write accesses and executed instructions during the execution of a specific function.\n" 249 | + KNOB_BASE::StringKnobSummary() + "\n"); 250 | return -1; 251 | } 252 | 253 | int main(int argc, char *argv[]) 254 | { 255 | if (PIN_Init(argc, argv)) return Usage(); 256 | 257 | functionAddr = KnobFunctionAddr.Value(); 258 | 259 | trace = fopen(KnobOutputFile.Value().c_str(), "w"); 260 | if( trace == NULL ){ 261 | perror("fopen"); 262 | abort(); 263 | } 264 | 265 | INS_AddInstrumentFunction(Instruction, 0); 266 | PIN_AddFiniFunction(Fini, 0); 267 | 268 | PIN_InitSymbols(); 269 | IMG_AddInstrumentFunction(InstImage, 0); 270 | 271 | PIN_StartProgram(); 272 | 273 | fprintf(stderr, "This point should never be reached"); 274 | abort(); 275 | 276 | return 0; 277 | } 278 | -------------------------------------------------------------------------------- /ext/radare2/r2_find.py: -------------------------------------------------------------------------------- 1 | # This file is part of Sibyl. 2 | # Copyright 2014 Camille MOUGEY 3 | # 4 | # Sibyl is free software: you can redistribute it and/or modify it 5 | # under the terms of the GNU General Public License as published by 6 | # the Free Software Foundation, either version 3 of the License, or 7 | # (at your option) any later version. 8 | # 9 | # Sibyl is distributed in the hope that it will be useful, but WITHOUT 10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 11 | # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 12 | # License for more details. 13 | # 14 | # You should have received a copy of the GNU General Public License 15 | # along with Sibyl. If not, see . 16 | # 17 | # Ported to radare2 - Michael Messner @s3cur1ty_de 18 | 19 | import r2pipe 20 | import json 21 | import os 22 | import sys 23 | import time 24 | import re 25 | import subprocess 26 | 27 | # Sibyl launching 28 | def exec_cmd(command_line): 29 | """Launch the command line @command_line""" 30 | global env 31 | process = subprocess.Popen(command_line, 32 | stdout=subprocess.PIPE, 33 | env=env) 34 | 35 | result, _ = process.communicate() 36 | 37 | if process.returncode != 0: 38 | # An error occured 39 | raise RuntimeError("[-] An error occured, please consult the console") 40 | 41 | return result 42 | 43 | def available_tests(): 44 | """Get the available tests""" 45 | global identify_binary 46 | command_line = [identify_binary, "config", "-V", "available_tests_keys"] 47 | return eval(exec_cmd(command_line)) 48 | 49 | 50 | def parse_output(command_line): 51 | """Parse the output of find.py""" 52 | result = exec_cmd(command_line) 53 | 54 | for result in json.loads(result)["results"]: 55 | address, candidates = result["address"], result["functions"] 56 | if candidates: 57 | yield address, map(str, candidates) 58 | 59 | def handle_found(addr, candidates): 60 | """Callback when @candidates have been found for a given address @addr. 61 | Print and rename the function at @addr 62 | @addr: address of the function analyzed 63 | @candidates: list of string of possible matched functions 64 | """ 65 | print "[+] Found %s at %s" % (",".join(candidates), hex(addr)) 66 | #rename the functions in r2 67 | r2.cmd('afn ' + ",".join(candidates) +'_sibyl ' +hex(addr)) 68 | # setup flags in r2 69 | r2.cmd('f ' + ",".join(candidates) +'_sibyl @ ' +hex(addr)) 70 | #write IDA pro batch file to be able to import the stuff to ida 71 | #in IDA use + and copy the content from the generated file 72 | f.write("MakeName(" +hex(addr) +", \"" +",".join(candidates) +'_sibyl' +"\");\n") 73 | 74 | 75 | def launch_on_funcs(architecture, abi, funcs, test_set, map_addr=None, 76 | jitter=None, buf_size=2000): 77 | """Launch identification on functions. 78 | @architecture: str standing for current architecture 79 | @abi: str standing for expected ABI 80 | @funcs: list of function addresses (int) to check 81 | @test_set: list of test sets to run 82 | Optional arguments: 83 | @map_addr: (optional) the base address where the binary has to be loaded if 84 | format is not recognized 85 | @jitter: (optional) jitter engine to use (gcc, tcc, llvm, python, qemu) 86 | @buf_size: (optional) number of argument to pass to each instance of sibyl. 87 | High number means speed; low number means less ressources and higher 88 | frequency of report 89 | """ 90 | 91 | # Check Sibyl availability 92 | global identify_binary 93 | if not identify_binary: 94 | raise ValueError("[-] A valid Sibyl path to find.py must be supplied") 95 | 96 | # Get binary information 97 | bin_details=r2.cmdj('oj') 98 | filename = bin_details[0]['uri'] 99 | nb_func = r2.cmd('aflc') 100 | 101 | # Prepare run 102 | starttime = time.time() 103 | nb_found = 0 104 | add_map = [] 105 | if isinstance(map_addr, int): 106 | add_map = ["-m", hex(map_addr)] 107 | 108 | # Launch identification 109 | nb_func = int(nb_func) 110 | print "[*] Launch identification on %d function(s)" % nb_func 111 | options = ["-a", architecture, "-b", abi, "-o", "JSON"] 112 | for test_name in test_set: 113 | options += ["-t", test_name] 114 | if jitter is not None: 115 | options += ["-j", jitter] 116 | options += add_map 117 | res = {} 118 | 119 | for i in xrange(0, len(funcs), buf_size): 120 | # Build command line 121 | addresses = funcs[i:i + buf_size] 122 | command_line = [identify_binary, "find"] 123 | command_line += options 124 | command_line += [filename] 125 | command_line += addresses 126 | 127 | # Call Sibyl and keep only stdout 128 | for addr, candidates in parse_output(command_line): 129 | handle_found(addr, candidates) 130 | res[addr] = candidates 131 | nb_found += 1 132 | 133 | # Print current status and estimated time 134 | curtime = (time.time() - starttime) 135 | maxi = min(i + buf_size, len(funcs)) 136 | estimatedtime = (curtime * nb_func) / maxi 137 | remaintime = estimatedtime - curtime 138 | print "[*] Current: %.02f%% (sub_%s)| Estimated time remaining: %.02fs" % (((100. /nb_func) * maxi), 139 | addresses[-1], 140 | remaintime) 141 | 142 | print "[*] Finished ! Found %d candidates in %.02fs" % (nb_found, time.time() - starttime) 143 | return res 144 | 145 | def architecture(bin_info): 146 | processor_name = bin_info['bin']['arch'] 147 | endian = bin_info['bin']['endian'] 148 | bits = bin_info['bin']['bits'] 149 | print "[*] Architecture: %s / Endianess: %s / Bits: %s" %(processor_name, endian, bits) 150 | 151 | if processor_name == "arm": 152 | # TODO ARM/thumb 153 | # hack for thumb: set armt = True in globals :/ 154 | # set bigendiant = True is bigendian 155 | is_armt = globals().get('armt', False) 156 | is_bigendian = globals().get('bigendian', False) 157 | 158 | abi = "ABI_ARM" 159 | if is_armt: 160 | if endian == "big": 161 | name = "armtb" 162 | else: 163 | name = "armtl" 164 | else: 165 | if endian == "big": 166 | name = "armb" 167 | else: 168 | name = "arml" 169 | 170 | elif processor_name == "mips": 171 | abi = "ABI_MIPS_O32" 172 | if endian == "big": 173 | name = "mips32b" 174 | else: 175 | name = "mips32l" 176 | 177 | elif processor_name == "ppc": # currently not supported 178 | abi = "ABI_PPC" 179 | if endian == "big": 180 | name = "ppc32b" 181 | else: 182 | name = "ppc32l" 183 | print "[-] not supported" 184 | 185 | elif processor_name == "x86": 186 | if endian == "little": 187 | if bits == 32: 188 | name = "x86_32" 189 | abi = "ABIStdCall_x86_32" 190 | #abi = "ABIFastCall_x86_32" #currently we have to do this manually 191 | elif bits == 64: #untested and unknown if this is correct 192 | name = "x86_64" 193 | abi = "ABI_AMD64_SYSTEMV" 194 | elif bits == 16: #untested and unknown if this is correct 195 | name = "x86_16" 196 | abi = "" #untested, no ABI available 197 | print "[-] not supported" 198 | else: 199 | print "[-] not supported" 200 | 201 | else: 202 | print "[-] not supported" 203 | 204 | return name, abi 205 | 206 | ## radare2 interfacing 207 | def main(): 208 | print("[*] Get already known functions via r2 command aflqj ...") 209 | 210 | current_functionsj = r2.cmdj("aflqj") 211 | bin_info = r2.cmdj('ij') 212 | 213 | settings_architecture, settings_abi = architecture(bin_info) 214 | 215 | #set this up for testing 216 | #settings_architecture = "arml" # [-a {arml,armb,armtl,armtb,sh4,x86_16,x86_32,x86_64,msp430,mips32b,mips32l,aarch64l,aarch64b,ppc32b,mepl,mepb}] 217 | #settings_abi = "ABI_ARM" # [-b {ABIStdCall_x86_32,ABIFastCall_x86_32,ABI_AMD64_SYSTEMV,ABI_AMD64_MS,ABI_ARM,ABI_MIPS_O32}] 218 | settings_tests = ['string','stdlib','ctype'] # [-t {stdlib,string,ctype}] 219 | 220 | sibyl_res = launch_on_funcs(settings_architecture, 221 | settings_abi, 222 | current_functionsj, 223 | settings_tests) 224 | 225 | if __name__ == '__main__': 226 | 227 | r2 = r2pipe.open() 228 | print('\n[*] Found ' +r2.cmd('aflc')+ ' functions') 229 | 230 | if int(r2.cmd('aflc')) == 0: 231 | print('\n[-] no functions found for analyzing ... try to analyze the binary first') 232 | exit(0) 233 | 234 | # we create an IDA batch file for auto renaming the functions in IDA pro 235 | f = open('ida_batch_sibyl.txt', 'w', 0) 236 | 237 | # Find SIBYL find.py 238 | identify_binary = "sibyl" 239 | env = os.environ 240 | AVAILABLE_TESTS = available_tests() 241 | 242 | main() 243 | 244 | f.close() 245 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | # This file is part of Sibyl. 4 | # Copyright 2014 - 2017 Camille MOUGEY 5 | # 6 | # Sibyl is free software: you can redistribute it and/or modify it 7 | # under the terms of the GNU General Public License as published by 8 | # the Free Software Foundation, either version 3 of the License, or 9 | # (at your option) any later version. 10 | # 11 | # Sibyl is distributed in the hope that it will be useful, but WITHOUT 12 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 13 | # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 14 | # License for more details. 15 | # 16 | # You should have received a copy of the GNU General Public License 17 | # along with Sibyl. If not, see . 18 | 19 | from distutils.core import setup 20 | 21 | setup( 22 | name='sibyl', 23 | version='0.1', 24 | author='Camille MOUGEY', 25 | author_email='commial@gmail.com', 26 | url='https://github.com/cea-sec/sibyl', 27 | download_url='https://github.com/cea-sec/sibyl/tarball/master', 28 | license='GPLv3+', 29 | description='A Miasm2 based function divination', 30 | long_description=""" 31 | Sibyl is a tool aiming at recognizing functions in binaries based on their side 32 | effects, by running them in a sandboxed environment.""", 33 | keywords=["reverse engineering", "emulation"], 34 | install_requires=[ 35 | 'miasm2', 36 | ], 37 | packages=['sibyl', 'sibyl/abi', 'sibyl/engine', 'sibyl/learn', 38 | 'sibyl/learn/tracer', 'sibyl/learn/generator', 39 | 'sibyl/heuristics', 'sibyl/test', 'sibyl/actions'], 40 | scripts=['bin/sibyl'], 41 | ) 42 | -------------------------------------------------------------------------------- /sibyl/__init__.py: -------------------------------------------------------------------------------- 1 | # This file is part of Sibyl. 2 | # Copyright 2014 Camille MOUGEY 3 | # 4 | # Sibyl is free software: you can redistribute it and/or modify it 5 | # under the terms of the GNU General Public License as published by 6 | # the Free Software Foundation, either version 3 of the License, or 7 | # (at your option) any later version. 8 | # 9 | # Sibyl is distributed in the hope that it will be useful, but WITHOUT 10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 11 | # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 12 | # License for more details. 13 | # 14 | # You should have received a copy of the GNU General Public License 15 | # along with Sibyl. If not, see . 16 | 17 | 18 | """This package provides methods for identifying common functions. 19 | It works by JITting them thanks to Miasm2 engine and comparing their side 20 | effects with its signature base. 21 | """ 22 | 23 | __all__ = [] 24 | -------------------------------------------------------------------------------- /sibyl/abi/__init__.py: -------------------------------------------------------------------------------- 1 | # This file is part of Sibyl. 2 | # Copyright 2014 Camille MOUGEY 3 | # 4 | # Sibyl is free software: you can redistribute it and/or modify it 5 | # under the terms of the GNU General Public License as published by 6 | # the Free Software Foundation, either version 3 of the License, or 7 | # (at your option) any later version. 8 | # 9 | # Sibyl is distributed in the hope that it will be useful, but WITHOUT 10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 11 | # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 12 | # License for more details. 13 | # 14 | # You should have received a copy of the GNU General Public License 15 | # along with Sibyl. If not, see . 16 | 17 | from sibyl.abi.x86 import ABIS as ABIS_X86 18 | from sibyl.abi.arm import ABIS as ABIS_ARM 19 | from sibyl.abi.mep import ABIS as ABIS_MEP 20 | from sibyl.abi.mips import ABIS as ABIS_MIPS 21 | ABIS = ABIS_X86 + ABIS_ARM + ABIS_MEP + ABIS_MIPS 22 | __all__ = ["ABIS"] 23 | -------------------------------------------------------------------------------- /sibyl/abi/abi.py: -------------------------------------------------------------------------------- 1 | # This file is part of Sibyl. 2 | # Copyright 2014 Camille MOUGEY 3 | # 4 | # Sibyl is free software: you can redistribute it and/or modify it 5 | # under the terms of the GNU General Public License as published by 6 | # the Free Software Foundation, either version 3 of the License, or 7 | # (at your option) any later version. 8 | # 9 | # Sibyl is distributed in the hope that it will be useful, but WITHOUT 10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 11 | # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 12 | # License for more details. 13 | # 14 | # You should have received a copy of the GNU General Public License 15 | # along with Sibyl. If not, see . 16 | 17 | 18 | class ABI(object): 19 | "Parent class, stand for an ABI" 20 | 21 | # Associated architectures 22 | arch = [] 23 | 24 | def __init__(self, jitter, ira): 25 | self.jitter = jitter 26 | self.ira = ira 27 | 28 | def reset(self): 29 | "Reset the current ABI" 30 | pass 31 | 32 | def add_arg(self, number, element): 33 | """Add a function argument 34 | @number: argument number (start 0) 35 | @element: argument 36 | """ 37 | raise NotImplementedError("Abstract method") 38 | 39 | def prepare_call(self, ret_addr): 40 | """Prepare the call to a function 41 | @ret_addr: return address 42 | """ 43 | raise NotImplementedError("Abstract method") 44 | 45 | def get_result(self): 46 | """Return the function result value, as int""" 47 | raise NotImplementedError("Abstract method") 48 | 49 | 50 | class ABIRegsStack(ABI): 51 | 52 | regs_mapping = None # Register mapping (list of str) 53 | args = None # order => element 54 | 55 | def __init__(self, *args, **kwargs): 56 | super(ABIRegsStack, self).__init__(*args, **kwargs) 57 | self.args = {} 58 | 59 | def add_arg(self, number, element): 60 | if isinstance(element, (int, long)): 61 | self.args[number] = element 62 | else: 63 | raise NotImplementedError() 64 | 65 | def vm_push(self, element): 66 | raise NotImplementedError("Abstract method") 67 | 68 | def set_ret(self, element): 69 | raise NotImplementedError("Abstract method") 70 | 71 | def prepare_call(self, ret_addr): 72 | # Get args 73 | numbers = sorted(self.args.keys()) 74 | 75 | for i, key in reversed(list(enumerate(numbers))): 76 | element = self.args[key] 77 | 78 | if i < len(self.regs_mapping): 79 | # Regs argument 80 | setattr(self.jitter.cpu, self.regs_mapping[i], element) 81 | else: 82 | # Stack argument 83 | self.vm_push(element) 84 | 85 | self.set_ret(ret_addr) 86 | 87 | def reset(self): 88 | self.args = {} 89 | 90 | def get_result(self): 91 | return getattr(self.jitter.cpu, self.ira.ret_reg.name) 92 | -------------------------------------------------------------------------------- /sibyl/abi/arm.py: -------------------------------------------------------------------------------- 1 | # This file is part of Sibyl. 2 | # Copyright 2014 Camille MOUGEY 3 | # 4 | # Sibyl is free software: you can redistribute it and/or modify it 5 | # under the terms of the GNU General Public License as published by 6 | # the Free Software Foundation, either version 3 of the License, or 7 | # (at your option) any later version. 8 | # 9 | # Sibyl is distributed in the hope that it will be useful, but WITHOUT 10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 11 | # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 12 | # License for more details. 13 | # 14 | # You should have received a copy of the GNU General Public License 15 | # along with Sibyl. If not, see . 16 | 17 | 18 | from sibyl.abi import abi 19 | 20 | 21 | class ABI_ARM(abi.ABIRegsStack): 22 | 23 | regs_mapping = ["R0", "R1", "R2", "R3"] 24 | arch = ["arml", "armb", "armtl", "armtb"] 25 | 26 | def set_ret(self, ret_addr): 27 | self.jitter.cpu.LR = ret_addr 28 | 29 | def vm_push(self, element): 30 | self.jitter.push_uint32_t(element) 31 | 32 | 33 | ABIS = [ABI_ARM] 34 | -------------------------------------------------------------------------------- /sibyl/abi/mep.py: -------------------------------------------------------------------------------- 1 | # This file is part of Sibyl. 2 | # Copyright 2018 Camille MOUGEY 3 | # 4 | # Sibyl is free software: you can redistribute it and/or modify it 5 | # under the terms of the GNU General Public License as published by 6 | # the Free Software Foundation, either version 3 of the License, or 7 | # (at your option) any later version. 8 | # 9 | # Sibyl is distributed in the hope that it will be useful, but WITHOUT 10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 11 | # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 12 | # License for more details. 13 | # 14 | # You should have received a copy of the GNU General Public License 15 | # along with Sibyl. If not, see . 16 | # 17 | # Author: Guillaume VALADON 18 | 19 | 20 | from sibyl.abi import abi 21 | 22 | 23 | class ABI_MEP(abi.ABIRegsStack): 24 | 25 | regs_mapping = ["R1", "R2", "R3", "R4"] 26 | arch = ["mepl", "mepb"] 27 | 28 | def set_ret(self, ret_addr): 29 | self.jitter.cpu.LP = ret_addr 30 | 31 | def vm_push(self, element): 32 | self.jitter.push_uint32_t(element) 33 | 34 | 35 | ABIS = [ABI_MEP] 36 | -------------------------------------------------------------------------------- /sibyl/abi/mips.py: -------------------------------------------------------------------------------- 1 | # This file is part of Sibyl. 2 | # Copyright 2014 Camille MOUGEY 3 | # 4 | # Sibyl is free software: you can redistribute it and/or modify it 5 | # under the terms of the GNU General Public License as published by 6 | # the Free Software Foundation, either version 3 of the License, or 7 | # (at your option) any later version. 8 | # 9 | # Sibyl is distributed in the hope that it will be useful, but WITHOUT 10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 11 | # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 12 | # License for more details. 13 | # 14 | # You should have received a copy of the GNU General Public License 15 | # along with Sibyl. If not, see . 16 | 17 | 18 | from sibyl.abi import abi 19 | 20 | 21 | class ABI_MIPS_O32(abi.ABIRegsStack): 22 | 23 | regs_mapping = ["A0", "A1", "A2", "A3"] 24 | arch = ["mips32b", "mips32l"] 25 | 26 | def set_ret(self, ret_addr): 27 | self.jitter.cpu.RA = ret_addr 28 | 29 | def vm_push(self, element): 30 | self.jitter.push_uint32_t(element) 31 | 32 | 33 | ABIS = [ABI_MIPS_O32] 34 | -------------------------------------------------------------------------------- /sibyl/abi/x86.py: -------------------------------------------------------------------------------- 1 | # This file is part of Sibyl. 2 | # Copyright 2014 Camille MOUGEY 3 | # 4 | # Sibyl is free software: you can redistribute it and/or modify it 5 | # under the terms of the GNU General Public License as published by 6 | # the Free Software Foundation, either version 3 of the License, or 7 | # (at your option) any later version. 8 | # 9 | # Sibyl is distributed in the hope that it will be useful, but WITHOUT 10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 11 | # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 12 | # License for more details. 13 | # 14 | # You should have received a copy of the GNU General Public License 15 | # along with Sibyl. If not, see . 16 | 17 | 18 | from sibyl.abi import abi 19 | 20 | 21 | class ABIRegsStack_x86(abi.ABIRegsStack): 22 | 23 | def set_ret(self, ret_addr): 24 | self.vm_push(ret_addr) 25 | 26 | 27 | class ABIStdCall_x86_32(ABIRegsStack_x86): 28 | 29 | regs_mapping = [] # Stack only 30 | arch = ["x86_32"] 31 | 32 | def vm_push(self, element): 33 | self.jitter.push_uint32_t(element) 34 | 35 | 36 | class ABIFastCall_x86_32(ABIRegsStack_x86): 37 | 38 | regs_mapping = ["ECX", "EDX"] 39 | arch = ["x86_32"] 40 | 41 | def vm_push(self, element): 42 | self.jitter.push_uint32_t(element) 43 | 44 | 45 | class ABI_AMD64_SYSTEMV(ABIRegsStack_x86): 46 | 47 | regs_mapping = ["RDI", "RSI", "RDX", "RCX", "R8", "R9"] 48 | arch = ["x86_64"] 49 | 50 | def vm_push(self, element): 51 | self.jitter.push_uint64_t(element) 52 | 53 | 54 | class ABI_AMD64_MS(ABIRegsStack_x86): 55 | 56 | regs_mapping = ["RCX", "RDX", "R8", "R9"] 57 | arch = ["x86_64"] 58 | 59 | def vm_push(self, element): 60 | self.jitter.push_uint64_t(element) 61 | 62 | def set_ret(self, ret_addr): 63 | # Shadow stack reservation: 0x20 bytes 64 | for i in xrange(4): 65 | self.vm_push(0) 66 | super(ABI_AMD64_MS, self).set_ret(ret_addr) 67 | 68 | 69 | ABIS = [ABIStdCall_x86_32, ABIFastCall_x86_32, ABI_AMD64_SYSTEMV, ABI_AMD64_MS] 70 | -------------------------------------------------------------------------------- /sibyl/actions/__init__.py: -------------------------------------------------------------------------------- 1 | # This file is part of Sibyl. 2 | # Copyright 2014 - 2017 Camille MOUGEY 3 | # 4 | # Sibyl is free software: you can redistribute it and/or modify it 5 | # under the terms of the GNU General Public License as published by 6 | # the Free Software Foundation, either version 3 of the License, or 7 | # (at your option) any later version. 8 | # 9 | # Sibyl is distributed in the hope that it will be useful, but WITHOUT 10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 11 | # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 12 | # License for more details. 13 | # 14 | # You should have received a copy of the GNU General Public License 15 | # along with Sibyl. If not, see . 16 | "Sibyl actions implementations" 17 | 18 | from collections import namedtuple 19 | from importlib import import_module 20 | 21 | ActionDesc = namedtuple("ActionDesc", ["name", "desc", "module", "classname"]) 22 | 23 | ACTIONS = [ 24 | ActionDesc("config", "Configuration management", "config", "ActionConfig"), 25 | ActionDesc("find", "Function guesser", "find", "ActionFind"), 26 | ActionDesc("func", "Function discovering", "func", "ActionFunc"), 27 | ActionDesc("learn", "Learn a new function", "learn", "ActionLearn"), 28 | ] 29 | 30 | def load_action(actiondesc, args): 31 | "Load the action associated to @actiondesc with arguments @args" 32 | mod = import_module(".%s" % actiondesc.module, "sibyl.actions") 33 | return getattr(mod, actiondesc.classname)(args) 34 | -------------------------------------------------------------------------------- /sibyl/actions/action.py: -------------------------------------------------------------------------------- 1 | # This file is part of Sibyl. 2 | # Copyright 2014 - 2017 Camille MOUGEY 3 | # 4 | # Sibyl is free software: you can redistribute it and/or modify it 5 | # under the terms of the GNU General Public License as published by 6 | # the Free Software Foundation, either version 3 of the License, or 7 | # (at your option) any later version. 8 | # 9 | # Sibyl is distributed in the hope that it will be useful, but WITHOUT 10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 11 | # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 12 | # License for more details. 13 | # 14 | # You should have received a copy of the GNU General Public License 15 | # along with Sibyl. If not, see . 16 | import sys 17 | import argparse 18 | 19 | class Action(object): 20 | 21 | "Parent class for actions" 22 | 23 | _name_ = "" 24 | _desc_ = "" 25 | _args_ = [] # List of (*args, **kwargs) 26 | 27 | def __init__(self, command_line): 28 | # Parse command line 29 | parser = argparse.ArgumentParser( 30 | prog="%s %s" % (sys.argv[0], self._name_)) 31 | for args, kwargs in self._args_: 32 | parser.add_argument(*args, **kwargs) 33 | self.args = parser.parse_args(command_line) 34 | 35 | # Run action 36 | self.run() 37 | 38 | def run(self): 39 | raise NotImplementedError("Abstract method") 40 | 41 | @property 42 | def name(self): 43 | """Action name""" 44 | return self._name_ 45 | 46 | @property 47 | def description(self): 48 | """Action description""" 49 | return self._desc_ 50 | -------------------------------------------------------------------------------- /sibyl/actions/config.py: -------------------------------------------------------------------------------- 1 | # This file is part of Sibyl. 2 | # Copyright 2014 - 2017 Camille MOUGEY 3 | # 4 | # Sibyl is free software: you can redistribute it and/or modify it 5 | # under the terms of the GNU General Public License as published by 6 | # the Free Software Foundation, either version 3 of the License, or 7 | # (at your option) any later version. 8 | # 9 | # Sibyl is distributed in the hope that it will be useful, but WITHOUT 10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 11 | # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 12 | # License for more details. 13 | # 14 | # You should have received a copy of the GNU General Public License 15 | # along with Sibyl. If not, see . 16 | 17 | import os 18 | 19 | from sibyl.config import config, config_paths 20 | from sibyl.actions.action import Action 21 | 22 | 23 | class ActionConfig(Action): 24 | """Configuration management""" 25 | 26 | _name_ = "config" 27 | _desc_ = "Configuration management" 28 | _args_ = [ 29 | (("-V", "--value"), {"help": "Return the value of a specific option"}), 30 | (("-d", "--dump"), {"help": "Dump the current configuration", 31 | "action": "store_true"}), 32 | ] 33 | 34 | def run(self): 35 | if self.args.dump: 36 | print "\n".join(config.dump()) 37 | elif self.args.value: 38 | if self.args.value.endswith("_keys") and hasattr(config, 39 | self.args.value[:-5]): 40 | val = getattr(config, self.args.value[:-5]).keys() 41 | elif hasattr(config, self.args.value): 42 | val = getattr(config, self.args.value) 43 | else: 44 | val = "ERROR" 45 | print val 46 | else: 47 | self.show() 48 | 49 | def show(self): 50 | # Configuration files 51 | files = [fpath for fpath in config_paths if os.path.isfile(fpath)] 52 | if not files: 53 | print "No configuration file found. Supported paths:" 54 | print "\t" + "\n\t".join(config_paths) 55 | else: 56 | print "Configuration loaded from %s" % ", ".join(files) 57 | 58 | # Jitter engine 59 | engines = config.config["jit_engine"] 60 | if "miasm" in engines: 61 | idx = engines.index("miasm") 62 | engines[idx:idx + 1] = config.config["miasm_engine"] 63 | print "Jitter engine (preference order): %s" % ", ".join(engines) 64 | print "Elected jitter engine: %s" % config.jit_engine 65 | 66 | # Stubbing 67 | stubs = config.stubs 68 | if stubs: 69 | print "API stubbing activated on supported jitter, from %d files" % len(stubs) 70 | else: 71 | print "API stubbing is deactivated" 72 | 73 | # PIN 74 | if (config.pin_root and 75 | os.path.exists(os.path.join(config.pin_root, "pin"))): 76 | print "PIN root path found at: %s" % config.pin_root 77 | else: 78 | print "PIN root path not found" 79 | if (config.pin_tracer and 80 | os.path.exists(config.pin_tracer)): 81 | print "PIN tracer found at: %s" % config.pin_tracer 82 | else: 83 | print "PIN tracer not found" 84 | 85 | # Learn 86 | print "Learn's pruning strategy: %s/%d/%d" % (config.prune_strategy, 87 | config.prune_keep, 88 | config.prune_keep_max) 89 | 90 | # IDA 91 | idaq64_path = config.idaq64_path 92 | if idaq64_path: 93 | "IDA has been found at: %s" % idaq64_path 94 | else: 95 | print "IDA has been not found" 96 | 97 | # GHIDRA Headless 98 | ghidra_headless_path = config.ghidra_headless_path 99 | if ghidra_headless_path: 100 | print "GHIDRA analyzeHeadless has been found at: %s" % ghidra_headless_path 101 | else: 102 | print "GHIDRA analyzeHeadless has been not found" 103 | 104 | # Tests 105 | print "Tests availables:" 106 | for name, tests in config.available_tests.iteritems(): 107 | print "\t%s (%d)" % (name, len(tests)) 108 | print "\t\t" + ", ".join(test.func for test in tests) 109 | -------------------------------------------------------------------------------- /sibyl/actions/find.py: -------------------------------------------------------------------------------- 1 | # This file is part of Sibyl. 2 | # Copyright 2014 - 2017 Camille MOUGEY 3 | # 4 | # Sibyl is free software: you can redistribute it and/or modify it 5 | # under the terms of the GNU General Public License as published by 6 | # the Free Software Foundation, either version 3 of the License, or 7 | # (at your option) any later version. 8 | # 9 | # Sibyl is distributed in the hope that it will be useful, but WITHOUT 10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 11 | # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 12 | # License for more details. 13 | # 14 | # You should have received a copy of the GNU General Public License 15 | # along with Sibyl. If not, see . 16 | 17 | import logging 18 | import json 19 | import sys 20 | from collections import namedtuple 21 | 22 | from miasm2.analysis.machine import Machine 23 | from miasm2.analysis.binary import Container 24 | 25 | from sibyl.config import config 26 | from sibyl.testlauncher import TestLauncher 27 | from sibyl.abi import ABIS 28 | from sibyl.heuristics.arch import ArchHeuristic 29 | from sibyl.commons import print_table 30 | from sibyl.actions.action import Action 31 | 32 | # Message exchanged with workers 33 | MessageTaskDone = namedtuple("MessageTaskDone", ["address", "results"]) 34 | 35 | 36 | class FakeProcess(object): 37 | """Mock simulating Process API in monoprocess mode""" 38 | 39 | def __init__(self, target, args): 40 | self.target = target 41 | self.args = args 42 | 43 | def start(self, *args, **kwargs): 44 | self.target(*self.args) 45 | 46 | def join(self, *args, **kwargs): 47 | pass 48 | 49 | 50 | class ActionFind(Action): 51 | """Action for actually launching function guessing""" 52 | 53 | _name_ = "find" 54 | _desc_ = "Function guesser" 55 | _args_ = [ 56 | # Mandatory 57 | (["filename"], {"help": "File to load"}), 58 | (["address"], {"help": "Address of the function under test. Allowed" \ 59 | " formats are '112233', '0x11223344', '-' for stdin " \ 60 | "and 'filename' for a file containing addresses", 61 | "nargs": "+"}), 62 | # Optional 63 | (["-a", "--architecture"], {"help": "Target architecture", 64 | "choices": Machine.available_machine()}), 65 | (["-b", "--abi"], {"help": "ABI to use", 66 | "choices": [x.__name__ for x in ABIS]}), 67 | (["-t", "--tests"], {"help": "Tests to run (default is all)", 68 | "choices": config.available_tests.keys(), 69 | "default": [], 70 | "action": "append"}), 71 | (["-v", "--verbose"], {"help": "Verbose mode (use multiple time to " \ 72 | "increase verbosity level)", 73 | "action": "count", 74 | "default": 0}), 75 | (["-i", "--timeout"], {"help": "Test timeout (in seconds)", 76 | "default": 2, 77 | "type": int}), 78 | (["-m", "--mapping-base"], {"help": "Binary mapping address", 79 | "default": "0"}), 80 | (["-j", "--jitter"], {"help": "Jitter engine (override default one)", 81 | "choices": ["gcc", "tcc", "llvm", "python", "qemu"], 82 | "default": config.jit_engine}), 83 | (["-p", "--monoproc"], {"help": "Launch tests in a single process " \ 84 | "(mainly for debug purpose)", 85 | "action": "store_true"}), 86 | (["-o", "--output-format"], {"help": "Output format", 87 | "choices": ["JSON", "human"], 88 | "default": "human"}), 89 | ] 90 | 91 | def do_test(self, addr_queue, msg_queue): 92 | """Multi-process worker for launching on functions""" 93 | 94 | # Init components 95 | tl = TestLauncher(self.args.filename, self.machine, self.abicls, 96 | self.tests, self.args.jitter, self.map_addr) 97 | 98 | # Activatate logging INFO on at least -vv 99 | if self.args.verbose > 1: 100 | tl.logger.setLevel(logging.INFO) 101 | 102 | # Main loop 103 | while True: 104 | address = addr_queue.get() 105 | if address is None: 106 | break 107 | possible_funcs = tl.run(address, timeout_seconds=self.args.timeout) 108 | msg_queue.put(MessageTaskDone(address, possible_funcs)) 109 | 110 | # Signal to master the end 111 | msg_queue.put(None) 112 | 113 | def run(self): 114 | """Launch search""" 115 | 116 | # Import multiprocessing only when required 117 | from multiprocessing import cpu_count, Queue, Process 118 | 119 | # Parse args 120 | self.map_addr = int(self.args.mapping_base, 0) 121 | if self.args.monoproc: 122 | cpu_count = lambda: 1 123 | Process = FakeProcess 124 | 125 | # Architecture 126 | architecture = False 127 | if self.args.architecture: 128 | architecture = self.args.architecture 129 | else: 130 | with open(self.args.filename) as fdesc: 131 | architecture = ArchHeuristic(fdesc).guess() 132 | if not architecture: 133 | raise ValueError("Unable to recognize the architecture, please specify it") 134 | if self.args.verbose > 0: 135 | print "Guessed architecture: %s" % architecture 136 | 137 | self.machine = Machine(architecture) 138 | if not self.args.address: 139 | print "No function address provided. Use 'sibyl func' to discover addresses" 140 | exit(-1) 141 | addresses = [] 142 | for address in self.args.address: 143 | if address == '-': 144 | # Use stdin 145 | addresses = [int(addr, 0) for addr in sys.stdin] 146 | continue 147 | try: 148 | addresses.append(int(address, 0)) 149 | except ValueError: 150 | # File 151 | addresses = [int(addr, 0) for addr in open(address)] 152 | if self.args.verbose > 0: 153 | print "Found %d addresses" % len(addresses) 154 | 155 | 156 | # Select ABI 157 | if self.args.abi is None: 158 | candidates = set(abicls for abicls in ABIS 159 | if architecture in abicls.arch) 160 | if not candidates: 161 | raise ValueError("No ABI for architecture %s" % architecture) 162 | if len(candidates) > 1: 163 | print "Please specify the ABI:" 164 | print "\t" + "\n\t".join(cand.__name__ for cand in candidates) 165 | exit(0) 166 | abicls = candidates.pop() 167 | else: 168 | for abicls in ABIS: 169 | if self.args.abi == abicls.__name__: 170 | break 171 | else: 172 | raise ValueError("Unknown ABI name: %s" % self.args.abi) 173 | self.abicls = abicls 174 | 175 | # Select Test set 176 | self.tests = [] 177 | for tname, tcases in config.available_tests.iteritems(): 178 | if not self.args.tests or tname in self.args.tests: 179 | self.tests += tcases 180 | if self.args.verbose > 0: 181 | print "Found %d test cases" % len(self.tests) 182 | 183 | # Prepare multiprocess 184 | cpu_c = cpu_count() 185 | addr_queue = Queue() 186 | msg_queue = Queue() 187 | processes = [] 188 | 189 | # Add tasks 190 | for address in addresses: 191 | addr_queue.put(address) 192 | 193 | # Add poison pill 194 | for _ in xrange(cpu_c): 195 | addr_queue.put(None) 196 | 197 | # Launch workers 198 | for _ in xrange(cpu_c): 199 | p = Process(target=self.do_test, args=(addr_queue, msg_queue)) 200 | processes.append(p) 201 | p.start() 202 | addr_queue.close() 203 | 204 | # Get results 205 | nb_poison = 0 206 | results = {} # address -> possible functions 207 | while nb_poison < cpu_c: 208 | msg = msg_queue.get() 209 | # Poison pill 210 | if msg is None: 211 | nb_poison += 1 212 | continue 213 | 214 | # Save result 215 | results[msg.address] = msg.results 216 | 217 | # Display status if needed 218 | if self.args.verbose > 0: 219 | sys.stdout.write("\r%d / %d" % (len(results), len(addresses))) 220 | sys.stdout.flush() 221 | if msg.results and self.args.output_format == "human": 222 | prefix = "" 223 | if self.args.verbose > 0: 224 | prefix = "\r" 225 | print prefix + "0x%08x : %s" % (msg.address, ",".join(msg.results)) 226 | 227 | # Clean output if needed 228 | if self.args.verbose > 0: 229 | print "" 230 | 231 | # End connexions 232 | msg_queue.close() 233 | msg_queue.join_thread() 234 | 235 | addr_queue.join_thread() 236 | for p in processes: 237 | p.join() 238 | 239 | if not addr_queue.empty(): 240 | raise RuntimeError("An error occured: queue is not empty") 241 | 242 | # Print final results 243 | if self.args.output_format == "JSON": 244 | # Expand results to always have the same key, and address as int 245 | print json.dumps({"information": {"total_count": len(addresses), 246 | "test_cases": len(self.tests)}, 247 | "results": [{"address": addr, "functions": result} 248 | for addr, result in results.iteritems()], 249 | }) 250 | elif self.args.output_format == "human" and self.args.verbose > 0: 251 | # Summarize results 252 | title = ["Address", "Candidates"] 253 | ligs = [title] 254 | 255 | ligs += [["0x%08x" % addr, ",".join(result)] 256 | for addr, result in sorted(results.iteritems(), 257 | key=lambda x: x[0]) 258 | if result] 259 | print_table(ligs, separator="| ") 260 | 261 | 262 | -------------------------------------------------------------------------------- /sibyl/actions/func.py: -------------------------------------------------------------------------------- 1 | # This file is part of Sibyl. 2 | # Copyright 2014 - 2017 Camille MOUGEY 3 | # 4 | # Sibyl is free software: you can redistribute it and/or modify it 5 | # under the terms of the GNU General Public License as published by 6 | # the Free Software Foundation, either version 3 of the License, or 7 | # (at your option) any later version. 8 | # 9 | # Sibyl is distributed in the hope that it will be useful, but WITHOUT 10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 11 | # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 12 | # License for more details. 13 | # 14 | # You should have received a copy of the GNU General Public License 15 | # along with Sibyl. If not, see . 16 | 17 | import os 18 | 19 | from miasm2.analysis.machine import Machine 20 | from miasm2.analysis.binary import Container 21 | 22 | from sibyl.config import config, config_paths 23 | from sibyl.actions.action import Action 24 | from sibyl.heuristics.func import FuncHeuristic, ida_funcs, ghidra_funcs 25 | from sibyl.heuristics.arch import ArchHeuristic 26 | 27 | 28 | heur_names = FuncHeuristic(None, None, "").heuristic_names 29 | 30 | class ActionFunc(Action): 31 | """Function discovering""" 32 | 33 | _name_ = "func" 34 | _desc_ = "Function discovering" 35 | _args_ = [ 36 | # Mandatory 37 | (["filename"], {"help": "File to load"}), 38 | # Optional 39 | (["-a", "--architecture"], {"help": "Target architecture", 40 | "choices": Machine.available_machine()}), 41 | (["-v", "--verbose"], {"help": "Verbose mode", 42 | "action": "store_true"}), 43 | (["-d", "--disable-heuristic"], {"help": "Disable an heuristic", 44 | "action": "append", 45 | "choices": heur_names, 46 | "default": []}), 47 | (["-e", "--enable-heuristic"], {"help": "Enable an heuristic", 48 | "action": "append", 49 | "choices": heur_names, 50 | "default": []}), 51 | ] 52 | 53 | def run(self): 54 | # Architecture 55 | architecture = False 56 | if self.args.architecture: 57 | architecture = self.args.architecture 58 | else: 59 | with open(self.args.filename) as fdesc: 60 | architecture = ArchHeuristic(fdesc).guess() 61 | if not architecture: 62 | raise ValueError("Unable to recognize the architecture, please specify it") 63 | if self.args.verbose: 64 | print "Guessed architecture: %s" % architecture 65 | 66 | cont = Container.from_stream(open(self.args.filename)) 67 | machine = Machine(architecture) 68 | addr_size = machine.ira().pc.size / 4 69 | fh = FuncHeuristic(cont, machine, self.args.filename) 70 | 71 | # Default: force only IDA or GHIDRA if available 72 | if config.idaq64_path: 73 | fh.heuristics = [ida_funcs] 74 | elif config.ghidra_headless_path: 75 | fh.heuristics = [ghidra_funcs] 76 | 77 | # Enable / disable heuristics 78 | for name in self.args.enable_heuristic: 79 | heur = fh.name2heuristic(name) 80 | if heur not in fh.heuristics: 81 | fh.heuristics.append(heur) 82 | for name in self.args.disable_heuristic: 83 | heur = fh.name2heuristic(name) 84 | fh.heuristics.remove(heur) 85 | 86 | if self.args.verbose: 87 | print "Heuristics to run: %s" % ", ".join(fh.heuristic_names) 88 | 89 | 90 | # Launch guess 91 | fmt = "0x{:0%dx}" % addr_size 92 | for addr in fh.guess(): 93 | print fmt.format(addr) 94 | -------------------------------------------------------------------------------- /sibyl/actions/learn.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | 4 | from miasm2.analysis.binary import Container 5 | 6 | from sibyl.actions.action import Action 7 | from sibyl.learn.tracer import AVAILABLE_TRACER 8 | from sibyl.learn.generator import AVAILABLE_GENERATOR 9 | from sibyl.learn.learn import TestCreator 10 | from sibyl.abi.x86 import ABI_AMD64_SYSTEMV 11 | 12 | 13 | class ActionLearn(Action): 14 | """Automatic learning of a new function from one or multiple call""" 15 | 16 | _name_ = "learn" 17 | _desc_ = "Learn a new function" 18 | _args_ = [ 19 | # Mandatory 20 | (["functionname"], {"help": "Name of the learned function"}), 21 | (["program"], {"help": "Program used to learn the function, currently" \ 22 | " only x86 64 programs are supported"}), 23 | (["headerfile"], {"help": ".h header containing function declaration" \ 24 | " and associated types"}), 25 | # Optional 26 | (["-a", "--address"], {"help": "Address of the learned function. If " \ 27 | "not set, the corresponding symbol address is used."}), 28 | (["-t", "--trace"], {"help": "Used tracer. Available: " \ 29 | ", ".join(AVAILABLE_TRACER.keys()), 30 | "default": "pin", 31 | "choices": AVAILABLE_TRACER.keys()}), 32 | (["-g", "--generator"], {"help": "Used generator. Available: " \ 33 | ", ".join(AVAILABLE_GENERATOR.keys()), 34 | "default": "python", 35 | "choices": AVAILABLE_GENERATOR.keys()}), 36 | (["-v", "--verbose"], {"help": "Verbose mode (use multiple time to " \ 37 | "increase verbosity level)", 38 | "action": "count", 39 | "default": 0}), 40 | (["-m", "--main"], {"help": "Address of the function that calls the" \ 41 | "learned function. Use by and only by the miasm tracer."}), 42 | (["-o", "--output"], {"help": "Output file. Class is printed to stdout" \ 43 | "if no output file is specified.", 44 | "default": None}), 45 | (["-z", "--avoid-null"], {"help": "If set, do not consider runs "\ 46 | "returning a null value", 47 | "action": "store_true"}), 48 | ] 49 | 50 | def run(self): 51 | # Currently only AMD64 SYSTEMV ABI is supported by the learning module 52 | abi = ABI_AMD64_SYSTEMV 53 | 54 | # Currently only x86_64 is supported by the learning module 55 | machine = "x86_64" 56 | 57 | if self.args.trace != "miasm" and self.args.main != None: 58 | raise ValueError("Main argument is only used by miasm tracer") 59 | 60 | main = int(self.args.main, 0) if self.args.main else None 61 | 62 | # If function address is not set then use the symbol address 63 | if self.args.address is None: 64 | cont = Container.from_stream(open(self.args.program)) 65 | address = cont.loc_db.get_name_offset(self.args.functionname) 66 | if address is None: 67 | raise ValueError("Symbol %s does not exists in %s" % (self.args.functionname, self.args.program)) 68 | else: 69 | address = int(self.args.address, 0) 70 | 71 | 72 | testcreator = TestCreator(self.args.functionname, address, 73 | self.args.program, self.args.headerfile, 74 | AVAILABLE_TRACER[self.args.trace], 75 | AVAILABLE_GENERATOR[self.args.generator], 76 | main, abi, machine, self.args.avoid_null) 77 | 78 | if self.args.verbose == 0: 79 | testcreator.logger.setLevel(logging.WARN) 80 | if self.args.verbose == 1: 81 | testcreator.logger.setLevel(logging.INFO) 82 | elif self.args.verbose == 2: 83 | testcreator.logger.setLevel(logging.DEBUG) 84 | 85 | createdTest = testcreator.create_test() 86 | 87 | if self.args.output: 88 | open(self.args.output, "w+").write(createdTest) 89 | else: 90 | print createdTest 91 | 92 | -------------------------------------------------------------------------------- /sibyl/commons.py: -------------------------------------------------------------------------------- 1 | """Common / shared elements""" 2 | import logging 3 | try: 4 | import pycparser 5 | except ImportError: 6 | pycparser = None 7 | else: 8 | from miasm2.core.ctypesmngr import c_to_ast, CTypeFunc 9 | from miasm2.core.objc import ObjCPtr, ObjCArray 10 | 11 | def init_logger(name): 12 | logger = logging.getLogger(name) 13 | 14 | console_handler = logging.StreamHandler() 15 | log_format = "%(levelname)-5s: %(message)s" 16 | console_handler.setFormatter(logging.Formatter(log_format)) 17 | logger.addHandler(console_handler) 18 | 19 | logger.setLevel(logging.ERROR) 20 | return logger 21 | 22 | 23 | class TimeoutException(Exception): 24 | """Exception to be called on timeouts""" 25 | pass 26 | 27 | 28 | END_ADDR = 0x1337babe 29 | 30 | def print_table(ligs, title=True, separator='|', level=0, align=""): 31 | "Print nicely @ligs. If title, @ligs[0] is title ligne" 32 | # Calc max by col 33 | columns = [0] * len(ligs[0]) 34 | for lig in ligs: 35 | for index, element in enumerate(lig): 36 | columns[index] = max(columns[index], len(element)) 37 | 38 | fmt_l = ["{%d:%s%d}" % (i, align, l + 2) for i, l in enumerate(columns)] 39 | fmt = separator.join(fmt_l) 40 | 41 | tab = "\t" * level 42 | 43 | for i, lig in enumerate(ligs): 44 | if i == 1 and title: 45 | print "%s%s" % (tab, "-" * len(fmt.format(*lig))) 46 | print "%s%s" % (tab, fmt.format(*lig)) 47 | 48 | class HeaderFile(object): 49 | """Abstract representation of a Header file""" 50 | 51 | def __init__(self, header_data, ctype_manager): 52 | """Parse @header_data to fill @ctype_manager 53 | @header_data: str of a C-like header file 54 | @ctype_manager: miasm2.core.objc.CTypesManager instance""" 55 | self.data = header_data 56 | self.ctype_manager = ctype_manager 57 | 58 | self.ast = self.parse_header(header_data) 59 | self.ctype_manager.types_ast.add_c_decl(header_data) 60 | self.functions = {} # function name -> FuncPrototype 61 | 62 | if pycparser is None: 63 | raise ImportError("pycparser module is needed to parse header file") 64 | self.parse_functions() 65 | 66 | @staticmethod 67 | def parse_header(header_data): 68 | """Return the AST corresponding to @header_data 69 | @header_data: str of a C-like header file 70 | """ 71 | # We can't use add_c_decl, because we need the AST to get back 72 | # function's arguments name 73 | parser = pycparser.c_parser.CParser() 74 | return c_to_ast(parser, header_data) 75 | 76 | def parse_functions(self): 77 | """Search for function declarations""" 78 | 79 | for ext in self.ast.ext: 80 | if not (isinstance(ext, pycparser.c_ast.Decl) and 81 | isinstance(ext.type, (pycparser.c_ast.FuncDecl, 82 | pycparser.c_ast.FuncDef))): 83 | continue 84 | func_name = ext.name 85 | objc_func = self.ctype_manager.get_objc(CTypeFunc(func_name)) 86 | 87 | args_order = [] 88 | args = {} 89 | for i, param in enumerate(ext.type.args.params): 90 | args_order.append(param.name) 91 | args[param.name] = objc_func.args[i][1] 92 | 93 | self.functions[func_name] = FuncPrototype(func_name, 94 | objc_func.type_ret, 95 | *args_order, **args) 96 | 97 | def objc_is_dereferenceable(target_type): 98 | """Return True if target_type may be used as a pointer 99 | @target_type: ObjC""" 100 | return isinstance(target_type, (ObjCPtr, ObjCArray)) 101 | 102 | 103 | class FuncPrototype(object): 104 | """Stand for a function's prototype""" 105 | 106 | def __init__(self, func_name, func_type, *args, **kwargs): 107 | """Init a prototype for @func_type @func_name(@kwargs (name -> type) ) 108 | """ 109 | self.func_name = func_name 110 | self.func_type = func_type 111 | self.args = kwargs 112 | self.args_order = args 113 | 114 | def __str__(self): 115 | return "%s %s(%s)" % (self.func_type, 116 | self.func_name, 117 | ", ".join("%s %s" % (self.args[name], name) 118 | for name in self.args_order)) 119 | -------------------------------------------------------------------------------- /sibyl/engine/__init__.py: -------------------------------------------------------------------------------- 1 | """This module abstracts running engine""" 2 | 3 | from sibyl.engine.qemu import QEMUEngine 4 | from sibyl.engine.miasm import MiasmEngine 5 | -------------------------------------------------------------------------------- /sibyl/engine/engine.py: -------------------------------------------------------------------------------- 1 | from sibyl.commons import init_logger 2 | 3 | 4 | class Engine(object): 5 | """Wrapper on execution engine""" 6 | 7 | def __init__(self, machine): 8 | """Instanciate an Engine 9 | @machine: miasm2.analysis.machine:Machine instance""" 10 | self.logger = init_logger(self.__class__.__name__) 11 | 12 | def take_snapshot(self): 13 | self.vm_mem = self.jitter.vm.get_all_memory() 14 | self.vm_regs = self.jitter.cpu.get_gpreg() 15 | 16 | def restore_snapshot(self, memory=True): 17 | raise NotImplementedError("Abstract method") 18 | 19 | def run(self, address, timeout_seconds): 20 | raise NotImplementedError("Abstract method") 21 | 22 | def prepare_run(self): 23 | pass 24 | 25 | def restore_snapshot(self, memory=True): 26 | raise NotImplementedError("Abstract method") 27 | -------------------------------------------------------------------------------- /sibyl/engine/miasm.py: -------------------------------------------------------------------------------- 1 | import signal 2 | 3 | from sibyl.engine.engine import Engine 4 | from sibyl.commons import TimeoutException, END_ADDR 5 | 6 | 7 | class MiasmEngine(Engine): 8 | """Engine based on Miasm""" 9 | 10 | def __init__(self, machine, jit_engine): 11 | jitter = machine.jitter(jit_engine) 12 | jitter.set_breakpoint(END_ADDR, MiasmEngine._code_sentinelle) 13 | self.jitter = jitter 14 | 15 | # Signal handling 16 | # 17 | # Due to Python signal handling implementation, signals aren't handled 18 | # nor passed to Jitted code in case of registration with signal API 19 | if jit_engine == "python": 20 | signal.signal(signal.SIGALRM, MiasmEngine._timeout) 21 | elif jit_engine in ["llvm", "tcc", "gcc"]: 22 | self.jitter.vm.set_alarm() 23 | else: 24 | raise ValueError("Unknown engine: %s" % jit_engine) 25 | 26 | super(MiasmEngine, self).__init__(machine) 27 | 28 | 29 | @staticmethod 30 | def _code_sentinelle(jitter): 31 | jitter.run = False 32 | jitter.pc = 0 33 | return True 34 | 35 | @staticmethod 36 | def _timeout(signum, frame): 37 | raise TimeoutException() 38 | 39 | def run(self, address, timeout_seconds): 40 | self.jitter.init_run(address) 41 | 42 | try: 43 | signal.alarm(timeout_seconds) 44 | self.jitter.continue_run() 45 | except (AssertionError, RuntimeError, ValueError, 46 | KeyError, IndexError, TimeoutException) as _: 47 | return False 48 | except Exception as error: 49 | self.logger.exception(error) 50 | return False 51 | finally: 52 | signal.alarm(0) 53 | 54 | return True 55 | 56 | def restore_snapshot(self, memory=True): 57 | # Restore memory 58 | if memory: 59 | self.jitter.vm.reset_memory_page_pool() 60 | self.jitter.vm.reset_code_bloc_pool() 61 | for addr, metadata in self.vm_mem.iteritems(): 62 | self.jitter.vm.add_memory_page(addr, 63 | metadata["access"], 64 | metadata["data"]) 65 | 66 | # Restore registers 67 | self.jitter.cpu.init_regs() 68 | self.jitter.cpu.set_gpreg(self.vm_regs) 69 | 70 | # Reset intern elements 71 | self.jitter.vm.set_exception(0) 72 | self.jitter.cpu.set_exception(0) 73 | self.jitter.bs._atomic_mode = False 74 | -------------------------------------------------------------------------------- /sibyl/heuristics/__init__.py: -------------------------------------------------------------------------------- 1 | # This file is part of Sibyl. 2 | # Copyright 2016 Camille MOUGEY 3 | # 4 | # Sibyl is free software: you can redistribute it and/or modify it 5 | # under the terms of the GNU General Public License as published by 6 | # the Free Software Foundation, either version 3 of the License, or 7 | # (at your option) any later version. 8 | # 9 | # Sibyl is distributed in the hope that it will be useful, but WITHOUT 10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 11 | # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 12 | # License for more details. 13 | # 14 | # You should have received a copy of the GNU General Public License 15 | # along with Sibyl. If not, see . 16 | -------------------------------------------------------------------------------- /sibyl/heuristics/arch.py: -------------------------------------------------------------------------------- 1 | "Module for architecture guessing" 2 | 3 | from miasm2.analysis.binary import Container, ContainerUnknown 4 | 5 | from sibyl.heuristics.heuristic import Heuristic 6 | 7 | 8 | def container_guess(archinfo): 9 | """Use the architecture provided by the container, if any 10 | @archinfo: ArchHeuristic instance 11 | """ 12 | 13 | cont = Container.from_stream(archinfo.stream) 14 | 15 | if isinstance(cont, ContainerUnknown) or not cont.arch: 16 | return {} 17 | 18 | return {cont.arch: 1} 19 | 20 | 21 | class ArchHeuristic(Heuristic): 22 | """Provide heuristics to detect the architecture of a stream""" 23 | 24 | # Enabled passes 25 | heuristics = [ 26 | container_guess, 27 | ] 28 | 29 | def __init__(self, stream): 30 | super(ArchHeuristic, self).__init__() 31 | self.stream = stream 32 | -------------------------------------------------------------------------------- /sibyl/heuristics/csts.py: -------------------------------------------------------------------------------- 1 | "Common constants for heuristics" 2 | 3 | 4 | # Function prologs and epilogs binary pattern, mainly ripped from archinfo 5 | # (project Angr) 6 | 7 | # arch name -> list of regexp expression 8 | func_prologs = { 9 | 'x86_32': [ 10 | r"\x55\x8b\xec", # push ebp; mov ebp, esp 11 | r"\x55\x89\xe5", # push ebp; mov ebp, esp 12 | ], 13 | 'arml': [ 14 | r"[\x00-\xff][\x00-\xff]\x2d\xe9", # stmfd sp!, {xxxxx} 15 | r"\x04\xe0\x2d\xe5", # push {lr} 16 | ], 17 | } 18 | func_epilogs = { 19 | 'x86_32': [ 20 | r"\xc9\xc3", # leave; ret 21 | r"([^\x41][\x50-\x5f]{1}|\x41[\x50-\x5f])\xc3", # pop ; ret 22 | r"[^\x48][\x83,\x81]\xc4([\x00-\xff]{1}|[\x00-\xff]{4})\xc3", # add esp, ; retq 23 | ], 24 | 'arml': [ 25 | r"[\x00-\xff]{2}\xbd\xe8\x1e\xff\x2f\xe1" # pop {xxx}; bx lr 26 | r"\x04\xe0\x9d\xe4\x1e\xff\x2f\xe1" # pop {xxx}; bx lr 27 | ], 28 | } 29 | -------------------------------------------------------------------------------- /sibyl/heuristics/func.py: -------------------------------------------------------------------------------- 1 | "Module for function address guessing" 2 | import logging 3 | import re 4 | import tempfile 5 | import subprocess 6 | import shutil 7 | import os 8 | 9 | from miasm2.core.asmblock import AsmBlockBad, log_asmblock 10 | 11 | from sibyl.heuristics.heuristic import Heuristic 12 | import sibyl.heuristics.csts as csts 13 | from sibyl.config import config 14 | 15 | 16 | def recursive_call(func_heur, addresses): 17 | """Try to find new functions by following subroutines calls""" 18 | # Prepare disassembly engine 19 | dis_engine = func_heur.machine.dis_engine 20 | cont = func_heur.cont 21 | loc_db = cont.loc_db 22 | mdis = dis_engine(cont.bin_stream, loc_db=loc_db) 23 | mdis.follow_call = True 24 | # Launch disassembly 25 | cur_log_level = log_asmblock.level 26 | log_asmblock.setLevel(logging.CRITICAL) 27 | 28 | label2block = {} 29 | 30 | for start_addr in addresses: 31 | try: 32 | cfg_temp = mdis.dis_multiblock(start_addr) 33 | except TypeError as error: 34 | log_asmblock.critical("While disassembling: %s", error) 35 | continue 36 | 37 | # Merge label2block, take care of disassembly order due to cache 38 | for node in cfg_temp.nodes(): 39 | label2block.setdefault(node, cfg_temp.loc_key_to_block(node)) 40 | # Avoid re-disassembling 41 | mdis.dont_dis.append(loc_db.get_location_offset(node)) 42 | log_asmblock.setLevel(cur_log_level) 43 | 44 | # Find potential addresses 45 | addresses = {} 46 | for bbl in label2block.itervalues(): 47 | if len(bbl.lines) == 0: 48 | continue 49 | last_line = bbl.lines[-1] 50 | if last_line.is_subcall(): 51 | for constraint in bbl.bto: 52 | if constraint.c_t != "c_to" or \ 53 | constraint.loc_key not in label2block: 54 | continue 55 | 56 | succ = label2block[constraint.loc_key] 57 | # Avoid redirectors 58 | if len(succ.lines) == 0 or succ.lines[0].dstflow(): 59 | continue 60 | 61 | # Avoid unmapped block and others relative bugs 62 | if isinstance(succ, AsmBlockBad): 63 | continue 64 | 65 | addresses[loc_db.get_location_offset(succ.loc_key)] = 1 66 | 67 | return addresses 68 | 69 | 70 | def _virt_find(virt, pattern): 71 | """Search @pattern in elfesteem @virt instance 72 | Inspired from elf_init.virt.find 73 | """ 74 | regexp = re.compile(pattern) 75 | offset = 0 76 | sections = [] 77 | for s in virt.parent.ph: 78 | s_max = s.ph.memsz 79 | if offset < s.ph.vaddr + s_max: 80 | sections.append(s) 81 | 82 | if not sections: 83 | raise StopIteration 84 | offset -= sections[0].ph.vaddr 85 | if offset < 0: 86 | offset = 0 87 | for s in sections: 88 | data = virt.parent.content[s.ph.offset:s.ph.offset + s.ph.filesz] 89 | ret = regexp.finditer(data[offset:]) 90 | yield ret, s.ph.vaddr 91 | offset = 0 92 | 93 | 94 | def pattern_matching(func_heur): 95 | """Search for function by pattern matching""" 96 | 97 | # Retrieve info 98 | architecture = func_heur.machine.name 99 | prologs = csts.func_prologs.get(architecture, []) 100 | data = func_heur.cont.bin_stream.bin.virt 101 | 102 | addresses = {} 103 | 104 | # Search for function prologs 105 | 106 | pattern = "(" + ")|(".join(prologs) + ")" 107 | for find_iter, vaddr_base in _virt_find(data, pattern): 108 | for match in find_iter: 109 | addr = match.start() + vaddr_base 110 | addresses[addr] = 1 111 | 112 | return addresses 113 | 114 | 115 | def named_symbols(func_heur): 116 | """Return the addresses of named symbols""" 117 | 118 | cont = func_heur.cont 119 | loc_db = cont.loc_db 120 | 121 | # Use the entry point 122 | addresses = [cont.entry_point] 123 | # Add address of symbol with a name (like 'main') 124 | addresses += [loc_db.get_location_offset(loc) 125 | for loc in loc_db.loc_keys 126 | if loc_db.get_location_names(loc) is not None] 127 | return {addr: 1 for addr in addresses} 128 | 129 | 130 | def ida_funcs(func_heur): 131 | """Use IDA heuristics to find functions""" 132 | 133 | idaq64_path = config.idaq64_path 134 | if not idaq64_path: 135 | return {} 136 | 137 | # Prepare temporary files: script and output 138 | tmp_script = tempfile.NamedTemporaryFile(suffix=".py", delete=True) 139 | tmp_out = tempfile.NamedTemporaryFile(suffix=".addr", delete=True) 140 | 141 | tmp_script.write("""idaapi.autoWait() 142 | open("%s", "w").write("\\n".join("0x%%x" %% x for x in Functions())) 143 | Exit(0) 144 | """ % tmp_out.name) 145 | tmp_script.flush() 146 | 147 | # Launch IDA 148 | env = os.environ.copy() 149 | env["TVHEADLESS"] = "true" 150 | run = subprocess.Popen([idaq64_path, "-A", 151 | "-OIDAPython:%s" % tmp_script.name, 152 | func_heur.filename], 153 | env=env, 154 | stdout=subprocess.PIPE, 155 | stderr=subprocess.PIPE, 156 | ) 157 | run.communicate() 158 | 159 | # Get back addresses 160 | tmp_out.seek(0) 161 | addresses = {int(x, 16): 1 for x in tmp_out} 162 | 163 | # Clean-up 164 | tmp_script.close() 165 | tmp_out.close() 166 | 167 | return addresses 168 | 169 | 170 | def ghidra_funcs(func_heur): 171 | """Use GHIDRA heuristics to find functions""" 172 | 173 | ghidra_headless_path = config.ghidra_headless_path 174 | if not ghidra_headless_path: 175 | return {} 176 | 177 | # Prepare temporary files: GHIDRA project location and output 178 | tmp_project_location = tempfile.mkdtemp(prefix="sibyl_ghidra_fakeproj") 179 | tmp_log = tempfile.NamedTemporaryFile(suffix=".log", delete=True) 180 | 181 | # Launch GHIDRA 182 | env = os.environ.copy() 183 | script_path = os.path.dirname(config.ghidra_export_function) 184 | script_name = os.path.basename(config.ghidra_export_function) 185 | run = subprocess.Popen( 186 | [ 187 | ghidra_headless_path, tmp_project_location, "fakeproj", 188 | "-import", func_heur.filename, 189 | "-preScript", script_name, 190 | "-scriptPath", script_path, 191 | "-scriptlog", tmp_log.name, 192 | ], 193 | env=env, 194 | stdout=subprocess.PIPE, 195 | stderr=subprocess.PIPE, 196 | ) 197 | run.communicate() 198 | 199 | # Get back addresses 200 | tmp_log.seek(0) 201 | addresses = {} 202 | for line in tmp_log: 203 | info = re.findall(script_name + "> 0x([0-9a-f]+)", line) 204 | if info: 205 | addresses[int(info[0], 16)] = 1 206 | 207 | # Clean-up 208 | tmp_log.close() 209 | shutil.rmtree(tmp_project_location) 210 | 211 | return addresses 212 | 213 | 214 | class FuncHeuristic(Heuristic): 215 | """Provide heuristic for function start address detection""" 216 | 217 | # Enabled passes 218 | heuristics = [ 219 | named_symbols, 220 | pattern_matching, 221 | recursive_call, 222 | ida_funcs, 223 | ghidra_funcs, 224 | ] 225 | 226 | def __init__(self, cont, machine, filename): 227 | """ 228 | @cont: miasm2's Container instance 229 | @machine: miasm2's Machine instance 230 | @filename: target's filename 231 | """ 232 | super(FuncHeuristic, self).__init__() 233 | self.cont = cont 234 | self.machine = machine 235 | self.filename = filename 236 | 237 | def do_votes(self): 238 | """Call recursive_call at the end""" 239 | do_recursive = False 240 | if recursive_call in self.heuristics: 241 | do_recursive = True 242 | self.heuristics.remove(recursive_call) 243 | 244 | super(FuncHeuristic, self).do_votes() 245 | addresses = self._votes 246 | 247 | if do_recursive: 248 | new_addresses = recursive_call(self, 249 | [addr 250 | for addr, vote in addresses.iteritems() 251 | if vote > 0]) 252 | for addr, vote in new_addresses.iteritems(): 253 | addresses[addr] = addresses.get(addr, 0) + vote 254 | self._votes = addresses 255 | 256 | def guess(self): 257 | for address, value in self.votes.iteritems(): 258 | # Heuristic may vote negatively 259 | if value > 0: 260 | yield address 261 | -------------------------------------------------------------------------------- /sibyl/heuristics/heuristic.py: -------------------------------------------------------------------------------- 1 | "Main class for heuristics" 2 | 3 | 4 | class Heuristic(object): 5 | """Main class for heuristics, handle common methods related to them""" 6 | 7 | # Enabled passes 8 | # passes are functions taking 'self' and returning a dict: 9 | # candidates -> estimated probability 10 | heuristics = [] 11 | 12 | def __init__(self): 13 | self._votes = None 14 | 15 | def do_votes(self): 16 | """Call heuristics and get back votes 17 | Use a cumulative linear strategy for comparison 18 | """ 19 | votes = {} 20 | for heuristic in self.heuristics: 21 | for name, vote in heuristic(self).iteritems(): 22 | votes[name] = votes.get(name, 0) + vote 23 | self._votes = votes 24 | 25 | @property 26 | def votes(self): 27 | """Cumulative votes for each candidates""" 28 | if not self._votes: 29 | self.do_votes() 30 | return self._votes 31 | 32 | @property 33 | def heuristic_names(self): 34 | """Return the list of available heuristics""" 35 | return [func.__name__ for func in self.heuristics] 36 | 37 | def name2heuristic(self, name): 38 | """Return the heuristic named @name""" 39 | for func in self.heuristics: 40 | if func.__name__ == name: 41 | return func 42 | else: 43 | raise KeyError("Unable to find %s" % name) 44 | 45 | def guess(self): 46 | """Return the best candidate""" 47 | sorted_votes = sorted(self.votes.iteritems(), key=lambda x:x[1]) 48 | if not sorted_votes: 49 | # No solution 50 | return False 51 | best, _ = sorted_votes[-1] 52 | return best 53 | -------------------------------------------------------------------------------- /sibyl/learn/__init__.py: -------------------------------------------------------------------------------- 1 | """This package provides methods for automaticly creating new test case for Sibyl 2 | """ 3 | 4 | __all__ = [] 5 | -------------------------------------------------------------------------------- /sibyl/learn/generator/__init__.py: -------------------------------------------------------------------------------- 1 | """This package provides methods for automaticly creating new test case for Sibyl 2 | """ 3 | """This package provides methods for tracing a program and retrieving: 4 | - executed instructions 5 | - memory accesses 6 | """ 7 | 8 | from sibyl.learn.generator.pythongenerator import PythonGenerator 9 | 10 | AVAILABLE_GENERATOR = { 11 | "python": PythonGenerator, 12 | } 13 | 14 | __all__ = ["AVAILABLE_GENERATOR"] 15 | -------------------------------------------------------------------------------- /sibyl/learn/generator/generator.py: -------------------------------------------------------------------------------- 1 | from miasm2.analysis.machine import Machine 2 | 3 | 4 | class Generator(object): 5 | ''' 6 | Abstract class used to represent a generator 7 | A generator is a class that create a test from a snapshot 8 | Here a test is a sibyl test init function and a sibyl test check function 9 | ''' 10 | 11 | def __init__(self, testcreator): 12 | ''' 13 | @testcreator: TestCreator instance with associated information 14 | ''' 15 | self.trace = testcreator.trace 16 | self.prototype = testcreator.prototype 17 | self.learnexceptiontext = testcreator.learnexceptiontext 18 | self.types = testcreator.types 19 | self.printer = Printer() 20 | self.headerfile = testcreator.headerfile 21 | self.ira = Machine(testcreator.machine).ira() 22 | self.ptr_size = self.ira.sizeof_pointer()/8 23 | self.logger = testcreator.logger 24 | 25 | def generate_test(self): 26 | '''Abstract method that should return the string corresponding to the code of the init test''' 27 | raise NotImplementedError("Abstract method") 28 | 29 | 30 | class Printer(object): 31 | 32 | default_indentation_size = 4 33 | 34 | def __init__(self, indentation_size=default_indentation_size): 35 | self._indentation_size = indentation_size 36 | self._indentation_level = 0 37 | self._whitespace = "" 38 | self._print = "" 39 | 40 | def dump(self): 41 | return self._print 42 | 43 | def sub_lvl(self, n=1): 44 | self._indentation_level -= self._indentation_size * n 45 | 46 | if self._indentation_level < 0: 47 | raise RuntimeError("indentation level negative") 48 | 49 | self._whitespace = " "*self._indentation_level 50 | 51 | def add_lvl(self, n=1): 52 | self._indentation_level += self._indentation_size * n 53 | self._whitespace = " "*self._indentation_level 54 | 55 | def add_block(self, block): 56 | self._print += (self._whitespace + block).replace('\n', '\n'+self._whitespace).rstrip(' ') 57 | 58 | def add_empty_line(self): 59 | self._print += '\n' 60 | 61 | def add_lower_block(self, block, n=1): 62 | self.sub_lvl(n) 63 | self.add_block(block) 64 | self.add_lvl(n) 65 | 66 | def add_upper_block(self, block, n=1): 67 | self.add_lvl(n) 68 | self.add_block(block) 69 | self.sub_lvl(n) 70 | -------------------------------------------------------------------------------- /sibyl/learn/generator/templates.py: -------------------------------------------------------------------------------- 1 | # Python 2 | imports = """ 3 | from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE 4 | 5 | from sibyl.test.test import TestHeader, TestSetTest 6 | """.strip() 7 | 8 | classDef = """ 9 | class Test{funcname}(TestHeader): 10 | '''This is an auto-generated class, using the Sibyl learn module''' 11 | """ 12 | 13 | classAttrib = """ func = "{funcname}" 14 | header = ''' 15 | {header} 16 | ''' 17 | """.rstrip() 18 | 19 | classTestList = """ 20 | tests = {testList} 21 | """.strip() 22 | 23 | registerTest = """ 24 | TESTS = [Test{funcname}] 25 | """.strip() 26 | -------------------------------------------------------------------------------- /sibyl/learn/learn.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import copy 3 | 4 | try: 5 | import pycparser 6 | except ImportError: 7 | raise ImportError("pycparser module is needed to learn and generate") 8 | from miasm2.core.objc import CTypesManagerNotPacked 9 | from miasm2.core.ctypesmngr import CAstTypes 10 | from miasm2.arch.x86.ctype import CTypeAMD64_unk 11 | 12 | from sibyl.learn.replay import Replay 13 | from sibyl.learn.findref import ExtractRef 14 | from sibyl.learn.trace import Trace 15 | from sibyl.commons import HeaderFile 16 | from sibyl.config import config 17 | 18 | 19 | class TestCreator(object): 20 | 21 | """Class used to create a test. Each instance is dedicated to only one learned function""" 22 | 23 | def __init__(self, functionname, address, program, header_filename, 24 | tracer_class, generator_class, main_address, abicls, machine, 25 | avoid_null): 26 | """ 27 | @functionname: name of the symbol of the learned function 28 | @address: address of the learned function in the program 29 | @program: program that uses the learned function 30 | @header_filename: file containing headers for the targeted function 31 | @tracer_class: class of the tracer used to run the program 32 | @generator_class: class of the generator used to create the test 33 | @main_address: address where the tracer has to begin, if none the tracer begins at the entry point 34 | @abicls: class of the ABI used by the program 35 | @machine: machine used by the program 36 | @avoid_null: if set, do not consider snapshots returning a null value 37 | """ 38 | self.functionname = functionname 39 | self.address = address 40 | self.program = program 41 | self.header_filename = header_filename 42 | self.tracer_class = tracer_class 43 | self.generator_class = generator_class 44 | self.main_address = main_address 45 | self.abicls = abicls 46 | self.machine = machine 47 | self.types = None 48 | self.avoid_null = avoid_null 49 | 50 | self.learnexceptiontext = [] 51 | 52 | self.logger = logging.getLogger("testcreator") 53 | console_handler = logging.StreamHandler() 54 | log_format = "%(levelname)-5s: %(message)s" 55 | console_handler.setFormatter(logging.Formatter(log_format)) 56 | self.logger.addHandler(console_handler) 57 | self.logger.setLevel(logging.INFO) 58 | 59 | def create_trace(self): 60 | '''Create the raw trace''' 61 | 62 | self.logger.info("Tracing the program") 63 | tracer = self.tracer_class( 64 | self.program, self.address, self.main_address, self.abicls, self.machine) 65 | self.trace_iter = tracer.do_trace() 66 | 67 | def prune_snapshots(self): 68 | '''Prune available snapshots according to the pruning politics''' 69 | 70 | self.logger.info("Parsing and prunning snapshots: strategy %s, " \ 71 | "with %d elements keeped each time", 72 | config.prune_strategy, 73 | config.prune_keep) 74 | trace = Trace() 75 | ignored = None 76 | 77 | # Prune depending on the strategy 78 | if config.prune_strategy == "branch": 79 | ignored = 0 80 | already_keeped = {} # path -> seen number 81 | for snapshot in self.trace_iter: 82 | # TODO use abi 83 | if self.avoid_null and snapshot.output_reg["RAX"] == 0: 84 | ignored += 1 85 | continue 86 | 87 | path = frozenset(snapshot.paths.edges()) 88 | current = already_keeped.get(path, 0) 89 | if current < config.prune_keep: 90 | # not enough sample of this current snapshot branch coverage 91 | trace.append(snapshot) 92 | else: 93 | ignored += 1 94 | already_keeped[path] = current + 1 95 | if config.prune_keep_max and len(trace) >= config.prune_keep_max: 96 | self.logger.info("Max number of snapshot reached!") 97 | break 98 | 99 | elif config.prune_strategy == "keepall": 100 | # Do not remove any snapshot 101 | trace = list(self.trace_iter) 102 | ignored = 0 103 | elif config.prune_strategy == "keep": 104 | # Remove all snapshot but one or a few (according to config) 105 | for i, snapshot in xrange(self.trace): 106 | trace.append(snapshot) 107 | if len(trace) >= config.prune_keep: 108 | break 109 | else: 110 | raise ValueError("Unsupported strategy type: %s" % config.prune_strategy) 111 | 112 | self.trace = trace 113 | if ignored is None: 114 | ignored = "unknown" 115 | self.logger.info("Keeped: %d, Ignored: %s", len(self.trace), 116 | ignored) 117 | 118 | # If the trace is empty, test can not be created 119 | if not self.trace: 120 | raise RuntimeError( 121 | "Test can not be created: function seems not to be called or " \ 122 | "the prune politic is too restrictive") 123 | 124 | def clean_trace(self): 125 | '''Try to remove all implementation dependant elements from the trace''' 126 | 127 | # Turn the trace into an implementation independent one 128 | self.logger.info("Cleaning snapshots") 129 | self.trace.clean() 130 | 131 | def test_trace(self): 132 | '''Find snapshots that do not recognize the learned function''' 133 | 134 | self.logger.info("Replaying cleaned snapshots") 135 | to_remove = [] 136 | for i, snapshot in enumerate(self.trace): 137 | self.logger.info("Replaying snapshot %d", i) 138 | r = Replay(self, snapshot) 139 | if not r.run(): 140 | self.logger.warn("Replay error: %s", ", ".join(r.replayexception)) 141 | to_remove.append(snapshot) 142 | for snapshot in to_remove: 143 | self.trace.remove(snapshot) 144 | 145 | def extract_refs(self): 146 | """Real extraction of input""" 147 | 148 | self.logger.info("Extract references from snapshots") 149 | for i, snapshot in enumerate(self.trace): 150 | self.logger.info("Extracting snapshot %d", i) 151 | r = ExtractRef(self, snapshot) 152 | if not r.run(): 153 | self.learnexceptiontext += r.replayexception 154 | 155 | def create_test_from_trace(self): 156 | self.logger.info("Generating the final test class") 157 | generator = self.generator_class(self) 158 | return generator.generate_test() 159 | 160 | def parse_types(self): 161 | """Extract the prototype of the targeted function and associated type""" 162 | ctype_manager = CTypesManagerNotPacked(CAstTypes(), CTypeAMD64_unk()) 163 | with open(self.header_filename) as fdesc: 164 | data = fdesc.read() 165 | self.headerfile = HeaderFile(data, ctype_manager) 166 | 167 | self.prototype = self.headerfile.functions[self.functionname] 168 | self.types = ctype_manager 169 | self.logger.info("Found prototype: %s" % self.prototype) 170 | 171 | def create_test(self): 172 | """ 173 | Main function of the trace that is in charge of calling other methods in the right order 174 | Return a string that correspong to the code of the test class 175 | """ 176 | 177 | self.parse_types() 178 | 179 | 180 | self.create_trace() 181 | 182 | self.prune_snapshots() 183 | 184 | self.clean_trace() 185 | 186 | self.test_trace() 187 | assert len(self.trace) > 0 188 | 189 | self.extract_refs() 190 | 191 | return self.create_test_from_trace() 192 | 193 | -------------------------------------------------------------------------------- /sibyl/learn/learnexception.py: -------------------------------------------------------------------------------- 1 | class LearnException(Exception): 2 | 3 | def __init__(self, info): 4 | super(LearnException, self).__init__() 5 | self.info = info 6 | 7 | def repr_class_name(self): 8 | return "LearnException" 9 | 10 | def __repr__(self): 11 | return self.repr_class_name() + "(" + self.info + ")" 12 | 13 | 14 | class ReturnPointerException(LearnException): 15 | 16 | def __init__(self): 17 | super(ReturnPointerException, self).__init__( 18 | "return value might be a pointer") 19 | 20 | def repr_class_name(self): 21 | return "ReturnPointerException" 22 | 23 | 24 | class ReturnValueException(LearnException): 25 | 26 | def __init__(self): 27 | super(ReturnPointerException, self).__init__( 28 | "return value is incorrect after replaying snapshot, the function might return nthing") 29 | 30 | def repr_class_name(self): 31 | return "ReturnValueException" 32 | -------------------------------------------------------------------------------- /sibyl/learn/replay.py: -------------------------------------------------------------------------------- 1 | import struct 2 | 3 | from miasm2.jitter.loader.elf import vm_load_elf 4 | from miasm2.analysis.machine import Machine 5 | from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE, EXCEPT_ACCESS_VIOL, EXCEPT_DIV_BY_ZERO, EXCEPT_PRIV_INSN 6 | 7 | from sibyl.config import config 8 | 9 | 10 | class Replay(object): 11 | ''' 12 | Class used to run a snapshot and check that it recognize or not a given function code 13 | Potential replay errors are stored in self.learnexception 14 | ''' 15 | 16 | def __init__(self, testcreator, replayed_snapshot): 17 | ''' 18 | @testcreator: TestCreator instance with associated information 19 | @replayed_snapshot: snapshot to be used 20 | ''' 21 | self.isFuncFound = False 22 | self.filename = testcreator.program 23 | self.learned_addr = testcreator.address 24 | self.snapshot = replayed_snapshot 25 | self.replayexception = [] 26 | self.abicls = testcreator.abicls 27 | self.machine = Machine(testcreator.machine) 28 | self.trace = testcreator.trace 29 | self.logger = testcreator.logger 30 | self.ira = self.machine.ira() 31 | self.ptr_size = self.ira.sizeof_pointer()/8 32 | 33 | def use_snapshot(self, jitter): 34 | '''Initilize the VM with the snapshot informations''' 35 | for reg, value in self.snapshot.input_reg.iteritems(): 36 | setattr(jitter.cpu, reg, value) 37 | 38 | # Set values for input memory 39 | for addr, mem in self.snapshot.in_memory.iteritems(): 40 | assert mem.access != 0 41 | if not jitter.vm.is_mapped(addr, mem.size): 42 | jitter.vm.add_memory_page(addr, mem.access, mem.data) 43 | else: 44 | if jitter.vm.get_mem_access(addr) & 0b11 == mem.access & 0b11: 45 | jitter.vm.set_mem(addr, mem.data) 46 | else: 47 | # TODO memory page is already set but have not the 48 | # same access right. However delete page does not 49 | # exist 50 | jitter.vm.set_mem(addr, mem.data) 51 | 52 | def compare_snapshot(self, jitter): 53 | '''Compare the expected result with the real one to determine if the function is recognize or not''' 54 | func_found = True 55 | 56 | for reg, value in self.snapshot.output_reg.iteritems(): 57 | if value != getattr(jitter.cpu, reg): 58 | self.replayexception += ["output register %s wrong : %i expected, %i found" % (reg, value, getattr(jitter.cpu, reg))] 59 | func_found = False 60 | 61 | for addr, mem in self.snapshot.out_memory.iteritems(): 62 | self.logger.debug("Check @%s, %s bytes: %r", hex(addr), hex(mem.size), mem.data[:0x10]) 63 | if mem.data != jitter.vm.get_mem(addr, mem.size): 64 | self.replayexception += ["output memory wrong at 0x%x: %s expected, %s found" % (addr + offset, repr(mem.data), repr(jitter.vm.get_mem(addr + offset, mem.size)))] 65 | func_found = False 66 | 67 | return func_found 68 | 69 | def end_func(self, jitter): 70 | if jitter.vm.is_mapped(getattr(jitter.cpu, self.ira.ret_reg.name), 1): 71 | self.replayexception += ["return value might be a pointer"] 72 | 73 | self.isFuncFound = self.compare_snapshot(jitter) 74 | 75 | jitter.run = False 76 | return False 77 | 78 | def run(self): 79 | '''Main function that is in charge of running the test and return the result: 80 | true if the snapshot has recognized the function, false else.''' 81 | 82 | # Retrieve miasm tools 83 | jitter = self.machine.jitter(config.miasm_engine) 84 | 85 | vm_load_elf(jitter.vm, open(self.filename, "rb").read()) 86 | 87 | # Init segment 88 | jitter.ir_arch.do_stk_segm = True 89 | jitter.ir_arch.do_ds_segm = True 90 | jitter.ir_arch.do_str_segm = True 91 | jitter.ir_arch.do_all_segm = True 92 | 93 | FS_0_ADDR = 0x7ff70000 94 | jitter.cpu.FS = 0x4 95 | jitter.cpu.set_segm_base(jitter.cpu.FS, FS_0_ADDR) 96 | jitter.vm.add_memory_page( 97 | FS_0_ADDR + 0x28, PAGE_READ, "\x42\x42\x42\x42\x42\x42\x42\x42", "Stack canary FS[0x28]") 98 | 99 | # Init the jitter with the snapshot 100 | self.use_snapshot(jitter) 101 | 102 | # Get the return address for our breakpoint 103 | return_addr = struct.unpack("P", jitter.vm.get_mem(jitter.cpu.RSP, 104 | 0x8))[0] 105 | jitter.add_breakpoint(return_addr, self.end_func) 106 | 107 | # Run the execution 108 | jitter.init_run(self.learned_addr) 109 | 110 | try: 111 | jitter.continue_run() 112 | assert jitter.run == False 113 | except AssertionError: 114 | # set the replayexception to the correct error 115 | if jitter.vm.get_exception() & EXCEPT_ACCESS_VIOL: 116 | self.replayexception += ["access violation"] 117 | elif jitter.vm.get_exception() & EXCEPT_DIV_BY_ZERO: 118 | self.replayexception += ["division by zero"] 119 | elif jitter.vm.get_exception() & EXCEPT_PRIV_INSN: 120 | self.replayexception += ["execution of private instruction"] 121 | else: 122 | self.replayexception += ["exception no %i" % (jitter.vm.get_exception())] 123 | self.isFuncFound = False 124 | 125 | return self.isFuncFound 126 | -------------------------------------------------------------------------------- /sibyl/learn/trace.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | import struct 3 | 4 | from sibyl.learn.replay import Replay 5 | from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE 6 | from miasm2.core.graph import DiGraph 7 | from miasm2.analysis.machine import Machine 8 | 9 | 10 | class Trace(list): 11 | '''List of snapshot''' 12 | 13 | def __init__(self, *args, **kwargs): 14 | super(Trace, self).__init__(*args, **kwargs) 15 | # Image name -> symbol name -> in-memory address 16 | self.symbols = {} 17 | 18 | def add_symbol(self, image_name, symbol_name, symbol_addr): 19 | """Add the symbol:addr from the image image_name""" 20 | self.symbols.setdefault(image_name, dict())[symbol_name] = symbol_addr 21 | 22 | def symbol_to_address(self, symbol_name, image_name=None): 23 | """Get the corresponding in-memory address from a symbol, or None if not found 24 | If image_name is set, restrict to the given image only 25 | """ 26 | if image_name is not None: 27 | return self.symbols[image_name].get(symbol_name, None) 28 | 29 | found = None 30 | for symbols in self.symbols.itervalues(): 31 | if symbol_name in symbols: 32 | if found is not None: 33 | raise ValueError("At least two symbols for this symbol") 34 | found = symbols[symbol_name] 35 | return found 36 | 37 | def clean(self): 38 | '''Try to remove all implementation dependant elements from the trace''' 39 | 40 | clean_trace = Trace() 41 | for snapshot in self: 42 | clean_trace.append(snapshot.clean()) 43 | clean_trace.symbols = self.symbols.copy() 44 | return clean_trace 45 | 46 | 47 | class MemoryAccess(object): 48 | '''Represent a memory block, read or write by the learned function''' 49 | 50 | def __init__(self, size, data, access): 51 | 52 | self.size = size 53 | self.data = data 54 | self.access = access 55 | 56 | def __str__(self): 57 | str_access = "" 58 | if self.access & PAGE_READ: 59 | str_access += "READ" 60 | if self.access & PAGE_WRITE: 61 | if str_access != "": 62 | str_access += " " 63 | str_access += "WRITE" 64 | 65 | return "size: " + str(self.size) + ", data: " + repr(self.data) + ", access: " + str_access 66 | 67 | def __repr__(self): 68 | return "<" + str(self) + ">" 69 | 70 | 71 | class Snapshot(object): 72 | 73 | @classmethod 74 | def get_byte(cls, value, byte): 75 | '''Return the byte @byte of the value''' 76 | return struct.pack('@B', (value & (0xFF << (8 * byte))) >> (8 * byte)) 77 | 78 | @classmethod 79 | def unpack_ptr(cls, value): 80 | return struct.unpack('@P', value)[0] 81 | 82 | def __init__(self, abicls, machine): 83 | self.abicls = abicls 84 | 85 | self.input_reg = {} 86 | self.output_reg = {} 87 | 88 | self._previous_addr = 0 89 | self._current_addr = 0 90 | self._instr_count = 0 91 | self._pending_call = [] 92 | # Function addr -> list of information on calls 93 | self.function_calls = {} 94 | self.paths = DiGraph() 95 | 96 | self.in_memory = {} 97 | self.out_memory = {} 98 | 99 | self._ira = Machine(machine).ira() 100 | self._ptr_size = self._ira.sizeof_pointer()/8 101 | self.sp = self._ira.sp.name 102 | 103 | def add_input_register(self, reg_name, reg_value): 104 | self.input_reg[reg_name] = reg_value 105 | 106 | def add_output_register(self, reg_name, reg_value): 107 | self.output_reg[reg_name] = reg_value 108 | 109 | def add_memory_read(self, address, size, value): 110 | for i in xrange(size): 111 | self.out_memory[address + i] = MemoryAccess(1, 112 | Snapshot.get_byte(value, i), 113 | 0, # Output access never used 114 | ) 115 | 116 | if address + i not in self.in_memory: 117 | self.in_memory[address + i] = MemoryAccess(1, 118 | Snapshot.get_byte(value, i), 119 | PAGE_READ, 120 | ) 121 | 122 | else: 123 | self.in_memory[address + i].access |= PAGE_READ 124 | 125 | def add_memory_write(self, address, size, value): 126 | for i in xrange(size): 127 | self.out_memory[address + i] = MemoryAccess(1, 128 | Snapshot.get_byte(value, i), 129 | 0, # Output access never used 130 | ) 131 | 132 | if address + i not in self.in_memory: 133 | self.in_memory[address + i] = MemoryAccess(1, 134 | "\x00", 135 | # The value is 136 | # not used by the 137 | # test 138 | PAGE_WRITE, 139 | ) 140 | 141 | else: 142 | self.in_memory[address + i].access |= PAGE_WRITE 143 | 144 | def add_executed_instruction(self, address): 145 | ''' 146 | Function called to signal that the address has been executed 147 | This function has to be called in the order of their executed instruction 148 | Else paths can not be updated correctly 149 | ''' 150 | self._previous_addr = self._current_addr 151 | self._current_addr = address 152 | self.paths.add_uniq_edge(self._previous_addr, self._current_addr) 153 | self._instr_count += 1 154 | 155 | # Resolve call destination 156 | if (self._pending_call and 157 | self._previous_addr == self._pending_call[-1]["caller_addr"]): 158 | info = self._pending_call[-1] 159 | info["dest"] = address 160 | info["beg"] = self._instr_count 161 | 162 | 163 | def add_call(self, caller_addr, stack_ptr): 164 | ''' 165 | Function call, target is not determined yet 166 | called *before* instruction execution 167 | ''' 168 | info = {"stack_ptr": stack_ptr, 169 | "caller_addr": caller_addr, 170 | } 171 | self._pending_call.append(info) 172 | 173 | def add_ret(self, ret_addr, stack_ptr, value): 174 | ''' 175 | Function ret 176 | called *after* instruction execution 177 | ''' 178 | # Find corresponding call 179 | assert self._pending_call 180 | assert self._pending_call[-1]["stack_ptr"] >= stack_ptr 181 | 182 | info = self._pending_call.pop() 183 | info["end"] = self._instr_count 184 | info["ret"] = value 185 | current_interval = self.function_calls.setdefault(info["dest"], 186 | list()).append(info) 187 | 188 | def clean(self): 189 | """Clean the snapshot for further uses""" 190 | 191 | self.agglomerate_memory(self.in_memory) 192 | self.agglomerate_memory(self.out_memory) 193 | 194 | def agglomerate_memory(self, mem): 195 | ''' 196 | Assuming @mem is only composed of non-overlapping block 197 | this function agglomerate contiguous blocks having the same access right 198 | ''' 199 | for addr in sorted(mem.keys()): 200 | 201 | # if the addr is not already deleted 202 | if addr in mem: 203 | 204 | end_addr = addr + mem[addr].size 205 | while end_addr in mem: 206 | cur_mem = mem[addr] 207 | next_mem = mem[end_addr] 208 | 209 | # If access change, do not agglomerate 210 | if cur_mem.access != next_mem.access: 211 | break 212 | 213 | cur_mem.size += next_mem.size 214 | cur_mem.data += next_mem.data 215 | del mem[end_addr] 216 | end_addr += next_mem.size 217 | -------------------------------------------------------------------------------- /sibyl/learn/tracer/__init__.py: -------------------------------------------------------------------------------- 1 | """This package provides methods for tracing a program and retrieving: 2 | - executed instructions 3 | - memory accesses 4 | """ 5 | 6 | from sibyl.learn.tracer.pin import TracerPin 7 | from sibyl.learn.tracer.miasm import TracerMiasm 8 | 9 | AVAILABLE_TRACER = { 10 | "pin": TracerPin, 11 | "miasm": TracerMiasm 12 | } 13 | 14 | __all__ = ["AVAILABLE_TRACER"] 15 | -------------------------------------------------------------------------------- /sibyl/learn/tracer/miasm.py: -------------------------------------------------------------------------------- 1 | ''' 2 | This module gives a tracer that uses miasm to run the program 3 | ''' 4 | 5 | from sibyl.learn.tracer.tracer import Tracer 6 | from sibyl.learn.trace import Trace, Snapshot 7 | 8 | from miasm2.jitter.emulatedsymbexec import EmulatedSymbExec 9 | from miasm2.jitter.csts import PAGE_READ 10 | from miasm2.analysis.machine import Machine 11 | from miasm2.jitter.loader.elf import vm_load_elf 12 | 13 | class CustomEmulatedSymbExec(EmulatedSymbExec): 14 | '''New emulator that trap all memory read and write which is needed by the miasm tracer''' 15 | 16 | def __init__(self, *args, **kwargs): 17 | super(CustomEmulatedSymbExec, self).__init__(*args, **kwargs) 18 | 19 | self._read_callback = set() 20 | self._write_callback = set() 21 | 22 | def add_read_call(self, callback): 23 | '''Add a new callback used each time a read appended''' 24 | self._read_callback.add(callback) 25 | 26 | def remove_read_callback(self, callback): 27 | '''Remove a read callback''' 28 | self._read_callback.remove(callback) 29 | 30 | def add_write_call(self, callback): 31 | '''Add a new callback used each time a write appended''' 32 | self._write_callback.add(callback) 33 | 34 | def remove_write_callback(self, callback): 35 | '''Remove a write callback''' 36 | self._write_callback.remove(callback) 37 | 38 | def _func_read(self, expr_mem): 39 | '''Function call for each read. We overwrite it to intercept the read''' 40 | for callback in self._read_callback: 41 | callback(self, expr_mem) 42 | 43 | return super(CustomEmulatedSymbExec, self)._func_read(expr_mem) 44 | 45 | def _func_write(self, symb_exec, dest, data): 46 | '''Function call for each write. We overwrite it to intercept the write''' 47 | for callback in self._write_callback: 48 | callback(self, dest, data) 49 | 50 | super(CustomEmulatedSymbExec, self)._func_write(symb_exec, dest, data) 51 | 52 | 53 | class TracerMiasm(Tracer): 54 | 55 | '''Tracer that uses miasm''' 56 | 57 | def __init__(self, *args, **kwargs): 58 | super(TracerMiasm, self).__init__(*args, **kwargs) 59 | 60 | self.isTracing = False 61 | self.trace = None 62 | 63 | def read_callback(self, symb_exec, expr_mem): 64 | '''Read callback that add the read event to the snapshot''' 65 | addr = int(expr_mem.ptr) 66 | size = expr_mem.size / 8 67 | value = int(symb_exec.cpu.get_mem(addr, size)[::-1].encode("hex"), 16) 68 | 69 | self.current_snapshot.add_memory_read(addr, size, value) 70 | 71 | def write_callback(self, symb_exec, dest, data): 72 | '''Write callback that add the read event to the snapshot''' 73 | addr = int(dest.ptr) 74 | size = data.size / 8 75 | value = int(data.arg.arg) 76 | 77 | self.current_snapshot.add_memory_write(addr, size, value) 78 | 79 | def exec_callback(self, jitter): 80 | '''Callback called before each bloc execution''' 81 | self.current_snapshot.add_executed_instruction(jitter.pc) 82 | return True 83 | 84 | def begin_func(self, jitter): 85 | ''' 86 | Function called by miasm at the begin of every execution of the traced function 87 | ''' 88 | self.old_ret_addr = jitter.pop_uint64_t() 89 | jitter.push_uint64_t(0x1337beef) 90 | 91 | self.isTracing = True 92 | 93 | self.current_snapshot = Snapshot(self.abicls, self.machine) 94 | 95 | # Add the breakpoint to watch every memory read and write 96 | jitter.jit.symbexec.add_read_call(self.read_callback) 97 | jitter.jit.symbexec.add_write_call(self.write_callback) 98 | 99 | # Called before the execution of each basic bloc 100 | jitter.exec_cb = self.exec_callback 101 | 102 | for reg_name in self.reg_list: 103 | self.current_snapshot.add_input_register( 104 | reg_name, getattr(jitter.cpu, reg_name)) 105 | 106 | return True 107 | 108 | def end_func(self, jitter): 109 | ''' 110 | Function called by miasm at the end of every execution of the traced function 111 | ''' 112 | 113 | jitter.pc = self.old_ret_addr 114 | 115 | for reg_name in self.reg_list: 116 | self.current_snapshot.add_output_register( 117 | reg_name, getattr(jitter.cpu, reg_name)) 118 | 119 | jitter.exec_cb = None 120 | 121 | # Remove memory breakpoints 122 | jitter.jit.symbexec.remove_read_callback(self.read_callback) 123 | jitter.jit.symbexec.remove_write_callback(self.write_callback) 124 | 125 | self.trace.append(self.current_snapshot) 126 | 127 | self.isTracing = False 128 | 129 | return True 130 | 131 | def end_do_trace(self, jitter): 132 | ''' 133 | Function called by miasm at the end of the program's execution 134 | ''' 135 | jitter.run = False 136 | return False 137 | 138 | def do_trace(self): 139 | '''Run miasm and construct the trace''' 140 | 141 | self.trace = Trace() 142 | 143 | # Retrieve miasm tools 144 | machine = Machine(self.machine) 145 | jitter = machine.jitter("python") 146 | 147 | # Set the jitter to use our custom emulator 148 | jitter.jit.symbexec = CustomEmulatedSymbExec( 149 | jitter.cpu, jitter.vm, jitter.jit.ir_arch, {}) 150 | jitter.jit.symbexec.enable_emulated_simplifications() 151 | jitter.jit.symbexec.reset_regs() 152 | 153 | elf = vm_load_elf(jitter.vm, open(self.program, "rb").read()) 154 | 155 | # Init segment 156 | jitter.ir_arch.do_stk_segm = True 157 | jitter.ir_arch.do_ds_segm = True 158 | jitter.ir_arch.do_str_segm = True 159 | jitter.ir_arch.do_all_segm = True 160 | 161 | FS_0_ADDR = 0x7ff70000 162 | jitter.cpu.FS = 0x4 163 | jitter.cpu.set_segm_base(jitter.cpu.FS, FS_0_ADDR) 164 | jitter.vm.add_memory_page( 165 | FS_0_ADDR + 0x28, PAGE_READ, "\x42\x42\x42\x42\x42\x42\x42\x42") 166 | 167 | # Init stack and push main args 168 | jitter.init_stack() 169 | jitter.push_uint64_t(1) 170 | jitter.vm.add_memory_page(0x800000, PAGE_READ, self.program) 171 | jitter.push_uint64_t(0x800000) 172 | jitter.push_uint64_t(0xDEADDEAD) 173 | 174 | jitter.add_breakpoint(0xDEADDEAD, self.end_do_trace) 175 | jitter.add_breakpoint(0x1337beef, self.end_func) 176 | jitter.add_breakpoint(self.address, self.begin_func) 177 | 178 | # Run the execution 179 | if self.main_address is None: 180 | jitter.init_run(elf.Ehdr.entry) 181 | else: 182 | jitter.init_run(self.main_address) 183 | 184 | jitter.continue_run() 185 | assert jitter.run == False 186 | return self.trace 187 | -------------------------------------------------------------------------------- /sibyl/learn/tracer/pin.py: -------------------------------------------------------------------------------- 1 | ''' 2 | This module gives a tracer that uses pin (3.0-76991-gcc-linux) to run the program 3 | ''' 4 | 5 | import tempfile 6 | import os 7 | 8 | import sibyl 9 | from sibyl.learn.tracer.tracer import Tracer 10 | from sibyl.learn.trace import Trace, Snapshot 11 | from sibyl.config import config 12 | 13 | 14 | class TracerPin(Tracer): 15 | 16 | '''Tracer that uses pin''' 17 | 18 | def do_trace(self): 19 | '''Run the pintool and construct the trace from the pintool output file''' 20 | 21 | tmpName = self.__run_pin_cmd() 22 | return self.__parse_pin_output_file(open(tmpName)) 23 | 24 | def __run_pin_cmd(self): 25 | '''Run the pintool''' 26 | 27 | tmp = tempfile.NamedTemporaryFile(suffix=".trace", delete=False) 28 | tmpName = tmp.name 29 | tmp.close() 30 | 31 | pintool = config.pin_tracer 32 | if not pintool or not os.path.exists(pintool): 33 | raise RuntimeError("Unable to found the PIN-tool at '%s'. Please "\ 34 | "update the associated configuration" % pintool) 35 | 36 | cmd = [os.path.join(config.pin_root, "pin"), "-ifeellucky", "-t", 37 | pintool, "-a", "0x%x" % self.address, "-o", tmpName, 38 | "--", self.program] 39 | self._run_cmd(cmd) 40 | 41 | return tmpName 42 | 43 | def __parse_pin_output_file(self, traceFile): 44 | '''Parse the file created by the pintool in order to construct the trace''' 45 | 46 | trace = Trace() 47 | 48 | # Statefull elements 49 | started = False 50 | current_image_name = None 51 | 52 | # State machine for parsing 53 | for line in traceFile: 54 | infos = line.strip().split(' ') 55 | entry_type = infos[0] 56 | 57 | 58 | # Image loaded in memory 59 | # IMG 60 | if entry_type == "IMG": 61 | img_name = infos[1] 62 | current_image_name = img_name 63 | continue 64 | 65 | # Symbol entry 66 | # S 67 | elif entry_type == 'S': 68 | assert current_image_name is not None 69 | symbol_name = infos[2] 70 | symbol_addr = int(infos[1], 16) 71 | trace.add_symbol(current_image_name, symbol_name, symbol_addr) 72 | continue 73 | 74 | values = [int(v, 16) for v in infos[1:]] 75 | 76 | # Start of the learned function 77 | # Fields are registers value 78 | if entry_type == 'I': 79 | if not started: 80 | started = True 81 | current_snapshot = Snapshot(self.abicls, self.machine) 82 | 83 | for i, reg_name in enumerate(self.reg_list): 84 | current_snapshot.add_input_register(reg_name, values[i]) 85 | 86 | # Executed instructions address 87 | elif entry_type == '@': 88 | if started: 89 | current_snapshot.add_executed_instruction(values[0]) 90 | 91 | # Memory read access 92 | # Fields are read address, read size and read value 93 | elif entry_type == 'R': 94 | if started: 95 | current_snapshot.add_memory_read( 96 | values[0], values[1], values[2]) 97 | 98 | # Memory write access 99 | # Fields are writen address, writen size and writen value 100 | elif entry_type == 'W': 101 | if started: 102 | current_snapshot.add_memory_write( 103 | values[0], values[1], values[2]) 104 | 105 | # End of the learned function 106 | # Field are register value 107 | elif entry_type == 'O': 108 | if started: 109 | for i, reg_name in enumerate(self.reg_list): 110 | current_snapshot.add_output_register( 111 | reg_name, values[i]) 112 | 113 | # The learned function execution is over 114 | # Snapshot can be added to the trace 115 | started = False 116 | yield current_snapshot 117 | 118 | # Call to a function 119 | # CALL 120 | elif entry_type == "CALL": 121 | current_snapshot.add_call(values[0], values[1]) 122 | 123 | # Return from a function 124 | # RET 125 | elif entry_type == "RET": 126 | current_snapshot.add_ret(values[0], values[1], values[2]) 127 | -------------------------------------------------------------------------------- /sibyl/learn/tracer/tracer.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import os 3 | 4 | 5 | class Tracer(object): 6 | 7 | ''' 8 | Abstract class used to represent a tracer 9 | A tracer is a class that run a program and log the executed instruction and the memory read and write and compile all these informations in a trace class 10 | ''' 11 | 12 | reg_list = ["RAX", "RBX", "RCX", "RDX", "RSI", "RDI", "RBP", "RSP", "R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15"] 13 | 14 | def __init__(self, program, address, main_address, abicls, machine): 15 | ''' 16 | @program: traced program 17 | @address: address of the traced function 18 | @main_address: address where the tracer has to begin, if none the tracer begins at the entry point 19 | @abicls: class of the ABI used by the program 20 | @machine: machine used by the program 21 | ''' 22 | self.program = os.path.abspath(program) 23 | self.address = address 24 | self.main_address = main_address 25 | self.abicls = abicls 26 | self.machine = machine 27 | 28 | def do_trace(self): 29 | ''' 30 | Abstract method. 31 | Should return the trace of the program 32 | ''' 33 | 34 | raise NotImplementedError("Abstract method") 35 | 36 | @staticmethod 37 | def _run_cmd(cmd): 38 | ''' 39 | Runs the command @cmd 40 | Return stdout 41 | raise a RuntimeError if stderr is not empty 42 | ''' 43 | run = subprocess.Popen(cmd, stdout=subprocess.PIPE, 44 | stderr=subprocess.PIPE) 45 | stdout, stderr = run.communicate() 46 | 47 | stdout = stdout.strip() 48 | stderr = stderr.strip() 49 | if stdout: 50 | print stdout 51 | 52 | if stderr: 53 | print "STDERR is not empty" 54 | print stderr 55 | 56 | return stdout 57 | -------------------------------------------------------------------------------- /sibyl/test/__init__.py: -------------------------------------------------------------------------------- 1 | # This file is part of Sibyl. 2 | # Copyright 2014 Camille MOUGEY 3 | # 4 | # Sibyl is free software: you can redistribute it and/or modify it 5 | # under the terms of the GNU General Public License as published by 6 | # the Free Software Foundation, either version 3 of the License, or 7 | # (at your option) any later version. 8 | # 9 | # Sibyl is distributed in the hope that it will be useful, but WITHOUT 10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 11 | # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 12 | # License for more details. 13 | # 14 | # You should have received a copy of the GNU General Public License 15 | # along with Sibyl. If not, see . 16 | "Test / signature sets and related utils" 17 | __all__ = [] 18 | -------------------------------------------------------------------------------- /sibyl/test/ctype.py: -------------------------------------------------------------------------------- 1 | # This file is part of Sibyl. 2 | # Copyright 2014 Camille MOUGEY 3 | # 4 | # Sibyl is free software: you can redistribute it and/or modify it 5 | # under the terms of the GNU General Public License as published by 6 | # the Free Software Foundation, either version 3 of the License, or 7 | # (at your option) any later version. 8 | # 9 | # Sibyl is distributed in the hope that it will be useful, but WITHOUT 10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 11 | # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 12 | # License for more details. 13 | # 14 | # You should have received a copy of the GNU General Public License 15 | # along with Sibyl. If not, see . 16 | 17 | 18 | from sibyl.test.test import Test, TestSetGenerator 19 | 20 | 21 | class TestIsCharset(Test): 22 | """Test for character classification routines: 23 | isalnum, isalpha, isascii, isblank, iscntrl, isdigit, isgraph, islower, 24 | isprint, ispunct, isspace, isupper, isxdigit 25 | 26 | Decision tree: 27 | g 28 | |-- 0 (return 1) 29 | | |-- 30 | | | |-- ! 31 | | | | |-- isalnum 32 | | | | `-- isgraph 33 | | | -- \\x00 34 | | | |-- isascii 35 | | | `-- isprint 36 | | `-- A 37 | | |-- isalpha 38 | | `-- islower 39 | `-- \\t (return 0) 40 | |-- 41 | | |-- iscntrl 42 | | `-- \\n 43 | | |-- isblank 44 | | `-- isspace 45 | `-- A 46 | |-- 0 47 | | |-- isupper 48 | | `-- isxdigit 49 | `-- 0 50 | |-- ispunct 51 | `-- isdigit 52 | """ 53 | 54 | def reset_full(self, *args, **kwargs): 55 | super(TestIsCharset, self).reset_full(*args, **kwargs) 56 | # Reset tests tree 57 | self.cur_tree = self.decision_tree 58 | self.next_test = self.cur_tree["t"] 59 | self.tests = TestSetGenerator(self.test_iter()) 60 | 61 | def reset(self, *args, **kwargs): 62 | super(TestIsCharset, self).reset_full(*args, **kwargs) 63 | 64 | def check_gen(self, result=None): 65 | if result == None: 66 | result = self._get_result() 67 | 68 | # Returned values should be 0 or 1 69 | if result not in [0, 1]: 70 | return False 71 | 72 | # Browse decision tree 73 | key = "g" if result == 1 else "b" 74 | next_tree = self.cur_tree[key] 75 | 76 | if next_tree is False: 77 | # No more candidate 78 | return False 79 | elif isinstance(next_tree, str): 80 | # Candidate found 81 | self.func = next_tree 82 | self.next_test = None 83 | return True 84 | elif isinstance(next_tree, dict): 85 | # Browse next candidates 86 | self.cur_tree = next_tree 87 | self.next_test = self.cur_tree["t"] 88 | return True 89 | raise ValueError("Impossible tree value") 90 | 91 | def test_iter(self): 92 | while self.next_test: 93 | yield self.next_test 94 | raise StopIteration() 95 | 96 | def init_notascii(self): 97 | self._add_arg(0, 255) 98 | 99 | def init_g(self): 100 | self._add_arg(0, ord('g')) 101 | 102 | def init_0(self): 103 | self._add_arg(0, ord('0')) 104 | 105 | def init_space(self): 106 | self._add_arg(0, ord(' ')) 107 | 108 | def init_x00(self): 109 | self._add_arg(0, 0) 110 | 111 | def init_exclam(self): 112 | self._add_arg(0, ord('!')) 113 | 114 | def init_A(self): 115 | self._add_arg(0, ord('A')) 116 | 117 | def init_tab(self): 118 | self._add_arg(0, ord('\t')) 119 | 120 | def init_ret(self): 121 | self._add_arg(0, ord('\n')) 122 | 123 | def init_punct(self): 124 | self._add_arg(0, ord('.')) 125 | 126 | decision_tree = {"t": (init_notascii, check_gen), 127 | "g": False, # Increase mean cost 128 | "b": {"t": (init_g, check_gen), 129 | "g": {"t": (init_0, check_gen), 130 | "g": {"t": (init_space, check_gen), 131 | "g": {"t": (init_x00, check_gen), 132 | "g": "isascii", 133 | "b": "isprint" 134 | }, 135 | "b": {"t": (init_exclam, check_gen), 136 | "g": "isgraph", 137 | "b": "isalnum" 138 | }, 139 | }, 140 | "b": {"t": (init_A, check_gen), 141 | "g": "isalpha", 142 | "b": "islower" 143 | }, 144 | }, 145 | "b": {"t": (init_tab, check_gen), 146 | "g": {"t": (init_space, check_gen), 147 | "g": {"t": (init_ret, check_gen), 148 | "g": "isspace", 149 | "b": "isblank" 150 | }, 151 | "b": "iscntrl" 152 | }, 153 | "b": {"t": (init_A, check_gen), 154 | "g": {"t": (init_0, check_gen), 155 | "g": "isxdigit", 156 | "b": "isupper" 157 | }, 158 | "b": {"t": (init_0, check_gen), 159 | "g": "isdigit", 160 | "b": {"t": (init_punct, check_gen), 161 | "g": "ispunct", 162 | "b": False 163 | # Avoid false positive 164 | }, 165 | }, 166 | }, 167 | }, 168 | } 169 | } 170 | 171 | TESTS = [TestIsCharset] 172 | -------------------------------------------------------------------------------- /sibyl/test/stdlib.py: -------------------------------------------------------------------------------- 1 | # This file is part of Sibyl. 2 | # Copyright 2014 Camille MOUGEY 3 | # 4 | # Sibyl is free software: you can redistribute it and/or modify it 5 | # under the terms of the GNU General Public License as published by 6 | # the Free Software Foundation, either version 3 of the License, or 7 | # (at your option) any later version. 8 | # 9 | # Sibyl is distributed in the hope that it will be useful, but WITHOUT 10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 11 | # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 12 | # License for more details. 13 | # 14 | # You should have received a copy of the GNU General Public License 15 | # along with Sibyl. If not, see . 16 | 17 | 18 | from sibyl.test.test import Test, TestSetTest 19 | 20 | 21 | class TestAbs(Test): 22 | 23 | value = 42 24 | 25 | # Test1 26 | def init1(self): 27 | self._add_arg(0, self.value + 1) 28 | 29 | def check1(self): 30 | result = self._get_result() 31 | return result == (self.value + 1) 32 | 33 | # Test2 34 | def init2(self): 35 | self._add_arg(0, self._as_int(-1 * self.value)) 36 | 37 | def check2(self): 38 | result = self._get_result() 39 | return result == self.value 40 | 41 | # Properties 42 | func = "abs" 43 | tests = TestSetTest(init1, check1) & TestSetTest(init2, check2) 44 | 45 | 46 | class TestA64l(Test): 47 | 48 | my_string = "v/" 49 | value = 123 50 | 51 | # Test 52 | def init(self): 53 | self.my_addr = self._alloc_string(self.my_string) 54 | self._add_arg(0, self.my_addr) 55 | 56 | def check(self): 57 | result = self._get_result() 58 | return all([result == self.value, 59 | self._ensure_mem(self.my_addr, self.my_string)]) 60 | 61 | # Properties 62 | func = "a64l" 63 | tests = TestSetTest(init, check) 64 | 65 | 66 | class TestAtoi(Test): 67 | 68 | my_string = "44" 69 | my_string2 = "127.0.0.1" 70 | 71 | # Test 72 | def my_init(self, string): 73 | self.my_addr = self._alloc_string(string) 74 | self._add_arg(0, self.my_addr) 75 | 76 | def my_check(self, string): 77 | result = self._get_result() 78 | return all([result == int(string.split(".")[0]), 79 | self._ensure_mem(self.my_addr, string)]) 80 | 81 | # Test1 82 | def init1(self): 83 | return self.my_init(self.my_string) 84 | 85 | def check1(self): 86 | return self.my_check(self.my_string) 87 | 88 | # Test1 89 | def init2(self): 90 | return self.my_init(self.my_string2) 91 | 92 | def check2(self): 93 | return self.my_check(self.my_string2) 94 | 95 | 96 | # Properties 97 | func = "atoi" 98 | tests = TestSetTest(init1, check1) & TestSetTest(init2, check2) 99 | 100 | 101 | TESTS = [TestAbs, TestA64l, TestAtoi] 102 | -------------------------------------------------------------------------------- /sibyl/test/test.py: -------------------------------------------------------------------------------- 1 | # This file is part of Sibyl. 2 | # Copyright 2014 Camille MOUGEY 3 | # 4 | # Sibyl is free software: you can redistribute it and/or modify it 5 | # under the terms of the GNU General Public License as published by 6 | # the Free Software Foundation, either version 3 of the License, or 7 | # (at your option) any later version. 8 | # 9 | # Sibyl is distributed in the hope that it will be useful, but WITHOUT 10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 11 | # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 12 | # License for more details. 13 | # 14 | # You should have received a copy of the GNU General Public License 15 | # along with Sibyl. If not, see . 16 | 17 | 18 | import random 19 | from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE 20 | from miasm2.expression.modint import mod_size2int 21 | from miasm2.expression.simplifications import expr_simp 22 | try: 23 | import pycparser 24 | except ImportError: 25 | pycparser = None 26 | else: 27 | from miasm2.core.objc import CTypesManagerNotPacked, CHandler 28 | from miasm2.core.ctypesmngr import CAstTypes 29 | from miasm2.arch.x86.ctype import CTypeAMD64_unk 30 | 31 | from sibyl.commons import HeaderFile 32 | 33 | 34 | class Test(object): 35 | "Main class for tests" 36 | 37 | # Elements to override 38 | 39 | func = "" # Possible function if test passes 40 | tests = [] # List of tests (init, check) to pass 41 | reset_mem = True # Reset memory between tests 42 | 43 | def init(self): 44 | "Called for setting up the test case" 45 | pass 46 | 47 | def check(self): 48 | """Called to check test result 49 | Return True if all checks are passed""" 50 | return True 51 | 52 | def reset_full(self): 53 | """Reset the test case between two functions""" 54 | self.alloc_pool = 0x20000000 55 | 56 | def reset(self): 57 | """Reset the test case between two subtests""" 58 | self.reset_full() 59 | 60 | # Utils 61 | 62 | def __init__(self, jitter, abi): 63 | self.jitter = jitter 64 | self.alloc_pool = 0x20000000 65 | self.abi = abi 66 | 67 | def _reserv_mem(self, size, read=True, write=False): 68 | right = 0 69 | if read: 70 | right |= PAGE_READ 71 | if write: 72 | right |= PAGE_WRITE 73 | 74 | # Memory alignement 75 | size += 16 - size % 16 76 | 77 | to_ret = self.alloc_pool 78 | self.alloc_pool += size + 1 79 | 80 | return to_ret 81 | 82 | def __alloc_mem(self, mem, read=True, write=False): 83 | right = 0 84 | if read: 85 | right |= PAGE_READ 86 | if write: 87 | right |= PAGE_WRITE 88 | 89 | # Memory alignement 90 | mem += "".join([chr(random.randint(0, 255)) \ 91 | for _ in xrange((16 - len(mem) % 16))]) 92 | 93 | self.jitter.vm.add_memory_page(self.alloc_pool, right, mem) 94 | to_ret = self.alloc_pool 95 | self.alloc_pool += len(mem) + 1 96 | 97 | return to_ret 98 | 99 | def _alloc_mem(self, size, read=True, write=False): 100 | mem = "".join([chr(random.randint(0, 255)) for _ in xrange(size)]) 101 | return self.__alloc_mem(mem, read=read, write=write) 102 | 103 | def _alloc_string(self, string, read=True, write=False): 104 | return self.__alloc_mem(string + "\x00", read=read, write=write) 105 | 106 | def _alloc_pointer(self, pointer, read=True, write=False): 107 | pointer_size = self.abi.ira.sizeof_pointer() 108 | return self.__alloc_mem(Test.pack(pointer, pointer_size), 109 | read=read, 110 | write=write) 111 | 112 | def _write_mem(self, addr, element): 113 | self.jitter.vm.set_mem(addr, element) 114 | 115 | def _write_string(self, addr, element): 116 | self._write_mem(addr, element + "\x00") 117 | 118 | def _add_arg(self, number, element): 119 | self.abi.add_arg(number, element) 120 | 121 | def _get_result(self): 122 | return self.abi.get_result() 123 | 124 | def _ensure_mem(self, addr, element): 125 | try: 126 | return self.jitter.vm.get_mem(addr, len(element)) == element 127 | except RuntimeError: 128 | return False 129 | 130 | def _ensure_mem_sparse(self, addr, element, offsets): 131 | """@offsets: offsets to ignore""" 132 | for i, sub_element in enumerate(element): 133 | if i in offsets: 134 | continue 135 | if not self._ensure_mem(addr + i, sub_element): 136 | return False 137 | return True 138 | 139 | def _as_int(self, element): 140 | int_size = self.abi.ira.sizeof_int() 141 | max_val = 2**int_size 142 | return (element + max_val) % max_val 143 | 144 | def _to_int(self, element): 145 | int_size = self.abi.ira.sizeof_int() 146 | return mod_size2int[int_size](element) 147 | 148 | def _memread_pointer(self, addr): 149 | pointer_size = self.abi.ira.sizeof_pointer() / 8 150 | try: 151 | element = self.jitter.vm.get_mem(addr, pointer_size) 152 | except RuntimeError: 153 | return False 154 | return Test.unpack(element) 155 | 156 | @staticmethod 157 | def pack(element, size): 158 | out = "" 159 | while element != 0: 160 | out += chr(element % 0x100) 161 | element >>= 8 162 | if len(out) > size / 8: 163 | raise ValueError("To big to be packed") 164 | out = out + "\x00" * ((size / 8) - len(out)) 165 | return out 166 | 167 | @staticmethod 168 | def unpack(element): 169 | return int(element[::-1].encode("hex"), 16) 170 | 171 | 172 | class TestSet(object): 173 | """Stand for a set of test to run, potentially associated to a logic form 174 | 175 | The logic form is represented as a tree, in which nodes are TestSet children 176 | instance 177 | """ 178 | 179 | def __and__(self, ts): 180 | return TestSetAnd(self, ts) 181 | 182 | def __or__(self, ts): 183 | return TestSetOr(self, ts) 184 | 185 | def execute(self, callback): 186 | """Successive execution of test set (like a visitor on the aossicated tree) 187 | through @callback 188 | @callback: bool func(init, check) 189 | """ 190 | return NotImplementedError("Asbtract method") 191 | 192 | 193 | class TestSetAnd(TestSet): 194 | """Logic form : TestSet1 & TestSet2 195 | 196 | Lazy evaluation: if TestSet1 fail, TestSet2 is not launched 197 | """ 198 | 199 | def __init__(self, ts1, ts2): 200 | super(TestSetAnd, self).__init__() 201 | assert isinstance(ts1, TestSet) 202 | assert isinstance(ts2, TestSet) 203 | self._ts1 = ts1 204 | self._ts2 = ts2 205 | 206 | def __repr__(self): 207 | return "%r TS_AND %r" % (self._ts1, self._ts2) 208 | 209 | def execute(self, callback): 210 | if not self._ts1.execute(callback): 211 | # Early quit 212 | return False 213 | else: 214 | # First test is valid 215 | return self._ts2.execute(callback) 216 | 217 | 218 | class TestSetOr(TestSet): 219 | """Logic form : TestSet1 | TestSet2 220 | 221 | Lazy evaluation: if TestSet1 success, TestSet2 is not launched 222 | """ 223 | 224 | def __init__(self, ts1, ts2): 225 | super(TestSetOr, self).__init__() 226 | assert isinstance(ts1, TestSet) 227 | assert isinstance(ts2, TestSet) 228 | self._ts1 = ts1 229 | self._ts2 = ts2 230 | 231 | def __repr__(self): 232 | return "%r TS_OR %r" % (self._ts1, self._ts2) 233 | 234 | def execute(self, callback): 235 | if self._ts1.execute(callback): 236 | # Early quit 237 | return True 238 | else: 239 | return self._ts2.execute(callback) 240 | 241 | 242 | class TestSetTest(TestSet): 243 | """Terminal node of TestSet 244 | 245 | Stand for a check in a test case 246 | 247 | init: initialization function, called before launching the target address 248 | check: checking function, verifying the final state 249 | """ 250 | 251 | def __init__(self, init, check): 252 | super(TestSetTest, self).__init__() 253 | self._init = init 254 | self._check = check 255 | 256 | def __repr__(self): 257 | return "" % (self._init, self._check) 258 | 259 | def execute(self, callback): 260 | return callback(self._init, self._check) 261 | 262 | 263 | class TestSetGenerator(TestSet): 264 | """TestSet based using a generator to retrieve tests""" 265 | 266 | def __init__(self, generator): 267 | self._generator = generator 268 | 269 | def execute(self, callback): 270 | for (init, check) in self._generator: 271 | if not callback(init, check): 272 | return False 273 | return True 274 | 275 | 276 | class TestHeader(Test): 277 | """Test extension with support for header parsing, and handling of struct 278 | offset, size, ... 279 | """ 280 | 281 | header = None 282 | 283 | def __init__(self, *args, **kwargs): 284 | super(TestHeader, self).__init__(*args, **kwargs) 285 | # Requirement check 286 | if pycparser is None: 287 | raise ImportError("pycparser module is needed to launch tests based" 288 | "on header files") 289 | 290 | ctype_manager = CTypesManagerNotPacked(CAstTypes(), CTypeAMD64_unk()) 291 | 292 | hdr = HeaderFile(self.header, ctype_manager) 293 | proto = hdr.functions[self.func] 294 | self.c_handler = CHandler( 295 | hdr.ctype_manager, 296 | {'arg%d_%s' % (i, name): set([proto.args[name]]) 297 | for i, name in enumerate(proto.args_order)} 298 | ) 299 | self.expr_types_from_C = {'arg%d_%s' % (i, name): proto.args[name] 300 | for i, name in enumerate(proto.args_order)} 301 | self.cache_sizeof = {} 302 | self.cache_trad = {} 303 | self.cache_field_addr = {} 304 | 305 | def sizeof(self, Clike): 306 | ret = self.cache_sizeof.get(Clike, None) 307 | if ret is None: 308 | ret = self.c_handler.c_to_type( 309 | Clike, 310 | self.expr_types_from_C 311 | ).size * 8 312 | self.cache_sizeof[Clike] = ret 313 | return ret 314 | 315 | def trad(self, Clike): 316 | ret = self.cache_trad.get(Clike, None) 317 | if ret is None: 318 | ret = self.c_handler.c_to_expr(Clike, self.expr_types_from_C) 319 | self.cache_trad[Clike] = ret 320 | return ret 321 | 322 | def field_addr(self, base, Clike, is_ptr=False): 323 | key = (base, Clike, is_ptr) 324 | ret = self.cache_field_addr.get(key, None) 325 | if ret is None: 326 | base_expr = self.trad(base) 327 | if is_ptr: 328 | access_expr = self.trad(Clike) 329 | else: 330 | access_expr = self.trad("&(%s)" % Clike) 331 | offset = int(expr_simp(access_expr - base_expr)) 332 | ret = offset 333 | self.cache_field_addr[key] = ret 334 | return ret 335 | -------------------------------------------------------------------------------- /sibyl/testlauncher.py: -------------------------------------------------------------------------------- 1 | # This file is part of Sibyl. 2 | # Copyright 2014 Camille MOUGEY 3 | # 4 | # Sibyl is free software: you can redistribute it and/or modify it 5 | # under the terms of the GNU General Public License as published by 6 | # the Free Software Foundation, either version 3 of the License, or 7 | # (at your option) any later version. 8 | # 9 | # Sibyl is distributed in the hope that it will be useful, but WITHOUT 10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 11 | # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 12 | # License for more details. 13 | # 14 | # You should have received a copy of the GNU General Public License 15 | # along with Sibyl. If not, see . 16 | 17 | """This module provides a way to prepare and launch Sibyl tests on a binary""" 18 | 19 | 20 | import time 21 | import signal 22 | import logging 23 | from miasm2.analysis.binary import Container, ContainerPE, ContainerELF 24 | 25 | from sibyl.commons import init_logger, TimeoutException, END_ADDR 26 | from sibyl.engine import QEMUEngine, MiasmEngine 27 | from sibyl.config import config 28 | 29 | class TestLauncher(object): 30 | "Launch tests for a function and report matching candidates" 31 | 32 | def __init__(self, filename, machine, abicls, tests_cls, engine_name, 33 | map_addr=0): 34 | 35 | # Logging facilities 36 | self.logger = init_logger("testlauncher") 37 | 38 | # Prepare JiT engine 39 | self.machine = machine 40 | self.init_engine(engine_name) 41 | 42 | # Init and snapshot VM 43 | self.load_vm(filename, map_addr) 44 | self.init_stub() 45 | self.snapshot = self.engine.take_snapshot() 46 | 47 | # Init tests 48 | self.init_abi(abicls) 49 | self.initialize_tests(tests_cls) 50 | 51 | def init_stub(self): 52 | """Initialize stubbing capabilities""" 53 | if not isinstance(self.engine, MiasmEngine): 54 | # Unsupported capability 55 | return 56 | 57 | # Get stubs' implementation 58 | context = {} 59 | for fpath in config.stubs: 60 | execfile(fpath, context) 61 | if not context: 62 | return 63 | 64 | libs = None 65 | if isinstance(self.ctr, ContainerPE): 66 | from miasm2.jitter.loader.pe import preload_pe, libimp_pe 67 | libs = libimp_pe() 68 | preload_pe(self.jitter.vm, self.ctr.executable, libs) 69 | 70 | elif isinstance(self.ctr, ContainerELF): 71 | from miasm2.jitter.loader.elf import preload_elf, libimp_elf 72 | libs = libimp_elf() 73 | preload_elf(self.jitter.vm, self.ctr.executable, libs) 74 | 75 | else: 76 | return 77 | 78 | # Add associated breakpoints 79 | self.jitter.add_lib_handler(libs, context) 80 | 81 | def initialize_tests(self, tests_cls): 82 | tests = [] 83 | for testcls in tests_cls: 84 | tests.append(testcls(self.jitter, self.abi)) 85 | self.tests = tests 86 | 87 | def load_vm(self, filename, map_addr): 88 | self.ctr = Container.from_stream(open(filename), vm=self.jitter.vm, 89 | addr=map_addr) 90 | self.jitter.cpu.init_regs() 91 | self.jitter.init_stack() 92 | 93 | def init_engine(self, engine_name): 94 | if engine_name == "qemu": 95 | self.engine = QEMUEngine(self.machine) 96 | else: 97 | self.engine = MiasmEngine(self.machine, engine_name) 98 | self.jitter = self.engine.jitter 99 | 100 | def init_abi(self, abicls): 101 | ira = self.machine.ira() 102 | self.abi = abicls(self.jitter, ira) 103 | 104 | def launch_tests(self, test, address, timeout_seconds=0): 105 | # Variables to remind between two "launch_test" 106 | self._temp_reset_mem = True 107 | 108 | # Reset between functions 109 | test.reset_full() 110 | 111 | # Callback to launch 112 | def launch_test(init, check): 113 | """Launch a test associated with @init, @check""" 114 | 115 | # Reset state 116 | self.engine.restore_snapshot(memory=self._temp_reset_mem) 117 | self.abi.reset() 118 | test.reset() 119 | 120 | # Prepare VM 121 | init(test) 122 | self.abi.prepare_call(ret_addr=END_ADDR) 123 | 124 | # Run code 125 | status = self.engine.run(address, timeout_seconds) 126 | if not status: 127 | # Early quit 128 | self._temp_reset_mem = True 129 | return False 130 | 131 | # Check result 132 | to_ret = check(test) 133 | 134 | # Update flags 135 | self._temp_reset_mem = test.reset_mem 136 | 137 | return to_ret 138 | 139 | # Launch subtests 140 | status = test.tests.execute(launch_test) 141 | if status: 142 | self._possible_funcs.append(test.func) 143 | 144 | def run(self, address, *args, **kwargs): 145 | self._possible_funcs = [] 146 | 147 | nb_tests = len(self.tests) 148 | self.logger.info("Launch tests (%d available functions)" % (nb_tests)) 149 | starttime = time.time() 150 | 151 | self.engine.prepare_run() 152 | for test in self.tests: 153 | self.launch_tests(test, address, *args, **kwargs) 154 | 155 | self.logger.info("Total time: %.4f seconds" % (time.time() - starttime)) 156 | return self._possible_funcs 157 | 158 | def get_possible_funcs(self): 159 | return self._possible_funcs 160 | possible_funcs = property(get_possible_funcs) 161 | -------------------------------------------------------------------------------- /test/find/Makefile: -------------------------------------------------------------------------------- 1 | # This file is part of Sibyl. 2 | # Copyright 2014 Camille MOUGEY 3 | # 4 | # Sibyl is free software: you can redistribute it and/or modify it 5 | # under the terms of the GNU General Public License as published by 6 | # the Free Software Foundation, either version 3 of the License, or 7 | # (at your option) any later version. 8 | # 9 | # Sibyl is distributed in the hope that it will be useful, but WITHOUT 10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 11 | # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 12 | # License for more details. 13 | # 14 | # You should have received a copy of the GNU General Public License 15 | # along with Sibyl. If not, see . 16 | 17 | CC := gcc 18 | CFLAGS := -m32 -O0 --static 19 | PROGRAMS := test_string test_stdlib test_ctype test_stub 20 | 21 | all: $(PROGRAMS) 22 | 23 | .c.c: 24 | $(CC) $(CFLAGS) $< -o $@ 25 | 26 | test_string: test_string.c 27 | test_stdlib: test_stdlib.c 28 | test_ctype: test_ctype.c 29 | test_stub: test_stub.c 30 | $(CC) -m32 -O0 $< -o $@ 31 | 32 | 33 | 34 | clean: 35 | rm -f $(PROGRAMS) 36 | -------------------------------------------------------------------------------- /test/find/__init__.py: -------------------------------------------------------------------------------- 1 | from .run_ctests import test_find 2 | -------------------------------------------------------------------------------- /test/find/run_ctests.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import os 3 | import re 4 | import subprocess 5 | from argparse import ArgumentParser 6 | from utils.log import log_error, log_success, log_info 7 | 8 | from elfesteem.elf_init import ELF 9 | from sibyl.heuristics.func import FuncHeuristic 10 | 11 | match_C = re.compile("\w+[ \*]+(\w+)\(.*\)") 12 | custom_tag = "my_" 13 | whitelist_funcs = ["main"] 14 | 15 | 16 | def get_funcs_exe_source(c_file, filename): 17 | """Get function defined in @c_file""" 18 | with open(c_file) as fdesc: 19 | data = fdesc.read() 20 | funcs = [] 21 | for match in match_C.finditer(data): 22 | funcs.append(match.groups()[0]) 23 | funcs = list(name for name in set(funcs) if name not in whitelist_funcs) 24 | 25 | # Find corresponding binary offset 26 | to_check = [] 27 | with open(filename) as fdesc: 28 | elf = ELF(fdesc.read()) 29 | 30 | symbols = {} 31 | for name, symb in elf.getsectionbyname(".symtab").symbols.iteritems(): 32 | offset = symb.value 33 | if name.startswith("__"): 34 | name = name[2:] 35 | symbols.setdefault(name, set()).add(offset) 36 | if name in funcs: 37 | if name.startswith(custom_tag): 38 | ## Custom tags can be used to write equivalent functions like 39 | ## 'my_strlen' for a custom strlen 40 | name = name[len(custom_tag):] 41 | to_check.append((offset, name)) 42 | return to_check, symbols 43 | 44 | 45 | def get_funcs_heuristics(c_file, filename): 46 | """Get function from Sibyl heuristics""" 47 | # Force the activation of all heuristics 48 | fh = FuncHeuristic(None, None, "") 49 | cmd = ["sibyl", "func"] 50 | for name in fh.heuristic_names: 51 | cmd += ["-e", name] 52 | cmd.append(filename) 53 | print " ".join(cmd) 54 | sibyl = subprocess.Popen(cmd, stdout=subprocess.PIPE, 55 | stderr=subprocess.PIPE) 56 | stdout, stderr = sibyl.communicate() 57 | if stderr: 58 | raise RuntimeError("Something gone wrong...:\n%s" % stderr) 59 | 60 | # Parse output and merge with symtab (ground truth) 61 | to_check_symtab, extra = get_funcs_exe_source(c_file, filename) 62 | addr2name = {addr: name for addr, name in to_check_symtab} 63 | to_check = [] 64 | for line in stdout.split("\n"): 65 | if not line: 66 | continue 67 | addr = int(line, 0) 68 | if addr in addr2name: 69 | to_check.append((addr, name)) 70 | 71 | return to_check, extra 72 | 73 | 74 | def test_find(args): 75 | 76 | if args.func_heuristic: 77 | get_funcs = get_funcs_heuristics 78 | else: 79 | get_funcs = get_funcs_exe_source 80 | 81 | # Compil tests 82 | log_info( "Remove old files" ) 83 | os.system("make clean") 84 | log_info( "Compile C files" ) 85 | status = os.system("make") 86 | 87 | # Find test names 88 | c_files = [] 89 | 90 | for cur_dir, sub_dir, files in os.walk("."): 91 | c_files += [x for x in files if x.endswith(".c")] 92 | 93 | log_info( "Found:\n\t- " + "\n\t- ".join(c_files) ) 94 | 95 | for c_file in c_files: 96 | filename = c_file[:-2] 97 | log_info( " %s:" % filename ) 98 | # to_check: (addr, expected found) 99 | # extra: possible extra match 100 | to_check, extra = get_funcs(c_file, filename) 101 | print "\n".join("0x%08x: %s" % (addr, funcname) 102 | for (addr, funcname) in to_check) 103 | 104 | # Launch Sibyl 105 | log_info( "Launch Sibyl" ) 106 | options = ["-j", "gcc", "-i", "5", "-b", "ABIStdCall_x86_32"] 107 | if not args.arch_heuristic: 108 | options += ["-a", "x86_32"] 109 | 110 | cmd = ["sibyl", "find"] + options + [filename] 111 | cmd += [hex(addr) for addr, _ in to_check] 112 | print " ".join(cmd) 113 | sibyl = subprocess.Popen(cmd, stdout=subprocess.PIPE, 114 | stderr=subprocess.PIPE) 115 | 116 | # Parse result 117 | found = [] 118 | stdout, stderr = sibyl.communicate() 119 | for line in stdout.split("\n"): 120 | if not line or not " : " in line: 121 | continue 122 | addr, func = line.split(" : ") 123 | found.append((int(addr, 0), func)) 124 | 125 | if sibyl.returncode: 126 | log_error("Process exits with a %d code" % sibyl.returncode) 127 | print stderr 128 | exit(sibyl.returncode) 129 | 130 | log_info( "Evaluate results" ) 131 | i = 0 132 | 133 | for element in found: 134 | if element not in to_check: 135 | offset, name = element 136 | if offset in extra.get(name, []): 137 | # Present in symtab but not in C source file 138 | print "[+] Additionnal found: %s (@0x%08x)" % (name, offset) 139 | else: 140 | alt_names = [aname 141 | for aname, offsets in extra.iteritems() 142 | if offset in offsets] 143 | log_error("Bad found: %s (@0x%08x -> '%s')" % (name, 144 | offset, 145 | ",".join(alt_names))) 146 | else: 147 | i += 1 148 | for element in to_check: 149 | if element not in found: 150 | log_error("Unable to find: %s (@0x%08x)" % (element[1], element[0])) 151 | 152 | log_success("Found %d/%d correct elements" % (i, len(to_check))) 153 | 154 | log_info( "Remove old files" ) 155 | os.system("make clean") 156 | return False 157 | -------------------------------------------------------------------------------- /test/find/test_ctype.c: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of Sibyl. 3 | * Copyright 2014 Camille MOUGEY 4 | * 5 | * Sibyl is free software: you can redistribute it and/or modify it 6 | * under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Sibyl is distributed in the hope that it will be useful, but WITHOUT 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 12 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 13 | * License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with Sibyl. If not, see . 17 | */ 18 | 19 | int isalnum(int c) 20 | { 21 | return(((c>='a') && (c<='z')) || ((c>='A') && (c<='Z')) || ((c>='0') && (c<='9'))); 22 | } 23 | 24 | int isalpha(int c) 25 | { 26 | return((c >='a' && c <='z') || (c >='A' && c <='Z')); 27 | } 28 | int isascii(int c) 29 | { 30 | return (c >= 0 && c< 128); 31 | } 32 | int isdigit (int c) 33 | { 34 | return((c>='0') && (c<='9')); 35 | } 36 | int isblank(int c) 37 | { 38 | return ((c == ' ') || (c == '\t')); 39 | } 40 | int iscntrl(int c) 41 | { 42 | return((c==0x7F) || (c>=0 && c<=0x1F)); 43 | } 44 | int islower(int c) 45 | { 46 | return ((c>='a') && (c<='z')); 47 | } 48 | int isprint(int c) 49 | { 50 | return(c>=0x20 && c<=0x7E); 51 | } 52 | int isgraph(int c) 53 | { 54 | return(c>0x20 && c<=0x7E); 55 | } 56 | int ispunct(int c) 57 | { 58 | return(isgraph(c) && !isalnum(c)); 59 | } 60 | int isspace(int c) 61 | { 62 | return ((c>=0x09 && c<=0x0D) || (c==0x20)); 63 | } 64 | int isupper(int c) 65 | { 66 | return ((c>='A') && (c<='Z')); 67 | } 68 | int isxdigit (int c) 69 | { 70 | return(((c>='0') && (c<='9')) || ((c>='A') && (c<='F')) || ((c>='a') && (c<='f')) ); 71 | } 72 | 73 | 74 | int main() { 75 | return 0; 76 | } 77 | -------------------------------------------------------------------------------- /test/find/test_stdlib.c: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of Sibyl. 3 | * Copyright 2014 Camille MOUGEY 4 | * 5 | * Sibyl is free software: you can redistribute it and/or modify it 6 | * under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Sibyl is distributed in the hope that it will be useful, but WITHOUT 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 12 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 13 | * License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with Sibyl. If not, see . 17 | */ 18 | 19 | #include 20 | #include 21 | 22 | int abs (int i) 23 | { 24 | return i < 0 ? -i : i; 25 | } 26 | 27 | #define TABLE_BASE 0x2e 28 | #define TABLE_SIZE 0x4d 29 | 30 | #define XX ((char)0x40) 31 | 32 | 33 | static const char a64l_table[TABLE_SIZE] = 34 | { 35 | /* 0x2e */ 0, 1, 36 | /* 0x30 */ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, XX, XX, XX, XX, XX, XX, 37 | /* 0x40 */ XX, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 38 | /* 0x50 */ 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, XX, XX, XX, XX, XX, 39 | /* 0x60 */ XX, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 40 | /* 0x70 */ 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63 41 | }; 42 | 43 | 44 | long int a64l(const char *string) 45 | { 46 | const char *ptr = string; 47 | unsigned long int result = 0ul; 48 | const char *end = ptr + 6; 49 | int shift = 0; 50 | 51 | do 52 | { 53 | unsigned index; 54 | unsigned value; 55 | 56 | index = *ptr - TABLE_BASE; 57 | if ((unsigned int) index >= TABLE_SIZE) 58 | break; 59 | value = (int) a64l_table[index]; 60 | if (value == (int) XX) 61 | break; 62 | ++ptr; 63 | result |= value << shift; 64 | shift += 6; 65 | } 66 | while (ptr != end); 67 | 68 | return (long int) result; 69 | } 70 | 71 | 72 | /* Convert a string to an int. */ 73 | int atoi(const char *nptr) 74 | { 75 | return (int) strtol (nptr, (char **) NULL, 10); 76 | } 77 | 78 | 79 | 80 | int main() { 81 | return 0; 82 | } 83 | -------------------------------------------------------------------------------- /test/find/test_string.c: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of Sibyl. 3 | * Copyright 2014 Camille MOUGEY 4 | * 5 | * Sibyl is free software: you can redistribute it and/or modify it 6 | * under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Sibyl is distributed in the hope that it will be useful, but WITHOUT 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 12 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 13 | * License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with Sibyl. If not, see . 17 | */ 18 | 19 | #include 20 | #include 21 | #include 22 | 23 | #define STRTEST "Hello" 24 | 25 | char * ___strtok; 26 | 27 | 28 | /** 29 | * strnicmp - Case insensitive, length-limited string comparison 30 | * @s1: One string 31 | * @s2: The other string 32 | * @len: the maximum number of characters to compare 33 | */ 34 | int strnicmp(const char *s1, const char *s2, size_t len) 35 | { 36 | /* Yes, Virginia, it had better be unsigned */ 37 | unsigned char c1, c2; 38 | 39 | c1 = 0; c2 = 0; 40 | if (len) { 41 | do { 42 | c1 = *s1; c2 = *s2; 43 | s1++; s2++; 44 | if (!c1) 45 | break; 46 | if (!c2) 47 | break; 48 | if (c1 == c2) 49 | continue; 50 | c1 = tolower(c1); 51 | c2 = tolower(c2); 52 | if (c1 != c2) 53 | break; 54 | } while (--len); 55 | } 56 | return (int)c1 - (int)c2; 57 | } 58 | 59 | /** 60 | * strcpy - Copy a %NUL terminated string 61 | * @dest: Where to copy the string to 62 | * @src: Where to copy the string from 63 | */ 64 | char * strcpy(char * dest,const char *src) 65 | { 66 | char *tmp = dest; 67 | 68 | while ((*dest++ = *src++) != '\0') 69 | /* nothing */; 70 | return tmp; 71 | } 72 | 73 | /** 74 | * strncpy - Copy a length-limited, %NUL-terminated string 75 | * @dest: Where to copy the string to 76 | * @src: Where to copy the string from 77 | * @count: The maximum number of bytes to copy 78 | * 79 | * Note that unlike userspace strncpy, this does not %NUL-pad the buffer. 80 | * However, the result is not %NUL-terminated if the source exceeds 81 | * @count bytes. 82 | */ 83 | char * strncpy(char * dest,const char *src,size_t count) 84 | { 85 | char *tmp = dest; 86 | 87 | while (count-- && (*dest++ = *src++) != '\0') 88 | /* nothing */; 89 | 90 | return tmp; 91 | } 92 | 93 | /** 94 | * strcat - Append one %NUL-terminated string to another 95 | * @dest: The string to be appended to 96 | * @src: The string to append to it 97 | */ 98 | char * strcat(char * dest, const char * src) 99 | { 100 | char *tmp = dest; 101 | 102 | while (*dest) 103 | dest++; 104 | while ((*dest++ = *src++) != '\0') 105 | ; 106 | 107 | return tmp; 108 | } 109 | 110 | /** 111 | * strncat - Append a length-limited, %NUL-terminated string to another 112 | * @dest: The string to be appended to 113 | * @src: The string to append to it 114 | * @count: The maximum numbers of bytes to copy 115 | * 116 | * Note that in contrast to strncpy, strncat ensures the result is 117 | * terminated. 118 | */ 119 | char * strncat(char *dest, const char *src, size_t count) 120 | { 121 | char *tmp = dest; 122 | 123 | if (count) { 124 | while (*dest) 125 | dest++; 126 | while ((*dest++ = *src++)) { 127 | if (--count == 0) { 128 | *dest = '\0'; 129 | break; 130 | } 131 | } 132 | } 133 | 134 | return tmp; 135 | } 136 | 137 | /** 138 | * strcmp - Compare two strings 139 | * @cs: One string 140 | * @ct: Another string 141 | */ 142 | int strcmp(const char * cs,const char * ct) 143 | { 144 | register signed char __res; 145 | 146 | while (1) { 147 | if ((__res = *cs - *ct++) != 0 || !*cs++) 148 | break; 149 | } 150 | 151 | return __res; 152 | } 153 | 154 | /** 155 | * strchr - Find the first occurrence of a character in a string 156 | * @s: The string to be searched 157 | * @c: The character to search for 158 | */ 159 | char * strchr(const char * s, int c) 160 | { 161 | for(; *s != (char) c; ++s) 162 | if (*s == '\0') 163 | return NULL; 164 | return (char *) s; 165 | } 166 | 167 | /** 168 | * strlen - Find the length of a string 169 | * @s: The string to be sized 170 | */ 171 | size_t strlen(const char * s) 172 | { 173 | const char *sc; 174 | 175 | for (sc = s; *sc != '\0'; ++sc) 176 | /* nothing */; 177 | return sc - s; 178 | } 179 | 180 | 181 | /** 182 | * strrchr - Find the last occurrence of a character in a string 183 | * @s: The string to be searched 184 | * @c: The character to search for 185 | */ 186 | char * strrchr(const char * s, int c) 187 | { 188 | const char *p = s + strlen(s); 189 | do { 190 | if (*p == (char)c) 191 | return (char *)p; 192 | } while (--p >= s); 193 | return NULL; 194 | } 195 | 196 | /** 197 | * strnlen - Find the length of a length-limited string 198 | * @s: The string to be sized 199 | * @count: The maximum number of bytes to search 200 | */ 201 | size_t strnlen(const char * s, size_t count) 202 | { 203 | const char *sc; 204 | 205 | for (sc = s; count-- && *sc != '\0'; ++sc) 206 | /* nothing */; 207 | return sc - s; 208 | } 209 | 210 | /** 211 | * strspn - Calculate the length of the initial substring of @s which only 212 | * contain letters in @accept 213 | * @s: The string to be searched 214 | * @accept: The string to search for 215 | */ 216 | size_t strspn(const char *s, const char *accept) 217 | { 218 | const char *p; 219 | const char *a; 220 | size_t count = 0; 221 | 222 | for (p = s; *p != '\0'; ++p) { 223 | for (a = accept; *a != '\0'; ++a) { 224 | if (*p == *a) 225 | break; 226 | } 227 | if (*a == '\0') 228 | return count; 229 | ++count; 230 | } 231 | 232 | return count; 233 | } 234 | 235 | /** 236 | * strpbrk - Find the first occurrence of a set of characters 237 | * @cs: The string to be searched 238 | * @ct: The characters to search for 239 | */ 240 | char * strpbrk(const char * cs,const char * ct) 241 | { 242 | const char *sc1,*sc2; 243 | 244 | for( sc1 = cs; *sc1 != '\0'; ++sc1) { 245 | for( sc2 = ct; *sc2 != '\0'; ++sc2) { 246 | if (*sc1 == *sc2) 247 | return (char *) sc1; 248 | } 249 | } 250 | return NULL; 251 | } 252 | 253 | 254 | /** 255 | * strtok - Split a string into tokens 256 | * @s: The string to be searched 257 | * @ct: The characters to search for 258 | * 259 | * WARNING: strtok is deprecated, use strsep instead. 260 | */ 261 | char * strtok(char * s,const char * ct) 262 | { 263 | char *sbegin, *send; 264 | 265 | sbegin = s ? s : ___strtok; 266 | if (!sbegin) { 267 | return NULL; 268 | } 269 | sbegin += strspn(sbegin,ct); 270 | if (*sbegin == '\0') { 271 | ___strtok = NULL; 272 | return( NULL ); 273 | } 274 | send = strpbrk( sbegin, ct); 275 | if (send && *send != '\0') 276 | *send++ = '\0'; 277 | ___strtok = send; 278 | return (sbegin); 279 | } 280 | 281 | /** 282 | * strsep - Split a string into tokens 283 | * @s: The string to be searched 284 | * @ct: The characters to search for 285 | * 286 | * strsep() updates @s to point after the token, ready for the next call. 287 | * 288 | * It returns empty tokens, too, behaving exactly like the libc function 289 | * of that name. In fact, it was stolen from glibc2 and de-fancy-fied. 290 | * Same semantics, slimmer shape. ;) 291 | */ 292 | char * strsep(char **s, const char *ct) 293 | { 294 | char *sbegin = *s, *end; 295 | 296 | if (sbegin == NULL) 297 | return NULL; 298 | 299 | end = strpbrk(sbegin, ct); 300 | if (end) 301 | *end++ = '\0'; 302 | *s = end; 303 | 304 | return sbegin; 305 | } 306 | 307 | /** 308 | * memset - Fill a region of memory with the given value 309 | * @s: Pointer to the start of the area. 310 | * @c: The byte to fill the area with 311 | * @count: The size of the area. 312 | * 313 | * Do not use memset() to access IO space, use memset_io() instead. 314 | */ 315 | void * memset(void * s,int c,size_t count) 316 | { 317 | char *xs = (char *) s; 318 | 319 | while (count--) 320 | *xs++ = c; 321 | 322 | return s; 323 | } 324 | 325 | 326 | int my_strlen(const char *s) { 327 | int res = 0; 328 | while (*(s++) != '\x00') 329 | res++; 330 | return res; 331 | } 332 | 333 | /** 334 | * memmove - Copy one area of memory to another 335 | * @dest: Where to copy to 336 | * @src: Where to copy from 337 | * @count: The size of the area. 338 | * 339 | * Unlike memcpy(), memmove() copes with overlapping areas. 340 | */ 341 | void * memmove(void * dest,const void *src,size_t count) 342 | { 343 | char *tmp, *s; 344 | 345 | if (dest <= src) { 346 | tmp = (char *) dest; 347 | s = (char *) src; 348 | while (count--) 349 | *tmp++ = *s++; 350 | } 351 | else { 352 | tmp = (char *) dest + count; 353 | s = (char *) src + count; 354 | while (count--) 355 | *--tmp = *--s; 356 | } 357 | 358 | return dest; 359 | } 360 | 361 | /** 362 | * memcpy - Copy one area of memory to another 363 | * @dest: Where to copy to 364 | * @src: Where to copy from 365 | * @count: The size of the area. 366 | * 367 | * You should not use this function to access IO space, use memcpy_toio() 368 | * or memcpy_fromio() instead. 369 | */ 370 | void * memcpy(void * dest,const void *src,size_t count) 371 | { 372 | char *tmp = (char *) dest, *s = (char *) src; 373 | 374 | while (count--) 375 | *tmp++ = *s++; 376 | 377 | return dest; 378 | } 379 | 380 | void * my_memcpy(void * dest,const void *src,size_t count) 381 | { 382 | char *tmp = (char *) &dest[count-1] , *s = (char *) &src[count-1]; 383 | 384 | while (count--) 385 | *tmp-- = *s--; 386 | 387 | return dest; 388 | } 389 | 390 | int main() { 391 | 392 | printf("%s, %d\n", STRTEST, my_strlen(STRTEST)); 393 | return 0; 394 | } 395 | -------------------------------------------------------------------------------- /test/find/test_stub.c: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of Sibyl. 3 | * Copyright 2017 Camille MOUGEY 4 | * 5 | * Sibyl is free software: you can redistribute it and/or modify it 6 | * under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * Sibyl is distributed in the hope that it will be useful, but WITHOUT 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 12 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 13 | * License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with Sibyl. If not, see . 17 | */ 18 | 19 | #include 20 | 21 | size_t my_strlen(const char *s) { 22 | return strlen(s); 23 | } 24 | 25 | int main() { 26 | return 0; 27 | } 28 | -------------------------------------------------------------------------------- /test/learn/Makefile: -------------------------------------------------------------------------------- 1 | CC = gcc 2 | CFLAGS = -m64 3 | 4 | SRC = $(wildcard *.c) 5 | PROGRAMS = $(SRC:.c=) 6 | 7 | all: $(PROGRAMS) 8 | 9 | %: %.c 10 | $(CC) $(CFLAGS) $< -o $@ 11 | 12 | clean: 13 | rm -rf $(PROGRAMS) $(CLASS) $(CLASSCOMP) 14 | -------------------------------------------------------------------------------- /test/learn/__init__.py: -------------------------------------------------------------------------------- 1 | from .run_tests import test_learn 2 | -------------------------------------------------------------------------------- /test/learn/add.c: -------------------------------------------------------------------------------- 1 | int add(int a, int b) { 2 | return a+b; 3 | } 4 | 5 | #ifdef __GNUC__ 6 | #ifndef __clang__ 7 | int main(void) __attribute__((optimize("-O0"))); 8 | #endif 9 | #endif 10 | int main(void){ 11 | return add(42,42); 12 | } 13 | -------------------------------------------------------------------------------- /test/learn/add.h: -------------------------------------------------------------------------------- 1 | int add(int a, int b); 2 | -------------------------------------------------------------------------------- /test/learn/copy_struct.c: -------------------------------------------------------------------------------- 1 | #include "copy_struct.h" 2 | 3 | void* my_memcpy(void *dest,const void *src, size_t n) 4 | { 5 | size_t i; 6 | void *tmp = dest; 7 | 8 | for (i = 0;i < n; i++) { 9 | *(char*)dest++ = *(char*)src++; 10 | } 11 | return tmp; 12 | } 13 | 14 | void copy_struct(elem* in, elem* out) { 15 | my_memcpy((char*) out, (char *)in, sizeof(elem)); 16 | } 17 | 18 | #ifdef __GNUC__ 19 | #ifndef __clang__ 20 | int main(void) __attribute__((optimize("-O0"))); 21 | #endif 22 | #endif 23 | int main(void) { 24 | elem e1, e2; 25 | e1.a = 4; 26 | copy_struct(&e1, &e2); 27 | return 0; 28 | } 29 | -------------------------------------------------------------------------------- /test/learn/copy_struct.h: -------------------------------------------------------------------------------- 1 | typedef struct elem { 2 | int a; 3 | char* b; 4 | int c[10]; 5 | } elem; 6 | typedef long unsigned int size_t; 7 | void copy_struct(elem* in, elem* out); 8 | -------------------------------------------------------------------------------- /test/learn/deref_struct.c: -------------------------------------------------------------------------------- 1 | #include "deref_struct.h" 2 | 3 | sub_elem* deref_struct(list* l, unsigned int expected) { 4 | int i; 5 | for (;;) { 6 | for (i = 0; i < 10; i++) { 7 | if (l->elem.c[i].b == expected) { 8 | return &(l->elem.c[i]); 9 | } 10 | } 11 | l = l->next; 12 | } 13 | } 14 | 15 | #ifdef __GNUC__ 16 | #ifndef __clang__ 17 | int main(void) __attribute__((optimize("-O0"))); 18 | #endif 19 | #endif 20 | int main(void) { 21 | list tab[3]; 22 | tab[0].next = &tab[1]; 23 | tab[1].next = &tab[2]; 24 | 25 | tab[2].elem.c[4].b = 0x1337; 26 | deref_struct(&tab[0], 0x1337); 27 | return 0; 28 | } 29 | -------------------------------------------------------------------------------- /test/learn/deref_struct.h: -------------------------------------------------------------------------------- 1 | typedef struct sub_elem { 2 | int a; 3 | unsigned int b; 4 | } sub_elem; 5 | 6 | typedef struct elem { 7 | char *a; 8 | sub_elem c[10]; 9 | } elem; 10 | 11 | typedef struct list { 12 | struct list* next; 13 | elem elem; 14 | } list; 15 | 16 | sub_elem* deref_struct(list* l, unsigned int expected); 17 | -------------------------------------------------------------------------------- /test/learn/doublePtr.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int doublePtr(int** x, int nbElem) 4 | { 5 | int sum = 0; 6 | for(nbElem--;nbElem>=0;nbElem--) 7 | sum += (*x)[nbElem]; 8 | return sum; 9 | } 10 | 11 | #ifdef __GNUC__ 12 | #ifndef __clang__ 13 | int main(void) __attribute__((optimize("-O0"))); 14 | #endif 15 | #endif 16 | int main(void) { 17 | int tab[10]={10,1,2,3,4,5,6,7,8,9}; 18 | int* ptr = tab; 19 | 20 | return doublePtr(&ptr, 10); 21 | } 22 | -------------------------------------------------------------------------------- /test/learn/doublePtr.h: -------------------------------------------------------------------------------- 1 | int doublePtr(int** x, int nbElem); 2 | -------------------------------------------------------------------------------- /test/learn/my_strcpy.c: -------------------------------------------------------------------------------- 1 | /** 2 | * strcpy - Copy a %NUL terminated string 3 | * @dest: Where to copy the string to 4 | * @src: Where to copy the string from 5 | */ 6 | char * my_strcpy(char * dest,const char *src) 7 | { 8 | char *tmp = dest; 9 | 10 | while ((*dest++ = *src++) != '\0') 11 | /* nothing */; 12 | return tmp; 13 | } 14 | 15 | #ifdef __GNUC__ 16 | #ifndef __clang__ 17 | int main(void) __attribute__((optimize("-O0"))); 18 | #endif 19 | #endif 20 | int main(void) { 21 | char tmp1[16] = "aaaaazzzzzeeeee"; 22 | char tmp2[16] = "---------------"; 23 | 24 | my_strcpy( tmp1, tmp2); 25 | 26 | return 0; 27 | } 28 | -------------------------------------------------------------------------------- /test/learn/my_strcpy.h: -------------------------------------------------------------------------------- 1 | char * my_strcpy(char * dest,const char *src); 2 | -------------------------------------------------------------------------------- /test/learn/my_strlen.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | /** 4 | * strlen - Find the length of a string 5 | * @s: The string to be sized 6 | */ 7 | size_t my_strlen(const char * s) 8 | { 9 | const char *sc; 10 | 11 | for (sc = s; *sc != '\0'; ++sc) 12 | /* nothing */; 13 | return sc - s; 14 | } 15 | 16 | #ifdef __GNUC__ 17 | #ifndef __clang__ 18 | int main(void) __attribute__((optimize("-O0"))); 19 | #endif 20 | #endif 21 | int main(void){ 22 | return my_strlen("Hello world !"); 23 | } 24 | -------------------------------------------------------------------------------- /test/learn/my_strlen.h: -------------------------------------------------------------------------------- 1 | typedef long unsigned int size_t; 2 | size_t my_strlen(const char * s); 3 | -------------------------------------------------------------------------------- /test/learn/numerous_arguments.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | unsigned int numerous_arguments(unsigned int a, unsigned int b, unsigned int c, unsigned int d, unsigned int e, unsigned int f, unsigned int g, unsigned int h, unsigned int i, unsigned int j, unsigned int k, unsigned int l, unsigned int m, unsigned int n, unsigned int o) { 4 | return a+b+c+d+e+f+g+h+i+j+k+l+m+n+o; 5 | } 6 | 7 | #ifdef __GNUC__ 8 | #ifndef __clang__ 9 | int main(void) __attribute__((optimize("-O0"))); 10 | #endif 11 | #endif 12 | int main(void){ 13 | return numerous_arguments(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15)+numerous_arguments(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1); 14 | } 15 | -------------------------------------------------------------------------------- /test/learn/numerous_arguments.h: -------------------------------------------------------------------------------- 1 | unsigned int numerous_arguments(unsigned int a, unsigned int b, unsigned int c, unsigned int d, unsigned int e, unsigned int f, unsigned int g, unsigned int h, unsigned int i, unsigned int j, unsigned int k, unsigned int l, unsigned int m, unsigned int n, unsigned int o); 2 | -------------------------------------------------------------------------------- /test/learn/run_tests.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import os 3 | import sys 4 | import tempfile 5 | import imp 6 | from utils.log import log_error, log_success, log_info 7 | 8 | from miasm2.analysis.machine import Machine 9 | from miasm2.analysis.binary import Container 10 | 11 | from sibyl.testlauncher import TestLauncher 12 | from sibyl.abi.x86 import ABI_AMD64_SYSTEMV 13 | from sibyl.config import config 14 | 15 | # Tests to fix 16 | unsupported = [ 17 | ] 18 | 19 | def invoke_pin(filename, func_name, header_filename, cont): 20 | return ["sibyl", "learn", "-t", "pin", func_name, filename, header_filename] 21 | 22 | def invoke_miasm(filename, func_name, header_filename, cont): 23 | main_addr = cont.loc_db.get_name_offset("main") 24 | return ["sibyl", "learn", "-t", "miasm", "-m", "0x%x" % main_addr, 25 | func_name, filename, header_filename] 26 | 27 | def test_learn(args): 28 | machine = Machine("x86_64") 29 | 30 | # Compil tests 31 | log_info("Remove old files") 32 | os.system("make clean") 33 | log_info("Compile C files") 34 | status = os.system("make") 35 | assert status == 0 36 | 37 | # Find test names 38 | c_files = [] 39 | 40 | for cur_dir, sub_dir, files in os.walk("."): 41 | c_files += [x[:-2] for x in files if x.endswith(".c")] 42 | 43 | # Ways to invoke 44 | to_invoke = { 45 | "Miasm": invoke_miasm, 46 | } 47 | if args.pin_tracer: 48 | to_invoke["PIN"] = invoke_pin 49 | 50 | # Learn + test 51 | fail = False 52 | for filename in c_files: 53 | 54 | if filename in unsupported: 55 | log_error("Skip %s (unsupported)" % filename) 56 | continue 57 | 58 | with open(filename) as fdesc: 59 | cont = Container.from_stream(fdesc) 60 | 61 | func_name = filename 62 | func_addr = cont.loc_db.get_name_offset(func_name) 63 | header_filename = "%s.h" % filename 64 | 65 | for name, cb in to_invoke.iteritems(): 66 | log_info("Learning %s over %s with %s" % (func_name, 67 | filename, name)) 68 | cmdline = cb(filename, func_name, header_filename, cont) 69 | 70 | print " ".join(cmdline) 71 | sibyl = subprocess.Popen(cmdline, env=os.environ, 72 | stdout=subprocess.PIPE, 73 | stderr=subprocess.PIPE) 74 | stdout, stderr = sibyl.communicate() 75 | if sibyl.returncode != 0: 76 | log_error("Failed to learn with error:") 77 | print stderr 78 | fail = True 79 | continue 80 | 81 | log_info("Testing generated class") 82 | 83 | mod = imp.new_module("testclass") 84 | exec stdout in mod.__dict__ 85 | classTest = getattr(mod, "TESTS")[0] 86 | tl = TestLauncher(filename, machine, ABI_AMD64_SYSTEMV, [classTest], 87 | config.jit_engine) 88 | 89 | possible_funcs = tl.run(func_addr) 90 | if tl.possible_funcs and possible_funcs == [filename]: 91 | log_success("Generated class recognize the function " \ 92 | "'%s'" % func_name) 93 | else: 94 | log_error("Generated class failed to recognize the function " \ 95 | "'%s'" % func_name) 96 | fail = True 97 | 98 | # Clean 99 | log_info( "Remove old files" ) 100 | os.system("make clean") 101 | 102 | return fail 103 | -------------------------------------------------------------------------------- /test/learn/several_traces.c: -------------------------------------------------------------------------------- 1 | int several_traces(int a, int b, unsigned char addOrMul){ 2 | if( addOrMul ) 3 | return a+b; 4 | else 5 | return a*b; 6 | } 7 | 8 | #ifdef __GNUC__ 9 | #ifndef __clang__ 10 | int main(void) __attribute__((optimize("-O0"))); 11 | #endif 12 | #endif 13 | int main(void) { 14 | return several_traces(42,42,0)+several_traces(-42,1337,1)+several_traces(4,2,0); 15 | } 16 | -------------------------------------------------------------------------------- /test/learn/several_traces.h: -------------------------------------------------------------------------------- 1 | int several_traces(int a, int b, unsigned char addOrMul); 2 | -------------------------------------------------------------------------------- /test/run_all_tests.py: -------------------------------------------------------------------------------- 1 | from argparse import ArgumentParser 2 | import os 3 | 4 | from utils.log import log_info 5 | from find import test_find 6 | from learn import test_learn 7 | 8 | AVAILABLE_TEST = [ 9 | test_find, 10 | test_learn, 11 | ] 12 | 13 | 14 | parser = ArgumentParser("Regression tester") 15 | parser.add_argument("-f", "--func-heuristic", action="store_true", 16 | help="Enable function addresses detection heuristics") 17 | parser.add_argument("-a", "--arch-heuristic", action="store_true", 18 | help="Enable architecture detection heuristics") 19 | parser.add_argument("-p", "--pin-tracer", action="store_true", 20 | help="Enable PIN tracer") 21 | args = parser.parse_args() 22 | 23 | def run_test(test_func, args): 24 | log_info("Start test: "+test_func.__module__) 25 | 26 | module_path = os.path.dirname(test_func.__module__.replace('.','/')) 27 | previous_cwd = os.getcwd() 28 | 29 | os.chdir(os.path.join(previous_cwd, module_path)) 30 | ret = test_func(args) 31 | os.chdir(previous_cwd) 32 | return ret 33 | 34 | fail = False 35 | for test in AVAILABLE_TEST: 36 | fail |= run_test(test, args) 37 | 38 | assert fail is False 39 | -------------------------------------------------------------------------------- /test/utils/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = [] 2 | -------------------------------------------------------------------------------- /test/utils/log.py: -------------------------------------------------------------------------------- 1 | colors = {"red": "\033[91;1m", 2 | "end": "\033[0m", 3 | "green": "\033[92;1m", 4 | "lightcyan": "\033[96m", 5 | "blue": "\033[94;1m"} 6 | 7 | def log_error(content): 8 | msg = "%(red)s[-] " % colors + content + "%(end)s" % colors 9 | print msg 10 | 11 | def log_success(content): 12 | msg = "%(green)s[+] " % colors + content + "%(end)s" % colors 13 | print msg 14 | 15 | def log_info(content): 16 | print "[+] "+content 17 | --------------------------------------------------------------------------------