├── .gitignore
├── .travis.yml
├── Dockerfile
├── LICENSE.md
├── README.md
├── bin
    └── sibyl
├── doc
    ├── ADD_ABI.md
    ├── ADD_TEST.md
    ├── ADVANCED_USE.md
    ├── CHANGELOG.md
    ├── CONFIG.md
    ├── LEARNING.md
    ├── TESTING.md
    └── img
    │   ├── ghidra_screen.png
    │   ├── ghidra_screen2.png
    │   └── ida_screen.png
├── ext
    ├── ghidra
    │   ├── ExportFunction.java
    │   └── find.py
    ├── ida
    │   └── find.py
    ├── pin_tracer
    │   ├── Makefile
    │   └── pin_tracer.cpp
    └── radare2
    │   └── r2_find.py
├── setup.py
├── sibyl
    ├── __init__.py
    ├── abi
    │   ├── __init__.py
    │   ├── abi.py
    │   ├── arm.py
    │   ├── mep.py
    │   ├── mips.py
    │   └── x86.py
    ├── actions
    │   ├── __init__.py
    │   ├── action.py
    │   ├── config.py
    │   ├── find.py
    │   ├── func.py
    │   └── learn.py
    ├── commons.py
    ├── config.py
    ├── engine
    │   ├── __init__.py
    │   ├── engine.py
    │   ├── miasm.py
    │   └── qemu.py
    ├── heuristics
    │   ├── __init__.py
    │   ├── arch.py
    │   ├── csts.py
    │   ├── func.py
    │   └── heuristic.py
    ├── learn
    │   ├── __init__.py
    │   ├── findref.py
    │   ├── generator
    │   │   ├── __init__.py
    │   │   ├── generator.py
    │   │   ├── pythongenerator.py
    │   │   └── templates.py
    │   ├── learn.py
    │   ├── learnexception.py
    │   ├── replay.py
    │   ├── trace.py
    │   └── tracer
    │   │   ├── __init__.py
    │   │   ├── miasm.py
    │   │   ├── pin.py
    │   │   └── tracer.py
    ├── test
    │   ├── __init__.py
    │   ├── ctype.py
    │   ├── stdlib.py
    │   ├── string.py
    │   └── test.py
    └── testlauncher.py
└── test
    ├── find
        ├── Makefile
        ├── __init__.py
        ├── run_ctests.py
        ├── test_ctype.c
        ├── test_stdlib.c
        ├── test_string.c
        └── test_stub.c
    ├── learn
        ├── Makefile
        ├── __init__.py
        ├── add.c
        ├── add.h
        ├── copy_struct.c
        ├── copy_struct.h
        ├── deref_struct.c
        ├── deref_struct.h
        ├── doublePtr.c
        ├── doublePtr.h
        ├── my_strcpy.c
        ├── my_strcpy.h
        ├── my_strlen.c
        ├── my_strlen.h
        ├── numerous_arguments.c
        ├── numerous_arguments.h
        ├── run_tests.py
        ├── several_traces.c
        └── several_traces.h
    ├── run_all_tests.py
    └── utils
        ├── __init__.py
        └── log.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | 
 5 | # C extensions
 6 | *.so
 7 | 
 8 | # Emacs
 9 | *~
10 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | sudo: false
 2 | language: python
 3 | python:
 4 | - "2.7"
 5 | addons:
 6 |     apt:
 7 |         packages:
 8 |             - make
 9 |             - gcc
10 |             - python-virtualenv
11 |             - unzip
12 |             - gcc-multilib
13 | before_script:
14 | - "cd .."
15 | # make virtual env
16 | - "python /usr/lib/python2.7/dist-packages/virtualenv.py virtualenv;"
17 | - "cd virtualenv;"
18 | - "source bin/activate;"
19 | # install elfesteem
20 | - "git clone https://github.com/serpilliere/elfesteem elfesteem && cd elfesteem && python setup.py install && cd ..;"
21 | # install pyparsing
22 | - "pip install pyparsing"
23 | # install miasm
24 | - "cd ..;git clone -b 'v0.1.1' https://github.com/cea-sec/miasm miasm && cd miasm;"
25 | - "python setup.py build build_ext -I$(pwd)/../virtualenv/include -L$(pwd)/../virtualenv/tinycc"
26 | - "python setup.py install"
27 | - "cd ..;"
28 | # install unicorn
29 | - "git clone https://github.com/unicorn-engine/unicorn && cd unicorn && make && make -C bindings/python && export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$(pwd) && cd ..;"
30 | - "cd unicorn/bindings/python && python setup.py install && cd ../../../;"
31 | - "python -c 'import unicorn'"
32 | # install pycparser
33 | - "pip install pycparser"
34 | # install pycrypto
35 | - "pip install pycrypto"
36 | # install Sibyl
37 | - "cd Sibyl && python setup.py install && cd ..;"
38 | # get tests
39 | - "git clone https://github.com/commial/Sibyl-tests sibyl-tests;"
40 | # prepare env
41 | - "export SIBYLTEST=$(pwd)/sibyl-tests"
42 | - "export SIBYL=$(pwd)/Sibyl"
43 | script:
44 | # Sibyl regression tests
45 | - "cd $SIBYL/test && python run_all_tests.py;"
46 | # Sibyl bigger test
47 | - "cd $SIBYLTEST && ./run.sh;"
48 | # Sibyl regression tests with heuristics, may not end in reasonnable time
49 | - "cd $SIBYL/test && python run_all_tests.py -f -a;"


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # This file is part of Sibyl.
 2 | # Copyright 2017 Camille MOUGEY <camille.mougey@cea.fr>
 3 | #
 4 | # Sibyl is free software: you can redistribute it and/or modify it
 5 | # under the terms of the GNU General Public License as published by
 6 | # the Free Software Foundation, either version 3 of the License, or
 7 | # (at your option) any later version.
 8 | #
 9 | # Sibyl is distributed in the hope that it will be useful, but WITHOUT
10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
11 | # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
12 | # License for more details.
13 | #
14 | # You should have received a copy of the GNU General Public License
15 | # along with Sibyl. If not, see <http://www.gnu.org/licenses/>.
16 | 
17 | FROM miasm/tested:latest
18 | MAINTAINER Camille Mougey <camille.mougey@cea.fr>
19 | USER root
20 | 
21 | # Get unicorn
22 | RUN apt-get install -y python-pip &&\
23 |     pip install --pre unicorn
24 | 
25 | # Get Sibyl
26 | ADD https://github.com/cea-sec/Sibyl/archive/master.tar.gz /opt/Sibyl.tar.gz
27 | RUN cd /opt &&\
28 |     tar xzvf Sibyl.tar.gz &&\
29 |     rm Sibyl.tar.gz &&\
30 |     mv Sibyl-master Sibyl &&\
31 |     chown -Rh miasm2 Sibyl &&\
32 |     cd Sibyl &&\
33 |     python setup.py install
34 | 
35 | # Prepare the environment
36 | WORKDIR /opt/Sibyl
37 | USER miasm2
38 | 
39 | CMD ["/usr/local/bin/sibyl"]
40 | 


--------------------------------------------------------------------------------
/bin/sibyl:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python2
 2 | 
 3 | # This file is part of Sibyl.
 4 | # Copyright 2014 - 2017 Camille MOUGEY <camille.mougey@cea.fr>
 5 | #
 6 | # Sibyl is free software: you can redistribute it and/or modify it
 7 | # under the terms of the GNU General Public License as published by
 8 | # the Free Software Foundation, either version 3 of the License, or
 9 | # (at your option) any later version.
10 | #
11 | # Sibyl is distributed in the hope that it will be useful, but WITHOUT
12 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 | # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
14 | # License for more details.
15 | #
16 | # You should have received a copy of the GNU General Public License
17 | # along with Sibyl. If not, see <http://www.gnu.org/licenses/>.
18 | "Sibyl client"
19 | 
20 | import sys
21 | 
22 | from sibyl.commons import print_table
23 | from sibyl.actions import ACTIONS, load_action
24 | 
25 | if __name__ == "__main__":
26 | 
27 |     if len(sys.argv) < 2:
28 |         print "Usage: %s [action]\n" % sys.argv[0]
29 |         print "Actions:"
30 |         # Sort actions by name and print them
31 |         actions = [(action.name, action.desc)
32 |                    for action in sorted(ACTIONS,
33 |                                         key=lambda action: action.name)]
34 |         print_table(actions,
35 |                     title=False,
36 |                     separator=" ",
37 |                     level=1)
38 |         exit(0)
39 | 
40 |     action = sys.argv[1]
41 | 
42 |     # Try to guess action
43 |     guessed = [act for act in ACTIONS if act.name.startswith(action)]
44 |     if len(guessed) == 1:
45 |         # Action found, redirect to it
46 |         load_action(guessed[0], sys.argv[2:])
47 |     else:
48 |         # Action not found
49 |         if len(guessed) == 0:
50 |             print "Unknown action: %s" % action
51 |         else:
52 |             print "Ambiguous action: %s" % " ".join(guess.name
53 |                                                     for guess in guessed)
54 |         exit(-1)
55 | 
56 | 


--------------------------------------------------------------------------------
/doc/ADD_ABI.md:
--------------------------------------------------------------------------------
 1 | Adding an ABI
 2 | -------------
 3 | 
 4 | ### Overview
 5 | 
 6 | The ABI descriptions in Sibyl are quite simple for now.
 7 | Their are located in `sibyl/abi`, and all inherits from `sibyl.abi.abi:ABI`.
 8 | 
 9 | The convention is to regroup them by relative architecture.
10 | 
11 | ### Add an ABI
12 | 
13 | Here is a commented fake ABI, where arguments are first passed by registers, and
14 | then by stack:
15 | 
16 | ```Python
17 | class ABI_CUSTOM(abi.ABIRegsStack):
18 | 
19 | 	# Map argument number -> register name
20 |     regs_mapping = ["A0", "A1", "A2", "A3"]
21 | 	# Associate this ABI to a given architecture, to be used when this
22 | 	# achitecture is recognized
23 |     arch = ["mips32b", "mips32l"]
24 | 
25 | 	# Indicate how the return address has to be set (stack, specific register,
26 | 	# ...)
27 |     def set_ret(self, ret_addr):
28 |         self.jitter.cpu.RA = ret_addr
29 | 
30 | 	# Indicate how an element is push on the stack, for stack based arguments
31 |     def vm_push(self, element):
32 |         self.jitter.push_uint32_t(element)
33 | ```
34 | 
35 | Finally, the class just has to be added to the `sibyl.abi:ABIS` list to be
36 | considered.
37 | 


--------------------------------------------------------------------------------
/doc/ADD_TEST.md:
--------------------------------------------------------------------------------
  1 | Adding a test case
  2 | ------------------
  3 | 
  4 | ### Overview
  5 | 
  6 | Available test cases are in the directory _sibyl/test/_.
  7 | A test has to extend the class _sibyl.test.test.Test_ and provide at least:
  8 | 
  9 | * _func_: the name of the function to test
 10 | * _tests_: a `TestSetTest` instance, composed of (_init_, _check_) methods
 11 |   respectively called to initialize the VM and check the resulting state
 12 | 
 13 | Finally, the class has to be "announced", by beeing in the `TESTS` (list)
 14 | variable of the module.
 15 | 
 16 | ### Example
 17 | 
 18 | Here is a commented case:
 19 | ```Python
 20 | class TestA64l(test.Test):
 21 | 
 22 |     my_string = "v/"
 23 |     value = 123
 24 | 
 25 |     # Test
 26 |     def init(self):
 27 |         # Alloc a string thanks to a common API, in read only
 28 |         self.my_addr = self._alloc_string(self.my_string)
 29 |         # Set the first argument independently of ABI
 30 |         self._add_arg(0, self.my_addr)
 31 | 
 32 |     def check(self):
 33 |         # Get the result independently of ABI
 34 |         result = self._get_result()
 35 |         # Check the expected result, and verify memory
 36 |         return all([result == self.value,
 37 |                     self._ensure_mem(self.my_addr, self.my_string)])
 38 | 
 39 |     # Properties
 40 |     func = "a64l"
 41 |     tests = TestSetTest(init, check)
 42 | ```
 43 | 
 44 | A more elaborated test can be found in _sibyl/test/ctype.py_.
 45 | 
 46 | ### TestSetTest
 47 | 
 48 | In order to avoid false positive, it is recommended to use multiple tests. For
 49 | instance, testing a `strlen` with at least two strings (different in size) will
 50 | avoid finding function returning always the same number.
 51 | 
 52 | To do so, `TestSetTest` instance can be combined through `&` and `|` operator,
 53 | respectively imposing the success of both tests, or the success of one of them.
 54 | 
 55 | For instance (`TestStrlen`):
 56 | ```Python
 57 |     tests = TestSetTest(init, check) & TestSetTest(init2, check2)
 58 | ```
 59 | 
 60 | Tests to run can also be choosen on the fly, depending on previous test results.
 61 | In fact, `Test.tests` act as a generator. It is asked to produce a `(init,
 62 | check)` couple only after the previous check. The `func` attribute is read only
 63 | on success, so it can be changed in the same time than test strategy if needed.
 64 | 
 65 | For instance, `TestIsCharset` in `ctype.py` implements a test strategy based on
 66 | a decision tree.
 67 | 
 68 | ### Subscribing custom tests
 69 | 
 70 | To avoid modifying the sibyl package for each new test, one can add them in the
 71 | configuration file.
 72 | 
 73 | In the section `tests`, one just have to add:
 74 | ```
 75 | [tests]
 76 | name = path/to/source.py
 77 | ```
 78 | 
 79 | Where `source.py` offers a `TESTS` variable.
 80 | 
 81 | Here is two example of organisation:
 82 | * A file with custom tests implementation, offering `TESTS` at its end
 83 | * A directory with several tests implemenration, and a single file merging them
 84 |   in its `TESTS` variable
 85 | 
 86 | For more detail on configuration, please consult the relative documentation.
 87 | 
 88 | Once the configuration done, the new tests should appear in the `Tests
 89 | availables` section of `sibyl config`, and in the help of `--tests` options of
 90 | `sibyl find` under name `name`.
 91 | 
 92 | Without specifying tests (ie. all tests) or with `-t name`, these tests
 93 | will be used in the identification.
 94 | 
 95 | 
 96 | ### Debugging its tests
 97 | 
 98 | A few trick can be used to debug the tests.
 99 | 
100 | The Python `pdb` module is a good start to obtain and inspect the context in
101 | `init` or `check` methods.
102 | 
103 | As error are masked, because they are considered as a recognition fail, one
104 | would probably want to avoid this exception catching. To do this, remove error
105 | catching code in `sibyl/engine/miasm.py::MiasmEngine.run` (if you're using one
106 | of the Miasm jitter).
107 | 
108 | Adding jitter log could also help, for instance by adding in this same method:
109 | ```Python
110 | self.jitter.jit.log_mn = True
111 | self.jitter.jit.log_regs = True
112 | ```
113 | 
114 | Please refer to Miasm for more information on this.
115 | 
116 | Finally, it is often easier to:
117 | * deactivate multiprocessing (`find -p`)
118 | * use only your function, on one test (`find -t name addr1`)
119 | * deactivate timeout (`find -t 0`)
120 | 


--------------------------------------------------------------------------------
/doc/ADVANCED_USE.md:
--------------------------------------------------------------------------------
 1 | Avanced use
 2 | -----------
 3 | 
 4 | ### Architecture selection
 5 | 
 6 | The architecture might be automatically recognized from the binary (for
 7 | instance, using ELF or PE information).
 8 | 
 9 | If not, the architecture can be specified / forced using the `--architecture`
10 | argument of `find`.
11 | 
12 | ### Mapping shift
13 | 
14 | It may happens that the file format is not correctly recognized (for instance,
15 | in firmware), and that having the binary mapped at a specific address modify the
16 | function behavior (ie. they use absolute address).
17 | 
18 | For these cases, the option `--mapping-base` can be used to specify a base
19 | address for the binary mapping.
20 | 
21 | ### Emulation engine
22 | A few emulation engine are supported. Through the `--jitter` option, one can
23 | specified:
24 | 
25 | * `python`: use a full Python emulation
26 | * `tcc` or `gcc`: use a C compiler to JiT code (thanks to Miasm)
27 | * `LLVM`: use LLVM JiT capabilities (thanks to Miasm)
28 | * `qemu`: use the Unicorn (http://www.unicorn-engine.org/) QEMU binding
29 | 
30 | Empirically, the `qemu` jitter happens to be the fastest, but requires an
31 | additionnal dependency. In addition, it may not support a custom architecture
32 | added to Miasm.
33 | 
34 | The second fastest jitter is `gcc`, because of the repeated call to the same
35 | function (and its cache). In addition, it requires a very common dependency.
36 | 
37 | ### Function heuristic
38 | 
39 | The `sibyl func` action provides a way to find possible function addresses.  It
40 | uses heuristics, which can be individually activated or de-activated using `-e`
41 | and `-d` options.
42 | 
43 | For instance, the `recursive_call` heuristics may take a long time to ends,
44 | where the `pattern_matching` one is very fast but innacurate.
45 | 
46 | The full list can be obtain in the `--help` description.
47 | 
48 | If an IDA or GHIDRA installation is detected, it will be used primarily as an
49 | addresses source. They can be enabled / disabled through `-e`/`-d` options.
50 | 
51 | ### Addresses specification
52 | 
53 | The targeted addresses can be specified in three ways:
54 | * using the addresses, such as `sibyl find my_binary 0x11223344 0x22334455 12345`
55 | * using a file, such as `sibyl find my_binary /tmp/addrs`
56 | * using stdin, such as `sibyl func my_binary | sibyl find my_binary -`
57 | 
58 | ### ABI selection
59 | 
60 | The ABI can be specified or overwritten thanks to the `--abi` option of `sibyl
61 | find`.
62 | 
63 | If only one ABI is available for the target architecture, it will be selected
64 | automatically. Otherwise, the command line will ask for more precision.
65 | 
66 | The choosen ABI is indicated if the verbosity level is high enough.
67 | 
68 | ### Linking with other tools
69 | 
70 | Sibyl output is intended to be human readable.
71 | 
72 | But, depending on the usage, some options are provided for an easier linking:
73 | * `sibyl find` can deliver results in JSON format (`-o JSON`)
74 | * `sibyl config` can be requested for direct value, or possible value of a
75 |   configuration element (`-V element`)
76 | * the `sibyl` module can be used as an API
77 | 


--------------------------------------------------------------------------------
/doc/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | Changelog
 2 | ---------
 3 | 
 4 | ### Version 0.2
 5 | 
 6 | * Land the new learning module (initial version from [@graux-pierre](https://github.com/graux-pierre))
 7 | * Add documentation on how-to-learn a function, associated principle and limits
 8 | * Mine function address from IDA in sibyl func
 9 | * Support multiple strategy for trace pruning
10 | * Add support for API stubbing
11 | * Radare2 find script from [@m-1-k-3](https://github.com/m-1-k-3)
12 | * Toshiba MeP support from [@guedou](https://github.com/guedou)
13 | 
14 | Minors :
15 | 
16 | * Various fixes from [@serpilliere](https://github.com/serpilliere)
17 | * Reflect API changes for Miasm v0.1.1
18 | * Add configuration associated with PIN (PIN_ROOT + tracer path)
19 | * Add regression tests for the learning module
20 | * Support expanduser in config
21 | * Restrict `bzero` implementation to avoid false positive
22 | * Adds support for function returning a non-allocated pointer
23 | 
24 | ### Version 0.1
25 | 
26 | This is the initial release, including:
27 | 
28 | * Sibyl as a Python module
29 | * CLI `sibyl`
30 | * IDA stub
31 | * Configuration management
32 | * Support for Miasm, QEMU engine
33 | * Support for a few ABI
34 | * Support for a few functions of _string.h_, _ctype.h_ and _stdlib.h_
35 | * Regression tests
36 | * PoC of a learning module
37 | * Documentation
38 | 


--------------------------------------------------------------------------------
/doc/CONFIG.md:
--------------------------------------------------------------------------------
  1 | Configuration
  2 | -------------
  3 | 
  4 | ### Configuration files
  5 | 
  6 | The default Sibyl configuration can be overwritten with configuration file.
  7 | 
  8 | These files are taken in account if they are located in any of the location (and
  9 | in the same order) returned by `sibyl config` when no configuration are
 10 | available:
 11 | ```
 12 | $ sibyl config
 13 | No configuration file found. Supported paths:
 14 | 	/etc/sibyl.conf
 15 | 	/etc/sibyl/sibyl.conf
 16 | 	/usr/local/etc/sibyl.conf
 17 | 	/usr/local/etc/sibyl/sibyl.conf
 18 | 	/home/user/sibyl.conf
 19 | 	/home/user/.sibyl.conf
 20 | ...
 21 | ```
 22 | 
 23 | The resulting configuration file can be obtain through `sibyl config -d`.
 24 | 
 25 | ### Default configuration
 26 | 
 27 | The default configuration is equivalent to:
 28 | 
 29 | ```Python
 30 | [find]
 31 | jit_engine = qemu,miasm
 32 | stubs = $MIASM/os_dep/win_api_x86_32.py,$MIASM/os_dep/linux_stdlib.py
 33 | 
 34 | [tests]
 35 | ctype = $SIBYL/test/ctype.py
 36 | string = $SIBYL/test/string.py
 37 | stdlib = $SIBYL/test/stdlib.py
 38 | 
 39 | [miasm]
 40 | jit_engine = gcc,llvm,tcc,python
 41 | 
 42 | [pin]
 43 | root = $PIN_ROOT
 44 | tracer = $SIBYL/ext/pin_tracer/pin_tracer.so
 45 | 
 46 | [learn]
 47 | prune_strategy = branch
 48 | prune_keep = 1
 49 | prune_keep_max = 5
 50 | 
 51 | [ida]
 52 | idaq64 =
 53 | ```
 54 | 
 55 | ### Section 'find'
 56 | 
 57 | This section is relative to the `find` action.
 58 | 
 59 | The `jit_engine` parameter is a list, separated by ',', of jitter engine
 60 | preference.
 61 | If the first engine is not available, then the second is used, and so on.
 62 | The keyword `miasm` can be used to stand for the Miasm elected engine.
 63 | 
 64 | To known the jitter engine elected, use `sibyl config -V jit_engine`.
 65 | 
 66 | The `stubs` parameter is a list, separated by ',' of Python file path. These
 67 | files can implement stubs (as Python function with the correct name). These
 68 | stubs will be used to emulate external APIs, on supported jitter engines, during
 69 | the `find` action.
 70 | 
 71 | ### Section 'tests'
 72 | 
 73 | This section links to available test sets. By default, only Sibyl ones are
 74 | present.
 75 | 
 76 | The syntax is: `name = path/to/file.py`.
 77 | 
 78 | The list of registered tests can be obtain withe
 79 | `sibyl config -V available_tests_keys`.
 80 | 
 81 | For more information on tests, please refer to the corresponding documentation.
 82 | 
 83 | ### Section 'miasm'
 84 | 
 85 | This section highlights options relative to Miasm use.
 86 | 
 87 | The `miasm_engine` parameter is a list, separated by ',', of jitter engine
 88 | preference when Miasm is used.
 89 | If the first engine is not available, then the second is used, and so on.
 90 | 
 91 | To known the jitter engine elected, use `sibyl config -V miasm_engine`.
 92 | 
 93 | ### Section 'pin'
 94 | 
 95 | This section contains options relative to PIN use.
 96 | 
 97 | The `root` parameter is the root path of the Intel Pin installation (the one
 98 | containing the `pin` binary).
 99 | By default, the environment variable `$PIN_ROOT` is used (if it exists).
100 | If `pin` is already in the user's path, this parameter can be ignored.
101 | 
102 | The `tracer` parameter is the path of the compiled version of the tracer
103 | `ext/pin_tracer/pin_tracer.cpp`, which will probably looks like
104 | `/path/to/sibyl/ext/pin_tracer/pin_tracer.so`.
105 | 
106 | ### Section 'learn'
107 | 
108 | This section contains options relative to the `learn` action.
109 | 
110 | The `prune_strategy` parameter indicates which strategy should be used to prune
111 | the obtained snapshots. Current supported values are `branch`, `keep`, `keepall`.
112 | 
113 | The `prune_keep` value specifies the number of snapshot to keep per prunning.
114 | 
115 | The `prune_keep_map` value specifies the overall maximum number of snapshot to
116 | keep. `0` means no limit.
117 | 
118 | Please refer to the related documentation for more information.
119 | 
120 | ### Section 'ida'
121 | 
122 | This section contains options relative to IDA use.
123 | 
124 | The `idaq64` parameter is the path of the `idaq64` binary. It will be used to
125 | find the executable if it is not in the `$PATH`.
126 | 
127 | ### Section 'ghidra'
128 | 
129 | This section contains options relative to GHIDRA use.
130 | 
131 | The `headless` parameter is the path of the `analyzeHeadless` binary (usually at
132 | `GHIDRA_PATH/support/analyzeHeadless`). It will be used to find the executable
133 | if it is not in the `$PATH`.
134 | 
135 | The `export_function` is the path of the script used to discover function
136 | (launched as a *GHIDRA preScript*). The one provided with Sibyl is named
137 | `ExportFunction.java` (`ext/ghidra/ExportFunction.java`).
138 | 
139 | 
140 | ### Configuration overview
141 | 
142 | Using `sibyl config` without option, one can obtain:
143 | * the configuration file used, if any
144 | * available configuration file paths
145 | * elected jit engine
146 | * loaded Tests, associated to their names
147 | 
148 | ### API
149 | 
150 | Sibyl configuration is available from `sibyl.config:config`.
151 | 
152 | This `Config` instance provides:
153 | * `jit_engine`: Name of engine to use for jit
154 | * `available_tests`: dictionnary mapping test group name to corresponding classes
155 | 
156 | ### Path handling
157 | 
158 | This rules are applied for path:
159 | * the token `$SIBYL` can be used to point to Sibyl installation dir;
160 | * the token `$MIASM` can be used to point to Miasm2 installation dir;
161 | * `~` or `~user` are replaced with the `user` home directory;
162 | * Environment variables are expanded;
163 | 
164 | 


--------------------------------------------------------------------------------
/doc/LEARNING.md:
--------------------------------------------------------------------------------
  1 | Learning
  2 | --------
  3 | 
  4 | ### Overview
  5 | 
  6 | The learning module can be used to automatically create a new test from an
  7 | existing binary.  It requires:
  8 | 
  9 | * A binary implementing the targeted function
 10 | * At least one call to this function with working arguments
 11 | * The targeted function prototype, including involved structures
 12 | 
 13 | With these information, the module will run an instrumented version of the
 14 | function, and collect *semantic side effects*.
 15 | 
 16 | Then, these side effects are abstracted in an architecture agnostic form, close
 17 | to the C language.
 18 | 
 19 | During the recognition phase, this form is derived according to the expected
 20 | memory layout: structure padding, `int` size, etc.
 21 | 
 22 | If there are multiple calls to the function, Sibyl will apply a pruning policy
 23 | to keep the only relevant ones, according to the associated configuration.
 24 | 
 25 | :warning: Depending on the target binary, a few precaution should be taken;
 26 | indeed, depending on the used *tracer*, the binary might be run in an
 27 | unsandboxed environment.
 28 | 
 29 | ### Example
 30 | 
 31 | Let's
 32 | use
 33 | [SoftFp, 2016/12/20 release](https://bellard.org/softfp/softfp-2016-12-20.tar.gz),
 34 | a software floating point library, as an example.
 35 | 
 36 | We will target a few arithmetic functions, and use the regression test
 37 | `softfptest` to retrieve calls to these functions.
 38 | 
 39 | #### Setup
 40 | 
 41 | `softfptest` is slightly modified before being used. Indeed, we only need a few
 42 | run of the main loop to obtain a fairly amount of calls to the targeted
 43 | functions. As a result, the learning process will be faster.
 44 | 
 45 | Function prototypes are also needed:
 46 | 
 47 | ```C
 48 | typedef unsigned long int uint64_t;
 49 | typedef unsigned int uint32_t;
 50 | 
 51 | typedef uint64_t sfloat64;
 52 | 
 53 | typedef enum {
 54 |     RM_RNE,
 55 |     RM_RTZ,
 56 |     RM_RDN,
 57 |     RM_RUP,
 58 |     RM_RMM,
 59 | } RoundingModeEnum;
 60 | 
 61 | 
 62 | sfloat64 add_sf64(sfloat64 a, sfloat64 b, RoundingModeEnum rm, uint32_t *pfflags);
 63 | sfloat64 mul_sf64(sfloat64 a, sfloat64 b, RoundingModeEnum rm, uint32_t *pfflags);
 64 | sfloat64 div_sf64(sfloat64 a, sfloat64 b, RoundingModeEnum rm, uint32_t *pfflags);
 65 | sfloat64 sqrt_sf64(sfloat64 a, RoundingModeEnum rm, uint32_t *pfflags);
 66 | sfloat64 fma_sf64(sfloat64 a, sfloat64 b, sfloat64 c, RoundingModeEnum rm, uint32_t *pfflags);
 67 | sfloat64 min_sf64(sfloat64 a, sfloat64 b, uint32_t *pfflags);
 68 | sfloat64 max_sf64(sfloat64 a, sfloat64 b, uint32_t *pfflags);
 69 | ```
 70 | 
 71 | Also, the *PIN tracer* has to be compiled:
 72 | 
 73 | ```
 74 | $ cd ext/pin_tracer
 75 | $ PIN_ROOT=/opt/... make
 76 | ...
 77 | ```
 78 | 
 79 | And the configuration set accordingly (see [the associated documentation](CONFIG.md) for more detail).
 80 | 
 81 | #### Options
 82 | 
 83 | The target action is `learn`.
 84 | 
 85 | In this example, the *tracer* used is *PIN*, for performance reasons and because
 86 | the target binary is available on a supported architecture. In other cases, the
 87 | *Miasm* tracer is still available.
 88 | 
 89 | A lot of calls returns zero (due to the architecture of the regression test). To
 90 | ignore them (there are mostly irrelevant and pollute the resulting tests),
 91 | `--avoid-null` (`-z`) is used.
 92 | 
 93 | The result is dumped in a Python file: `-o float_{NAME}.py`.
 94 | 
 95 | #### Learning
 96 | 
 97 | The complete command line is:
 98 | ```
 99 | $ sibyl learn -v -z {FUNC_NAME} softfptest soft.h -o float_{NAME}.py
100 | ```
101 | 
102 | One may notice that a few warning are displayed:
103 | ```
104 | WARNING: argument pfflags not used?!
105 | ```
106 | 
107 | Indeed, Sibyl has detected that the `pfflags` argument seems to not be used in
108 | any of the calls keep. This could indicate a lack of call example, a too
109 | restrictive implementation, or a useless argument.
110 | 
111 | #### Obtained test
112 | 
113 | The resulting test looks like:
114 | ```Python
115 | from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE
116 | 
117 | from sibyl.test.test import TestHeader, TestSetTest
118 | 
119 | class Testmax_sf64(TestHeader):
120 |     '''This is an auto-generated class, using the Sibyl learn module'''
121 | 
122 |     func = "max_sf64"
123 |     header = '''
124 | typedef unsigned long int uint64_t;
125 | ...
126 | '''
127 | 
128 |     def init1(self):
129 | 
130 |         self._add_arg(0, 0) # arg0_a
131 |         self._add_arg(1, 9218868437227405312) # arg1_b
132 | 
133 |     def check1(self):
134 |         return all((
135 |             # Check output value
136 |             self._get_result() == 0x7ff0000000000000,
137 |         ))
138 | 
139 | ...
140 | tests =  TestSetTest(init1, check1) & TestSetTest(init2, check2) & TestSetTest(init3, check3) & TestSetTest(init4, check4) & TestSetTest(init5, check5)
141 | 
142 | TESTS = [Testmax_sf64]
143 | ```
144 | 
145 | When type understanding is needed, the tests are a bit more complicated (from `mul_sf64`):
146 | ```Python
147 |    def init2(self):
148 |         # arg3_pfflags
149 |         base0_ptr_size = self.field_addr("arg3_pfflags", "*(arg3_pfflags)") + self.sizeof("*(arg3_pfflags)")
150 |         base0_ptr = self._alloc_mem(base0_ptr_size, read=True, write=True)
151 | 
152 |         self._add_arg(0, 0) # arg0_a
153 |         self._add_arg(1, 9218868437227405312) # arg1_b
154 |         self._add_arg(2, 0) # arg2_rm
155 |         self._add_arg(3, base0_ptr) # arg3_pfflags
156 | 
157 |         # *(arg3_pfflags) = 0x0
158 |         self._write_mem(base0_ptr, self.pack(0x0, self.sizeof("*(arg3_pfflags)")))
159 | 
160 |         self.base0_ptr = base0_ptr
161 | 
162 |     def check2(self):
163 |         return all((
164 |             # Check output value
165 |             self._get_result() == 0x7ff8000000000000,
166 |             # *(arg3_pfflags) == 0x10
167 |             self._ensure_mem(self.base0_ptr, self.pack(0x10, self.sizeof("*(arg3_pfflags)"))),
168 |         ))
169 | 
170 | ```
171 | 
172 | #### Replay
173 | 
174 | Outputs are directly usable as Sibyl test. To regroup them in a common test set, one can create a Python script merging `TESTS` list from the different scripts, as:
175 | 
176 | ```Python
177 | out = []
178 | for f in ["add", "mul", "div", "sqrt", "fma", "min", "max"]:
179 |     execfile("float_%s_sf64.py" % f)
180 |     out += TESTS
181 | 
182 | TESTS = out
183 | ```
184 | 
185 | To inform Sibyl about this new test set, a line is added in the configuration
186 | (see [the associated documentation](CONFIG.md) for more detail):
187 | 
188 | ```Python
189 | [tests]
190 | sfloat = /path/to/float.py
191 | ```
192 | 
193 | The tests are now detected by Sibyl, as stated by this command line:
194 | ```
195 | $ sibyl config
196 | ...
197 | 	sfloat (7)
198 | 		add_sf64, mul_sf64, div_sf64, sqrt_sf64, fma_sf64, min_sf64, max_sf64
199 | ```
200 | 
201 | At this stage, they are replayable on new binaries, for instance on an obfuscated version of `softfptest`:
202 | ```
203 | $ sibyl func softfptest.obfu | sibyl find -v -t sfloat -b ABI_AMD64_SYSTEMV softfptest.obfu -
204 | Guessed architecture: x86_64
205 | Found 405 addresses
206 | Found 7 test cases
207 | 0x004330d0 : max_sf64
208 | 0x0042a9a0 : mul_sf64
209 | 0x00431e30 : min_sf64
210 | 0x0042bc90 : fma_sf64
211 | 0x00430c70 : sqrt_sf64
212 | 0x00429270 : add_sf64
213 | 0x0042ee70 : div_sf64
214 | ```
215 | 
216 | ### Known limitations
217 | 
218 | The learning module has known limitations.
219 | 
220 | As mentioned in the previous section, it is necessary to have a working binary,
221 | which call the function with valid arguments.
222 | 
223 | In addition, this binary must be *traceable*, which could not be the case,
224 | depending on the architecture.
225 | 
226 | The limitation of Sibyl are also applied to this module; for instance and for
227 | now, there is no support of floating argument, or ABI specificity such as
228 | structure in-lining in arguments.
229 | 
230 | For now Sibyl does not track, and then does not support, functions using an
231 | allocator for their semantic use (for example, a function allocating a new
232 | structure through `malloc`).
233 | 


--------------------------------------------------------------------------------
/doc/TESTING.md:
--------------------------------------------------------------------------------
 1 | Testing
 2 | -------
 3 | 
 4 | ### Integrated regression tests
 5 | 
 6 | Sibyl is provided with a few regression tests.
 7 | 
 8 | To test a Sibyl installation:
 9 | 
10 | ```
11 | $ cd c_tests && python run_ctests.py
12 | ...
13 | ```
14 | 
15 | Heuristics can be tested by using `-f` and `-a` options, respectively for
16 | functions and architecture guessing.
17 | 
18 | One should have at least a few functions detected. Depending on your system, the
19 | package `libc6-dev-i386` may be required to build the tests.
20 | 
21 | Depending on the current Sibyl state, some functions can be misdetected or
22 | absent.
23 | 
24 | ### External regression tests
25 | 
26 | Sibyl commits go through a CI process, which includes tests on real programs.
27 | These tests are available
28 | on [Sibyl-tests](https://github.com/commial/Sibyl-tests) repository.
29 | 
30 | 
31 | ### Learning tests
32 | 
33 | As Learning documentation, this part will completely change soon.
34 | 


--------------------------------------------------------------------------------
/doc/img/ghidra_screen.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cea-sec/Sibyl/14866eb8ef3a65fcc4535faaf76eb42faf64d313/doc/img/ghidra_screen.png


--------------------------------------------------------------------------------
/doc/img/ghidra_screen2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cea-sec/Sibyl/14866eb8ef3a65fcc4535faaf76eb42faf64d313/doc/img/ghidra_screen2.png


--------------------------------------------------------------------------------
/doc/img/ida_screen.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cea-sec/Sibyl/14866eb8ef3a65fcc4535faaf76eb42faf64d313/doc/img/ida_screen.png


--------------------------------------------------------------------------------
/ext/ghidra/ExportFunction.java:
--------------------------------------------------------------------------------
 1 | import ghidra.program.model.address.Address;
 2 | import ghidra.program.model.listing.*;
 3 | import ghidra.app.util.headless.HeadlessScript;
 4 | 
 5 | /*
 6 |  * Print out found functions' address. Do not consider external functions.
 7 |  * This script can be run as a preScript, Ghidra already have functions at this point.
 8 |  */
 9 | public class ExportFunction extends HeadlessScript {
10 | 
11 | 	@Override
12 | 	public void run() throws Exception {
13 | 		// Find and print found functions
14 | 		Listing listing = currentProgram.getListing();
15 | 		FunctionIterator iter = listing.getFunctions(true);
16 | 		while (iter.hasNext() && !monitor.isCancelled()) {
17 | 			Function f = iter.next();
18 | 			if (f.isExternal()) {
19 | 				continue;
20 | 			}
21 | 			/*
22 | 			 * Let's consider already labeled functions
23 | 			String fName = f.getName();
24 | 			if (!fName.startsWith("FUN_")) {
25 | 				continue;
26 | 			}
27 | 			*/
28 | 			Address entry = f.getEntryPoint();
29 | 			if (entry != null) {
30 | 				println(String.format("0x%x", entry.getOffset()));
31 | 			}
32 | 		}
33 | 		setHeadlessContinuationOption(HeadlessContinuationOption.ABORT_AND_DELETE);
34 | 
35 | 	}
36 | }
37 | 


--------------------------------------------------------------------------------
/ext/ghidra/find.py:
--------------------------------------------------------------------------------
  1 | #Sibyl "find" launcher
  2 | #@author MOUGEY Camille
  3 | #@category FunctionID
  4 | 
  5 | import json
  6 | import os
  7 | import subprocess
  8 | import time
  9 | 
 10 | # Find SIBYL find.py
 11 | identify_binary = "sibyl"
 12 | env = os.environ
 13 | 
 14 | # Sibyl launching
 15 | def exec_cmd(command_line):
 16 |     """Launch the command line @command_line"""
 17 |     global env
 18 |     process = subprocess.Popen(command_line,
 19 |                                stdout=subprocess.PIPE,
 20 |                                env=env)
 21 | 
 22 |     result, _ = process.communicate()
 23 | 
 24 |     if process.returncode != 0:
 25 |         # An error occured
 26 |         raise RuntimeError("An error occured, please consult the console")
 27 | 
 28 |     return result
 29 | 
 30 | def available_tests():
 31 |     """Get the available tests"""
 32 |     global identify_binary
 33 |     command_line = [identify_binary, "config", "-V", "available_tests_keys"]
 34 |     return eval(exec_cmd(command_line))
 35 | 
 36 | def parse_output(command_line):
 37 |     """Parse the output of find.py"""
 38 |     result = exec_cmd(command_line)
 39 | 
 40 |     for result in json.loads(result)["results"]:
 41 |         address, candidates = result["address"], result["functions"]
 42 |         if candidates:
 43 |             yield address, map(str, candidates)
 44 | 
 45 | 
 46 | def handle_found(addr, candidates):
 47 |     """Callback when @candidates have been found for a given address @addr.
 48 |     Print and add an GHIDRA comment at @addr
 49 |     @addr: address of the function analyzed
 50 |     @candidates: list of string of possible matched functions
 51 |     """
 52 |     print("Found %s at %s" % (",".join(candidates), hex(addr)))
 53 |     listing = currentProgram.getListing()
 54 |     codeUnit = listing.getCodeUnitAt(toAddr(addr))
 55 |     codeUnit.setComment(codeUnit.PLATE_COMMENT, "Sibyl - %s" % ",".join(candidates))
 56 | 
 57 | 
 58 | def launch_on_funcs(architecture, abi, funcs, test_set, map_addr=None,
 59 |                     jitter=None, buf_size=2000):
 60 |     """Launch identification on functions.
 61 |     @architecture: str standing for current architecture
 62 |     @abi: str standing for expected ABI
 63 |     @funcs: list of function addresses (int) to check
 64 |     @test_set: list of test sets to run
 65 |     Optional arguments:
 66 |     @map_addr: (optional) the base address where the binary has to be loaded if
 67 |     format is not recognized
 68 |     @jitter: (optional) jitter engine to use (gcc, tcc, llvm, python, qemu)
 69 |     @buf_size: (optional) number of argument to pass to each instance of sibyl.
 70 |     High number means speed; low number means less ressources and higher
 71 |     frequency of report
 72 |     """
 73 | 
 74 |     # Check Sibyl availability
 75 |     global identify_binary
 76 |     if not identify_binary:
 77 |         raise ValueError("A valid Sibyl path to find.py must be supplied")
 78 | 
 79 |     # Get binary information
 80 |     filename = str(currentProgram.getExecutablePath())
 81 |     nb_func = len(funcs)
 82 | 
 83 |     # Prepare run
 84 |     starttime = time.time()
 85 |     nb_found = 0
 86 |     add_map = []
 87 |     if isinstance(map_addr, int):
 88 |         add_map = ["-m", hex(map_addr)]
 89 | 
 90 |     # Launch identification
 91 |     monitor.setMessage("Launch identification on %d function(s)" % nb_func)
 92 |     options = ["-a", architecture, "-b", abi, "-o", "JSON"]
 93 |     for test_name in test_set:
 94 |         options += ["-t", test_name]
 95 |     if jitter is not None:
 96 |         options += ["-j", jitter]
 97 |     options += add_map
 98 |     res = {}
 99 | 
100 |     for i in xrange(0, len(funcs), buf_size):
101 |         # Build command line
102 |         addresses = funcs[i:i + buf_size]
103 |         command_line = [identify_binary, "find"]
104 |         command_line += options
105 |         command_line += [filename]
106 |         command_line += addresses
107 | 
108 |         # Call Sibyl and keep only stdout
109 |         for addr, candidates in parse_output(command_line):
110 |             handle_found(addr, candidates)
111 |             res[addr] = candidates
112 |             nb_found += 1
113 | 
114 |         # Print current status and estimated time
115 |         curtime = (time.time() - starttime)
116 |         maxi = min(i + buf_size, len(funcs))
117 |         estimatedtime = (curtime * nb_func) / maxi
118 |         remaintime = estimatedtime - curtime
119 |         monitor.setMessage("Current: %.02f%% (FUN_%s)| Estimated time remaining: %.02fs" % (((100. /nb_func) * maxi),
120 |                                                                                             addresses[-1],
121 |                                                                                             remaintime))
122 |         if monitor.isCancelled():
123 |             print "Early break asked by the user"
124 |             break
125 | 
126 |     print "Finished ! Found %d candidates in %.02fs" % (nb_found, time.time() - starttime)
127 |     return res
128 | 
129 | 
130 | GHIDRAArch2MiasmArch = {
131 |     "x86/little/32": "x86_32",
132 | }
133 | 
134 | GHIDRAABI2SibylABI = {
135 |     ("x86_32", "default"): "ABIStdCall_x86_32",
136 | }
137 | 
138 | if __name__ == "__main__":
139 |     processor_name, abi = str(currentProgram.getLanguage()).rsplit("/", 1)
140 |     m_arch = GHIDRAArch2MiasmArch.get(processor_name, None)
141 |     if processor_name is None:
142 |         popup("Unsupported architecture: %s" % processor_name)
143 |         os.exit(0)
144 | 
145 |     s_abi = GHIDRAABI2SibylABI.get((m_arch, abi), None)
146 |     if s_abi is None:
147 |         popup("Unsupported ABI: (%s, %s)" % (m_arch, abi))
148 |         os.exit(0)
149 | 
150 |     monitor.setMessage("Get functions address...")
151 |     cur, whole = "Current function", "Whole program"
152 |     choice = askChoice("Target", "Target function(s)", [cur, whole], cur)
153 |     if choice == cur:
154 |         addrs = ["0x%x" % getFunctionContaining(currentAddress).entryPoint.getOffset()]
155 |     else:
156 |         addrs = []
157 |         for func in currentProgram.getListing().getFunctions(True):
158 |             if func.isExternal():
159 |                 continue
160 | 
161 |             # Ignore already labeled functions
162 |             # name = func.getName()
163 |             # if not name.startswith("FUN_"):
164 |             #     # Ignore already labeled functions
165 |             #     continue
166 | 
167 |             addr = func.getEntryPoint()
168 |             if addr is not None:
169 |                 addrs.append("0x%x" % addr.getOffset())
170 | 
171 |     monitor.setMessage("Get available tests...")
172 |     AVAILABLE_TESTS = available_tests()
173 |     testset = askChoices(
174 |         "Test set", "Testsets to enable", AVAILABLE_TESTS, AVAILABLE_TESTS
175 |     )
176 | 
177 |     launch_on_funcs(m_arch, s_abi, addrs, testset)
178 | 


--------------------------------------------------------------------------------
/ext/pin_tracer/Makefile:
--------------------------------------------------------------------------------
 1 | ifneq ("$(PIN_ROOT)", "")
 2 | 	CONFIG_ROOT := $(PIN_ROOT)/source/tools/Config
 3 | 	include $(CONFIG_ROOT)/makefile.config
 4 | 	include $(TOOLS_ROOT)/Config/makefile.default.rules
 5 | endif
 6 | 
 7 | all: intel64
 8 | 
 9 | intel64:
10 | ifeq ("$(PIN_ROOT)", "")
11 |  # PIN_ROOT has to point the root directory of pin-3.0-76991-gcc-linux
12 | 	$(error PIN_ROOT variable is not set)
13 | endif
14 | 	$(MAKE) TARGET=intel64 obj-intel64/pin_tracer.so
15 | 	mv obj-intel64/pin_tracer.so .
16 | 
17 | clean:
18 | 	rm -rf obj-intel64 pin_tracer.so
19 | 


--------------------------------------------------------------------------------
/ext/pin_tracer/pin_tracer.cpp:
--------------------------------------------------------------------------------
  1 | /* The file should be compiled with pin-3.0-76991-gcc-linux */
  2 | 
  3 | #include <sys/types.h>
  4 | #include <unistd.h>
  5 | #include <stdio.h>
  6 | #include <stdlib.h>
  7 | #include <string.h>
  8 | #include "pin.H"
  9 | 
 10 | /* Set pin option and gobal variable for the address of the traced function */
 11 | KNOB<uint64_t> KnobFunctionAddr(KNOB_MODE_WRITEONCE, "pintool",
 12 | 	"a", "0x0", "function address to trace");
 13 | uint64_t functionAddr = 0x0;
 14 | 
 15 | /* Set pin option and gobal variable for the output file */
 16 | KNOB<string> KnobOutputFile(KNOB_MODE_WRITEONCE, "pintool",
 17 | 	"o", "out.trace", "outputfile");
 18 | FILE * trace;
 19 | 
 20 | /* Boolean variable. True if traced function is currently executed (then tracer have to produce output), false else. */
 21 | unsigned char instrument = 0;
 22 | 
 23 | /* Value of RSP at the beginning of each function execution.
 24 |    It is used to detect the end of the function (initial rsp lower than current rsp)*/
 25 | ADDRINT rspInit;
 26 | 
 27 | 
 28 | /* Error checking functions */
 29 | 
 30 | #define check_error(func, func_name) { \
 31 | if((func) < 0){ \
 32 | 	perror(func_name); \
 33 | 	abort(); \
 34 | }}
 35 | 
 36 | #define check_fprintf_error(func) check_error(func, "fprintf")
 37 | #define check_snprintf_error(func) check_error(func, "snprintf")
 38 | 
 39 | 
 40 | /* Print a memory read record */
 41 | VOID RecordMemRead(VOID * ip, VOID * addr, UINT32 size, char* disass)
 42 | {
 43 | 	if( instrument ) {
 44 | 		switch(size){
 45 | 		case 1: check_fprintf_error(fprintf(trace,"R %p %x %x %p\n", addr, size, *(uint8_t*)addr, ip)); break;
 46 | 		case 2: check_fprintf_error(fprintf(trace,"R %p %x %x %p\n", addr, size, *(uint16_t*)addr, ip));break;
 47 | 		case 4: check_fprintf_error(fprintf(trace,"R %p %x %x %p\n", addr, size, *(uint32_t*)addr, ip));break;
 48 | 		case 8: check_fprintf_error(fprintf(trace,"R %p %x %lx %p\n", addr, size, *(uint64_t*)addr, ip));break;
 49 | 		case 16: check_fprintf_error(fprintf(trace,"R %p %x %lx%016lx %p\n", addr, size, *(((uint64_t*)addr)+1), *(uint64_t*)addr, ip));break;
 50 | 		case 32: check_fprintf_error(fprintf(trace,"R %p %x %lx%032lx %p\n", addr, size, *(((uint64_t*)addr)+1), *(uint64_t*)addr, ip));break;
 51 | 		default: fprintf(stderr, "abort: read size is not managed ([%p]%u @%p(%s))\n",addr,size,ip,disass);abort();
 52 | 		}
 53 | 	}
 54 | }
 55 | 
 56 | /* Global variables used to communicate between RecordMemWriteContent and RecordMemWriteAddr functions */
 57 | VOID * last_addr;
 58 | UINT32 last_size;
 59 | 
 60 | /* Print the written value and the address of the instruction of a memory write record */
 61 | VOID RecordMemWriteContent(VOID * ip)
 62 | {
 63 | 	if( instrument ) {
 64 | 		switch(last_size){
 65 | 		case 1: check_fprintf_error(fprintf(trace,"%x %p\n", *(uint8_t*)last_addr, ip));break;
 66 | 		case 2: check_fprintf_error(fprintf(trace,"%x %p\n", *(uint16_t*)last_addr, ip));break;
 67 | 		case 4: check_fprintf_error(fprintf(trace,"%x %p\n", *(uint32_t*)last_addr, ip));break;
 68 | 		case 8: check_fprintf_error(fprintf(trace,"%lx %p\n", *(uint64_t*)last_addr, ip));break;
 69 | 		case 16: check_fprintf_error(fprintf(trace,"%lx%016lx %p\n", *(((uint64_t*)last_addr)+1), *(uint64_t*)last_addr, ip));break;
 70 | 		case 32: check_fprintf_error(fprintf(trace,"%lx%032lx %p\n", *(((uint64_t*)last_addr)+1), *(uint64_t*)last_addr, ip));break;
 71 | 		default: fprintf(stderr, "abort: write size is not managed ([%p]%i @%p)\n", last_addr, last_size, ip); abort();
 72 | 		}
 73 | 	}
 74 | }
 75 | 
 76 | /* Print the written address and size of a memory write record */
 77 | VOID RecordMemWriteAddr(VOID * ip, VOID * addr, UINT32 size)
 78 | {
 79 | 	if( instrument ) {
 80 | 		switch(size){
 81 | 		case 1: check_fprintf_error(fprintf(trace,"W %p %x ", addr, size));break;
 82 | 		case 2: check_fprintf_error(fprintf(trace,"W %p %x ", addr, size));break;
 83 | 		case 4: check_fprintf_error(fprintf(trace,"W %p %x ", addr, size));break;
 84 | 		case 8: check_fprintf_error(fprintf(trace,"W %p %x ", addr, size));break;
 85 | 		case 16: check_fprintf_error(fprintf(trace,"W %p %x ", addr, size));break;
 86 | 		case 32: check_fprintf_error(fprintf(trace,"W %p %x ", addr, size));break;
 87 | 		default: fprintf(stderr, "abort: write size is not managed ([%p]%i @%p)\n", addr, size, ip); abort();
 88 | 		}
 89 | 		last_size = size;
 90 | 		last_addr = addr;
 91 | 	}
 92 | }
 93 | 
 94 | VOID DumpRegsI(VOID * ip, ADDRINT rax, ADDRINT rbx, ADDRINT rcx, ADDRINT rdx, ADDRINT rsi, ADDRINT rdi, ADDRINT rbp, ADDRINT rsp, ADDRINT r8, ADDRINT r9, ADDRINT r10, ADDRINT r11, ADDRINT r12, ADDRINT r13, ADDRINT r14, ADDRINT r15) {
 95 | 	/* If the function is not already begun and IP is at its first instruction,
 96 | 	   we log input registers */
 97 | 	if( !instrument && (uint64_t)ip == functionAddr ) {
 98 | 		instrument = 1;
 99 | 		check_fprintf_error(fprintf(trace,"I %lx %lx %lx %lx %lx %lx %lx %lx %lx %lx %lx %lx %lx %lx %lx %lx\n", rax, rbx, rcx, rdx, rsi, rdi, rbp, rsp, r8, r9, r10, r11, r12, r13, r14, r15));
100 | 		rspInit = rsp;
101 | 	}
102 | }
103 | 
104 | VOID DumpCall(VOID* ip, ADDRINT rsp) {
105 | 	/* If the function was running*/
106 | 	if( instrument )
107 | 		fprintf(trace, "CALL %p %p\n", ip, (void*) rsp);
108 | }
109 | 
110 | VOID DumpRet(VOID* ip, ADDRINT rsp, ADDRINT rax) {
111 | 	/* If the function was running*/
112 | 	if( instrument )
113 | 		fprintf(trace, "RET %p %lx %lx\n", ip, rsp, rax);
114 | }
115 | 
116 | 
117 | VOID DumpRegsO(VOID * ip, ADDRINT rax, ADDRINT rbx, ADDRINT rcx, ADDRINT rdx, ADDRINT rsi, ADDRINT rdi, ADDRINT rbp, ADDRINT rsp, ADDRINT r8, ADDRINT r9, ADDRINT r10, ADDRINT r11, ADDRINT r12, ADDRINT r13, ADDRINT r14, ADDRINT r15) {
118 | 	/* If the function was running*/
119 | 	if( instrument ) {
120 | 		/* Log the executed instruction address */
121 | 		fprintf(trace,"@ %p\n", ip);
122 | 
123 | 		/* If the function is finished */
124 | 		if( rspInit < rsp ) {
125 | 			/* Log output registers */
126 | 			instrument = 0;
127 | 			check_fprintf_error(fprintf(trace,"O %lx %lx %lx %lx %lx %lx %lx %lx %lx %lx %lx %lx %lx %lx %lx %lx\n", rax, rbx, rcx, rdx, rsi, rdi, rbp, rsp, r8, r9, r10, r11, r12, r13, r14, r15));
128 | 		}
129 | 	}
130 | }
131 | 
132 | /* Is called for every instruction and instruments reads and writes accesses */
133 | VOID Instruction(INS ins, VOID *v)
134 | {
135 | 	INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)DumpRegsI,
136 | 				   IARG_INST_PTR,
137 | 				   IARG_REG_VALUE, REG_RAX,
138 | 				   IARG_REG_VALUE, REG_RBX,
139 | 				   IARG_REG_VALUE, REG_RCX,
140 | 				   IARG_REG_VALUE, REG_RDX,
141 | 				   IARG_REG_VALUE, REG_RSI,
142 | 				   IARG_REG_VALUE, REG_RDI,
143 | 				   IARG_REG_VALUE, REG_RBP,
144 | 				   IARG_REG_VALUE, REG_RSP,
145 | 				   IARG_REG_VALUE, REG_R8,
146 | 				   IARG_REG_VALUE, REG_R9,
147 | 				   IARG_REG_VALUE, REG_R10,
148 | 				   IARG_REG_VALUE, REG_R11,
149 | 				   IARG_REG_VALUE, REG_R12,
150 | 				   IARG_REG_VALUE, REG_R13,
151 | 				   IARG_REG_VALUE, REG_R14,
152 | 				   IARG_REG_VALUE, REG_R15,
153 | 				   IARG_END);
154 | 
155 | 	IPOINT ipt;
156 | 	if(INS_HasFallThrough(ins)) ipt = IPOINT_AFTER;
157 | 	else if(INS_IsBranchOrCall(ins) || INS_IsSysret(ins) || INS_IsRet(ins)) ipt = IPOINT_TAKEN_BRANCH;
158 | 	else ipt = IPOINT_BEFORE;
159 | 
160 | 	INS_InsertCall(ins, ipt, (AFUNPTR)DumpRegsO,
161 | 				   IARG_INST_PTR,
162 | 				   IARG_REG_VALUE, REG_RAX,
163 | 				   IARG_REG_VALUE, REG_RBX,
164 | 				   IARG_REG_VALUE, REG_RCX,
165 | 				   IARG_REG_VALUE, REG_RDX,
166 | 				   IARG_REG_VALUE, REG_RSI,
167 | 				   IARG_REG_VALUE, REG_RDI,
168 | 				   IARG_REG_VALUE, REG_RBP,
169 | 				   IARG_REG_VALUE, REG_RSP,
170 | 				   IARG_REG_VALUE, REG_R8,
171 | 				   IARG_REG_VALUE, REG_R9,
172 | 				   IARG_REG_VALUE, REG_R10,
173 | 				   IARG_REG_VALUE, REG_R11,
174 | 				   IARG_REG_VALUE, REG_R12,
175 | 				   IARG_REG_VALUE, REG_R13,
176 | 				   IARG_REG_VALUE, REG_R14,
177 | 				   IARG_REG_VALUE, REG_R15,
178 | 				   IARG_END);
179 | 
180 | 
181 | 	if (INS_IsCall(ins)) {
182 | 		// We cannot use INS_DirectBranchOrCallTargetAddress -> CALL RAX
183 | 		INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)DumpCall,
184 | 			       IARG_INST_PTR,
185 | 			       IARG_REG_VALUE, REG_RSP,
186 | 			       IARG_END);
187 | 
188 | 	}
189 | 
190 | 	if (INS_IsRet(ins)) {
191 | 		INS_InsertCall(ins, IPOINT_TAKEN_BRANCH, (AFUNPTR)DumpRet,
192 | 			       IARG_INST_PTR,
193 | 			       IARG_REG_VALUE, REG_RSP,
194 | 			       IARG_REG_VALUE, REG_RAX,
195 | 			       IARG_END);
196 | 
197 | 	}
198 | 
199 | 	/* Iterate over each memory operand of the instruction */
200 | 	UINT32 memOperands = INS_MemoryOperandCount(ins);
201 | 	for (UINT32 memOp = 0; memOp < memOperands; memOp++) {
202 | 
203 | 		/* If it is a read operand, log it */
204 | 		if (INS_MemoryOperandIsRead(ins, memOp)) {
205 | 			INS_InsertPredicatedCall(ins, IPOINT_BEFORE, (AFUNPTR)RecordMemRead,
206 | 									 IARG_INST_PTR,
207 | 									 IARG_MEMORYOP_EA, memOp,
208 | 									 IARG_MEMORYREAD_SIZE,
209 | 									 IARG_PTR,strdup(INS_Disassemble(ins).c_str()),
210 | 									 IARG_END);
211 | 		}
212 | 
213 | 		/* If it is a write operand, log it */
214 | 		if (INS_MemoryOperandIsWritten(ins, memOp)) {
215 | 			IPOINT ipt;
216 | 			ipt = INS_HasFallThrough(ins)? IPOINT_AFTER : IPOINT_TAKEN_BRANCH;
217 | 			INS_InsertPredicatedCall(ins, ipt, (AFUNPTR)RecordMemWriteContent,
218 | 									 IARG_INST_PTR,
219 | 									 IARG_CALL_ORDER, CALL_ORDER_FIRST,
220 | 									 IARG_END);
221 | 			INS_InsertPredicatedCall(ins, IPOINT_BEFORE, (AFUNPTR)RecordMemWriteAddr,
222 | 									 IARG_INST_PTR,
223 | 									 IARG_MEMORYOP_EA, memOp,
224 | 									 IARG_MEMORYWRITE_SIZE,
225 | 									 IARG_CALL_ORDER, CALL_ORDER_LAST,
226 | 									 IARG_END);
227 | 		}
228 | 	}
229 | }
230 | 
231 | void InstImage(IMG img, void *v)
232 | {
233 | 	fprintf(trace,"IMG %s\n", IMG_Name(img).c_str());
234 | 
235 | 	for (SYM sym = IMG_RegsymHead(img); SYM_Valid(sym); sym = SYM_Next(sym)) {
236 | 		fprintf(trace,"S %p %s\n", (void *) SYM_Address(sym),
237 | 			SYM_Name(sym).c_str());
238 | 	}
239 | }
240 | 
241 | VOID Fini(INT32 code, VOID *v)
242 | {
243 | 	fclose(trace);
244 | }
245 | 
246 | INT32 Usage()
247 | {
248 | 	PIN_ERROR( "This Pintool prints a trace of the read/write accesses and executed instructions during the execution of a specific function.\n"
249 | 			  + KNOB_BASE::StringKnobSummary() + "\n");
250 | 	return -1;
251 | }
252 | 
253 | int main(int argc, char *argv[])
254 | {
255 | 	if (PIN_Init(argc, argv)) return Usage();
256 | 
257 | 	functionAddr = KnobFunctionAddr.Value();
258 | 
259 | 	trace = fopen(KnobOutputFile.Value().c_str(), "w");
260 | 	if( trace == NULL ){
261 | 		perror("fopen");
262 | 		abort();
263 | 	}
264 | 
265 | 	INS_AddInstrumentFunction(Instruction, 0);
266 | 	PIN_AddFiniFunction(Fini, 0);
267 | 
268 | 	PIN_InitSymbols();
269 | 	IMG_AddInstrumentFunction(InstImage, 0);
270 | 
271 | 	PIN_StartProgram();
272 | 
273 | 	fprintf(stderr, "This point should never be reached");
274 | 	abort();
275 | 
276 | 	return 0;
277 | }
278 | 


--------------------------------------------------------------------------------
/ext/radare2/r2_find.py:
--------------------------------------------------------------------------------
  1 | # This file is part of Sibyl.
  2 | # Copyright 2014 Camille MOUGEY <camille.mougey@cea.fr>
  3 | #
  4 | # Sibyl is free software: you can redistribute it and/or modify it
  5 | # under the terms of the GNU General Public License as published by
  6 | # the Free Software Foundation, either version 3 of the License, or
  7 | # (at your option) any later version.
  8 | #
  9 | # Sibyl is distributed in the hope that it will be useful, but WITHOUT
 10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 11 | # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
 12 | # License for more details.
 13 | #
 14 | # You should have received a copy of the GNU General Public License
 15 | # along with Sibyl. If not, see <http://www.gnu.org/licenses/>.
 16 | #
 17 | # Ported to radare2 - Michael Messner @s3cur1ty_de
 18 | 
 19 | import r2pipe
 20 | import json
 21 | import os
 22 | import sys
 23 | import time
 24 | import re
 25 | import subprocess
 26 | 
 27 | # Sibyl launching
 28 | def exec_cmd(command_line):
 29 |     """Launch the command line @command_line"""
 30 |     global env
 31 |     process = subprocess.Popen(command_line,
 32 |                                stdout=subprocess.PIPE,
 33 |                                env=env)
 34 | 
 35 |     result, _ = process.communicate()
 36 | 
 37 |     if process.returncode != 0:
 38 |         # An error occured
 39 |         raise RuntimeError("[-] An error occured, please consult the console")
 40 | 
 41 |     return result
 42 | 
 43 | def available_tests():
 44 |     """Get the available tests"""
 45 |     global identify_binary
 46 |     command_line = [identify_binary, "config", "-V", "available_tests_keys"]
 47 |     return eval(exec_cmd(command_line))
 48 | 
 49 | 
 50 | def parse_output(command_line):
 51 |     """Parse the output of find.py"""
 52 |     result = exec_cmd(command_line)
 53 | 
 54 |     for result in json.loads(result)["results"]:
 55 |         address, candidates = result["address"], result["functions"]
 56 |         if candidates:
 57 |             yield address, map(str, candidates)
 58 | 
 59 | def handle_found(addr, candidates):
 60 |     """Callback when @candidates have been found for a given address @addr.
 61 |     Print and rename the function at @addr
 62 |     @addr: address of the function analyzed
 63 |     @candidates: list of string of possible matched functions
 64 |     """
 65 |     print "[+] Found %s at %s" % (",".join(candidates), hex(addr))
 66 |     #rename the functions in r2
 67 |     r2.cmd('afn ' + ",".join(candidates) +'_sibyl ' +hex(addr))
 68 |     # setup flags in r2
 69 |     r2.cmd('f ' + ",".join(candidates) +'_sibyl @ ' +hex(addr))
 70 |     #write IDA pro batch file to be able to import the stuff to ida
 71 |     #in IDA use <shift>+<F2> and copy the content from the generated file
 72 |     f.write("MakeName(" +hex(addr) +", \"" +",".join(candidates) +'_sibyl' +"\");\n")
 73 | 
 74 | 
 75 | def launch_on_funcs(architecture, abi, funcs, test_set, map_addr=None,
 76 |                     jitter=None, buf_size=2000):
 77 |     """Launch identification on functions.
 78 |     @architecture: str standing for current architecture
 79 |     @abi: str standing for expected ABI
 80 |     @funcs: list of function addresses (int) to check
 81 |     @test_set: list of test sets to run
 82 |     Optional arguments:
 83 |     @map_addr: (optional) the base address where the binary has to be loaded if
 84 |     format is not recognized
 85 |     @jitter: (optional) jitter engine to use (gcc, tcc, llvm, python, qemu)
 86 |     @buf_size: (optional) number of argument to pass to each instance of sibyl.
 87 |     High number means speed; low number means less ressources and higher
 88 |     frequency of report
 89 |     """
 90 | 
 91 |     # Check Sibyl availability
 92 |     global identify_binary
 93 |     if not identify_binary:
 94 |         raise ValueError("[-] A valid Sibyl path to find.py must be supplied")
 95 | 
 96 |     # Get binary information
 97 |     bin_details=r2.cmdj('oj')
 98 |     filename = bin_details[0]['uri']
 99 |     nb_func = r2.cmd('aflc')
100 | 
101 |     # Prepare run
102 |     starttime = time.time()
103 |     nb_found = 0
104 |     add_map = []
105 |     if isinstance(map_addr, int):
106 |         add_map = ["-m", hex(map_addr)]
107 | 
108 |     # Launch identification
109 |     nb_func = int(nb_func)
110 |     print "[*] Launch identification on %d function(s)" % nb_func
111 |     options = ["-a", architecture, "-b", abi, "-o", "JSON"]
112 |     for test_name in test_set:
113 |         options += ["-t", test_name]
114 |     if jitter is not None:
115 |         options += ["-j", jitter]
116 |     options += add_map
117 |     res = {}
118 | 
119 |     for i in xrange(0, len(funcs), buf_size):
120 |         # Build command line
121 |         addresses = funcs[i:i + buf_size]
122 |         command_line = [identify_binary, "find"]
123 |         command_line += options
124 |         command_line += [filename]
125 |         command_line += addresses
126 | 
127 |         # Call Sibyl and keep only stdout
128 |         for addr, candidates in parse_output(command_line):
129 |             handle_found(addr, candidates)
130 |             res[addr] = candidates
131 |             nb_found += 1
132 | 
133 |         # Print current status and estimated time
134 |         curtime = (time.time() - starttime)
135 |         maxi = min(i + buf_size, len(funcs))
136 |         estimatedtime = (curtime * nb_func) / maxi
137 |         remaintime = estimatedtime - curtime
138 |         print "[*] Current: %.02f%% (sub_%s)| Estimated time remaining: %.02fs" % (((100. /nb_func) * maxi),
139 |                                                                                      addresses[-1],
140 |                                                                                      remaintime)
141 | 
142 |     print "[*] Finished ! Found %d candidates in %.02fs" % (nb_found, time.time() - starttime)
143 |     return res
144 | 
145 | def architecture(bin_info):
146 |   processor_name = bin_info['bin']['arch']
147 |   endian = bin_info['bin']['endian']
148 |   bits = bin_info['bin']['bits']
149 |   print "[*] Architecture: %s / Endianess: %s / Bits: %s" %(processor_name, endian, bits)
150 | 
151 |   if processor_name == "arm":
152 |     # TODO ARM/thumb
153 |     # hack for thumb: set armt = True in globals :/
154 |     # set bigendiant = True is bigendian
155 |     is_armt = globals().get('armt', False)
156 |     is_bigendian = globals().get('bigendian', False)
157 | 
158 |     abi = "ABI_ARM"
159 |     if is_armt:
160 |       if endian == "big":
161 |         name = "armtb"
162 |       else:
163 |         name = "armtl"
164 |     else:
165 |       if endian == "big":
166 |         name = "armb"
167 |       else:
168 |         name = "arml"
169 | 
170 |   elif processor_name == "mips":
171 |     abi = "ABI_MIPS_O32"
172 |     if endian == "big":
173 |       name = "mips32b"
174 |     else:
175 |       name = "mips32l"
176 | 
177 |   elif processor_name == "ppc":   # currently not supported
178 |     abi = "ABI_PPC"
179 |     if endian == "big":
180 |       name = "ppc32b"
181 |     else:
182 |       name = "ppc32l"
183 |       print "[-] not supported"
184 | 
185 |   elif processor_name == "x86":
186 |     if endian == "little":
187 |       if bits == 32:
188 |         name = "x86_32"
189 |         abi = "ABIStdCall_x86_32"
190 |         #abi = "ABIFastCall_x86_32" #currently we have to do this manually
191 |       elif bits == 64:  #untested and unknown if this is correct
192 |         name = "x86_64"
193 |         abi = "ABI_AMD64_SYSTEMV"
194 |       elif bits == 16:  #untested and unknown if this is correct
195 |         name = "x86_16"
196 |         abi = ""        #untested, no ABI available
197 |         print "[-] not supported"
198 |     else:
199 |       print "[-] not supported"
200 | 
201 |   else:
202 |     print "[-] not supported"
203 | 
204 |   return name, abi
205 | 
206 | ## radare2 interfacing
207 | def main():
208 |   print("[*] Get already known functions via r2 command aflqj ...")
209 | 
210 |   current_functionsj = r2.cmdj("aflqj")
211 |   bin_info = r2.cmdj('ij')
212 | 
213 |   settings_architecture, settings_abi = architecture(bin_info)
214 | 
215 |   #set this up for testing
216 |   #settings_architecture = "arml"  # [-a {arml,armb,armtl,armtb,sh4,x86_16,x86_32,x86_64,msp430,mips32b,mips32l,aarch64l,aarch64b,ppc32b,mepl,mepb}]
217 |   #settings_abi = "ABI_ARM"      # [-b {ABIStdCall_x86_32,ABIFastCall_x86_32,ABI_AMD64_SYSTEMV,ABI_AMD64_MS,ABI_ARM,ABI_MIPS_O32}]
218 |   settings_tests = ['string','stdlib','ctype']       # [-t {stdlib,string,ctype}]
219 | 
220 |   sibyl_res = launch_on_funcs(settings_architecture,
221 |                                 settings_abi,
222 |                                 current_functionsj,
223 |                                 settings_tests)
224 | 
225 | if __name__ == '__main__':
226 | 
227 |   r2 = r2pipe.open()
228 |   print('\n[*] Found ' +r2.cmd('aflc')+ ' functions')
229 | 
230 |   if int(r2.cmd('aflc')) == 0:
231 |     print('\n[-] no functions found for analyzing ... try to analyze the binary first')
232 |     exit(0)
233 | 
234 |   # we create an IDA batch file for auto renaming the functions in IDA pro
235 |   f = open('ida_batch_sibyl.txt', 'w', 0)
236 | 
237 |   # Find SIBYL find.py
238 |   identify_binary = "sibyl"
239 |   env = os.environ
240 |   AVAILABLE_TESTS = available_tests()
241 | 
242 |   main()
243 | 
244 |   f.close()
245 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | 
 3 | # This file is part of Sibyl.
 4 | # Copyright 2014 - 2017 Camille MOUGEY <camille.mougey@cea.fr>
 5 | #
 6 | # Sibyl is free software: you can redistribute it and/or modify it
 7 | # under the terms of the GNU General Public License as published by
 8 | # the Free Software Foundation, either version 3 of the License, or
 9 | # (at your option) any later version.
10 | #
11 | # Sibyl is distributed in the hope that it will be useful, but WITHOUT
12 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 | # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
14 | # License for more details.
15 | #
16 | # You should have received a copy of the GNU General Public License
17 | # along with Sibyl. If not, see <http://www.gnu.org/licenses/>.
18 | 
19 | from distutils.core import setup
20 | 
21 | setup(
22 |     name='sibyl',
23 |     version='0.1',
24 |     author='Camille MOUGEY',
25 |     author_email='commial@gmail.com',
26 |     url='https://github.com/cea-sec/sibyl',
27 |     download_url='https://github.com/cea-sec/sibyl/tarball/master',
28 |     license='GPLv3+',
29 |     description='A Miasm2 based function divination',
30 |     long_description="""
31 | Sibyl is a tool aiming at recognizing functions in binaries based on their side
32 |     effects, by running them in a sandboxed environment.""",
33 |     keywords=["reverse engineering", "emulation"],
34 |     install_requires=[
35 |         'miasm2',
36 |     ],
37 |     packages=['sibyl', 'sibyl/abi', 'sibyl/engine', 'sibyl/learn',
38 |               'sibyl/learn/tracer', 'sibyl/learn/generator',
39 |               'sibyl/heuristics', 'sibyl/test', 'sibyl/actions'],
40 |     scripts=['bin/sibyl'],
41 | )
42 | 


--------------------------------------------------------------------------------
/sibyl/__init__.py:
--------------------------------------------------------------------------------
 1 | # This file is part of Sibyl.
 2 | # Copyright 2014 Camille MOUGEY <camille.mougey@cea.fr>
 3 | #
 4 | # Sibyl is free software: you can redistribute it and/or modify it
 5 | # under the terms of the GNU General Public License as published by
 6 | # the Free Software Foundation, either version 3 of the License, or
 7 | # (at your option) any later version.
 8 | #
 9 | # Sibyl is distributed in the hope that it will be useful, but WITHOUT
10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
11 | # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
12 | # License for more details.
13 | #
14 | # You should have received a copy of the GNU General Public License
15 | # along with Sibyl. If not, see <http://www.gnu.org/licenses/>.
16 | 
17 | 
18 | """This package provides methods for identifying common functions.
19 | It works by JITting them thanks to Miasm2 engine and comparing their side
20 | effects with its signature base.
21 | """
22 | 
23 | __all__ = []
24 | 


--------------------------------------------------------------------------------
/sibyl/abi/__init__.py:
--------------------------------------------------------------------------------
 1 | # This file is part of Sibyl.
 2 | # Copyright 2014 Camille MOUGEY <camille.mougey@cea.fr>
 3 | #
 4 | # Sibyl is free software: you can redistribute it and/or modify it
 5 | # under the terms of the GNU General Public License as published by
 6 | # the Free Software Foundation, either version 3 of the License, or
 7 | # (at your option) any later version.
 8 | #
 9 | # Sibyl is distributed in the hope that it will be useful, but WITHOUT
10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
11 | # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
12 | # License for more details.
13 | #
14 | # You should have received a copy of the GNU General Public License
15 | # along with Sibyl. If not, see <http://www.gnu.org/licenses/>.
16 | 
17 | from sibyl.abi.x86 import ABIS as ABIS_X86
18 | from sibyl.abi.arm import ABIS as ABIS_ARM
19 | from sibyl.abi.mep import ABIS as ABIS_MEP
20 | from sibyl.abi.mips import ABIS as ABIS_MIPS
21 | ABIS = ABIS_X86 + ABIS_ARM + ABIS_MEP + ABIS_MIPS
22 | __all__ = ["ABIS"]
23 | 


--------------------------------------------------------------------------------
/sibyl/abi/abi.py:
--------------------------------------------------------------------------------
 1 | # This file is part of Sibyl.
 2 | # Copyright 2014 Camille MOUGEY <camille.mougey@cea.fr>
 3 | #
 4 | # Sibyl is free software: you can redistribute it and/or modify it
 5 | # under the terms of the GNU General Public License as published by
 6 | # the Free Software Foundation, either version 3 of the License, or
 7 | # (at your option) any later version.
 8 | #
 9 | # Sibyl is distributed in the hope that it will be useful, but WITHOUT
10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
11 | # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
12 | # License for more details.
13 | #
14 | # You should have received a copy of the GNU General Public License
15 | # along with Sibyl. If not, see <http://www.gnu.org/licenses/>.
16 | 
17 | 
18 | class ABI(object):
19 |     "Parent class, stand for an ABI"
20 | 
21 |     # Associated architectures
22 |     arch = []
23 | 
24 |     def __init__(self, jitter, ira):
25 |         self.jitter = jitter
26 |         self.ira = ira
27 | 
28 |     def reset(self):
29 |         "Reset the current ABI"
30 |         pass
31 | 
32 |     def add_arg(self, number, element):
33 |         """Add a function argument
34 |         @number: argument number (start 0)
35 |         @element: argument
36 |         """
37 |         raise NotImplementedError("Abstract method")
38 | 
39 |     def prepare_call(self, ret_addr):
40 |         """Prepare the call to a function
41 |         @ret_addr: return address
42 |         """
43 |         raise NotImplementedError("Abstract method")
44 | 
45 |     def get_result(self):
46 |         """Return the function result value, as int"""
47 |         raise NotImplementedError("Abstract method")
48 | 
49 | 
50 | class ABIRegsStack(ABI):
51 | 
52 |     regs_mapping = None # Register mapping (list of str)
53 |     args = None         # order => element
54 | 
55 |     def __init__(self, *args, **kwargs):
56 |         super(ABIRegsStack, self).__init__(*args, **kwargs)
57 |         self.args = {}
58 | 
59 |     def add_arg(self, number, element):
60 |         if isinstance(element, (int, long)):
61 |             self.args[number] = element
62 |         else:
63 |             raise NotImplementedError()
64 | 
65 |     def vm_push(self, element):
66 |         raise NotImplementedError("Abstract method")
67 | 
68 |     def set_ret(self, element):
69 |         raise NotImplementedError("Abstract method")
70 | 
71 |     def prepare_call(self, ret_addr):
72 |         # Get args
73 |         numbers = sorted(self.args.keys())
74 | 
75 |         for i, key in reversed(list(enumerate(numbers))):
76 |             element = self.args[key]
77 | 
78 |             if i < len(self.regs_mapping):
79 |                 # Regs argument
80 |                 setattr(self.jitter.cpu, self.regs_mapping[i], element)
81 |             else:
82 |                 # Stack argument
83 |                 self.vm_push(element)
84 | 
85 |         self.set_ret(ret_addr)
86 | 
87 |     def reset(self):
88 |         self.args = {}
89 | 
90 |     def get_result(self):
91 |         return getattr(self.jitter.cpu, self.ira.ret_reg.name)
92 | 


--------------------------------------------------------------------------------
/sibyl/abi/arm.py:
--------------------------------------------------------------------------------
 1 | # This file is part of Sibyl.
 2 | # Copyright 2014 Camille MOUGEY <camille.mougey@cea.fr>
 3 | #
 4 | # Sibyl is free software: you can redistribute it and/or modify it
 5 | # under the terms of the GNU General Public License as published by
 6 | # the Free Software Foundation, either version 3 of the License, or
 7 | # (at your option) any later version.
 8 | #
 9 | # Sibyl is distributed in the hope that it will be useful, but WITHOUT
10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
11 | # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
12 | # License for more details.
13 | #
14 | # You should have received a copy of the GNU General Public License
15 | # along with Sibyl. If not, see <http://www.gnu.org/licenses/>.
16 | 
17 | 
18 | from sibyl.abi import abi
19 | 
20 | 
21 | class ABI_ARM(abi.ABIRegsStack):
22 | 
23 |     regs_mapping = ["R0", "R1", "R2", "R3"]
24 |     arch = ["arml", "armb", "armtl", "armtb"]
25 | 
26 |     def set_ret(self, ret_addr):
27 |         self.jitter.cpu.LR = ret_addr
28 | 
29 |     def vm_push(self, element):
30 |         self.jitter.push_uint32_t(element)
31 | 
32 | 
33 | ABIS = [ABI_ARM]
34 | 


--------------------------------------------------------------------------------
/sibyl/abi/mep.py:
--------------------------------------------------------------------------------
 1 | # This file is part of Sibyl.
 2 | # Copyright 2018 Camille MOUGEY <camille.mougey@cea.fr>
 3 | #
 4 | # Sibyl is free software: you can redistribute it and/or modify it
 5 | # under the terms of the GNU General Public License as published by
 6 | # the Free Software Foundation, either version 3 of the License, or
 7 | # (at your option) any later version.
 8 | #
 9 | # Sibyl is distributed in the hope that it will be useful, but WITHOUT
10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
11 | # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
12 | # License for more details.
13 | #
14 | # You should have received a copy of the GNU General Public License
15 | # along with Sibyl. If not, see <http://www.gnu.org/licenses/>.
16 | #
17 | # Author: Guillaume VALADON <guillaume@valadon.net>
18 | 
19 | 
20 | from sibyl.abi import abi
21 | 
22 | 
23 | class ABI_MEP(abi.ABIRegsStack):
24 | 
25 |     regs_mapping = ["R1", "R2", "R3", "R4"]
26 |     arch = ["mepl", "mepb"]
27 | 
28 |     def set_ret(self, ret_addr):
29 |         self.jitter.cpu.LP = ret_addr
30 | 
31 |     def vm_push(self, element):
32 |         self.jitter.push_uint32_t(element)
33 | 
34 | 
35 | ABIS = [ABI_MEP]
36 | 


--------------------------------------------------------------------------------
/sibyl/abi/mips.py:
--------------------------------------------------------------------------------
 1 | # This file is part of Sibyl.
 2 | # Copyright 2014 Camille MOUGEY <camille.mougey@cea.fr>
 3 | #
 4 | # Sibyl is free software: you can redistribute it and/or modify it
 5 | # under the terms of the GNU General Public License as published by
 6 | # the Free Software Foundation, either version 3 of the License, or
 7 | # (at your option) any later version.
 8 | #
 9 | # Sibyl is distributed in the hope that it will be useful, but WITHOUT
10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
11 | # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
12 | # License for more details.
13 | #
14 | # You should have received a copy of the GNU General Public License
15 | # along with Sibyl. If not, see <http://www.gnu.org/licenses/>.
16 | 
17 | 
18 | from sibyl.abi import abi
19 | 
20 | 
21 | class ABI_MIPS_O32(abi.ABIRegsStack):
22 | 
23 |     regs_mapping = ["A0", "A1", "A2", "A3"]
24 |     arch = ["mips32b", "mips32l"]
25 | 
26 |     def set_ret(self, ret_addr):
27 |         self.jitter.cpu.RA = ret_addr
28 | 
29 |     def vm_push(self, element):
30 |         self.jitter.push_uint32_t(element)
31 | 
32 | 
33 | ABIS = [ABI_MIPS_O32]
34 | 


--------------------------------------------------------------------------------
/sibyl/abi/x86.py:
--------------------------------------------------------------------------------
 1 | # This file is part of Sibyl.
 2 | # Copyright 2014 Camille MOUGEY <camille.mougey@cea.fr>
 3 | #
 4 | # Sibyl is free software: you can redistribute it and/or modify it
 5 | # under the terms of the GNU General Public License as published by
 6 | # the Free Software Foundation, either version 3 of the License, or
 7 | # (at your option) any later version.
 8 | #
 9 | # Sibyl is distributed in the hope that it will be useful, but WITHOUT
10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
11 | # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
12 | # License for more details.
13 | #
14 | # You should have received a copy of the GNU General Public License
15 | # along with Sibyl. If not, see <http://www.gnu.org/licenses/>.
16 | 
17 | 
18 | from sibyl.abi import abi
19 | 
20 | 
21 | class ABIRegsStack_x86(abi.ABIRegsStack):
22 | 
23 |     def set_ret(self, ret_addr):
24 |         self.vm_push(ret_addr)
25 | 
26 | 
27 | class ABIStdCall_x86_32(ABIRegsStack_x86):
28 | 
29 |     regs_mapping = [] # Stack only
30 |     arch = ["x86_32"]
31 | 
32 |     def vm_push(self, element):
33 |         self.jitter.push_uint32_t(element)
34 | 
35 | 
36 | class ABIFastCall_x86_32(ABIRegsStack_x86):
37 | 
38 |     regs_mapping = ["ECX", "EDX"]
39 |     arch = ["x86_32"]
40 | 
41 |     def vm_push(self, element):
42 |         self.jitter.push_uint32_t(element)
43 | 
44 | 
45 | class ABI_AMD64_SYSTEMV(ABIRegsStack_x86):
46 | 
47 |     regs_mapping = ["RDI", "RSI", "RDX", "RCX", "R8", "R9"]
48 |     arch = ["x86_64"]
49 | 
50 |     def vm_push(self, element):
51 |         self.jitter.push_uint64_t(element)
52 | 
53 | 
54 | class ABI_AMD64_MS(ABIRegsStack_x86):
55 | 
56 |     regs_mapping = ["RCX", "RDX", "R8", "R9"]
57 |     arch = ["x86_64"]
58 | 
59 |     def vm_push(self, element):
60 |         self.jitter.push_uint64_t(element)
61 | 
62 |     def set_ret(self, ret_addr):
63 |         # Shadow stack reservation: 0x20 bytes
64 |         for i in xrange(4):
65 |             self.vm_push(0)
66 |         super(ABI_AMD64_MS, self).set_ret(ret_addr)
67 | 
68 | 
69 | ABIS = [ABIStdCall_x86_32, ABIFastCall_x86_32, ABI_AMD64_SYSTEMV, ABI_AMD64_MS]
70 | 


--------------------------------------------------------------------------------
/sibyl/actions/__init__.py:
--------------------------------------------------------------------------------
 1 | # This file is part of Sibyl.
 2 | # Copyright 2014 - 2017 Camille MOUGEY <camille.mougey@cea.fr>
 3 | #
 4 | # Sibyl is free software: you can redistribute it and/or modify it
 5 | # under the terms of the GNU General Public License as published by
 6 | # the Free Software Foundation, either version 3 of the License, or
 7 | # (at your option) any later version.
 8 | #
 9 | # Sibyl is distributed in the hope that it will be useful, but WITHOUT
10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
11 | # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
12 | # License for more details.
13 | #
14 | # You should have received a copy of the GNU General Public License
15 | # along with Sibyl. If not, see <http://www.gnu.org/licenses/>.
16 | "Sibyl actions implementations"
17 | 
18 | from collections import namedtuple
19 | from importlib import import_module
20 | 
21 | ActionDesc = namedtuple("ActionDesc", ["name", "desc", "module", "classname"])
22 | 
23 | ACTIONS = [
24 |     ActionDesc("config", "Configuration management", "config", "ActionConfig"),
25 |     ActionDesc("find", "Function guesser", "find", "ActionFind"),
26 |     ActionDesc("func", "Function discovering", "func", "ActionFunc"),
27 |     ActionDesc("learn", "Learn a new function", "learn", "ActionLearn"),
28 | ]
29 | 
30 | def load_action(actiondesc, args):
31 |     "Load the action associated to @actiondesc with arguments @args"
32 |     mod = import_module(".%s" % actiondesc.module, "sibyl.actions")
33 |     return getattr(mod, actiondesc.classname)(args)
34 | 


--------------------------------------------------------------------------------
/sibyl/actions/action.py:
--------------------------------------------------------------------------------
 1 | # This file is part of Sibyl.
 2 | # Copyright 2014 - 2017 Camille MOUGEY <camille.mougey@cea.fr>
 3 | #
 4 | # Sibyl is free software: you can redistribute it and/or modify it
 5 | # under the terms of the GNU General Public License as published by
 6 | # the Free Software Foundation, either version 3 of the License, or
 7 | # (at your option) any later version.
 8 | #
 9 | # Sibyl is distributed in the hope that it will be useful, but WITHOUT
10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
11 | # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
12 | # License for more details.
13 | #
14 | # You should have received a copy of the GNU General Public License
15 | # along with Sibyl. If not, see <http://www.gnu.org/licenses/>.
16 | import sys
17 | import argparse
18 | 
19 | class Action(object):
20 | 
21 |     "Parent class for actions"
22 | 
23 |     _name_ = ""
24 |     _desc_ = ""
25 |     _args_ = []  # List of (*args, **kwargs)
26 | 
27 |     def __init__(self, command_line):
28 |         # Parse command line
29 |         parser = argparse.ArgumentParser(
30 |             prog="%s %s" % (sys.argv[0], self._name_))
31 |         for args, kwargs in self._args_:
32 |             parser.add_argument(*args, **kwargs)
33 |         self.args = parser.parse_args(command_line)
34 | 
35 |         # Run action
36 |         self.run()
37 | 
38 |     def run(self):
39 |         raise NotImplementedError("Abstract method")
40 | 
41 |     @property
42 |     def name(self):
43 |         """Action name"""
44 |         return self._name_
45 | 
46 |     @property
47 |     def description(self):
48 |         """Action description"""
49 |         return self._desc_
50 | 


--------------------------------------------------------------------------------
/sibyl/actions/config.py:
--------------------------------------------------------------------------------
  1 | # This file is part of Sibyl.
  2 | # Copyright 2014 - 2017 Camille MOUGEY <camille.mougey@cea.fr>
  3 | #
  4 | # Sibyl is free software: you can redistribute it and/or modify it
  5 | # under the terms of the GNU General Public License as published by
  6 | # the Free Software Foundation, either version 3 of the License, or
  7 | # (at your option) any later version.
  8 | #
  9 | # Sibyl is distributed in the hope that it will be useful, but WITHOUT
 10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 11 | # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
 12 | # License for more details.
 13 | #
 14 | # You should have received a copy of the GNU General Public License
 15 | # along with Sibyl. If not, see <http://www.gnu.org/licenses/>.
 16 | 
 17 | import os
 18 | 
 19 | from sibyl.config import config, config_paths
 20 | from sibyl.actions.action import Action
 21 | 
 22 | 
 23 | class ActionConfig(Action):
 24 |     """Configuration management"""
 25 | 
 26 |     _name_ = "config"
 27 |     _desc_ = "Configuration management"
 28 |     _args_ = [
 29 |         (("-V", "--value"), {"help": "Return the value of a specific option"}),
 30 |         (("-d", "--dump"), {"help": "Dump the current configuration",
 31 |                             "action": "store_true"}),
 32 |     ]
 33 | 
 34 |     def run(self):
 35 |         if self.args.dump:
 36 |             print "\n".join(config.dump())
 37 |         elif self.args.value:
 38 |             if self.args.value.endswith("_keys") and hasattr(config,
 39 |                                                              self.args.value[:-5]):
 40 |                 val = getattr(config, self.args.value[:-5]).keys()
 41 |             elif hasattr(config, self.args.value):
 42 |                 val = getattr(config, self.args.value)
 43 |             else:
 44 |                 val = "ERROR"
 45 |             print val
 46 |         else:
 47 |             self.show()
 48 | 
 49 |     def show(self):
 50 |         # Configuration files
 51 |         files = [fpath for fpath in config_paths if os.path.isfile(fpath)]
 52 |         if not files:
 53 |             print "No configuration file found. Supported paths:"
 54 |             print "\t" + "\n\t".join(config_paths)
 55 |         else:
 56 |             print "Configuration loaded from %s" % ", ".join(files)
 57 | 
 58 |         # Jitter engine
 59 |         engines = config.config["jit_engine"]
 60 |         if "miasm" in engines:
 61 |             idx = engines.index("miasm")
 62 |             engines[idx:idx + 1] = config.config["miasm_engine"]
 63 |         print "Jitter engine (preference order): %s" % ", ".join(engines)
 64 |         print "Elected jitter engine: %s" % config.jit_engine
 65 | 
 66 |         # Stubbing
 67 |         stubs = config.stubs
 68 |         if stubs:
 69 |             print "API stubbing activated on supported jitter, from %d files" % len(stubs)
 70 |         else:
 71 |             print "API stubbing is deactivated"
 72 | 
 73 |         # PIN
 74 |         if (config.pin_root and
 75 |             os.path.exists(os.path.join(config.pin_root, "pin"))):
 76 |             print "PIN root path found at: %s" % config.pin_root
 77 |         else:
 78 |             print "PIN root path not found"
 79 |         if (config.pin_tracer and
 80 |             os.path.exists(config.pin_tracer)):
 81 |             print "PIN tracer found at: %s" % config.pin_tracer
 82 |         else:
 83 |             print "PIN tracer not found"
 84 | 
 85 |         # Learn
 86 |         print "Learn's pruning strategy: %s/%d/%d" % (config.prune_strategy,
 87 |                                                       config.prune_keep,
 88 |                                                       config.prune_keep_max)
 89 | 
 90 |         # IDA
 91 |         idaq64_path = config.idaq64_path
 92 |         if idaq64_path:
 93 |             "IDA has been found at: %s" % idaq64_path
 94 |         else:
 95 |             print "IDA has been not found"
 96 | 
 97 |         # GHIDRA Headless
 98 |         ghidra_headless_path = config.ghidra_headless_path
 99 |         if ghidra_headless_path:
100 |             print "GHIDRA analyzeHeadless has been found at: %s" % ghidra_headless_path
101 |         else:
102 |             print "GHIDRA analyzeHeadless has been not found"
103 | 
104 |         # Tests
105 |         print "Tests availables:"
106 |         for name, tests in config.available_tests.iteritems():
107 |             print "\t%s (%d)" % (name, len(tests))
108 |             print "\t\t" + ", ".join(test.func for test in tests)
109 | 


--------------------------------------------------------------------------------
/sibyl/actions/find.py:
--------------------------------------------------------------------------------
  1 | # This file is part of Sibyl.
  2 | # Copyright 2014 - 2017 Camille MOUGEY <camille.mougey@cea.fr>
  3 | #
  4 | # Sibyl is free software: you can redistribute it and/or modify it
  5 | # under the terms of the GNU General Public License as published by
  6 | # the Free Software Foundation, either version 3 of the License, or
  7 | # (at your option) any later version.
  8 | #
  9 | # Sibyl is distributed in the hope that it will be useful, but WITHOUT
 10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 11 | # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
 12 | # License for more details.
 13 | #
 14 | # You should have received a copy of the GNU General Public License
 15 | # along with Sibyl. If not, see <http://www.gnu.org/licenses/>.
 16 | 
 17 | import logging
 18 | import json
 19 | import sys
 20 | from collections import namedtuple
 21 | 
 22 | from miasm2.analysis.machine import Machine
 23 | from miasm2.analysis.binary import Container
 24 | 
 25 | from sibyl.config import config
 26 | from sibyl.testlauncher import TestLauncher
 27 | from sibyl.abi import ABIS
 28 | from sibyl.heuristics.arch import ArchHeuristic
 29 | from sibyl.commons import print_table
 30 | from sibyl.actions.action import Action
 31 | 
 32 | # Message exchanged with workers
 33 | MessageTaskDone = namedtuple("MessageTaskDone", ["address", "results"])
 34 | 
 35 | 
 36 | class FakeProcess(object):
 37 |     """Mock simulating Process API in monoprocess mode"""
 38 | 
 39 |     def __init__(self, target, args):
 40 |         self.target = target
 41 |         self.args = args
 42 | 
 43 |     def start(self, *args, **kwargs):
 44 |         self.target(*self.args)
 45 | 
 46 |     def join(self, *args, **kwargs):
 47 |         pass
 48 | 
 49 | 
 50 | class ActionFind(Action):
 51 |     """Action for actually launching function guessing"""
 52 | 
 53 |     _name_ = "find"
 54 |     _desc_ = "Function guesser"
 55 |     _args_ = [
 56 |         # Mandatory
 57 |         (["filename"], {"help": "File to load"}),
 58 |         (["address"], {"help": "Address of the function under test. Allowed" \
 59 |                        " formats are '112233', '0x11223344', '-' for stdin " \
 60 |                        "and 'filename' for a file containing addresses",
 61 |                        "nargs": "+"}),
 62 |         # Optional
 63 |         (["-a", "--architecture"], {"help": "Target architecture",
 64 |                                     "choices": Machine.available_machine()}),
 65 |         (["-b", "--abi"], {"help": "ABI to use",
 66 |                            "choices": [x.__name__ for x in ABIS]}),
 67 |         (["-t", "--tests"], {"help": "Tests to run (default is all)",
 68 |                              "choices": config.available_tests.keys(),
 69 |                              "default": [],
 70 |                              "action": "append"}),
 71 |         (["-v", "--verbose"], {"help": "Verbose mode (use multiple time to " \
 72 |                                "increase verbosity level)",
 73 |                                "action": "count",
 74 |                                "default": 0}),
 75 |         (["-i", "--timeout"], {"help": "Test timeout (in seconds)",
 76 |                                "default": 2,
 77 |                                "type": int}),
 78 |         (["-m", "--mapping-base"], {"help": "Binary mapping address",
 79 |                                     "default": "0"}),
 80 |         (["-j", "--jitter"], {"help": "Jitter engine (override default one)",
 81 |                               "choices": ["gcc", "tcc", "llvm", "python", "qemu"],
 82 |                               "default": config.jit_engine}),
 83 |         (["-p", "--monoproc"], {"help": "Launch tests in a single process " \
 84 |                                 "(mainly for debug purpose)",
 85 |                                 "action": "store_true"}),
 86 |         (["-o", "--output-format"], {"help": "Output format",
 87 |                                      "choices": ["JSON", "human"],
 88 |                                      "default": "human"}),
 89 |     ]
 90 | 
 91 |     def do_test(self, addr_queue, msg_queue):
 92 |         """Multi-process worker for launching on functions"""
 93 | 
 94 |         # Init components
 95 |         tl = TestLauncher(self.args.filename, self.machine, self.abicls,
 96 |                           self.tests, self.args.jitter, self.map_addr)
 97 | 
 98 |         # Activatate logging INFO on at least -vv
 99 |         if self.args.verbose > 1:
100 |             tl.logger.setLevel(logging.INFO)
101 | 
102 |         # Main loop
103 |         while True:
104 |             address = addr_queue.get()
105 |             if address is None:
106 |                 break
107 |             possible_funcs = tl.run(address, timeout_seconds=self.args.timeout)
108 |             msg_queue.put(MessageTaskDone(address, possible_funcs))
109 | 
110 |         # Signal to master the end
111 |         msg_queue.put(None)
112 | 
113 |     def run(self):
114 |         """Launch search"""
115 | 
116 |         # Import multiprocessing only when required
117 |         from multiprocessing import cpu_count, Queue, Process
118 | 
119 |         # Parse args
120 |         self.map_addr = int(self.args.mapping_base, 0)
121 |         if self.args.monoproc:
122 |             cpu_count = lambda: 1
123 |             Process = FakeProcess
124 | 
125 |         # Architecture
126 |         architecture = False
127 |         if self.args.architecture:
128 |             architecture = self.args.architecture
129 |         else:
130 |             with open(self.args.filename) as fdesc:
131 |                 architecture = ArchHeuristic(fdesc).guess()
132 |             if not architecture:
133 |                 raise ValueError("Unable to recognize the architecture, please specify it")
134 |             if self.args.verbose > 0:
135 |                 print "Guessed architecture: %s" % architecture
136 | 
137 |         self.machine = Machine(architecture)
138 |         if not self.args.address:
139 |             print "No function address provided. Use 'sibyl func' to discover addresses"
140 |             exit(-1)
141 |         addresses = []
142 |         for address in self.args.address:
143 |             if address == '-':
144 |                 # Use stdin
145 |                 addresses = [int(addr, 0) for addr in sys.stdin]
146 |                 continue
147 |             try:
148 |                 addresses.append(int(address, 0))
149 |             except ValueError:
150 |                 # File
151 |                 addresses = [int(addr, 0) for addr in open(address)]
152 |         if self.args.verbose > 0:
153 |             print "Found %d addresses" % len(addresses)
154 | 
155 | 
156 |         # Select ABI
157 |         if self.args.abi is None:
158 |             candidates = set(abicls for abicls in ABIS
159 |                              if architecture in abicls.arch)
160 |             if not candidates:
161 |                 raise ValueError("No ABI for architecture %s" % architecture)
162 |             if len(candidates) > 1:
163 |                 print "Please specify the ABI:"
164 |                 print "\t" + "\n\t".join(cand.__name__ for cand in candidates)
165 |                 exit(0)
166 |             abicls = candidates.pop()
167 |         else:
168 |             for abicls in ABIS:
169 |                 if self.args.abi == abicls.__name__:
170 |                     break
171 |             else:
172 |                 raise ValueError("Unknown ABI name: %s" % self.args.abi)
173 |         self.abicls = abicls
174 | 
175 |         # Select Test set
176 |         self.tests = []
177 |         for tname, tcases in config.available_tests.iteritems():
178 |             if not self.args.tests or tname in self.args.tests:
179 |                 self.tests += tcases
180 |         if self.args.verbose > 0:
181 |             print "Found %d test cases" % len(self.tests)
182 | 
183 |         # Prepare multiprocess
184 |         cpu_c = cpu_count()
185 |         addr_queue = Queue()
186 |         msg_queue = Queue()
187 |         processes = []
188 | 
189 |         # Add tasks
190 |         for address in addresses:
191 |             addr_queue.put(address)
192 | 
193 |         # Add poison pill
194 |         for _ in xrange(cpu_c):
195 |             addr_queue.put(None)
196 | 
197 |         # Launch workers
198 |         for _ in xrange(cpu_c):
199 |             p = Process(target=self.do_test, args=(addr_queue, msg_queue))
200 |             processes.append(p)
201 |             p.start()
202 |         addr_queue.close()
203 | 
204 |         # Get results
205 |         nb_poison = 0
206 |         results = {} # address -> possible functions
207 |         while nb_poison < cpu_c:
208 |             msg = msg_queue.get()
209 |             # Poison pill
210 |             if msg is None:
211 |                 nb_poison += 1
212 |                 continue
213 | 
214 |             # Save result
215 |             results[msg.address] = msg.results
216 | 
217 |             # Display status if needed
218 |             if self.args.verbose > 0:
219 |                 sys.stdout.write("\r%d / %d" % (len(results), len(addresses)))
220 |                 sys.stdout.flush()
221 |             if msg.results and self.args.output_format == "human":
222 |                 prefix = ""
223 |                 if self.args.verbose > 0:
224 |                     prefix = "\r"
225 |                 print prefix + "0x%08x : %s" % (msg.address, ",".join(msg.results))
226 | 
227 |         # Clean output if needed
228 |         if self.args.verbose > 0:
229 |             print ""
230 | 
231 |         # End connexions
232 |         msg_queue.close()
233 |         msg_queue.join_thread()
234 | 
235 |         addr_queue.join_thread()
236 |         for p in processes:
237 |             p.join()
238 | 
239 |         if not addr_queue.empty():
240 |             raise RuntimeError("An error occured: queue is not empty")
241 | 
242 |         # Print final results
243 |         if self.args.output_format == "JSON":
244 |             # Expand results to always have the same key, and address as int
245 |             print json.dumps({"information": {"total_count": len(addresses),
246 |                                               "test_cases": len(self.tests)},
247 |                               "results": [{"address": addr, "functions": result}
248 |                                           for addr, result in results.iteritems()],
249 |             })
250 |         elif self.args.output_format == "human" and self.args.verbose > 0:
251 |             # Summarize results
252 |             title = ["Address", "Candidates"]
253 |             ligs = [title]
254 | 
255 |             ligs += [["0x%08x" % addr, ",".join(result)]
256 |                      for addr, result in sorted(results.iteritems(),
257 |                                                 key=lambda x: x[0])
258 |                      if result]
259 |             print_table(ligs, separator="| ")
260 | 
261 | 
262 | 


--------------------------------------------------------------------------------
/sibyl/actions/func.py:
--------------------------------------------------------------------------------
 1 | # This file is part of Sibyl.
 2 | # Copyright 2014 - 2017 Camille MOUGEY <camille.mougey@cea.fr>
 3 | #
 4 | # Sibyl is free software: you can redistribute it and/or modify it
 5 | # under the terms of the GNU General Public License as published by
 6 | # the Free Software Foundation, either version 3 of the License, or
 7 | # (at your option) any later version.
 8 | #
 9 | # Sibyl is distributed in the hope that it will be useful, but WITHOUT
10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
11 | # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
12 | # License for more details.
13 | #
14 | # You should have received a copy of the GNU General Public License
15 | # along with Sibyl. If not, see <http://www.gnu.org/licenses/>.
16 | 
17 | import os
18 | 
19 | from miasm2.analysis.machine import Machine
20 | from miasm2.analysis.binary import Container
21 | 
22 | from sibyl.config import config, config_paths
23 | from sibyl.actions.action import Action
24 | from sibyl.heuristics.func import FuncHeuristic, ida_funcs, ghidra_funcs
25 | from sibyl.heuristics.arch import ArchHeuristic
26 | 
27 | 
28 | heur_names = FuncHeuristic(None, None, "").heuristic_names
29 | 
30 | class ActionFunc(Action):
31 |     """Function discovering"""
32 | 
33 |     _name_ = "func"
34 |     _desc_ = "Function discovering"
35 |     _args_ = [
36 |         # Mandatory
37 |         (["filename"], {"help": "File to load"}),
38 |         # Optional
39 |         (["-a", "--architecture"], {"help": "Target architecture",
40 |                                     "choices": Machine.available_machine()}),
41 |         (["-v", "--verbose"], {"help": "Verbose mode",
42 |                                "action": "store_true"}),
43 |         (["-d", "--disable-heuristic"], {"help": "Disable an heuristic",
44 |                                          "action": "append",
45 |                                          "choices": heur_names,
46 |                                          "default": []}),
47 |         (["-e", "--enable-heuristic"], {"help": "Enable an heuristic",
48 |                                         "action": "append",
49 |                                         "choices": heur_names,
50 |                                         "default": []}),
51 |     ]
52 | 
53 |     def run(self):
54 |         # Architecture
55 |         architecture = False
56 |         if self.args.architecture:
57 |             architecture = self.args.architecture
58 |         else:
59 |             with open(self.args.filename) as fdesc:
60 |                 architecture = ArchHeuristic(fdesc).guess()
61 |             if not architecture:
62 |                 raise ValueError("Unable to recognize the architecture, please specify it")
63 |             if self.args.verbose:
64 |                 print "Guessed architecture: %s" % architecture
65 | 
66 |         cont = Container.from_stream(open(self.args.filename))
67 |         machine = Machine(architecture)
68 |         addr_size = machine.ira().pc.size / 4
69 |         fh = FuncHeuristic(cont, machine, self.args.filename)
70 | 
71 |         # Default: force only IDA or GHIDRA if available
72 |         if config.idaq64_path:
73 |             fh.heuristics = [ida_funcs]
74 |         elif config.ghidra_headless_path:
75 |             fh.heuristics = [ghidra_funcs]
76 | 
77 |         # Enable / disable heuristics
78 |         for name in self.args.enable_heuristic:
79 |             heur = fh.name2heuristic(name)
80 |             if heur not in fh.heuristics:
81 |                 fh.heuristics.append(heur)
82 |         for name in self.args.disable_heuristic:
83 |             heur = fh.name2heuristic(name)
84 |             fh.heuristics.remove(heur)
85 | 
86 |         if self.args.verbose:
87 |             print "Heuristics to run: %s" % ", ".join(fh.heuristic_names)
88 | 
89 | 
90 |         # Launch guess
91 |         fmt = "0x{:0%dx}" % addr_size
92 |         for addr in fh.guess():
93 |             print fmt.format(addr)
94 | 


--------------------------------------------------------------------------------
/sibyl/actions/learn.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import logging
 3 | 
 4 | from miasm2.analysis.binary import Container
 5 | 
 6 | from sibyl.actions.action import Action
 7 | from sibyl.learn.tracer import AVAILABLE_TRACER
 8 | from sibyl.learn.generator import AVAILABLE_GENERATOR
 9 | from sibyl.learn.learn import TestCreator
10 | from sibyl.abi.x86 import ABI_AMD64_SYSTEMV
11 | 
12 | 
13 | class ActionLearn(Action):
14 |     """Automatic learning of a new function from one or multiple call"""
15 | 
16 |     _name_ = "learn"
17 |     _desc_ = "Learn a new function"
18 |     _args_ = [
19 |         # Mandatory
20 |         (["functionname"], {"help": "Name of the learned function"}),
21 |         (["program"], {"help": "Program used to learn the function, currently" \
22 |                        " only x86 64 programs are supported"}),
23 |         (["headerfile"], {"help": ".h header containing function declaration" \
24 |                           " and associated types"}),
25 |         # Optional
26 |         (["-a", "--address"], {"help": "Address of the learned function. If " \
27 |                                "not set, the corresponding symbol address is used."}),
28 |         (["-t", "--trace"], {"help": "Used tracer. Available: " \
29 |                              ", ".join(AVAILABLE_TRACER.keys()),
30 |                              "default": "pin",
31 |                              "choices": AVAILABLE_TRACER.keys()}),
32 |         (["-g", "--generator"], {"help": "Used generator. Available: " \
33 |                                  ", ".join(AVAILABLE_GENERATOR.keys()),
34 |                                  "default": "python",
35 |                                  "choices": AVAILABLE_GENERATOR.keys()}),
36 |         (["-v", "--verbose"], {"help": "Verbose mode (use multiple time to " \
37 |                                "increase verbosity level)",
38 |                                "action": "count",
39 |                                "default": 0}),
40 |         (["-m", "--main"], {"help": "Address of the function that calls the" \
41 |                             "learned function. Use by and only by the miasm tracer."}),
42 |         (["-o", "--output"], {"help": "Output file. Class is printed to stdout" \
43 |                               "if no output file is specified.",
44 |                               "default": None}),
45 |         (["-z", "--avoid-null"], {"help": "If set, do not consider runs "\
46 |                                   "returning a null value",
47 |                                   "action": "store_true"}),
48 |     ]
49 | 
50 |     def run(self):
51 |         # Currently only AMD64 SYSTEMV ABI is supported by the learning module
52 |         abi = ABI_AMD64_SYSTEMV
53 | 
54 |         # Currently only x86_64 is supported by the learning module
55 |         machine = "x86_64"
56 | 
57 |         if self.args.trace != "miasm" and self.args.main != None:
58 |             raise ValueError("Main argument is only used by miasm tracer")
59 | 
60 |         main = int(self.args.main, 0) if self.args.main else None
61 | 
62 |         # If function address is not set then use the symbol address
63 |         if self.args.address is None:
64 |             cont = Container.from_stream(open(self.args.program))
65 |             address = cont.loc_db.get_name_offset(self.args.functionname)
66 |             if address is None:
67 |                 raise ValueError("Symbol %s does not exists in %s" % (self.args.functionname, self.args.program))
68 |         else:
69 |             address = int(self.args.address, 0)
70 | 
71 | 
72 |         testcreator = TestCreator(self.args.functionname, address,
73 |                                   self.args.program, self.args.headerfile,
74 |                                   AVAILABLE_TRACER[self.args.trace],
75 |                                   AVAILABLE_GENERATOR[self.args.generator],
76 |                                   main, abi, machine, self.args.avoid_null)
77 | 
78 |         if self.args.verbose == 0:
79 |             testcreator.logger.setLevel(logging.WARN)
80 |         if self.args.verbose == 1:
81 |             testcreator.logger.setLevel(logging.INFO)
82 |         elif self.args.verbose == 2:
83 |             testcreator.logger.setLevel(logging.DEBUG)
84 | 
85 |         createdTest = testcreator.create_test()
86 | 
87 |         if self.args.output:
88 |             open(self.args.output, "w+").write(createdTest)
89 |         else:
90 |             print createdTest
91 | 
92 | 


--------------------------------------------------------------------------------
/sibyl/commons.py:
--------------------------------------------------------------------------------
  1 | """Common / shared elements"""
  2 | import logging
  3 | try:
  4 |     import pycparser
  5 | except ImportError:
  6 |     pycparser = None
  7 | else:
  8 |     from miasm2.core.ctypesmngr import c_to_ast, CTypeFunc
  9 |     from miasm2.core.objc import ObjCPtr, ObjCArray
 10 | 
 11 | def init_logger(name):
 12 |     logger = logging.getLogger(name)
 13 | 
 14 |     console_handler = logging.StreamHandler()
 15 |     log_format = "%(levelname)-5s: %(message)s"
 16 |     console_handler.setFormatter(logging.Formatter(log_format))
 17 |     logger.addHandler(console_handler)
 18 | 
 19 |     logger.setLevel(logging.ERROR)
 20 |     return logger
 21 | 
 22 | 
 23 | class TimeoutException(Exception):
 24 |     """Exception to be called on timeouts"""
 25 |     pass
 26 | 
 27 | 
 28 | END_ADDR = 0x1337babe
 29 | 
 30 | def print_table(ligs, title=True, separator='|', level=0, align=""):
 31 |     "Print nicely @ligs. If title, @ligs[0] is title ligne"
 32 |     # Calc max by col
 33 |     columns = [0] * len(ligs[0])
 34 |     for lig in ligs:
 35 |         for index, element in enumerate(lig):
 36 |             columns[index] = max(columns[index], len(element))
 37 | 
 38 |     fmt_l = ["{%d:%s%d}" % (i, align, l + 2) for i, l in enumerate(columns)]
 39 |     fmt = separator.join(fmt_l)
 40 | 
 41 |     tab = "\t" * level
 42 | 
 43 |     for i, lig in enumerate(ligs):
 44 |         if i == 1 and title:
 45 |             print "%s%s" % (tab, "-" * len(fmt.format(*lig)))
 46 |         print "%s%s" % (tab, fmt.format(*lig))
 47 | 
 48 | class HeaderFile(object):
 49 |     """Abstract representation of a Header file"""
 50 | 
 51 |     def __init__(self, header_data, ctype_manager):
 52 |         """Parse @header_data to fill @ctype_manager
 53 |         @header_data: str of a C-like header file
 54 |         @ctype_manager: miasm2.core.objc.CTypesManager instance"""
 55 |         self.data = header_data
 56 |         self.ctype_manager = ctype_manager
 57 | 
 58 |         self.ast = self.parse_header(header_data)
 59 |         self.ctype_manager.types_ast.add_c_decl(header_data)
 60 |         self.functions = {} # function name -> FuncPrototype
 61 | 
 62 |         if pycparser is None:
 63 |             raise ImportError("pycparser module is needed to parse header file")
 64 |         self.parse_functions()
 65 | 
 66 |     @staticmethod
 67 |     def parse_header(header_data):
 68 |         """Return the AST corresponding to @header_data
 69 |         @header_data: str of a C-like header file
 70 |         """
 71 |         # We can't use add_c_decl, because we need the AST to get back
 72 |         # function's arguments name
 73 |         parser = pycparser.c_parser.CParser()
 74 |         return c_to_ast(parser, header_data)
 75 | 
 76 |     def parse_functions(self):
 77 |         """Search for function declarations"""
 78 | 
 79 |         for ext in self.ast.ext:
 80 |             if not (isinstance(ext, pycparser.c_ast.Decl) and
 81 |                     isinstance(ext.type, (pycparser.c_ast.FuncDecl,
 82 |                                           pycparser.c_ast.FuncDef))):
 83 |                 continue
 84 |             func_name = ext.name
 85 |             objc_func = self.ctype_manager.get_objc(CTypeFunc(func_name))
 86 | 
 87 |             args_order = []
 88 |             args = {}
 89 |             for i, param in enumerate(ext.type.args.params):
 90 |                 args_order.append(param.name)
 91 |                 args[param.name] = objc_func.args[i][1]
 92 | 
 93 |             self.functions[func_name] = FuncPrototype(func_name,
 94 |                                                       objc_func.type_ret,
 95 |                                                       *args_order, **args)
 96 | 
 97 | def objc_is_dereferenceable(target_type):
 98 |     """Return True if target_type may be used as a pointer
 99 |     @target_type: ObjC"""
100 |     return isinstance(target_type, (ObjCPtr, ObjCArray))
101 | 
102 | 
103 | class FuncPrototype(object):
104 |     """Stand for a function's prototype"""
105 | 
106 |     def __init__(self, func_name, func_type, *args, **kwargs):
107 |         """Init a prototype for @func_type @func_name(@kwargs (name -> type) )
108 |         """
109 |         self.func_name = func_name
110 |         self.func_type = func_type
111 |         self.args = kwargs
112 |         self.args_order = args
113 | 
114 |     def __str__(self):
115 |         return "%s %s(%s)" % (self.func_type,
116 |                               self.func_name,
117 |                               ", ".join("%s %s" % (self.args[name], name)
118 |                                         for name in self.args_order))
119 | 


--------------------------------------------------------------------------------
/sibyl/engine/__init__.py:
--------------------------------------------------------------------------------
1 | """This module abstracts running engine"""
2 | 
3 | from sibyl.engine.qemu import QEMUEngine
4 | from sibyl.engine.miasm import MiasmEngine
5 | 


--------------------------------------------------------------------------------
/sibyl/engine/engine.py:
--------------------------------------------------------------------------------
 1 | from sibyl.commons import init_logger
 2 | 
 3 | 
 4 | class Engine(object):
 5 |     """Wrapper on execution engine"""
 6 | 
 7 |     def __init__(self, machine):
 8 |         """Instanciate an Engine
 9 |         @machine: miasm2.analysis.machine:Machine instance"""
10 |         self.logger = init_logger(self.__class__.__name__)
11 | 
12 |     def take_snapshot(self):
13 |         self.vm_mem = self.jitter.vm.get_all_memory()
14 |         self.vm_regs = self.jitter.cpu.get_gpreg()
15 | 
16 |     def restore_snapshot(self, memory=True):
17 |         raise NotImplementedError("Abstract method")
18 | 
19 |     def run(self, address, timeout_seconds):
20 |         raise NotImplementedError("Abstract method")
21 | 
22 |     def prepare_run(self):
23 |         pass
24 | 
25 |     def restore_snapshot(self, memory=True):
26 |         raise NotImplementedError("Abstract method")
27 | 


--------------------------------------------------------------------------------
/sibyl/engine/miasm.py:
--------------------------------------------------------------------------------
 1 | import signal
 2 | 
 3 | from sibyl.engine.engine import Engine
 4 | from sibyl.commons import TimeoutException, END_ADDR
 5 | 
 6 | 
 7 | class MiasmEngine(Engine):
 8 |     """Engine based on Miasm"""
 9 | 
10 |     def __init__(self, machine, jit_engine):
11 |         jitter = machine.jitter(jit_engine)
12 |         jitter.set_breakpoint(END_ADDR, MiasmEngine._code_sentinelle)
13 |         self.jitter = jitter
14 | 
15 |         # Signal handling
16 |         #
17 |         # Due to Python signal handling implementation, signals aren't handled
18 |         # nor passed to Jitted code in case of registration with signal API
19 |         if jit_engine == "python":
20 |             signal.signal(signal.SIGALRM, MiasmEngine._timeout)
21 |         elif jit_engine in ["llvm", "tcc", "gcc"]:
22 |             self.jitter.vm.set_alarm()
23 |         else:
24 |             raise ValueError("Unknown engine: %s" % jit_engine)
25 | 
26 |         super(MiasmEngine, self).__init__(machine)
27 | 
28 | 
29 |     @staticmethod
30 |     def _code_sentinelle(jitter):
31 |         jitter.run = False
32 |         jitter.pc = 0
33 |         return True
34 | 
35 |     @staticmethod
36 |     def _timeout(signum, frame):
37 |         raise TimeoutException()
38 | 
39 |     def run(self, address, timeout_seconds):
40 |         self.jitter.init_run(address)
41 | 
42 |         try:
43 |             signal.alarm(timeout_seconds)
44 |             self.jitter.continue_run()
45 |         except (AssertionError, RuntimeError, ValueError,
46 |                 KeyError, IndexError, TimeoutException) as _:
47 |             return False
48 |         except Exception as error:
49 |             self.logger.exception(error)
50 |             return False
51 |         finally:
52 |             signal.alarm(0)
53 | 
54 |         return True
55 | 
56 |     def restore_snapshot(self, memory=True):
57 |         # Restore memory
58 |         if memory:
59 |             self.jitter.vm.reset_memory_page_pool()
60 |             self.jitter.vm.reset_code_bloc_pool()
61 |             for addr, metadata in self.vm_mem.iteritems():
62 |                 self.jitter.vm.add_memory_page(addr,
63 |                                                metadata["access"],
64 |                                                metadata["data"])
65 | 
66 |         # Restore registers
67 |         self.jitter.cpu.init_regs()
68 |         self.jitter.cpu.set_gpreg(self.vm_regs)
69 | 
70 |         # Reset intern elements
71 |         self.jitter.vm.set_exception(0)
72 |         self.jitter.cpu.set_exception(0)
73 |         self.jitter.bs._atomic_mode = False
74 | 


--------------------------------------------------------------------------------
/sibyl/heuristics/__init__.py:
--------------------------------------------------------------------------------
 1 | # This file is part of Sibyl.
 2 | # Copyright 2016 Camille MOUGEY <camille.mougey@cea.fr>
 3 | #
 4 | # Sibyl is free software: you can redistribute it and/or modify it
 5 | # under the terms of the GNU General Public License as published by
 6 | # the Free Software Foundation, either version 3 of the License, or
 7 | # (at your option) any later version.
 8 | #
 9 | # Sibyl is distributed in the hope that it will be useful, but WITHOUT
10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
11 | # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
12 | # License for more details.
13 | #
14 | # You should have received a copy of the GNU General Public License
15 | # along with Sibyl. If not, see <http://www.gnu.org/licenses/>.
16 | 


--------------------------------------------------------------------------------
/sibyl/heuristics/arch.py:
--------------------------------------------------------------------------------
 1 | "Module for architecture guessing"
 2 | 
 3 | from miasm2.analysis.binary import Container, ContainerUnknown
 4 | 
 5 | from sibyl.heuristics.heuristic import Heuristic
 6 | 
 7 | 
 8 | def container_guess(archinfo):
 9 |     """Use the architecture provided by the container, if any
10 |     @archinfo: ArchHeuristic instance
11 |     """
12 | 
13 |     cont = Container.from_stream(archinfo.stream)
14 | 
15 |     if isinstance(cont, ContainerUnknown) or not cont.arch:
16 |         return {}
17 | 
18 |     return {cont.arch: 1}
19 | 
20 | 
21 | class ArchHeuristic(Heuristic):
22 |     """Provide heuristics to detect the architecture of a stream"""
23 | 
24 |     # Enabled passes
25 |     heuristics = [
26 |         container_guess,
27 |     ]
28 | 
29 |     def __init__(self, stream):
30 |         super(ArchHeuristic, self).__init__()
31 |         self.stream = stream
32 | 


--------------------------------------------------------------------------------
/sibyl/heuristics/csts.py:
--------------------------------------------------------------------------------
 1 | "Common constants for heuristics"
 2 | 
 3 | 
 4 | # Function prologs and epilogs binary pattern, mainly ripped from archinfo
 5 | # (project Angr)
 6 | 
 7 | # arch name -> list of regexp expression
 8 | func_prologs = {
 9 |     'x86_32': [
10 |         r"\x55\x8b\xec", # push ebp; mov ebp, esp
11 |         r"\x55\x89\xe5",  # push ebp; mov ebp, esp
12 |     ],
13 |     'arml': [
14 |         r"[\x00-\xff][\x00-\xff]\x2d\xe9",          # stmfd sp!, {xxxxx}
15 |         r"\x04\xe0\x2d\xe5",                        # push {lr}
16 |     ],
17 | }
18 | func_epilogs = {
19 |     'x86_32': [
20 |         r"\xc9\xc3", # leave; ret
21 |         r"([^\x41][\x50-\x5f]{1}|\x41[\x50-\x5f])\xc3", # pop <reg>; ret
22 |         r"[^\x48][\x83,\x81]\xc4([\x00-\xff]{1}|[\x00-\xff]{4})\xc3", #  add esp, <siz>; retq
23 |     ],
24 |     'arml': [
25 |         r"[\x00-\xff]{2}\xbd\xe8\x1e\xff\x2f\xe1"   # pop {xxx}; bx lr
26 |         r"\x04\xe0\x9d\xe4\x1e\xff\x2f\xe1"         # pop {xxx}; bx lr
27 |     ],
28 | }
29 | 


--------------------------------------------------------------------------------
/sibyl/heuristics/func.py:
--------------------------------------------------------------------------------
  1 | "Module for function address guessing"
  2 | import logging
  3 | import re
  4 | import tempfile
  5 | import subprocess
  6 | import shutil
  7 | import os
  8 | 
  9 | from miasm2.core.asmblock import AsmBlockBad, log_asmblock
 10 | 
 11 | from sibyl.heuristics.heuristic import Heuristic
 12 | import sibyl.heuristics.csts as csts
 13 | from sibyl.config import config
 14 | 
 15 | 
 16 | def recursive_call(func_heur, addresses):
 17 |     """Try to find new functions by following subroutines calls"""
 18 |     # Prepare disassembly engine
 19 |     dis_engine = func_heur.machine.dis_engine
 20 |     cont = func_heur.cont
 21 |     loc_db = cont.loc_db
 22 |     mdis = dis_engine(cont.bin_stream, loc_db=loc_db)
 23 |     mdis.follow_call = True
 24 |     # Launch disassembly
 25 |     cur_log_level = log_asmblock.level
 26 |     log_asmblock.setLevel(logging.CRITICAL)
 27 | 
 28 |     label2block = {}
 29 | 
 30 |     for start_addr in addresses:
 31 |         try:
 32 |             cfg_temp = mdis.dis_multiblock(start_addr)
 33 |         except TypeError as error:
 34 |             log_asmblock.critical("While disassembling: %s", error)
 35 |             continue
 36 | 
 37 |         # Merge label2block, take care of disassembly order due to cache
 38 |         for node in cfg_temp.nodes():
 39 |             label2block.setdefault(node, cfg_temp.loc_key_to_block(node))
 40 |             # Avoid re-disassembling
 41 |             mdis.dont_dis.append(loc_db.get_location_offset(node))
 42 |     log_asmblock.setLevel(cur_log_level)
 43 | 
 44 |     # Find potential addresses
 45 |     addresses = {}
 46 |     for bbl in label2block.itervalues():
 47 |         if len(bbl.lines) == 0:
 48 |             continue
 49 |         last_line = bbl.lines[-1]
 50 |         if last_line.is_subcall():
 51 |             for constraint in bbl.bto:
 52 |                 if constraint.c_t != "c_to" or \
 53 |                    constraint.loc_key not in label2block:
 54 |                     continue
 55 | 
 56 |                 succ = label2block[constraint.loc_key]
 57 |                 # Avoid redirectors
 58 |                 if len(succ.lines) == 0 or succ.lines[0].dstflow():
 59 |                     continue
 60 | 
 61 |                 # Avoid unmapped block and others relative bugs
 62 |                 if isinstance(succ, AsmBlockBad):
 63 |                     continue
 64 | 
 65 |                 addresses[loc_db.get_location_offset(succ.loc_key)] = 1
 66 | 
 67 |     return addresses
 68 | 
 69 | 
 70 | def _virt_find(virt, pattern):
 71 |     """Search @pattern in elfesteem @virt instance
 72 |     Inspired from elf_init.virt.find
 73 |     """
 74 |     regexp = re.compile(pattern)
 75 |     offset = 0
 76 |     sections = []
 77 |     for s in virt.parent.ph:
 78 |         s_max = s.ph.memsz
 79 |         if offset < s.ph.vaddr + s_max:
 80 |             sections.append(s)
 81 | 
 82 |     if not sections:
 83 |         raise StopIteration
 84 |     offset -= sections[0].ph.vaddr
 85 |     if offset < 0:
 86 |         offset = 0
 87 |     for s in sections:
 88 |         data = virt.parent.content[s.ph.offset:s.ph.offset + s.ph.filesz]
 89 |         ret = regexp.finditer(data[offset:])
 90 |         yield ret, s.ph.vaddr
 91 |         offset = 0
 92 | 
 93 | 
 94 | def pattern_matching(func_heur):
 95 |     """Search for function by pattern matching"""
 96 | 
 97 |     # Retrieve info
 98 |     architecture = func_heur.machine.name
 99 |     prologs = csts.func_prologs.get(architecture, [])
100 |     data = func_heur.cont.bin_stream.bin.virt
101 | 
102 |     addresses = {}
103 | 
104 |     # Search for function prologs
105 | 
106 |     pattern = "(" + ")|(".join(prologs) + ")"
107 |     for find_iter, vaddr_base in _virt_find(data, pattern):
108 |         for match in find_iter:
109 |             addr = match.start() + vaddr_base
110 |             addresses[addr] = 1
111 | 
112 |     return addresses
113 | 
114 | 
115 | def named_symbols(func_heur):
116 |     """Return the addresses of named symbols"""
117 | 
118 |     cont = func_heur.cont
119 |     loc_db = cont.loc_db
120 | 
121 |     # Use the entry point
122 |     addresses = [cont.entry_point]
123 |     # Add address of symbol with a name (like 'main')
124 |     addresses += [loc_db.get_location_offset(loc)
125 |                   for loc in loc_db.loc_keys
126 |                   if loc_db.get_location_names(loc) is not None]
127 |     return {addr: 1 for addr in addresses}
128 | 
129 | 
130 | def ida_funcs(func_heur):
131 |     """Use IDA heuristics to find functions"""
132 | 
133 |     idaq64_path = config.idaq64_path
134 |     if not idaq64_path:
135 |         return {}
136 | 
137 |     # Prepare temporary files: script and output
138 |     tmp_script = tempfile.NamedTemporaryFile(suffix=".py", delete=True)
139 |     tmp_out = tempfile.NamedTemporaryFile(suffix=".addr", delete=True)
140 | 
141 |     tmp_script.write("""idaapi.autoWait()
142 | open("%s", "w").write("\\n".join("0x%%x" %% x for x in Functions()))
143 | Exit(0)
144 | """ % tmp_out.name)
145 |     tmp_script.flush()
146 | 
147 |     # Launch IDA
148 |     env = os.environ.copy()
149 |     env["TVHEADLESS"] = "true"
150 |     run = subprocess.Popen([idaq64_path, "-A",
151 |                             "-OIDAPython:%s" % tmp_script.name,
152 |                             func_heur.filename],
153 |                             env=env,
154 |                            stdout=subprocess.PIPE,
155 |                            stderr=subprocess.PIPE,
156 |     )
157 |     run.communicate()
158 | 
159 |     # Get back addresses
160 |     tmp_out.seek(0)
161 |     addresses = {int(x, 16): 1 for x in tmp_out}
162 | 
163 |     # Clean-up
164 |     tmp_script.close()
165 |     tmp_out.close()
166 | 
167 |     return addresses
168 | 
169 | 
170 | def ghidra_funcs(func_heur):
171 |     """Use GHIDRA heuristics to find functions"""
172 | 
173 |     ghidra_headless_path = config.ghidra_headless_path
174 |     if not ghidra_headless_path:
175 |         return {}
176 | 
177 |     # Prepare temporary files: GHIDRA project location and output
178 |     tmp_project_location = tempfile.mkdtemp(prefix="sibyl_ghidra_fakeproj")
179 |     tmp_log = tempfile.NamedTemporaryFile(suffix=".log", delete=True)
180 | 
181 |     # Launch GHIDRA
182 |     env = os.environ.copy()
183 |     script_path = os.path.dirname(config.ghidra_export_function)
184 |     script_name = os.path.basename(config.ghidra_export_function)
185 |     run = subprocess.Popen(
186 |         [
187 |             ghidra_headless_path, tmp_project_location, "fakeproj",
188 |             "-import", func_heur.filename,
189 |             "-preScript", script_name,
190 |             "-scriptPath", script_path,
191 |             "-scriptlog", tmp_log.name,
192 |         ],
193 |         env=env,
194 |         stdout=subprocess.PIPE,
195 |         stderr=subprocess.PIPE,
196 |     )
197 |     run.communicate()
198 | 
199 |     # Get back addresses
200 |     tmp_log.seek(0)
201 |     addresses = {}
202 |     for line in tmp_log:
203 |         info = re.findall(script_name + "> 0x([0-9a-f]+)", line)
204 |         if info:
205 |             addresses[int(info[0], 16)] = 1
206 | 
207 |     # Clean-up
208 |     tmp_log.close()
209 |     shutil.rmtree(tmp_project_location)
210 | 
211 |     return addresses
212 | 
213 | 
214 | class FuncHeuristic(Heuristic):
215 |     """Provide heuristic for function start address detection"""
216 | 
217 |     # Enabled passes
218 |     heuristics = [
219 |         named_symbols,
220 |         pattern_matching,
221 |         recursive_call,
222 |         ida_funcs,
223 |         ghidra_funcs,
224 |     ]
225 | 
226 |     def __init__(self, cont, machine, filename):
227 |         """
228 |         @cont: miasm2's Container instance
229 |         @machine: miasm2's Machine instance
230 |         @filename: target's filename
231 |         """
232 |         super(FuncHeuristic, self).__init__()
233 |         self.cont = cont
234 |         self.machine = machine
235 |         self.filename = filename
236 | 
237 |     def do_votes(self):
238 |         """Call recursive_call at the end"""
239 |         do_recursive = False
240 |         if recursive_call in self.heuristics:
241 |             do_recursive = True
242 |             self.heuristics.remove(recursive_call)
243 | 
244 |         super(FuncHeuristic, self).do_votes()
245 |         addresses = self._votes
246 | 
247 |         if do_recursive:
248 |             new_addresses = recursive_call(self,
249 |                                            [addr
250 |                                             for addr, vote in addresses.iteritems()
251 |                                             if vote > 0])
252 |             for addr, vote in new_addresses.iteritems():
253 |                 addresses[addr] = addresses.get(addr, 0) + vote
254 |         self._votes = addresses
255 | 
256 |     def guess(self):
257 |         for address, value in self.votes.iteritems():
258 |             # Heuristic may vote negatively
259 |             if value > 0:
260 |                 yield address
261 | 


--------------------------------------------------------------------------------
/sibyl/heuristics/heuristic.py:
--------------------------------------------------------------------------------
 1 | "Main class for heuristics"
 2 | 
 3 | 
 4 | class Heuristic(object):
 5 |     """Main class for heuristics, handle common methods related to them"""
 6 | 
 7 |     # Enabled passes
 8 |     # passes are functions taking 'self' and returning a dict:
 9 |     #    candidates -> estimated probability
10 |     heuristics = []
11 | 
12 |     def __init__(self):
13 |         self._votes = None
14 | 
15 |     def do_votes(self):
16 |         """Call heuristics and get back votes
17 |         Use a cumulative linear strategy for comparison
18 |         """
19 |         votes = {}
20 |         for heuristic in self.heuristics:
21 |             for name, vote in heuristic(self).iteritems():
22 |                 votes[name] = votes.get(name, 0) + vote
23 |         self._votes = votes
24 | 
25 |     @property
26 |     def votes(self):
27 |         """Cumulative votes for each candidates"""
28 |         if not self._votes:
29 |             self.do_votes()
30 |         return self._votes
31 | 
32 |     @property
33 |     def heuristic_names(self):
34 |         """Return the list of available heuristics"""
35 |         return [func.__name__ for func in self.heuristics]
36 | 
37 |     def name2heuristic(self, name):
38 |         """Return the heuristic named @name"""
39 |         for func in self.heuristics:
40 |             if func.__name__ == name:
41 |                 return func
42 |         else:
43 |             raise KeyError("Unable to find %s" % name)
44 | 
45 |     def guess(self):
46 |         """Return the best candidate"""
47 |         sorted_votes = sorted(self.votes.iteritems(), key=lambda x:x[1])
48 |         if not sorted_votes:
49 |             # No solution
50 |             return False
51 |         best, _ = sorted_votes[-1]
52 |         return best
53 | 


--------------------------------------------------------------------------------
/sibyl/learn/__init__.py:
--------------------------------------------------------------------------------
1 | """This package provides methods for automaticly creating new test case for Sibyl
2 | """
3 | 
4 | __all__ = []
5 | 


--------------------------------------------------------------------------------
/sibyl/learn/generator/__init__.py:
--------------------------------------------------------------------------------
 1 | """This package provides methods for automaticly creating new test case for Sibyl
 2 | """
 3 | """This package provides methods for tracing a program and retrieving:
 4 | - executed instructions
 5 | - memory accesses
 6 | """
 7 | 
 8 | from sibyl.learn.generator.pythongenerator import PythonGenerator
 9 | 
10 | AVAILABLE_GENERATOR = {
11 |     "python": PythonGenerator,
12 | }
13 | 
14 | __all__ = ["AVAILABLE_GENERATOR"]
15 | 


--------------------------------------------------------------------------------
/sibyl/learn/generator/generator.py:
--------------------------------------------------------------------------------
 1 | from miasm2.analysis.machine import Machine
 2 | 
 3 | 
 4 | class Generator(object):
 5 |     '''
 6 |     Abstract class used to represent a generator
 7 |     A generator is a class that create a test from a snapshot
 8 |     Here a test is a sibyl test init function and a sibyl test check function
 9 |     '''
10 | 
11 |     def __init__(self, testcreator):
12 |         '''
13 |         @testcreator: TestCreator instance with associated information
14 |         '''
15 |         self.trace = testcreator.trace
16 |         self.prototype = testcreator.prototype
17 |         self.learnexceptiontext = testcreator.learnexceptiontext
18 |         self.types = testcreator.types
19 |         self.printer = Printer()
20 |         self.headerfile = testcreator.headerfile
21 |         self.ira = Machine(testcreator.machine).ira()
22 |         self.ptr_size = self.ira.sizeof_pointer()/8
23 |         self.logger = testcreator.logger
24 | 
25 |     def generate_test(self):
26 |         '''Abstract method that should return the string corresponding to the code of the init test'''
27 |         raise NotImplementedError("Abstract method")
28 | 
29 | 
30 | class Printer(object):
31 | 
32 |     default_indentation_size = 4
33 | 
34 |     def __init__(self, indentation_size=default_indentation_size):
35 |         self._indentation_size = indentation_size
36 |         self._indentation_level = 0
37 |         self._whitespace = ""
38 |         self._print = ""
39 | 
40 |     def dump(self):
41 |         return self._print
42 | 
43 |     def sub_lvl(self, n=1):
44 |         self._indentation_level -= self._indentation_size * n
45 | 
46 |         if self._indentation_level < 0:
47 |             raise RuntimeError("indentation level negative")
48 | 
49 |         self._whitespace = " "*self._indentation_level
50 | 
51 |     def add_lvl(self, n=1):
52 |         self._indentation_level += self._indentation_size * n
53 |         self._whitespace = " "*self._indentation_level
54 | 
55 |     def add_block(self, block):
56 |         self._print += (self._whitespace + block).replace('\n', '\n'+self._whitespace).rstrip(' ')
57 | 
58 |     def add_empty_line(self):
59 |         self._print += '\n'
60 | 
61 |     def add_lower_block(self, block, n=1):
62 |         self.sub_lvl(n)
63 |         self.add_block(block)
64 |         self.add_lvl(n)
65 | 
66 |     def add_upper_block(self, block, n=1):
67 |         self.add_lvl(n)
68 |         self.add_block(block)
69 |         self.sub_lvl(n)
70 | 


--------------------------------------------------------------------------------
/sibyl/learn/generator/templates.py:
--------------------------------------------------------------------------------
 1 | # Python
 2 | imports = """
 3 | from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE
 4 | 
 5 | from sibyl.test.test import TestHeader, TestSetTest
 6 | """.strip()
 7 | 
 8 | classDef = """
 9 | class Test{funcname}(TestHeader):
10 |     '''This is an auto-generated class, using the Sibyl learn module'''
11 | """
12 | 
13 | classAttrib = """    func = "{funcname}"
14 |     header = '''
15 | {header}
16 | '''
17 | """.rstrip()
18 | 
19 | classTestList = """
20 | tests = {testList}
21 | """.strip()
22 | 
23 | registerTest = """
24 | TESTS = [Test{funcname}]
25 | """.strip()
26 | 


--------------------------------------------------------------------------------
/sibyl/learn/learn.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import copy
  3 | 
  4 | try:
  5 |     import pycparser
  6 | except ImportError:
  7 |     raise ImportError("pycparser module is needed to learn and generate")
  8 | from miasm2.core.objc import CTypesManagerNotPacked
  9 | from miasm2.core.ctypesmngr import CAstTypes
 10 | from miasm2.arch.x86.ctype import CTypeAMD64_unk
 11 | 
 12 | from sibyl.learn.replay import Replay
 13 | from sibyl.learn.findref import ExtractRef
 14 | from sibyl.learn.trace import Trace
 15 | from sibyl.commons import HeaderFile
 16 | from sibyl.config import config
 17 | 
 18 | 
 19 | class TestCreator(object):
 20 | 
 21 |     """Class used to create a test. Each instance is dedicated to only one learned function"""
 22 | 
 23 |     def __init__(self, functionname, address, program, header_filename,
 24 |                  tracer_class, generator_class, main_address, abicls, machine,
 25 |                  avoid_null):
 26 |         """
 27 |         @functionname: name of the symbol of the learned function
 28 |         @address: address of the learned function in the program
 29 |         @program: program that uses the learned function
 30 |         @header_filename: file containing headers for the targeted function
 31 |         @tracer_class: class of the tracer used to run the program
 32 |         @generator_class: class of the generator used to create the test
 33 |         @main_address: address where the tracer has to begin, if none the tracer begins at the entry point
 34 |         @abicls: class of the ABI used by the program
 35 |         @machine: machine used by the program
 36 |         @avoid_null: if set, do not consider snapshots returning a null value
 37 |         """
 38 |         self.functionname = functionname
 39 |         self.address = address
 40 |         self.program = program
 41 |         self.header_filename = header_filename
 42 |         self.tracer_class = tracer_class
 43 |         self.generator_class = generator_class
 44 |         self.main_address = main_address
 45 |         self.abicls = abicls
 46 |         self.machine = machine
 47 |         self.types = None
 48 |         self.avoid_null = avoid_null
 49 | 
 50 |         self.learnexceptiontext = []
 51 | 
 52 |         self.logger = logging.getLogger("testcreator")
 53 |         console_handler = logging.StreamHandler()
 54 |         log_format = "%(levelname)-5s: %(message)s"
 55 |         console_handler.setFormatter(logging.Formatter(log_format))
 56 |         self.logger.addHandler(console_handler)
 57 |         self.logger.setLevel(logging.INFO)
 58 | 
 59 |     def create_trace(self):
 60 |         '''Create the raw trace'''
 61 | 
 62 |         self.logger.info("Tracing the program")
 63 |         tracer = self.tracer_class(
 64 |             self.program, self.address, self.main_address, self.abicls, self.machine)
 65 |         self.trace_iter = tracer.do_trace()
 66 | 
 67 |     def prune_snapshots(self):
 68 |         '''Prune available snapshots according to the pruning politics'''
 69 | 
 70 |         self.logger.info("Parsing and prunning snapshots: strategy %s, " \
 71 |                          "with %d elements keeped each time",
 72 |                          config.prune_strategy,
 73 |                          config.prune_keep)
 74 |         trace = Trace()
 75 |         ignored = None
 76 | 
 77 |         # Prune depending on the strategy
 78 |         if config.prune_strategy == "branch":
 79 |             ignored = 0
 80 |             already_keeped = {} # path -> seen number
 81 |             for snapshot in self.trace_iter:
 82 |                 # TODO use abi
 83 |                 if self.avoid_null and snapshot.output_reg["RAX"] == 0:
 84 |                     ignored += 1
 85 |                     continue
 86 | 
 87 |                 path = frozenset(snapshot.paths.edges())
 88 |                 current = already_keeped.get(path, 0)
 89 |                 if current < config.prune_keep:
 90 |                     # not enough sample of this current snapshot branch coverage
 91 |                     trace.append(snapshot)
 92 |                 else:
 93 |                     ignored += 1
 94 |                 already_keeped[path] = current + 1
 95 |                 if config.prune_keep_max and len(trace) >= config.prune_keep_max:
 96 |                     self.logger.info("Max number of snapshot reached!")
 97 |                     break
 98 | 
 99 |         elif config.prune_strategy == "keepall":
100 |             # Do not remove any snapshot
101 |             trace = list(self.trace_iter)
102 |             ignored = 0
103 |         elif config.prune_strategy == "keep":
104 |             # Remove all snapshot but one or a few (according to config)
105 |             for i, snapshot in xrange(self.trace):
106 |                 trace.append(snapshot)
107 |                 if len(trace) >= config.prune_keep:
108 |                     break
109 |         else:
110 |             raise ValueError("Unsupported strategy type: %s" % config.prune_strategy)
111 | 
112 |         self.trace = trace
113 |         if ignored is None:
114 |             ignored = "unknown"
115 |         self.logger.info("Keeped: %d, Ignored: %s", len(self.trace),
116 |                          ignored)
117 | 
118 |         # If the trace is empty, test can not be created
119 |         if not self.trace:
120 |             raise RuntimeError(
121 |                 "Test can not be created: function seems not to be called or " \
122 |                 "the prune politic is too restrictive")
123 | 
124 |     def clean_trace(self):
125 |         '''Try to remove all implementation dependant elements from the trace'''
126 | 
127 |         # Turn the trace into an implementation independent one
128 |         self.logger.info("Cleaning snapshots")
129 |         self.trace.clean()
130 | 
131 |     def test_trace(self):
132 |         '''Find snapshots that do not recognize the learned function'''
133 | 
134 |         self.logger.info("Replaying cleaned snapshots")
135 |         to_remove = []
136 |         for i, snapshot in enumerate(self.trace):
137 |             self.logger.info("Replaying snapshot %d", i)
138 |             r = Replay(self, snapshot)
139 |             if not r.run():
140 |                 self.logger.warn("Replay error: %s", ", ".join(r.replayexception))
141 |                 to_remove.append(snapshot)
142 |         for snapshot in to_remove:
143 |             self.trace.remove(snapshot)
144 | 
145 |     def extract_refs(self):
146 |         """Real extraction of input"""
147 | 
148 |         self.logger.info("Extract references from snapshots")
149 |         for i, snapshot in enumerate(self.trace):
150 |             self.logger.info("Extracting snapshot %d", i)
151 |             r = ExtractRef(self, snapshot)
152 |             if not r.run():
153 |                 self.learnexceptiontext += r.replayexception
154 | 
155 |     def create_test_from_trace(self):
156 |         self.logger.info("Generating the final test class")
157 |         generator = self.generator_class(self)
158 |         return generator.generate_test()
159 | 
160 |     def parse_types(self):
161 |         """Extract the prototype of the targeted function and associated type"""
162 |         ctype_manager = CTypesManagerNotPacked(CAstTypes(), CTypeAMD64_unk())
163 |         with open(self.header_filename) as fdesc:
164 |             data = fdesc.read()
165 |             self.headerfile = HeaderFile(data, ctype_manager)
166 | 
167 |         self.prototype = self.headerfile.functions[self.functionname]
168 |         self.types = ctype_manager
169 |         self.logger.info("Found prototype: %s" % self.prototype)
170 | 
171 |     def create_test(self):
172 |         """
173 |         Main function of the trace that is in charge of calling other methods in the right order
174 |         Return a string that correspong to the code of the test class
175 |         """
176 | 
177 |         self.parse_types()
178 | 
179 | 
180 |         self.create_trace()
181 | 
182 |         self.prune_snapshots()
183 | 
184 |         self.clean_trace()
185 | 
186 |         self.test_trace()
187 |         assert len(self.trace) > 0
188 | 
189 |         self.extract_refs()
190 | 
191 |         return self.create_test_from_trace()
192 | 
193 | 


--------------------------------------------------------------------------------
/sibyl/learn/learnexception.py:
--------------------------------------------------------------------------------
 1 | class LearnException(Exception):
 2 | 
 3 |     def __init__(self, info):
 4 |         super(LearnException, self).__init__()        
 5 |         self.info = info
 6 | 
 7 |     def repr_class_name(self):
 8 |         return "LearnException"
 9 | 
10 |     def __repr__(self):
11 |         return self.repr_class_name() + "(" + self.info + ")"
12 | 
13 | 
14 | class ReturnPointerException(LearnException):
15 | 
16 |     def __init__(self):
17 |         super(ReturnPointerException, self).__init__(
18 |             "return value might be a pointer")
19 | 
20 |     def repr_class_name(self):
21 |         return "ReturnPointerException"
22 | 
23 | 
24 | class ReturnValueException(LearnException):
25 | 
26 |     def __init__(self):
27 |         super(ReturnPointerException, self).__init__(
28 |             "return value is incorrect after replaying snapshot, the function might return nthing")
29 | 
30 |     def repr_class_name(self):
31 |         return "ReturnValueException"
32 | 


--------------------------------------------------------------------------------
/sibyl/learn/replay.py:
--------------------------------------------------------------------------------
  1 | import struct
  2 | 
  3 | from miasm2.jitter.loader.elf import vm_load_elf
  4 | from miasm2.analysis.machine import Machine
  5 | from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE, EXCEPT_ACCESS_VIOL, EXCEPT_DIV_BY_ZERO, EXCEPT_PRIV_INSN
  6 | 
  7 | from sibyl.config import config
  8 | 
  9 | 
 10 | class Replay(object):
 11 |     '''
 12 |     Class used to run a snapshot and check that it recognize or not a given function code
 13 |     Potential replay errors are stored in self.learnexception
 14 |     '''
 15 | 
 16 |     def __init__(self, testcreator, replayed_snapshot):
 17 |         '''
 18 |         @testcreator: TestCreator instance with associated information
 19 |         @replayed_snapshot: snapshot to be used
 20 |         '''
 21 |         self.isFuncFound = False
 22 |         self.filename = testcreator.program
 23 |         self.learned_addr = testcreator.address
 24 |         self.snapshot = replayed_snapshot
 25 |         self.replayexception = []
 26 |         self.abicls = testcreator.abicls
 27 |         self.machine = Machine(testcreator.machine)
 28 |         self.trace = testcreator.trace
 29 |         self.logger = testcreator.logger
 30 |         self.ira = self.machine.ira()
 31 |         self.ptr_size = self.ira.sizeof_pointer()/8
 32 | 
 33 |     def use_snapshot(self, jitter):
 34 |         '''Initilize the VM with the snapshot informations'''
 35 |         for reg, value in self.snapshot.input_reg.iteritems():
 36 |             setattr(jitter.cpu, reg, value)
 37 | 
 38 |         # Set values for input memory
 39 |         for addr, mem in self.snapshot.in_memory.iteritems():
 40 |             assert mem.access != 0
 41 |             if not jitter.vm.is_mapped(addr, mem.size):
 42 |                 jitter.vm.add_memory_page(addr, mem.access, mem.data)
 43 |             else:
 44 |                 if jitter.vm.get_mem_access(addr) & 0b11 == mem.access & 0b11:
 45 |                     jitter.vm.set_mem(addr, mem.data)
 46 |                 else:
 47 |                     # TODO memory page is already set but have not the
 48 |                     # same access right. However delete page does not
 49 |                     # exist
 50 |                     jitter.vm.set_mem(addr, mem.data)
 51 | 
 52 |     def compare_snapshot(self, jitter):
 53 |         '''Compare the expected result with the real one to determine if the function is recognize or not'''
 54 |         func_found = True
 55 | 
 56 |         for reg, value in self.snapshot.output_reg.iteritems():
 57 |             if value != getattr(jitter.cpu, reg):
 58 |                 self.replayexception += ["output register %s wrong : %i expected, %i found" % (reg, value, getattr(jitter.cpu, reg))]
 59 |                 func_found = False
 60 | 
 61 |         for addr, mem in self.snapshot.out_memory.iteritems():
 62 |             self.logger.debug("Check @%s, %s bytes: %r", hex(addr), hex(mem.size), mem.data[:0x10])
 63 |             if mem.data != jitter.vm.get_mem(addr, mem.size):
 64 |                 self.replayexception += ["output memory wrong at 0x%x: %s expected, %s found" % (addr + offset, repr(mem.data), repr(jitter.vm.get_mem(addr + offset, mem.size)))]
 65 |                 func_found = False
 66 | 
 67 |         return func_found
 68 | 
 69 |     def end_func(self, jitter):
 70 |         if jitter.vm.is_mapped(getattr(jitter.cpu, self.ira.ret_reg.name), 1):
 71 |             self.replayexception += ["return value might be a pointer"]
 72 | 
 73 |         self.isFuncFound = self.compare_snapshot(jitter)
 74 | 
 75 |         jitter.run = False
 76 |         return False
 77 | 
 78 |     def run(self):
 79 |         '''Main function that is in charge of running the test and return the result:
 80 |         true if the snapshot has recognized the function, false else.'''
 81 | 
 82 |         # Retrieve miasm tools
 83 |         jitter = self.machine.jitter(config.miasm_engine)
 84 | 
 85 |         vm_load_elf(jitter.vm, open(self.filename, "rb").read())
 86 | 
 87 |         # Init segment
 88 |         jitter.ir_arch.do_stk_segm = True
 89 |         jitter.ir_arch.do_ds_segm = True
 90 |         jitter.ir_arch.do_str_segm = True
 91 |         jitter.ir_arch.do_all_segm = True
 92 | 
 93 |         FS_0_ADDR = 0x7ff70000
 94 |         jitter.cpu.FS = 0x4
 95 |         jitter.cpu.set_segm_base(jitter.cpu.FS, FS_0_ADDR)
 96 |         jitter.vm.add_memory_page(
 97 |             FS_0_ADDR + 0x28, PAGE_READ, "\x42\x42\x42\x42\x42\x42\x42\x42", "Stack canary FS[0x28]")
 98 | 
 99 |         # Init the jitter with the snapshot
100 |         self.use_snapshot(jitter)
101 | 
102 |         # Get the return address for our breakpoint
103 |         return_addr = struct.unpack("P", jitter.vm.get_mem(jitter.cpu.RSP,
104 |                                                            0x8))[0]
105 |         jitter.add_breakpoint(return_addr, self.end_func)
106 | 
107 |         # Run the execution
108 |         jitter.init_run(self.learned_addr)
109 | 
110 |         try:
111 |             jitter.continue_run()
112 |             assert jitter.run == False
113 |         except AssertionError:
114 |             # set the replayexception to the correct error
115 |             if jitter.vm.get_exception() & EXCEPT_ACCESS_VIOL:
116 |                 self.replayexception += ["access violation"]
117 |             elif jitter.vm.get_exception() & EXCEPT_DIV_BY_ZERO:
118 |                 self.replayexception += ["division by zero"]
119 |             elif jitter.vm.get_exception() & EXCEPT_PRIV_INSN:
120 |                 self.replayexception += ["execution of private instruction"]
121 |             else:
122 |                 self.replayexception += ["exception no %i" % (jitter.vm.get_exception())]
123 |             self.isFuncFound = False
124 | 
125 |         return self.isFuncFound
126 | 


--------------------------------------------------------------------------------
/sibyl/learn/trace.py:
--------------------------------------------------------------------------------
  1 | from collections import namedtuple
  2 | import struct
  3 | 
  4 | from sibyl.learn.replay import Replay
  5 | from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE
  6 | from miasm2.core.graph import DiGraph
  7 | from miasm2.analysis.machine import Machine
  8 | 
  9 | 
 10 | class Trace(list):
 11 |     '''List of snapshot'''
 12 | 
 13 |     def __init__(self, *args, **kwargs):
 14 |         super(Trace, self).__init__(*args, **kwargs)
 15 |         # Image name -> symbol name -> in-memory address
 16 |         self.symbols = {}
 17 | 
 18 |     def add_symbol(self, image_name, symbol_name, symbol_addr):
 19 |         """Add the symbol:addr from the image image_name"""
 20 |         self.symbols.setdefault(image_name, dict())[symbol_name] = symbol_addr
 21 | 
 22 |     def symbol_to_address(self, symbol_name, image_name=None):
 23 |         """Get the corresponding in-memory address from a symbol, or None if not found
 24 |         If image_name is set, restrict to the given image only
 25 |         """
 26 |         if image_name is not None:
 27 |             return self.symbols[image_name].get(symbol_name, None)
 28 | 
 29 |         found = None
 30 |         for symbols in self.symbols.itervalues():
 31 |             if symbol_name in symbols:
 32 |                 if found is not None:
 33 |                     raise ValueError("At least two symbols for this symbol")
 34 |                 found = symbols[symbol_name]
 35 |         return found
 36 | 
 37 |     def clean(self):
 38 |         '''Try to remove all implementation dependant elements from the trace'''
 39 | 
 40 |         clean_trace = Trace()
 41 |         for snapshot in self:
 42 |             clean_trace.append(snapshot.clean())
 43 |         clean_trace.symbols = self.symbols.copy()
 44 |         return clean_trace
 45 | 
 46 | 
 47 | class MemoryAccess(object):
 48 |     '''Represent a memory block, read or write by the learned function'''
 49 | 
 50 |     def __init__(self, size, data, access):
 51 | 
 52 |         self.size = size
 53 |         self.data = data
 54 |         self.access = access
 55 | 
 56 |     def __str__(self):
 57 |         str_access = ""
 58 |         if self.access & PAGE_READ:
 59 |             str_access += "READ"
 60 |         if self.access & PAGE_WRITE:
 61 |             if str_access != "":
 62 |                 str_access += " "
 63 |             str_access += "WRITE"
 64 | 
 65 |         return "size: " + str(self.size) + ", data: " + repr(self.data) + ", access: " + str_access
 66 | 
 67 |     def __repr__(self):
 68 |         return "<" + str(self) + ">"
 69 | 
 70 | 
 71 | class Snapshot(object):
 72 | 
 73 |     @classmethod
 74 |     def get_byte(cls, value, byte):
 75 |         '''Return the byte @byte of the value'''
 76 |         return struct.pack('@B', (value & (0xFF << (8 * byte))) >> (8 * byte))
 77 | 
 78 |     @classmethod
 79 |     def unpack_ptr(cls, value):
 80 |         return struct.unpack('@P', value)[0]
 81 | 
 82 |     def __init__(self, abicls, machine):
 83 |         self.abicls = abicls
 84 | 
 85 |         self.input_reg = {}
 86 |         self.output_reg = {}
 87 | 
 88 |         self._previous_addr = 0
 89 |         self._current_addr = 0
 90 |         self._instr_count = 0
 91 |         self._pending_call = []
 92 |         # Function addr -> list of information on calls
 93 |         self.function_calls = {}
 94 |         self.paths = DiGraph()
 95 | 
 96 |         self.in_memory = {}
 97 |         self.out_memory = {}
 98 | 
 99 |         self._ira = Machine(machine).ira()
100 |         self._ptr_size = self._ira.sizeof_pointer()/8
101 |         self.sp = self._ira.sp.name
102 | 
103 |     def add_input_register(self, reg_name, reg_value):
104 |         self.input_reg[reg_name] = reg_value
105 | 
106 |     def add_output_register(self, reg_name, reg_value):
107 |         self.output_reg[reg_name] = reg_value
108 | 
109 |     def add_memory_read(self, address, size, value):
110 |         for i in xrange(size):
111 |             self.out_memory[address + i] = MemoryAccess(1,
112 |                                                         Snapshot.get_byte(value, i),
113 |                                                         0,  # Output access never used
114 |             )
115 | 
116 |             if address + i not in self.in_memory:
117 |                 self.in_memory[address + i] = MemoryAccess(1,
118 |                                                            Snapshot.get_byte(value, i),
119 |                                                            PAGE_READ,
120 |                 )
121 | 
122 |             else:
123 |                 self.in_memory[address + i].access |= PAGE_READ
124 | 
125 |     def add_memory_write(self, address, size, value):
126 |         for i in xrange(size):
127 |             self.out_memory[address + i] = MemoryAccess(1,
128 |                                                         Snapshot.get_byte(value, i),
129 |                                                         0,  # Output access never used
130 |             )
131 | 
132 |             if address + i not in self.in_memory:
133 |                 self.in_memory[address + i] = MemoryAccess(1,
134 |                                                            "\x00",
135 |                                                            # The value is
136 |                                                            # not used by the
137 |                                                            # test
138 |                                                            PAGE_WRITE,
139 |                 )
140 | 
141 |             else:
142 |                 self.in_memory[address + i].access |= PAGE_WRITE
143 | 
144 |     def add_executed_instruction(self, address):
145 |         '''
146 |         Function called to signal that the address has been executed
147 |         This function has to be called in the order of their executed instruction
148 |         Else paths can not be updated correctly
149 |         '''
150 |         self._previous_addr = self._current_addr
151 |         self._current_addr = address
152 |         self.paths.add_uniq_edge(self._previous_addr, self._current_addr)
153 |         self._instr_count += 1
154 | 
155 |         # Resolve call destination
156 |         if (self._pending_call and
157 |             self._previous_addr == self._pending_call[-1]["caller_addr"]):
158 |             info = self._pending_call[-1]
159 |             info["dest"] = address
160 |             info["beg"] = self._instr_count
161 | 
162 | 
163 |     def add_call(self, caller_addr, stack_ptr):
164 |         '''
165 |         Function call, target is not determined yet
166 |         called *before* instruction execution
167 |         '''
168 |         info = {"stack_ptr": stack_ptr,
169 |                 "caller_addr": caller_addr,
170 |         }
171 |         self._pending_call.append(info)
172 | 
173 |     def add_ret(self, ret_addr, stack_ptr, value):
174 |         '''
175 |         Function ret
176 |         called *after* instruction execution
177 |         '''
178 |         # Find corresponding call
179 |         assert self._pending_call
180 |         assert self._pending_call[-1]["stack_ptr"] >= stack_ptr
181 | 
182 |         info = self._pending_call.pop()
183 |         info["end"] = self._instr_count
184 |         info["ret"] = value
185 |         current_interval = self.function_calls.setdefault(info["dest"],
186 |                                                           list()).append(info)
187 | 
188 |     def clean(self):
189 |         """Clean the snapshot for further uses"""
190 | 
191 |         self.agglomerate_memory(self.in_memory)
192 |         self.agglomerate_memory(self.out_memory)
193 | 
194 |     def agglomerate_memory(self, mem):
195 |         '''
196 |         Assuming @mem is only composed of non-overlapping block
197 |         this function agglomerate contiguous blocks having the same access right
198 |         '''
199 |         for addr in sorted(mem.keys()):
200 | 
201 |             # if the addr is not already deleted
202 |             if addr in mem:
203 | 
204 |                 end_addr = addr + mem[addr].size
205 |                 while end_addr in mem:
206 |                     cur_mem = mem[addr]
207 |                     next_mem = mem[end_addr]
208 | 
209 |                     # If access change, do not agglomerate
210 |                     if cur_mem.access != next_mem.access:
211 |                         break
212 | 
213 |                     cur_mem.size += next_mem.size
214 |                     cur_mem.data += next_mem.data
215 |                     del mem[end_addr]
216 |                     end_addr += next_mem.size
217 | 


--------------------------------------------------------------------------------
/sibyl/learn/tracer/__init__.py:
--------------------------------------------------------------------------------
 1 | """This package provides methods for tracing a program and retrieving:
 2 | - executed instructions
 3 | - memory accesses
 4 | """
 5 | 
 6 | from sibyl.learn.tracer.pin import TracerPin
 7 | from sibyl.learn.tracer.miasm import TracerMiasm
 8 | 
 9 | AVAILABLE_TRACER = {
10 |     "pin": TracerPin,
11 |     "miasm": TracerMiasm
12 | }
13 | 
14 | __all__ = ["AVAILABLE_TRACER"]
15 | 


--------------------------------------------------------------------------------
/sibyl/learn/tracer/miasm.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | This module gives a tracer that uses miasm to run the program
  3 | '''
  4 | 
  5 | from sibyl.learn.tracer.tracer import Tracer
  6 | from sibyl.learn.trace import Trace, Snapshot
  7 | 
  8 | from miasm2.jitter.emulatedsymbexec import EmulatedSymbExec
  9 | from miasm2.jitter.csts import PAGE_READ
 10 | from miasm2.analysis.machine import Machine
 11 | from miasm2.jitter.loader.elf import vm_load_elf
 12 | 
 13 | class CustomEmulatedSymbExec(EmulatedSymbExec):
 14 |     '''New emulator that trap all memory read and write which is needed by the miasm tracer'''
 15 | 
 16 |     def __init__(self, *args, **kwargs):
 17 |         super(CustomEmulatedSymbExec, self).__init__(*args, **kwargs)
 18 | 
 19 |         self._read_callback = set()
 20 |         self._write_callback = set()
 21 | 
 22 |     def add_read_call(self, callback):
 23 |         '''Add a new callback used each time a read appended'''
 24 |         self._read_callback.add(callback)
 25 | 
 26 |     def remove_read_callback(self, callback):
 27 |         '''Remove a read callback'''
 28 |         self._read_callback.remove(callback)
 29 | 
 30 |     def add_write_call(self, callback):
 31 |         '''Add a new callback used each time a write appended'''
 32 |         self._write_callback.add(callback)
 33 | 
 34 |     def remove_write_callback(self, callback):
 35 |         '''Remove a write callback'''
 36 |         self._write_callback.remove(callback)
 37 | 
 38 |     def _func_read(self, expr_mem):
 39 |         '''Function call for each read. We overwrite it to intercept the read'''
 40 |         for callback in self._read_callback:
 41 |             callback(self, expr_mem)
 42 | 
 43 |         return super(CustomEmulatedSymbExec, self)._func_read(expr_mem)
 44 | 
 45 |     def _func_write(self, symb_exec, dest, data):
 46 |         '''Function call for each write. We overwrite it to intercept the write'''
 47 |         for callback in self._write_callback:
 48 |             callback(self, dest, data)
 49 | 
 50 |         super(CustomEmulatedSymbExec, self)._func_write(symb_exec, dest, data)
 51 | 
 52 | 
 53 | class TracerMiasm(Tracer):
 54 | 
 55 |     '''Tracer that uses miasm'''
 56 | 
 57 |     def __init__(self, *args, **kwargs):
 58 |         super(TracerMiasm, self).__init__(*args, **kwargs)
 59 | 
 60 |         self.isTracing = False
 61 |         self.trace = None
 62 | 
 63 |     def read_callback(self, symb_exec, expr_mem):
 64 |         '''Read callback that add the read event to the snapshot'''
 65 |         addr = int(expr_mem.ptr)
 66 |         size = expr_mem.size / 8
 67 |         value = int(symb_exec.cpu.get_mem(addr, size)[::-1].encode("hex"), 16)
 68 | 
 69 |         self.current_snapshot.add_memory_read(addr, size, value)
 70 | 
 71 |     def write_callback(self, symb_exec, dest, data):
 72 |         '''Write callback that add the read event to the snapshot'''
 73 |         addr = int(dest.ptr)
 74 |         size = data.size / 8
 75 |         value = int(data.arg.arg)
 76 | 
 77 |         self.current_snapshot.add_memory_write(addr, size, value)
 78 | 
 79 |     def exec_callback(self, jitter):
 80 |         '''Callback called before each bloc execution'''
 81 |         self.current_snapshot.add_executed_instruction(jitter.pc)
 82 |         return True
 83 | 
 84 |     def begin_func(self, jitter):
 85 |         '''
 86 |         Function called by miasm at the begin of every execution of the traced function
 87 |         '''
 88 |         self.old_ret_addr = jitter.pop_uint64_t()
 89 |         jitter.push_uint64_t(0x1337beef)
 90 | 
 91 |         self.isTracing = True
 92 | 
 93 |         self.current_snapshot = Snapshot(self.abicls, self.machine)
 94 | 
 95 |         # Add the breakpoint to watch every memory read and write
 96 |         jitter.jit.symbexec.add_read_call(self.read_callback)
 97 |         jitter.jit.symbexec.add_write_call(self.write_callback)
 98 | 
 99 |         # Called before the execution of each basic bloc
100 |         jitter.exec_cb = self.exec_callback
101 | 
102 |         for reg_name in self.reg_list:
103 |             self.current_snapshot.add_input_register(
104 |                 reg_name, getattr(jitter.cpu, reg_name))
105 | 
106 |         return True
107 | 
108 |     def end_func(self, jitter):
109 |         '''
110 |         Function called by miasm at the end of every execution of the traced function
111 |         '''
112 | 
113 |         jitter.pc = self.old_ret_addr
114 | 
115 |         for reg_name in self.reg_list:
116 |             self.current_snapshot.add_output_register(
117 |                 reg_name, getattr(jitter.cpu, reg_name))
118 | 
119 |         jitter.exec_cb = None
120 | 
121 |         # Remove memory breakpoints
122 |         jitter.jit.symbexec.remove_read_callback(self.read_callback)
123 |         jitter.jit.symbexec.remove_write_callback(self.write_callback)
124 | 
125 |         self.trace.append(self.current_snapshot)
126 | 
127 |         self.isTracing = False
128 | 
129 |         return True
130 | 
131 |     def end_do_trace(self, jitter):
132 |         '''
133 |         Function called by miasm at the end of the program's execution
134 |         '''
135 |         jitter.run = False
136 |         return False
137 | 
138 |     def do_trace(self):
139 |         '''Run miasm and construct the trace'''
140 | 
141 |         self.trace = Trace()
142 | 
143 |         # Retrieve miasm tools
144 |         machine = Machine(self.machine)
145 |         jitter = machine.jitter("python")
146 | 
147 |         # Set the jitter to use our custom emulator
148 |         jitter.jit.symbexec = CustomEmulatedSymbExec(
149 |             jitter.cpu, jitter.vm, jitter.jit.ir_arch, {})
150 |         jitter.jit.symbexec.enable_emulated_simplifications()
151 |         jitter.jit.symbexec.reset_regs()
152 | 
153 |         elf = vm_load_elf(jitter.vm, open(self.program, "rb").read())
154 | 
155 |         # Init segment
156 |         jitter.ir_arch.do_stk_segm = True
157 |         jitter.ir_arch.do_ds_segm = True
158 |         jitter.ir_arch.do_str_segm = True
159 |         jitter.ir_arch.do_all_segm = True
160 | 
161 |         FS_0_ADDR = 0x7ff70000
162 |         jitter.cpu.FS = 0x4
163 |         jitter.cpu.set_segm_base(jitter.cpu.FS, FS_0_ADDR)
164 |         jitter.vm.add_memory_page(
165 |             FS_0_ADDR + 0x28, PAGE_READ, "\x42\x42\x42\x42\x42\x42\x42\x42")
166 | 
167 |         # Init stack and push main args
168 |         jitter.init_stack()
169 |         jitter.push_uint64_t(1)
170 |         jitter.vm.add_memory_page(0x800000, PAGE_READ, self.program)
171 |         jitter.push_uint64_t(0x800000)
172 |         jitter.push_uint64_t(0xDEADDEAD)
173 | 
174 |         jitter.add_breakpoint(0xDEADDEAD, self.end_do_trace)
175 |         jitter.add_breakpoint(0x1337beef, self.end_func)
176 |         jitter.add_breakpoint(self.address, self.begin_func)
177 | 
178 |         # Run the execution
179 |         if self.main_address is None:
180 |             jitter.init_run(elf.Ehdr.entry)
181 |         else:
182 |             jitter.init_run(self.main_address)
183 | 
184 |         jitter.continue_run()
185 |         assert jitter.run == False
186 |         return self.trace
187 | 


--------------------------------------------------------------------------------
/sibyl/learn/tracer/pin.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | This module gives a tracer that uses pin (3.0-76991-gcc-linux) to run the program
  3 | '''
  4 | 
  5 | import tempfile
  6 | import os
  7 | 
  8 | import sibyl
  9 | from sibyl.learn.tracer.tracer import Tracer
 10 | from sibyl.learn.trace import Trace, Snapshot
 11 | from sibyl.config import config
 12 | 
 13 | 
 14 | class TracerPin(Tracer):
 15 | 
 16 |     '''Tracer that uses pin'''
 17 | 
 18 |     def do_trace(self):
 19 |         '''Run the pintool and construct the trace from the pintool output file'''
 20 | 
 21 |         tmpName = self.__run_pin_cmd()
 22 |         return self.__parse_pin_output_file(open(tmpName))
 23 | 
 24 |     def __run_pin_cmd(self):
 25 |         '''Run the pintool'''
 26 | 
 27 |         tmp = tempfile.NamedTemporaryFile(suffix=".trace", delete=False)
 28 |         tmpName = tmp.name
 29 |         tmp.close()
 30 | 
 31 |         pintool = config.pin_tracer
 32 |         if not pintool or not os.path.exists(pintool):
 33 |             raise RuntimeError("Unable to found the PIN-tool at '%s'. Please "\
 34 |                                "update the associated configuration" % pintool)
 35 | 
 36 |         cmd = [os.path.join(config.pin_root, "pin"), "-ifeellucky", "-t",
 37 |                pintool, "-a", "0x%x" % self.address, "-o", tmpName,
 38 |                "--", self.program]
 39 |         self._run_cmd(cmd)
 40 | 
 41 |         return tmpName
 42 | 
 43 |     def __parse_pin_output_file(self, traceFile):
 44 |         '''Parse the file created by the pintool in order to construct the trace'''
 45 | 
 46 |         trace = Trace()
 47 | 
 48 |         # Statefull elements
 49 |         started = False
 50 |         current_image_name = None
 51 | 
 52 |         # State machine for parsing
 53 |         for line in traceFile:
 54 |             infos = line.strip().split(' ')
 55 |             entry_type = infos[0]
 56 | 
 57 | 
 58 |             # Image loaded in memory
 59 |             # IMG <img_name>
 60 |             if entry_type == "IMG":
 61 |                 img_name = infos[1]
 62 |                 current_image_name = img_name
 63 |                 continue
 64 | 
 65 |             # Symbol entry
 66 |             # S <symbol_addr> <symbol_name>
 67 |             elif entry_type == 'S':
 68 |                 assert current_image_name is not None
 69 |                 symbol_name = infos[2]
 70 |                 symbol_addr = int(infos[1], 16)
 71 |                 trace.add_symbol(current_image_name, symbol_name, symbol_addr)
 72 |                 continue
 73 | 
 74 |             values = [int(v, 16) for v in infos[1:]]
 75 | 
 76 |             # Start of the learned function
 77 |             # Fields are registers value
 78 |             if entry_type == 'I':
 79 |                 if not started:
 80 |                     started = True
 81 |                     current_snapshot = Snapshot(self.abicls, self.machine)
 82 | 
 83 |                 for i, reg_name in enumerate(self.reg_list):
 84 |                     current_snapshot.add_input_register(reg_name, values[i])
 85 | 
 86 |             # Executed instructions address
 87 |             elif entry_type == '@':
 88 |                 if started:
 89 |                     current_snapshot.add_executed_instruction(values[0])
 90 | 
 91 |             # Memory read access
 92 |             # Fields are read address, read size and read value
 93 |             elif entry_type == 'R':
 94 |                 if started:
 95 |                     current_snapshot.add_memory_read(
 96 |                         values[0], values[1], values[2])
 97 | 
 98 |             # Memory write access
 99 |             # Fields are writen address, writen size and writen value
100 |             elif entry_type == 'W':
101 |                 if started:
102 |                     current_snapshot.add_memory_write(
103 |                         values[0], values[1], values[2])
104 | 
105 |             # End of the learned function
106 |             # Field are register value
107 |             elif entry_type == 'O':
108 |                 if started:
109 |                     for i, reg_name in enumerate(self.reg_list):
110 |                         current_snapshot.add_output_register(
111 |                             reg_name, values[i])
112 | 
113 |                     # The learned function execution is over
114 |                     # Snapshot can be added to the trace
115 |                     started = False
116 |                     yield current_snapshot
117 | 
118 |             # Call to a function
119 |             # CALL <caller_addr> <stack pointer>
120 |             elif entry_type == "CALL":
121 |                 current_snapshot.add_call(values[0], values[1])
122 | 
123 |             # Return from a function
124 |             # RET <ret_addr> <stack pointer after> <ret value>
125 |             elif entry_type == "RET":
126 |                 current_snapshot.add_ret(values[0], values[1], values[2])
127 | 


--------------------------------------------------------------------------------
/sibyl/learn/tracer/tracer.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | import os
 3 | 
 4 | 
 5 | class Tracer(object):
 6 | 
 7 |     '''
 8 |     Abstract class used to represent a tracer
 9 |     A tracer is a class that run a program and log the executed instruction and the memory read and write and compile all these informations in a trace class
10 |     '''
11 | 
12 |     reg_list = ["RAX", "RBX", "RCX", "RDX", "RSI", "RDI", "RBP", "RSP", "R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15"]
13 | 
14 |     def __init__(self, program, address, main_address, abicls, machine):
15 |         '''
16 |         @program: traced program
17 |         @address: address of the traced function
18 |         @main_address: address where the tracer has to begin, if none the tracer begins at the entry point
19 |         @abicls: class of the ABI used by the program
20 |         @machine: machine used by the program
21 |         '''
22 |         self.program = os.path.abspath(program)
23 |         self.address = address
24 |         self.main_address = main_address
25 |         self.abicls = abicls
26 |         self.machine = machine
27 | 
28 |     def do_trace(self):
29 |         '''
30 |         Abstract method.
31 |         Should return the trace of the program
32 |         '''
33 | 
34 |         raise NotImplementedError("Abstract method")
35 | 
36 |     @staticmethod
37 |     def _run_cmd(cmd):
38 |         '''
39 |         Runs the command @cmd
40 |         Return stdout
41 |         raise a RuntimeError if stderr is not empty
42 |         '''
43 |         run = subprocess.Popen(cmd, stdout=subprocess.PIPE,
44 |                                stderr=subprocess.PIPE)
45 |         stdout, stderr = run.communicate()
46 | 
47 |         stdout = stdout.strip()
48 |         stderr = stderr.strip()
49 |         if stdout:
50 |             print stdout
51 | 
52 |         if stderr:
53 |             print "STDERR is not empty"
54 |             print stderr
55 | 
56 |         return stdout
57 | 


--------------------------------------------------------------------------------
/sibyl/test/__init__.py:
--------------------------------------------------------------------------------
 1 | # This file is part of Sibyl.
 2 | # Copyright 2014 Camille MOUGEY <camille.mougey@cea.fr>
 3 | #
 4 | # Sibyl is free software: you can redistribute it and/or modify it
 5 | # under the terms of the GNU General Public License as published by
 6 | # the Free Software Foundation, either version 3 of the License, or
 7 | # (at your option) any later version.
 8 | #
 9 | # Sibyl is distributed in the hope that it will be useful, but WITHOUT
10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
11 | # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
12 | # License for more details.
13 | #
14 | # You should have received a copy of the GNU General Public License
15 | # along with Sibyl. If not, see <http://www.gnu.org/licenses/>.
16 | "Test / signature sets and related utils"
17 | __all__ = []
18 | 


--------------------------------------------------------------------------------
/sibyl/test/ctype.py:
--------------------------------------------------------------------------------
  1 | # This file is part of Sibyl.
  2 | # Copyright 2014 Camille MOUGEY <camille.mougey@cea.fr>
  3 | #
  4 | # Sibyl is free software: you can redistribute it and/or modify it
  5 | # under the terms of the GNU General Public License as published by
  6 | # the Free Software Foundation, either version 3 of the License, or
  7 | # (at your option) any later version.
  8 | #
  9 | # Sibyl is distributed in the hope that it will be useful, but WITHOUT
 10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 11 | # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
 12 | # License for more details.
 13 | #
 14 | # You should have received a copy of the GNU General Public License
 15 | # along with Sibyl. If not, see <http://www.gnu.org/licenses/>.
 16 | 
 17 | 
 18 | from sibyl.test.test import Test, TestSetGenerator
 19 | 
 20 | 
 21 | class TestIsCharset(Test):
 22 |     """Test for character classification routines:
 23 |     isalnum,  isalpha,  isascii,  isblank,  iscntrl,  isdigit, isgraph, islower,
 24 |     isprint, ispunct, isspace, isupper, isxdigit
 25 | 
 26 |     Decision tree:
 27 |     g
 28 |     |-- 0                                                             (return 1)
 29 |     |   |--
 30 |     |   |   |-- !
 31 |     |   |   |   |-- isalnum
 32 |     |   |   |   `-- isgraph
 33 |     |   |    -- \\x00
 34 |     |   |       |-- isascii
 35 |     |   |       `-- isprint
 36 |     |   `-- A
 37 |     |       |-- isalpha
 38 |     |       `-- islower
 39 |     `-- \\t                                                           (return 0)
 40 |         |--
 41 |         |   |-- iscntrl
 42 |         |   `-- \\n
 43 |         |       |-- isblank
 44 |         |       `-- isspace
 45 |         `-- A
 46 |             |-- 0
 47 |             |   |-- isupper
 48 |             |   `-- isxdigit
 49 |             `-- 0
 50 |                 |-- ispunct
 51 |                 `-- isdigit
 52 |     """
 53 | 
 54 |     def reset_full(self, *args, **kwargs):
 55 |         super(TestIsCharset, self).reset_full(*args, **kwargs)
 56 |         # Reset tests tree
 57 |         self.cur_tree = self.decision_tree
 58 |         self.next_test = self.cur_tree["t"]
 59 |         self.tests = TestSetGenerator(self.test_iter())
 60 | 
 61 |     def reset(self, *args, **kwargs):
 62 |         super(TestIsCharset, self).reset_full(*args, **kwargs)
 63 | 
 64 |     def check_gen(self, result=None):
 65 |         if result == None:
 66 |             result = self._get_result()
 67 | 
 68 |         # Returned values should be 0 or 1
 69 |         if result not in [0, 1]:
 70 |             return False
 71 | 
 72 |         # Browse decision tree
 73 |         key = "g" if result == 1 else "b"
 74 |         next_tree = self.cur_tree[key]
 75 | 
 76 |         if next_tree is False:
 77 |             # No more candidate
 78 |             return False
 79 |         elif isinstance(next_tree, str):
 80 |             # Candidate found
 81 |             self.func = next_tree
 82 |             self.next_test = None
 83 |             return True
 84 |         elif isinstance(next_tree, dict):
 85 |             # Browse next candidates
 86 |             self.cur_tree = next_tree
 87 |             self.next_test = self.cur_tree["t"]
 88 |             return True
 89 |         raise ValueError("Impossible tree value")
 90 | 
 91 |     def test_iter(self):
 92 |         while self.next_test:
 93 |             yield self.next_test
 94 |         raise StopIteration()
 95 | 
 96 |     def init_notascii(self):
 97 |         self._add_arg(0, 255)
 98 | 
 99 |     def init_g(self):
100 |         self._add_arg(0, ord('g'))
101 | 
102 |     def init_0(self):
103 |         self._add_arg(0, ord('0'))
104 | 
105 |     def init_space(self):
106 |         self._add_arg(0, ord(' '))
107 | 
108 |     def init_x00(self):
109 |         self._add_arg(0, 0)
110 | 
111 |     def init_exclam(self):
112 |         self._add_arg(0, ord('!'))
113 | 
114 |     def init_A(self):
115 |         self._add_arg(0, ord('A'))
116 | 
117 |     def init_tab(self):
118 |         self._add_arg(0, ord('\t'))
119 | 
120 |     def init_ret(self):
121 |         self._add_arg(0, ord('\n'))
122 | 
123 |     def init_punct(self):
124 |         self._add_arg(0, ord('.'))
125 | 
126 |     decision_tree = {"t": (init_notascii, check_gen),
127 |                      "g": False, # Increase mean cost
128 |                      "b": {"t": (init_g, check_gen),
129 |                            "g": {"t": (init_0, check_gen),
130 |                                  "g": {"t": (init_space, check_gen),
131 |                                        "g": {"t": (init_x00, check_gen),
132 |                                              "g": "isascii",
133 |                                              "b": "isprint"
134 |                                              },
135 |                                        "b": {"t": (init_exclam, check_gen),
136 |                                              "g": "isgraph",
137 |                                              "b": "isalnum"
138 |                                              },
139 |                                        },
140 |                                  "b": {"t": (init_A, check_gen),
141 |                                        "g": "isalpha",
142 |                                        "b": "islower"
143 |                                        },
144 |                                  },
145 |                            "b": {"t": (init_tab, check_gen),
146 |                                  "g": {"t": (init_space, check_gen),
147 |                                        "g": {"t": (init_ret, check_gen),
148 |                                              "g": "isspace",
149 |                                              "b": "isblank"
150 |                                              },
151 |                                        "b": "iscntrl"
152 |                                        },
153 |                                  "b": {"t": (init_A, check_gen),
154 |                                        "g": {"t": (init_0, check_gen),
155 |                                              "g": "isxdigit",
156 |                                              "b": "isupper"
157 |                                              },
158 |                                        "b": {"t": (init_0, check_gen),
159 |                                              "g": "isdigit",
160 |                                              "b": {"t": (init_punct, check_gen),
161 |                                                    "g": "ispunct",
162 |                                                    "b": False
163 |                                                    # Avoid false positive
164 |                                                    },
165 |                                              },
166 |                                        },
167 |                                  },
168 |                            }
169 |                      }
170 | 
171 | TESTS = [TestIsCharset]
172 | 


--------------------------------------------------------------------------------
/sibyl/test/stdlib.py:
--------------------------------------------------------------------------------
  1 | # This file is part of Sibyl.
  2 | # Copyright 2014 Camille MOUGEY <camille.mougey@cea.fr>
  3 | #
  4 | # Sibyl is free software: you can redistribute it and/or modify it
  5 | # under the terms of the GNU General Public License as published by
  6 | # the Free Software Foundation, either version 3 of the License, or
  7 | # (at your option) any later version.
  8 | #
  9 | # Sibyl is distributed in the hope that it will be useful, but WITHOUT
 10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 11 | # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
 12 | # License for more details.
 13 | #
 14 | # You should have received a copy of the GNU General Public License
 15 | # along with Sibyl. If not, see <http://www.gnu.org/licenses/>.
 16 | 
 17 | 
 18 | from sibyl.test.test import Test, TestSetTest
 19 | 
 20 | 
 21 | class TestAbs(Test):
 22 | 
 23 |     value = 42
 24 | 
 25 |     # Test1
 26 |     def init1(self):
 27 |         self._add_arg(0, self.value + 1)
 28 | 
 29 |     def check1(self):
 30 |         result = self._get_result()
 31 |         return result == (self.value + 1)
 32 | 
 33 |     # Test2
 34 |     def init2(self):
 35 |         self._add_arg(0, self._as_int(-1 * self.value))
 36 | 
 37 |     def check2(self):
 38 |         result = self._get_result()
 39 |         return result == self.value
 40 | 
 41 |     # Properties
 42 |     func = "abs"
 43 |     tests = TestSetTest(init1, check1) & TestSetTest(init2, check2)
 44 | 
 45 | 
 46 | class TestA64l(Test):
 47 | 
 48 |     my_string = "v/"
 49 |     value = 123
 50 | 
 51 |     # Test
 52 |     def init(self):
 53 |         self.my_addr = self._alloc_string(self.my_string)
 54 |         self._add_arg(0, self.my_addr)
 55 | 
 56 |     def check(self):
 57 |         result = self._get_result()
 58 |         return all([result == self.value,
 59 |                     self._ensure_mem(self.my_addr, self.my_string)])
 60 | 
 61 |     # Properties
 62 |     func = "a64l"
 63 |     tests = TestSetTest(init, check)
 64 | 
 65 | 
 66 | class TestAtoi(Test):
 67 | 
 68 |     my_string = "44"
 69 |     my_string2 = "127.0.0.1"
 70 | 
 71 |     # Test
 72 |     def my_init(self, string):
 73 |         self.my_addr = self._alloc_string(string)
 74 |         self._add_arg(0, self.my_addr)
 75 | 
 76 |     def my_check(self, string):
 77 |         result = self._get_result()
 78 |         return all([result == int(string.split(".")[0]),
 79 |                     self._ensure_mem(self.my_addr, string)])
 80 | 
 81 |     # Test1
 82 |     def init1(self):
 83 |         return self.my_init(self.my_string)
 84 | 
 85 |     def check1(self):
 86 |         return self.my_check(self.my_string)
 87 | 
 88 |     # Test1
 89 |     def init2(self):
 90 |         return self.my_init(self.my_string2)
 91 | 
 92 |     def check2(self):
 93 |         return self.my_check(self.my_string2)
 94 | 
 95 | 
 96 |     # Properties
 97 |     func = "atoi"
 98 |     tests = TestSetTest(init1, check1) & TestSetTest(init2, check2)
 99 | 
100 | 
101 | TESTS = [TestAbs, TestA64l, TestAtoi]
102 | 


--------------------------------------------------------------------------------
/sibyl/test/test.py:
--------------------------------------------------------------------------------
  1 | # This file is part of Sibyl.
  2 | # Copyright 2014 Camille MOUGEY <camille.mougey@cea.fr>
  3 | #
  4 | # Sibyl is free software: you can redistribute it and/or modify it
  5 | # under the terms of the GNU General Public License as published by
  6 | # the Free Software Foundation, either version 3 of the License, or
  7 | # (at your option) any later version.
  8 | #
  9 | # Sibyl is distributed in the hope that it will be useful, but WITHOUT
 10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 11 | # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
 12 | # License for more details.
 13 | #
 14 | # You should have received a copy of the GNU General Public License
 15 | # along with Sibyl. If not, see <http://www.gnu.org/licenses/>.
 16 | 
 17 | 
 18 | import random
 19 | from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE
 20 | from miasm2.expression.modint import mod_size2int
 21 | from miasm2.expression.simplifications import expr_simp
 22 | try:
 23 |     import pycparser
 24 | except ImportError:
 25 |     pycparser = None
 26 | else:
 27 |     from miasm2.core.objc import CTypesManagerNotPacked, CHandler
 28 |     from miasm2.core.ctypesmngr import CAstTypes
 29 |     from miasm2.arch.x86.ctype import CTypeAMD64_unk
 30 | 
 31 | from sibyl.commons import HeaderFile
 32 | 
 33 | 
 34 | class Test(object):
 35 |     "Main class for tests"
 36 | 
 37 |     # Elements to override
 38 | 
 39 |     func = ""   # Possible function if test passes
 40 |     tests = [] # List of tests (init, check) to pass
 41 |     reset_mem = True # Reset memory between tests
 42 | 
 43 |     def init(self):
 44 |         "Called for setting up the test case"
 45 |         pass
 46 | 
 47 |     def check(self):
 48 |         """Called to check test result
 49 |         Return True if all checks are passed"""
 50 |         return True
 51 | 
 52 |     def reset_full(self):
 53 |         """Reset the test case between two functions"""
 54 |         self.alloc_pool = 0x20000000
 55 | 
 56 |     def reset(self):
 57 |         """Reset the test case between two subtests"""
 58 |         self.reset_full()
 59 | 
 60 |     # Utils
 61 | 
 62 |     def __init__(self, jitter, abi):
 63 |         self.jitter = jitter
 64 |         self.alloc_pool = 0x20000000
 65 |         self.abi = abi
 66 | 
 67 |     def _reserv_mem(self, size, read=True, write=False):
 68 |         right = 0
 69 |         if read:
 70 |             right |= PAGE_READ
 71 |         if write:
 72 |             right |= PAGE_WRITE
 73 | 
 74 |         # Memory alignement
 75 |         size += 16 - size % 16
 76 | 
 77 |         to_ret = self.alloc_pool
 78 |         self.alloc_pool += size + 1
 79 | 
 80 |         return to_ret
 81 | 
 82 |     def __alloc_mem(self, mem, read=True, write=False):
 83 |         right = 0
 84 |         if read:
 85 |             right |= PAGE_READ
 86 |         if write:
 87 |             right |= PAGE_WRITE
 88 | 
 89 |         # Memory alignement
 90 |         mem += "".join([chr(random.randint(0, 255)) \
 91 |                             for _ in xrange((16 - len(mem) % 16))])
 92 | 
 93 |         self.jitter.vm.add_memory_page(self.alloc_pool, right, mem)
 94 |         to_ret = self.alloc_pool
 95 |         self.alloc_pool += len(mem) + 1
 96 | 
 97 |         return to_ret
 98 | 
 99 |     def _alloc_mem(self, size, read=True, write=False):
100 |         mem = "".join([chr(random.randint(0, 255)) for _ in xrange(size)])
101 |         return self.__alloc_mem(mem, read=read, write=write)
102 | 
103 |     def _alloc_string(self, string, read=True, write=False):
104 |         return self.__alloc_mem(string + "\x00", read=read, write=write)
105 | 
106 |     def _alloc_pointer(self, pointer, read=True, write=False):
107 |         pointer_size = self.abi.ira.sizeof_pointer()
108 |         return self.__alloc_mem(Test.pack(pointer, pointer_size),
109 |                                 read=read,
110 |                                 write=write)
111 | 
112 |     def _write_mem(self, addr, element):
113 |         self.jitter.vm.set_mem(addr, element)
114 | 
115 |     def _write_string(self, addr, element):
116 |         self._write_mem(addr, element + "\x00")
117 | 
118 |     def _add_arg(self, number, element):
119 |         self.abi.add_arg(number, element)
120 | 
121 |     def _get_result(self):
122 |         return self.abi.get_result()
123 | 
124 |     def _ensure_mem(self, addr, element):
125 |         try:
126 |             return self.jitter.vm.get_mem(addr, len(element)) == element
127 |         except RuntimeError:
128 |             return False
129 | 
130 |     def _ensure_mem_sparse(self, addr, element, offsets):
131 |         """@offsets: offsets to ignore"""
132 |         for i, sub_element in enumerate(element):
133 |             if i in offsets:
134 |                 continue
135 |             if not self._ensure_mem(addr + i, sub_element):
136 |                 return False
137 |         return True
138 | 
139 |     def _as_int(self, element):
140 |         int_size = self.abi.ira.sizeof_int()
141 |         max_val = 2**int_size
142 |         return (element + max_val) % max_val
143 | 
144 |     def _to_int(self, element):
145 |         int_size = self.abi.ira.sizeof_int()
146 |         return mod_size2int[int_size](element)
147 | 
148 |     def _memread_pointer(self, addr):
149 |         pointer_size = self.abi.ira.sizeof_pointer() / 8
150 |         try:
151 |             element = self.jitter.vm.get_mem(addr, pointer_size)
152 |         except RuntimeError:
153 |             return False
154 |         return Test.unpack(element)
155 | 
156 |     @staticmethod
157 |     def pack(element, size):
158 |         out = ""
159 |         while element != 0:
160 |             out += chr(element % 0x100)
161 |             element >>= 8
162 |         if len(out) > size / 8:
163 |             raise ValueError("To big to be packed")
164 |         out = out + "\x00" * ((size / 8) - len(out))
165 |         return out
166 | 
167 |     @staticmethod
168 |     def unpack(element):
169 |         return int(element[::-1].encode("hex"), 16)
170 | 
171 | 
172 | class TestSet(object):
173 |     """Stand for a set of test to run, potentially associated to a logic form
174 | 
175 |     The logic form is represented as a tree, in which nodes are TestSet children
176 |     instance
177 |     """
178 | 
179 |     def __and__(self, ts):
180 |         return TestSetAnd(self, ts)
181 | 
182 |     def __or__(self, ts):
183 |         return TestSetOr(self, ts)
184 | 
185 |     def execute(self, callback):
186 |         """Successive execution of test set (like a visitor on the aossicated tree)
187 |         through @callback
188 |         @callback: bool func(init, check)
189 |         """
190 |         return NotImplementedError("Asbtract method")
191 | 
192 | 
193 | class TestSetAnd(TestSet):
194 |     """Logic form : TestSet1 & TestSet2
195 | 
196 |     Lazy evaluation: if TestSet1 fail, TestSet2 is not launched
197 |     """
198 | 
199 |     def __init__(self, ts1, ts2):
200 |         super(TestSetAnd, self).__init__()
201 |         assert isinstance(ts1, TestSet)
202 |         assert isinstance(ts2, TestSet)
203 |         self._ts1 = ts1
204 |         self._ts2 = ts2
205 | 
206 |     def __repr__(self):
207 |         return "%r TS_AND %r" % (self._ts1, self._ts2)
208 | 
209 |     def execute(self, callback):
210 |         if not self._ts1.execute(callback):
211 |             # Early quit
212 |             return False
213 |         else:
214 |             # First test is valid
215 |             return self._ts2.execute(callback)
216 | 
217 | 
218 | class TestSetOr(TestSet):
219 |     """Logic form : TestSet1 | TestSet2
220 | 
221 |     Lazy evaluation: if TestSet1 success, TestSet2 is not launched
222 |     """
223 | 
224 |     def __init__(self, ts1, ts2):
225 |         super(TestSetOr, self).__init__()
226 |         assert isinstance(ts1, TestSet)
227 |         assert isinstance(ts2, TestSet)
228 |         self._ts1 = ts1
229 |         self._ts2 = ts2
230 | 
231 |     def __repr__(self):
232 |         return "%r TS_OR %r" % (self._ts1, self._ts2)
233 | 
234 |     def execute(self, callback):
235 |         if self._ts1.execute(callback):
236 |             # Early quit
237 |             return True
238 |         else:
239 |             return self._ts2.execute(callback)
240 | 
241 | 
242 | class TestSetTest(TestSet):
243 |     """Terminal node of TestSet
244 | 
245 |     Stand for a check in a test case
246 | 
247 |     init: initialization function, called before launching the target address
248 |     check: checking function, verifying the final state
249 |     """
250 | 
251 |     def __init__(self, init, check):
252 |         super(TestSetTest, self).__init__()
253 |         self._init = init
254 |         self._check = check
255 | 
256 |     def __repr__(self):
257 |         return "<TST %r,%r>" % (self._init, self._check)
258 | 
259 |     def execute(self, callback):
260 |         return callback(self._init, self._check)
261 | 
262 | 
263 | class TestSetGenerator(TestSet):
264 |     """TestSet based using a generator to retrieve tests"""
265 | 
266 |     def __init__(self, generator):
267 |         self._generator = generator
268 | 
269 |     def execute(self, callback):
270 |         for (init, check) in self._generator:
271 |             if not callback(init, check):
272 |                 return False
273 |         return True
274 | 
275 | 
276 | class TestHeader(Test):
277 |     """Test extension with support for header parsing, and handling of struct
278 |     offset, size, ...
279 |     """
280 | 
281 |     header = None
282 | 
283 |     def __init__(self, *args, **kwargs):
284 |         super(TestHeader, self).__init__(*args, **kwargs)
285 |         # Requirement check
286 |         if pycparser is None:
287 |             raise ImportError("pycparser module is needed to launch tests based"
288 |                               "on header files")
289 | 
290 |         ctype_manager = CTypesManagerNotPacked(CAstTypes(), CTypeAMD64_unk())
291 | 
292 |         hdr = HeaderFile(self.header, ctype_manager)
293 |         proto = hdr.functions[self.func]
294 |         self.c_handler = CHandler(
295 |             hdr.ctype_manager,
296 |             {'arg%d_%s' % (i, name): set([proto.args[name]])
297 |              for i, name in enumerate(proto.args_order)}
298 |         )
299 |         self.expr_types_from_C = {'arg%d_%s' % (i, name): proto.args[name]
300 |                                   for i, name in enumerate(proto.args_order)}
301 |         self.cache_sizeof = {}
302 |         self.cache_trad = {}
303 |         self.cache_field_addr = {}
304 | 
305 |     def sizeof(self, Clike):
306 |         ret = self.cache_sizeof.get(Clike, None)
307 |         if ret is None:
308 |             ret = self.c_handler.c_to_type(
309 |                 Clike,
310 |                 self.expr_types_from_C
311 |             ).size * 8
312 |             self.cache_sizeof[Clike] = ret
313 |         return ret
314 | 
315 |     def trad(self, Clike):
316 |         ret = self.cache_trad.get(Clike, None)
317 |         if ret is None:
318 |             ret = self.c_handler.c_to_expr(Clike, self.expr_types_from_C)
319 |             self.cache_trad[Clike] = ret
320 |         return ret
321 | 
322 |     def field_addr(self, base, Clike, is_ptr=False):
323 |         key = (base, Clike, is_ptr)
324 |         ret = self.cache_field_addr.get(key, None)
325 |         if ret is None:
326 |             base_expr = self.trad(base)
327 |             if is_ptr:
328 |                 access_expr = self.trad(Clike)
329 |             else:
330 |                 access_expr = self.trad("&(%s)" % Clike)
331 |             offset = int(expr_simp(access_expr - base_expr))
332 |             ret = offset
333 |             self.cache_field_addr[key] = ret
334 |         return ret
335 | 


--------------------------------------------------------------------------------
/sibyl/testlauncher.py:
--------------------------------------------------------------------------------
  1 | # This file is part of Sibyl.
  2 | # Copyright 2014 Camille MOUGEY <camille.mougey@cea.fr>
  3 | #
  4 | # Sibyl is free software: you can redistribute it and/or modify it
  5 | # under the terms of the GNU General Public License as published by
  6 | # the Free Software Foundation, either version 3 of the License, or
  7 | # (at your option) any later version.
  8 | #
  9 | # Sibyl is distributed in the hope that it will be useful, but WITHOUT
 10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 11 | # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
 12 | # License for more details.
 13 | #
 14 | # You should have received a copy of the GNU General Public License
 15 | # along with Sibyl. If not, see <http://www.gnu.org/licenses/>.
 16 | 
 17 | """This module provides a way to prepare and launch Sibyl tests on a binary"""
 18 | 
 19 | 
 20 | import time
 21 | import signal
 22 | import logging
 23 | from miasm2.analysis.binary import Container, ContainerPE, ContainerELF
 24 | 
 25 | from sibyl.commons import init_logger, TimeoutException, END_ADDR
 26 | from sibyl.engine import QEMUEngine, MiasmEngine
 27 | from sibyl.config import config
 28 | 
 29 | class TestLauncher(object):
 30 |     "Launch tests for a function and report matching candidates"
 31 | 
 32 |     def __init__(self, filename, machine, abicls, tests_cls, engine_name,
 33 |                  map_addr=0):
 34 | 
 35 |         # Logging facilities
 36 |         self.logger = init_logger("testlauncher")
 37 | 
 38 |         # Prepare JiT engine
 39 |         self.machine = machine
 40 |         self.init_engine(engine_name)
 41 | 
 42 |         # Init and snapshot VM
 43 |         self.load_vm(filename, map_addr)
 44 |         self.init_stub()
 45 |         self.snapshot = self.engine.take_snapshot()
 46 | 
 47 |         # Init tests
 48 |         self.init_abi(abicls)
 49 |         self.initialize_tests(tests_cls)
 50 | 
 51 |     def init_stub(self):
 52 |         """Initialize stubbing capabilities"""
 53 |         if not isinstance(self.engine, MiasmEngine):
 54 |             # Unsupported capability
 55 |             return
 56 | 
 57 |         # Get stubs' implementation
 58 |         context = {}
 59 |         for fpath in config.stubs:
 60 |             execfile(fpath, context)
 61 |         if not context:
 62 |             return
 63 | 
 64 |         libs = None
 65 |         if isinstance(self.ctr, ContainerPE):
 66 |             from miasm2.jitter.loader.pe import preload_pe, libimp_pe
 67 |             libs = libimp_pe()
 68 |             preload_pe(self.jitter.vm, self.ctr.executable, libs)
 69 | 
 70 |         elif isinstance(self.ctr, ContainerELF):
 71 |             from miasm2.jitter.loader.elf import preload_elf, libimp_elf
 72 |             libs = libimp_elf()
 73 |             preload_elf(self.jitter.vm, self.ctr.executable, libs)
 74 | 
 75 |         else:
 76 |             return
 77 | 
 78 |         # Add associated breakpoints
 79 |         self.jitter.add_lib_handler(libs, context)
 80 | 
 81 |     def initialize_tests(self, tests_cls):
 82 |         tests = []
 83 |         for testcls in tests_cls:
 84 |             tests.append(testcls(self.jitter, self.abi))
 85 |         self.tests = tests
 86 | 
 87 |     def load_vm(self, filename, map_addr):
 88 |         self.ctr = Container.from_stream(open(filename), vm=self.jitter.vm,
 89 |                                          addr=map_addr)
 90 |         self.jitter.cpu.init_regs()
 91 |         self.jitter.init_stack()
 92 | 
 93 |     def init_engine(self, engine_name):
 94 |         if engine_name == "qemu":
 95 |             self.engine = QEMUEngine(self.machine)
 96 |         else:
 97 |             self.engine = MiasmEngine(self.machine, engine_name)
 98 |         self.jitter = self.engine.jitter
 99 | 
100 |     def init_abi(self, abicls):
101 |         ira = self.machine.ira()
102 |         self.abi = abicls(self.jitter, ira)
103 | 
104 |     def launch_tests(self, test, address, timeout_seconds=0):
105 |         # Variables to remind between two "launch_test"
106 |         self._temp_reset_mem = True
107 | 
108 |         # Reset between functions
109 |         test.reset_full()
110 | 
111 |         # Callback to launch
112 |         def launch_test(init, check):
113 |             """Launch a test associated with @init, @check"""
114 | 
115 |             # Reset state
116 |             self.engine.restore_snapshot(memory=self._temp_reset_mem)
117 |             self.abi.reset()
118 |             test.reset()
119 | 
120 |             # Prepare VM
121 |             init(test)
122 |             self.abi.prepare_call(ret_addr=END_ADDR)
123 | 
124 |             # Run code
125 |             status = self.engine.run(address, timeout_seconds)
126 |             if not status:
127 |                 # Early quit
128 |                 self._temp_reset_mem = True
129 |                 return False
130 | 
131 |             # Check result
132 |             to_ret = check(test)
133 | 
134 |             # Update flags
135 |             self._temp_reset_mem = test.reset_mem
136 | 
137 |             return to_ret
138 | 
139 |         # Launch subtests
140 |         status = test.tests.execute(launch_test)
141 |         if status:
142 |             self._possible_funcs.append(test.func)
143 | 
144 |     def run(self, address, *args, **kwargs):
145 |         self._possible_funcs = []
146 | 
147 |         nb_tests = len(self.tests)
148 |         self.logger.info("Launch tests (%d available functions)" % (nb_tests))
149 |         starttime = time.time()
150 | 
151 |         self.engine.prepare_run()
152 |         for test in self.tests:
153 |             self.launch_tests(test, address, *args, **kwargs)
154 | 
155 |         self.logger.info("Total time: %.4f seconds" % (time.time() - starttime))
156 |         return self._possible_funcs
157 | 
158 |     def get_possible_funcs(self):
159 |         return self._possible_funcs
160 |     possible_funcs = property(get_possible_funcs)
161 | 


--------------------------------------------------------------------------------
/test/find/Makefile:
--------------------------------------------------------------------------------
 1 | # This file is part of Sibyl.
 2 | # Copyright 2014 Camille MOUGEY <camille.mougey@cea.fr>
 3 | #
 4 | # Sibyl is free software: you can redistribute it and/or modify it
 5 | # under the terms of the GNU General Public License as published by
 6 | # the Free Software Foundation, either version 3 of the License, or
 7 | # (at your option) any later version.
 8 | #
 9 | # Sibyl is distributed in the hope that it will be useful, but WITHOUT
10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
11 | # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
12 | # License for more details.
13 | #
14 | # You should have received a copy of the GNU General Public License
15 | # along with Sibyl. If not, see <http://www.gnu.org/licenses/>.
16 | 
17 | CC := gcc
18 | CFLAGS := -m32 -O0 --static
19 | PROGRAMS := test_string test_stdlib test_ctype test_stub
20 | 
21 | all: $(PROGRAMS)
22 | 
23 | .c.c:
24 | 	$(CC) $(CFLAGS) $< -o $@
25 | 
26 | test_string: test_string.c
27 | test_stdlib: test_stdlib.c
28 | test_ctype: test_ctype.c
29 | test_stub: test_stub.c
30 | 	$(CC) -m32 -O0 $< -o $@
31 | 
32 | 
33 | 
34 | clean:
35 | 	rm -f $(PROGRAMS)
36 | 


--------------------------------------------------------------------------------
/test/find/__init__.py:
--------------------------------------------------------------------------------
1 | from .run_ctests import test_find
2 | 


--------------------------------------------------------------------------------
/test/find/run_ctests.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | import os
  3 | import re
  4 | import subprocess
  5 | from argparse import ArgumentParser
  6 | from utils.log import log_error, log_success, log_info
  7 | 
  8 | from elfesteem.elf_init import ELF
  9 | from sibyl.heuristics.func import FuncHeuristic
 10 | 
 11 | match_C = re.compile("\w+[ \*]+(\w+)\(.*\)")
 12 | custom_tag = "my_"
 13 | whitelist_funcs = ["main"]
 14 | 
 15 | 
 16 | def get_funcs_exe_source(c_file, filename):
 17 |     """Get function defined in @c_file"""
 18 |     with open(c_file) as fdesc:
 19 |         data = fdesc.read()
 20 |     funcs = []
 21 |     for match in match_C.finditer(data):
 22 |         funcs.append(match.groups()[0])
 23 |     funcs = list(name for name in set(funcs) if name not in whitelist_funcs)
 24 | 
 25 |     # Find corresponding binary offset
 26 |     to_check = []
 27 |     with open(filename) as fdesc:
 28 |         elf = ELF(fdesc.read())
 29 | 
 30 |     symbols = {}
 31 |     for name, symb in elf.getsectionbyname(".symtab").symbols.iteritems():
 32 |         offset = symb.value
 33 |         if name.startswith("__"):
 34 |             name = name[2:]
 35 |         symbols.setdefault(name, set()).add(offset)
 36 |         if name in funcs:
 37 |             if name.startswith(custom_tag):
 38 |                 ## Custom tags can be used to write equivalent functions like
 39 |                 ## 'my_strlen' for a custom strlen
 40 |                 name = name[len(custom_tag):]
 41 |             to_check.append((offset, name))
 42 |     return to_check, symbols
 43 | 
 44 | 
 45 | def get_funcs_heuristics(c_file, filename):
 46 |     """Get function from Sibyl heuristics"""
 47 |     # Force the activation of all heuristics
 48 |     fh = FuncHeuristic(None, None, "")
 49 |     cmd = ["sibyl", "func"]
 50 |     for name in fh.heuristic_names:
 51 |         cmd += ["-e", name]
 52 |     cmd.append(filename)
 53 |     print " ".join(cmd)
 54 |     sibyl = subprocess.Popen(cmd, stdout=subprocess.PIPE,
 55 |                              stderr=subprocess.PIPE)
 56 |     stdout, stderr = sibyl.communicate()
 57 |     if stderr:
 58 |         raise RuntimeError("Something gone wrong...:\n%s" % stderr)
 59 | 
 60 |     # Parse output and merge with symtab (ground truth)
 61 |     to_check_symtab, extra = get_funcs_exe_source(c_file, filename)
 62 |     addr2name = {addr: name for addr, name in to_check_symtab}
 63 |     to_check = []
 64 |     for line in stdout.split("\n"):
 65 |         if not line:
 66 |             continue
 67 |         addr = int(line, 0)
 68 |         if addr in addr2name:
 69 |             to_check.append((addr, name))
 70 | 
 71 |     return to_check, extra
 72 | 
 73 | 
 74 | def test_find(args):
 75 | 
 76 |     if args.func_heuristic:
 77 |         get_funcs = get_funcs_heuristics
 78 |     else:
 79 |         get_funcs = get_funcs_exe_source
 80 | 
 81 |     # Compil tests
 82 |     log_info( "Remove old files" )
 83 |     os.system("make clean")
 84 |     log_info( "Compile C files" )
 85 |     status = os.system("make")
 86 | 
 87 |     # Find test names
 88 |     c_files = []
 89 | 
 90 |     for cur_dir, sub_dir, files in os.walk("."):
 91 |         c_files += [x for x in files if x.endswith(".c")]
 92 | 
 93 |     log_info( "Found:\n\t- " + "\n\t- ".join(c_files) )
 94 | 
 95 |     for c_file in c_files:
 96 |         filename = c_file[:-2]
 97 |         log_info( " %s:" % filename )
 98 |         # to_check: (addr, expected found)
 99 |         # extra: possible extra match
100 |         to_check, extra = get_funcs(c_file, filename)
101 |         print "\n".join("0x%08x: %s" % (addr, funcname)
102 |                         for (addr, funcname) in to_check)
103 | 
104 |         # Launch Sibyl
105 |         log_info( "Launch Sibyl" )
106 |         options = ["-j", "gcc", "-i", "5", "-b", "ABIStdCall_x86_32"]
107 |         if not args.arch_heuristic:
108 |             options += ["-a", "x86_32"]
109 | 
110 |         cmd = ["sibyl", "find"] + options + [filename]
111 |         cmd += [hex(addr) for addr, _ in to_check]
112 |         print " ".join(cmd)
113 |         sibyl = subprocess.Popen(cmd, stdout=subprocess.PIPE,
114 |                                  stderr=subprocess.PIPE)
115 | 
116 |         # Parse result
117 |         found = []
118 |         stdout, stderr = sibyl.communicate()
119 |         for line in stdout.split("\n"):
120 |             if not line or not " : " in line:
121 |                 continue
122 |             addr, func = line.split(" : ")
123 |             found.append((int(addr, 0), func))
124 | 
125 |         if sibyl.returncode:
126 |             log_error("Process exits with a %d code" % sibyl.returncode)
127 |             print stderr
128 |             exit(sibyl.returncode)
129 | 
130 |         log_info( "Evaluate results" )
131 |         i = 0
132 | 
133 |         for element in found:
134 |             if element not in to_check:
135 |                 offset, name = element
136 |                 if offset in extra.get(name, []):
137 |                     # Present in symtab but not in C source file
138 |                     print "[+] Additionnal found: %s (@0x%08x)" % (name, offset)
139 |                 else:
140 |                     alt_names = [aname
141 |                                  for aname, offsets in extra.iteritems()
142 |                                  if offset in offsets]
143 |                     log_error("Bad found: %s (@0x%08x -> '%s')" % (name,
144 |                                                                    offset,
145 |                                                                    ",".join(alt_names)))
146 |             else:
147 |                 i += 1
148 |         for element in to_check:
149 |             if element not in found:
150 |                 log_error("Unable to find: %s (@0x%08x)" % (element[1], element[0]))
151 | 
152 |         log_success("Found %d/%d correct elements" % (i, len(to_check)))
153 | 
154 |     log_info( "Remove old files" )
155 |     os.system("make clean")
156 |     return False
157 | 


--------------------------------------------------------------------------------
/test/find/test_ctype.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file is part of Sibyl.
 3 |  * Copyright 2014 Camille MOUGEY <camille.mougey@cea.fr>
 4 |  *
 5 |  * Sibyl is free software: you can redistribute it and/or modify it
 6 |  * under the terms of the GNU General Public License as published by
 7 |  * the Free Software Foundation, either version 3 of the License, or
 8 |  * (at your option) any later version.
 9 |  *
10 |  * Sibyl is distributed in the hope that it will be useful, but WITHOUT
11 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 |  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
13 |  * License for more details.
14 |  *
15 |  * You should have received a copy of the GNU General Public License
16 |  * along with Sibyl. If not, see <http://www.gnu.org/licenses/>.
17 |  */
18 | 
19 | int isalnum(int c)
20 | {
21 | 	return(((c>='a') && (c<='z')) || ((c>='A') && (c<='Z')) || ((c>='0') && (c<='9')));
22 | }
23 | 
24 | int isalpha(int c)
25 | {
26 | 	return((c >='a' && c <='z') || (c >='A' && c <='Z'));
27 | }
28 | int isascii(int c)
29 | {
30 | 	return (c >= 0 && c< 128);
31 | }
32 | int isdigit (int c)
33 | {
34 | 	return((c>='0') && (c<='9'));
35 | }
36 | int isblank(int c)
37 | {
38 | 	return ((c == ' ') || (c == '\t'));
39 | }
40 | int iscntrl(int c)
41 | {
42 | 	return((c==0x7F) || (c>=0 && c<=0x1F));
43 | }
44 | int islower(int c)
45 | {
46 | 	return ((c>='a') && (c<='z'));
47 | }
48 | int isprint(int c)
49 | {
50 | 	return(c>=0x20 && c<=0x7E);
51 | }
52 | int isgraph(int c)
53 | {
54 | 	return(c>0x20 && c<=0x7E);
55 | }
56 | int ispunct(int c)
57 | {
58 | 	return(isgraph(c) && !isalnum(c));
59 | }
60 | int isspace(int c)
61 | {
62 | 	return ((c>=0x09 && c<=0x0D) || (c==0x20));
63 | }
64 | int isupper(int c)
65 | {
66 | 	return ((c>='A') && (c<='Z'));
67 | }
68 | int isxdigit (int c)
69 | {
70 | 	return(((c>='0') && (c<='9')) || ((c>='A') && (c<='F')) || ((c>='a') && (c<='f')) );
71 | }
72 | 
73 | 
74 | int main() {
75 | 	return 0;
76 | }
77 | 


--------------------------------------------------------------------------------
/test/find/test_stdlib.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file is part of Sibyl.
 3 |  * Copyright 2014 Camille MOUGEY <camille.mougey@cea.fr>
 4 |  *
 5 |  * Sibyl is free software: you can redistribute it and/or modify it
 6 |  * under the terms of the GNU General Public License as published by
 7 |  * the Free Software Foundation, either version 3 of the License, or
 8 |  * (at your option) any later version.
 9 |  *
10 |  * Sibyl is distributed in the hope that it will be useful, but WITHOUT
11 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 |  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
13 |  * License for more details.
14 |  *
15 |  * You should have received a copy of the GNU General Public License
16 |  * along with Sibyl. If not, see <http://www.gnu.org/licenses/>.
17 |  */
18 | 
19 | #include <stdio.h>
20 | #include <stdlib.h>
21 | 
22 | int abs (int i)
23 | {
24 |   return i < 0 ? -i : i;
25 | }
26 | 
27 | #define TABLE_BASE 0x2e
28 | #define TABLE_SIZE 0x4d
29 | 
30 | #define XX ((char)0x40)
31 | 
32 | 
33 | static const char a64l_table[TABLE_SIZE] =
34 | {
35 |   /* 0x2e */                                                           0,  1,
36 |   /* 0x30 */   2,  3,  4,  5,  6,  7,  8,  9, 10, 11, XX, XX, XX, XX, XX, XX,
37 |   /* 0x40 */  XX, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
38 |   /* 0x50 */  27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, XX, XX, XX, XX, XX,
39 |   /* 0x60 */  XX, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52,
40 |   /* 0x70 */  53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63
41 | };
42 | 
43 | 
44 | long int a64l(const char *string)
45 | {
46 |   const char *ptr = string;
47 |   unsigned long int result = 0ul;
48 |   const char *end = ptr + 6;
49 |   int shift = 0;
50 | 
51 |   do
52 |     {
53 |       unsigned index;
54 |       unsigned value;
55 | 
56 |       index = *ptr - TABLE_BASE;
57 |       if ((unsigned int) index >= TABLE_SIZE)
58 | 	break;
59 |       value = (int) a64l_table[index];
60 |       if (value == (int) XX)
61 | 	break;
62 |       ++ptr;
63 |       result |= value << shift;
64 |       shift += 6;
65 |     }
66 |   while (ptr != end);
67 | 
68 |   return (long int) result;
69 | }
70 | 
71 | 
72 | /* Convert a string to an int.  */
73 | int atoi(const char *nptr)
74 | {
75 |   return (int) strtol (nptr, (char **) NULL, 10);
76 | }
77 | 
78 | 
79 | 
80 | int main() {
81 | 	return 0;
82 | }
83 | 


--------------------------------------------------------------------------------
/test/find/test_string.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * This file is part of Sibyl.
  3 |  * Copyright 2014 Camille MOUGEY <camille.mougey@cea.fr>
  4 |  *
  5 |  * Sibyl is free software: you can redistribute it and/or modify it
  6 |  * under the terms of the GNU General Public License as published by
  7 |  * the Free Software Foundation, either version 3 of the License, or
  8 |  * (at your option) any later version.
  9 |  *
 10 |  * Sibyl is distributed in the hope that it will be useful, but WITHOUT
 11 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 12 |  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
 13 |  * License for more details.
 14 |  *
 15 |  * You should have received a copy of the GNU General Public License
 16 |  * along with Sibyl. If not, see <http://www.gnu.org/licenses/>.
 17 |  */
 18 | 
 19 | #include <stdio.h>
 20 | #include <stdlib.h>
 21 | #include <ctype.h>
 22 | 
 23 | #define STRTEST "Hello"
 24 | 
 25 | char * ___strtok;
 26 | 
 27 | 
 28 | /**
 29 |  * strnicmp - Case insensitive, length-limited string comparison
 30 |  * @s1: One string
 31 |  * @s2: The other string
 32 |  * @len: the maximum number of characters to compare
 33 |  */
 34 | int strnicmp(const char *s1, const char *s2, size_t len)
 35 | {
 36 | 	/* Yes, Virginia, it had better be unsigned */
 37 | 	unsigned char c1, c2;
 38 | 
 39 | 	c1 = 0;	c2 = 0;
 40 | 	if (len) {
 41 | 		do {
 42 | 			c1 = *s1; c2 = *s2;
 43 | 			s1++; s2++;
 44 | 			if (!c1)
 45 | 				break;
 46 | 			if (!c2)
 47 | 				break;
 48 | 			if (c1 == c2)
 49 | 				continue;
 50 | 			c1 = tolower(c1);
 51 | 			c2 = tolower(c2);
 52 | 			if (c1 != c2)
 53 | 				break;
 54 | 		} while (--len);
 55 | 	}
 56 | 	return (int)c1 - (int)c2;
 57 | }
 58 | 
 59 | /**
 60 |  * strcpy - Copy a %NUL terminated string
 61 |  * @dest: Where to copy the string to
 62 |  * @src: Where to copy the string from
 63 |  */
 64 | char * strcpy(char * dest,const char *src)
 65 | {
 66 | 	char *tmp = dest;
 67 | 
 68 | 	while ((*dest++ = *src++) != '\0')
 69 | 		/* nothing */;
 70 | 	return tmp;
 71 | }
 72 | 
 73 | /**
 74 |  * strncpy - Copy a length-limited, %NUL-terminated string
 75 |  * @dest: Where to copy the string to
 76 |  * @src: Where to copy the string from
 77 |  * @count: The maximum number of bytes to copy
 78 |  *
 79 |  * Note that unlike userspace strncpy, this does not %NUL-pad the buffer.
 80 |  * However, the result is not %NUL-terminated if the source exceeds
 81 |  * @count bytes.
 82 |  */
 83 | char * strncpy(char * dest,const char *src,size_t count)
 84 | {
 85 | 	char *tmp = dest;
 86 | 
 87 | 	while (count-- && (*dest++ = *src++) != '\0')
 88 | 		/* nothing */;
 89 | 
 90 | 	return tmp;
 91 | }
 92 | 
 93 | /**
 94 |  * strcat - Append one %NUL-terminated string to another
 95 |  * @dest: The string to be appended to
 96 |  * @src: The string to append to it
 97 |  */
 98 | char * strcat(char * dest, const char * src)
 99 | {
100 | 	char *tmp = dest;
101 | 
102 | 	while (*dest)
103 | 		dest++;
104 | 	while ((*dest++ = *src++) != '\0')
105 | 		;
106 | 
107 | 	return tmp;
108 | }
109 | 
110 | /**
111 |  * strncat - Append a length-limited, %NUL-terminated string to another
112 |  * @dest: The string to be appended to
113 |  * @src: The string to append to it
114 |  * @count: The maximum numbers of bytes to copy
115 |  *
116 |  * Note that in contrast to strncpy, strncat ensures the result is
117 |  * terminated.
118 |  */
119 | char * strncat(char *dest, const char *src, size_t count)
120 | {
121 | 	char *tmp = dest;
122 | 
123 | 	if (count) {
124 | 		while (*dest)
125 | 			dest++;
126 | 		while ((*dest++ = *src++)) {
127 | 			if (--count == 0) {
128 | 				*dest = '\0';
129 | 				break;
130 | 			}
131 | 		}
132 | 	}
133 | 
134 | 	return tmp;
135 | }
136 | 
137 | /**
138 |  * strcmp - Compare two strings
139 |  * @cs: One string
140 |  * @ct: Another string
141 |  */
142 | int strcmp(const char * cs,const char * ct)
143 | {
144 | 	register signed char __res;
145 | 
146 | 	while (1) {
147 | 		if ((__res = *cs - *ct++) != 0 || !*cs++)
148 | 			break;
149 | 	}
150 | 
151 | 	return __res;
152 | }
153 | 
154 | /**
155 |  * strchr - Find the first occurrence of a character in a string
156 |  * @s: The string to be searched
157 |  * @c: The character to search for
158 |  */
159 | char * strchr(const char * s, int c)
160 | {
161 | 	for(; *s != (char) c; ++s)
162 | 		if (*s == '\0')
163 | 			return NULL;
164 | 	return (char *) s;
165 | }
166 | 
167 | /**
168 |  * strlen - Find the length of a string
169 |  * @s: The string to be sized
170 |  */
171 | size_t strlen(const char * s)
172 | {
173 | 	const char *sc;
174 | 
175 | 	for (sc = s; *sc != '\0'; ++sc)
176 | 		/* nothing */;
177 | 	return sc - s;
178 | }
179 | 
180 | 
181 | /**
182 |  * strrchr - Find the last occurrence of a character in a string
183 |  * @s: The string to be searched
184 |  * @c: The character to search for
185 |  */
186 | char * strrchr(const char * s, int c)
187 | {
188 |        const char *p = s + strlen(s);
189 |        do {
190 |            if (*p == (char)c)
191 |                return (char *)p;
192 |        } while (--p >= s);
193 |        return NULL;
194 | }
195 | 
196 | /**
197 |  * strnlen - Find the length of a length-limited string
198 |  * @s: The string to be sized
199 |  * @count: The maximum number of bytes to search
200 |  */
201 | size_t strnlen(const char * s, size_t count)
202 | {
203 | 	const char *sc;
204 | 
205 | 	for (sc = s; count-- && *sc != '\0'; ++sc)
206 | 		/* nothing */;
207 | 	return sc - s;
208 | }
209 | 
210 | /**
211 |  * strspn - Calculate the length of the initial substring of @s which only
212 |  * 	contain letters in @accept
213 |  * @s: The string to be searched
214 |  * @accept: The string to search for
215 |  */
216 | size_t strspn(const char *s, const char *accept)
217 | {
218 | 	const char *p;
219 | 	const char *a;
220 | 	size_t count = 0;
221 | 
222 | 	for (p = s; *p != '\0'; ++p) {
223 | 		for (a = accept; *a != '\0'; ++a) {
224 | 			if (*p == *a)
225 | 				break;
226 | 		}
227 | 		if (*a == '\0')
228 | 			return count;
229 | 		++count;
230 | 	}
231 | 
232 | 	return count;
233 | }
234 | 
235 | /**
236 |  * strpbrk - Find the first occurrence of a set of characters
237 |  * @cs: The string to be searched
238 |  * @ct: The characters to search for
239 |  */
240 | char * strpbrk(const char * cs,const char * ct)
241 | {
242 | 	const char *sc1,*sc2;
243 | 
244 | 	for( sc1 = cs; *sc1 != '\0'; ++sc1) {
245 | 		for( sc2 = ct; *sc2 != '\0'; ++sc2) {
246 | 			if (*sc1 == *sc2)
247 | 				return (char *) sc1;
248 | 		}
249 | 	}
250 | 	return NULL;
251 | }
252 | 
253 | 
254 | /**
255 |  * strtok - Split a string into tokens
256 |  * @s: The string to be searched
257 |  * @ct: The characters to search for
258 |  *
259 |  * WARNING: strtok is deprecated, use strsep instead.
260 |  */
261 | char * strtok(char * s,const char * ct)
262 | {
263 | 	char *sbegin, *send;
264 | 
265 | 	sbegin  = s ? s : ___strtok;
266 | 	if (!sbegin) {
267 | 		return NULL;
268 | 	}
269 | 	sbegin += strspn(sbegin,ct);
270 | 	if (*sbegin == '\0') {
271 | 		___strtok = NULL;
272 | 		return( NULL );
273 | 	}
274 | 	send = strpbrk( sbegin, ct);
275 | 	if (send && *send != '\0')
276 | 		*send++ = '\0';
277 | 	___strtok = send;
278 | 	return (sbegin);
279 | }
280 | 
281 | /**
282 |  * strsep - Split a string into tokens
283 |  * @s: The string to be searched
284 |  * @ct: The characters to search for
285 |  *
286 |  * strsep() updates @s to point after the token, ready for the next call.
287 |  *
288 |  * It returns empty tokens, too, behaving exactly like the libc function
289 |  * of that name. In fact, it was stolen from glibc2 and de-fancy-fied.
290 |  * Same semantics, slimmer shape. ;)
291 |  */
292 | char * strsep(char **s, const char *ct)
293 | {
294 | 	char *sbegin = *s, *end;
295 | 
296 | 	if (sbegin == NULL)
297 | 		return NULL;
298 | 
299 | 	end = strpbrk(sbegin, ct);
300 | 	if (end)
301 | 		*end++ = '\0';
302 | 	*s = end;
303 | 
304 | 	return sbegin;
305 | }
306 | 
307 | /**
308 |  * memset - Fill a region of memory with the given value
309 |  * @s: Pointer to the start of the area.
310 |  * @c: The byte to fill the area with
311 |  * @count: The size of the area.
312 |  *
313 |  * Do not use memset() to access IO space, use memset_io() instead.
314 |  */
315 | void * memset(void * s,int c,size_t count)
316 | {
317 | 	char *xs = (char *) s;
318 | 
319 | 	while (count--)
320 | 		*xs++ = c;
321 | 
322 | 	return s;
323 | }
324 | 
325 | 
326 | int my_strlen(const char *s) {
327 | 	int res = 0;
328 | 	while (*(s++) != '\x00')
329 | 		res++;
330 | 	return res;
331 | }
332 | 
333 | /**
334 |  * memmove - Copy one area of memory to another
335 |  * @dest: Where to copy to
336 |  * @src: Where to copy from
337 |  * @count: The size of the area.
338 |  *
339 |  * Unlike memcpy(), memmove() copes with overlapping areas.
340 |  */
341 | void * memmove(void * dest,const void *src,size_t count)
342 | {
343 | 	char *tmp, *s;
344 | 
345 | 	if (dest <= src) {
346 | 		tmp = (char *) dest;
347 | 		s = (char *) src;
348 | 		while (count--)
349 | 			*tmp++ = *s++;
350 | 		}
351 | 	else {
352 | 		tmp = (char *) dest + count;
353 | 		s = (char *) src + count;
354 | 		while (count--)
355 | 			*--tmp = *--s;
356 | 		}
357 | 
358 | 	return dest;
359 | }
360 | 
361 | /**
362 |  * memcpy - Copy one area of memory to another
363 |  * @dest: Where to copy to
364 |  * @src: Where to copy from
365 |  * @count: The size of the area.
366 |  *
367 |  * You should not use this function to access IO space, use memcpy_toio()
368 |  * or memcpy_fromio() instead.
369 |  */
370 | void * memcpy(void * dest,const void *src,size_t count)
371 | {
372 | 	char *tmp = (char *) dest, *s = (char *) src;
373 | 
374 | 	while (count--)
375 | 		*tmp++ = *s++;
376 | 
377 | 	return dest;
378 | }
379 | 
380 | void * my_memcpy(void * dest,const void *src,size_t count)
381 | {
382 | 	char *tmp = (char *) &dest[count-1] , *s = (char *) &src[count-1];
383 | 
384 | 	while (count--)
385 | 		*tmp-- = *s--;
386 | 
387 | 	return dest;
388 | }
389 | 
390 | int main() {
391 | 
392 | 	printf("%s, %d\n", STRTEST, my_strlen(STRTEST));
393 | 	return 0;
394 | }
395 | 


--------------------------------------------------------------------------------
/test/find/test_stub.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file is part of Sibyl.
 3 |  * Copyright 2017 Camille MOUGEY <camille.mougey@cea.fr>
 4 |  *
 5 |  * Sibyl is free software: you can redistribute it and/or modify it
 6 |  * under the terms of the GNU General Public License as published by
 7 |  * the Free Software Foundation, either version 3 of the License, or
 8 |  * (at your option) any later version.
 9 |  *
10 |  * Sibyl is distributed in the hope that it will be useful, but WITHOUT
11 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 |  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
13 |  * License for more details.
14 |  *
15 |  * You should have received a copy of the GNU General Public License
16 |  * along with Sibyl. If not, see <http://www.gnu.org/licenses/>.
17 |  */
18 | 
19 | #include <string.h>
20 | 
21 | size_t my_strlen(const char *s) {
22 | 	return strlen(s);
23 | }
24 | 
25 | int main() {
26 | 	return 0;
27 | }
28 | 


--------------------------------------------------------------------------------
/test/learn/Makefile:
--------------------------------------------------------------------------------
 1 | CC = gcc
 2 | CFLAGS = -m64
 3 | 
 4 | SRC = $(wildcard *.c)
 5 | PROGRAMS = $(SRC:.c=)
 6 | 
 7 | all: $(PROGRAMS)
 8 | 
 9 | %: %.c
10 | 	$(CC) $(CFLAGS) $< -o $@
11 | 
12 | clean:
13 | 	rm -rf $(PROGRAMS) $(CLASS) $(CLASSCOMP)
14 | 


--------------------------------------------------------------------------------
/test/learn/__init__.py:
--------------------------------------------------------------------------------
1 | from .run_tests import test_learn
2 | 


--------------------------------------------------------------------------------
/test/learn/add.c:
--------------------------------------------------------------------------------
 1 | int add(int a, int b) {
 2 | 	return a+b;
 3 | }
 4 | 
 5 | #ifdef __GNUC__
 6 | #ifndef __clang__
 7 | int main(void) __attribute__((optimize("-O0")));
 8 | #endif
 9 | #endif
10 | int main(void){
11 | 	return add(42,42);
12 | }
13 | 


--------------------------------------------------------------------------------
/test/learn/add.h:
--------------------------------------------------------------------------------
1 | int add(int a, int b);
2 | 


--------------------------------------------------------------------------------
/test/learn/copy_struct.c:
--------------------------------------------------------------------------------
 1 | #include "copy_struct.h"
 2 | 
 3 | void* my_memcpy(void *dest,const void *src, size_t n)
 4 | {
 5 | 	size_t i;
 6 | 	void *tmp = dest;
 7 | 
 8 | 	for (i = 0;i < n; i++) {
 9 | 		*(char*)dest++ = *(char*)src++;
10 | 	}
11 | 	return tmp;
12 | }
13 | 
14 | void copy_struct(elem* in, elem* out) {
15 | 	my_memcpy((char*) out, (char *)in, sizeof(elem));
16 | }
17 | 
18 | #ifdef __GNUC__
19 | #ifndef __clang__
20 | int main(void) __attribute__((optimize("-O0")));
21 | #endif
22 | #endif
23 | int main(void) {
24 | 	elem e1, e2;
25 | 	e1.a = 4;
26 | 	copy_struct(&e1, &e2);
27 | 	return 0;
28 | }
29 | 


--------------------------------------------------------------------------------
/test/learn/copy_struct.h:
--------------------------------------------------------------------------------
1 | typedef struct elem {
2 | 	int a;
3 | 	char* b;
4 | 	int c[10];
5 | } elem;
6 | typedef long unsigned int size_t;
7 | void copy_struct(elem* in, elem* out);
8 | 


--------------------------------------------------------------------------------
/test/learn/deref_struct.c:
--------------------------------------------------------------------------------
 1 | #include "deref_struct.h"
 2 | 
 3 | sub_elem* deref_struct(list* l, unsigned int expected) {
 4 | 	int i;
 5 | 	for (;;) {
 6 | 		for (i = 0; i < 10; i++) {
 7 | 			if (l->elem.c[i].b == expected) {
 8 | 				return &(l->elem.c[i]);
 9 | 			}
10 | 		}
11 | 		l = l->next;
12 | 	}
13 | }
14 | 
15 | #ifdef __GNUC__
16 | #ifndef __clang__
17 | int main(void) __attribute__((optimize("-O0")));
18 | #endif
19 | #endif
20 | int main(void) {
21 | 	list tab[3];
22 | 	tab[0].next = &tab[1];
23 | 	tab[1].next = &tab[2];
24 | 
25 | 	tab[2].elem.c[4].b = 0x1337;
26 | 	deref_struct(&tab[0], 0x1337);
27 | 	return 0;
28 | }
29 | 


--------------------------------------------------------------------------------
/test/learn/deref_struct.h:
--------------------------------------------------------------------------------
 1 | typedef struct sub_elem {
 2 | 	int a;
 3 | 	unsigned int b;
 4 | } sub_elem;
 5 | 
 6 | typedef struct elem {
 7 | 	char *a;
 8 | 	sub_elem c[10];
 9 | } elem;
10 | 
11 | typedef struct list {
12 | 	struct list* next;
13 | 	elem elem;
14 | } list;
15 | 
16 | sub_elem* deref_struct(list* l, unsigned int expected);
17 | 


--------------------------------------------------------------------------------
/test/learn/doublePtr.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | 
 3 | int doublePtr(int** x, int nbElem)
 4 | {
 5 | 	int sum = 0;
 6 | 	for(nbElem--;nbElem>=0;nbElem--)
 7 | 		sum += (*x)[nbElem];
 8 | 	return sum;
 9 | }
10 | 
11 | #ifdef __GNUC__
12 | #ifndef __clang__
13 | int main(void) __attribute__((optimize("-O0")));
14 | #endif
15 | #endif
16 | int main(void) {
17 | 	int tab[10]={10,1,2,3,4,5,6,7,8,9};
18 | 	int* ptr = tab;
19 | 
20 | 	return doublePtr(&ptr, 10);
21 | }
22 | 


--------------------------------------------------------------------------------
/test/learn/doublePtr.h:
--------------------------------------------------------------------------------
1 | int doublePtr(int** x, int nbElem);
2 | 


--------------------------------------------------------------------------------
/test/learn/my_strcpy.c:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * strcpy - Copy a %NUL terminated string
 3 |  * @dest: Where to copy the string to
 4 |  * @src: Where to copy the string from
 5 |  */
 6 | char * my_strcpy(char * dest,const char *src)
 7 | {
 8 | 	char *tmp = dest;
 9 | 
10 | 	while ((*dest++ = *src++) != '\0')
11 | 		/* nothing */;
12 | 	return tmp;
13 | }
14 | 
15 | #ifdef __GNUC__
16 | #ifndef __clang__
17 | int main(void) __attribute__((optimize("-O0")));
18 | #endif
19 | #endif
20 | int main(void) {
21 | 	char tmp1[16] = "aaaaazzzzzeeeee";
22 | 	char tmp2[16] = "---------------";
23 | 
24 | 	my_strcpy( tmp1, tmp2);
25 | 
26 | 	return 0;
27 | }
28 | 


--------------------------------------------------------------------------------
/test/learn/my_strcpy.h:
--------------------------------------------------------------------------------
1 | char * my_strcpy(char * dest,const char *src);
2 | 


--------------------------------------------------------------------------------
/test/learn/my_strlen.c:
--------------------------------------------------------------------------------
 1 | #include <stdlib.h>
 2 | 
 3 | /**
 4 |  * strlen - Find the length of a string
 5 |  * @s: The string to be sized
 6 |  */
 7 | size_t my_strlen(const char * s)
 8 | {
 9 | 	const char *sc;
10 | 
11 | 	for (sc = s; *sc != '\0'; ++sc)
12 | 		/* nothing */;
13 | 	return sc - s;
14 | }
15 | 
16 | #ifdef __GNUC__
17 | #ifndef __clang__
18 | int main(void) __attribute__((optimize("-O0")));
19 | #endif
20 | #endif
21 | int main(void){
22 | 	return my_strlen("Hello world !");
23 | }
24 | 


--------------------------------------------------------------------------------
/test/learn/my_strlen.h:
--------------------------------------------------------------------------------
1 | typedef long unsigned int size_t;
2 | size_t my_strlen(const char * s);
3 | 


--------------------------------------------------------------------------------
/test/learn/numerous_arguments.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | 
 3 | unsigned int numerous_arguments(unsigned int a, unsigned int b, unsigned int c, unsigned int d, unsigned int e, unsigned int f, unsigned int g, unsigned int h, unsigned int i, unsigned int j, unsigned int k, unsigned int l, unsigned int m, unsigned int n, unsigned int o) {
 4 | 	return a+b+c+d+e+f+g+h+i+j+k+l+m+n+o;
 5 | }
 6 | 
 7 | #ifdef __GNUC__
 8 | #ifndef __clang__
 9 | int main(void) __attribute__((optimize("-O0")));
10 | #endif
11 | #endif
12 | int main(void){
13 | 	return numerous_arguments(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15)+numerous_arguments(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1);
14 | }
15 | 


--------------------------------------------------------------------------------
/test/learn/numerous_arguments.h:
--------------------------------------------------------------------------------
1 | unsigned int numerous_arguments(unsigned int a, unsigned int b, unsigned int c, unsigned int d, unsigned int e, unsigned int f, unsigned int g, unsigned int h, unsigned int i, unsigned int j, unsigned int k, unsigned int l, unsigned int m, unsigned int n, unsigned int o);
2 | 


--------------------------------------------------------------------------------
/test/learn/run_tests.py:
--------------------------------------------------------------------------------
  1 | import subprocess
  2 | import os
  3 | import sys
  4 | import tempfile
  5 | import imp
  6 | from utils.log import log_error, log_success, log_info
  7 | 
  8 | from miasm2.analysis.machine import Machine
  9 | from miasm2.analysis.binary import Container
 10 | 
 11 | from sibyl.testlauncher import TestLauncher
 12 | from sibyl.abi.x86 import ABI_AMD64_SYSTEMV
 13 | from sibyl.config import config
 14 | 
 15 | # Tests to fix
 16 | unsupported = [
 17 | ]
 18 | 
 19 | def invoke_pin(filename, func_name, header_filename, cont):
 20 |     return ["sibyl", "learn", "-t", "pin", func_name, filename, header_filename]
 21 | 
 22 | def invoke_miasm(filename, func_name, header_filename, cont):
 23 |     main_addr = cont.loc_db.get_name_offset("main")
 24 |     return ["sibyl", "learn", "-t", "miasm", "-m", "0x%x" % main_addr,
 25 |             func_name, filename, header_filename]
 26 | 
 27 | def test_learn(args):
 28 |     machine = Machine("x86_64")
 29 | 
 30 |     # Compil tests
 31 |     log_info("Remove old files")
 32 |     os.system("make clean")
 33 |     log_info("Compile C files")
 34 |     status = os.system("make")
 35 |     assert status == 0
 36 | 
 37 |     # Find test names
 38 |     c_files = []
 39 | 
 40 |     for cur_dir, sub_dir, files in os.walk("."):
 41 |         c_files += [x[:-2] for x in files if x.endswith(".c")]
 42 | 
 43 |     # Ways to invoke
 44 |     to_invoke = {
 45 |         "Miasm": invoke_miasm,
 46 |     }
 47 |     if args.pin_tracer:
 48 |         to_invoke["PIN"] = invoke_pin
 49 | 
 50 |     # Learn + test
 51 |     fail = False
 52 |     for filename in c_files:
 53 | 
 54 |         if filename in unsupported:
 55 |             log_error("Skip %s (unsupported)" % filename)
 56 |             continue
 57 | 
 58 |         with open(filename) as fdesc:
 59 |             cont = Container.from_stream(fdesc)
 60 | 
 61 |         func_name = filename
 62 |         func_addr = cont.loc_db.get_name_offset(func_name)
 63 |         header_filename = "%s.h" % filename
 64 | 
 65 |         for name, cb in to_invoke.iteritems():
 66 |             log_info("Learning %s over %s with %s" % (func_name,
 67 |                                                       filename, name))
 68 |             cmdline = cb(filename, func_name, header_filename, cont)
 69 | 
 70 |             print " ".join(cmdline)
 71 |             sibyl = subprocess.Popen(cmdline, env=os.environ,
 72 |                                      stdout=subprocess.PIPE,
 73 |                                      stderr=subprocess.PIPE)
 74 |             stdout, stderr = sibyl.communicate()
 75 |             if sibyl.returncode != 0:
 76 |                 log_error("Failed to learn with error:")
 77 |                 print stderr
 78 |                 fail = True
 79 |                 continue
 80 | 
 81 |             log_info("Testing generated class")
 82 | 
 83 |             mod = imp.new_module("testclass")
 84 |             exec stdout in mod.__dict__
 85 |             classTest = getattr(mod, "TESTS")[0]
 86 |             tl = TestLauncher(filename, machine, ABI_AMD64_SYSTEMV, [classTest],
 87 |                               config.jit_engine)
 88 | 
 89 |             possible_funcs = tl.run(func_addr)
 90 |             if tl.possible_funcs and possible_funcs == [filename]:
 91 |                 log_success("Generated class recognize the function " \
 92 |                             "'%s'" % func_name)
 93 |             else:
 94 |                 log_error("Generated class failed to recognize the function " \
 95 |                           "'%s'" % func_name)
 96 |                 fail = True
 97 | 
 98 |     # Clean
 99 |     log_info( "Remove old files" )
100 |     os.system("make clean")
101 | 
102 |     return fail
103 | 


--------------------------------------------------------------------------------
/test/learn/several_traces.c:
--------------------------------------------------------------------------------
 1 | int several_traces(int a, int b, unsigned char addOrMul){
 2 | 	if( addOrMul )
 3 | 		return a+b;
 4 | 	else
 5 | 		return a*b;
 6 | }
 7 | 
 8 | #ifdef __GNUC__
 9 | #ifndef __clang__
10 | int main(void) __attribute__((optimize("-O0")));
11 | #endif
12 | #endif
13 | int main(void) {
14 | 	return several_traces(42,42,0)+several_traces(-42,1337,1)+several_traces(4,2,0);
15 | }
16 | 


--------------------------------------------------------------------------------
/test/learn/several_traces.h:
--------------------------------------------------------------------------------
1 | int several_traces(int a, int b, unsigned char addOrMul);
2 | 


--------------------------------------------------------------------------------
/test/run_all_tests.py:
--------------------------------------------------------------------------------
 1 | from argparse import ArgumentParser
 2 | import os
 3 | 
 4 | from utils.log import log_info
 5 | from find import test_find
 6 | from learn import test_learn
 7 | 
 8 | AVAILABLE_TEST = [
 9 |     test_find,
10 |     test_learn,
11 | ]
12 | 
13 | 
14 | parser = ArgumentParser("Regression tester")
15 | parser.add_argument("-f", "--func-heuristic", action="store_true",
16 |                     help="Enable function addresses detection heuristics")
17 | parser.add_argument("-a", "--arch-heuristic", action="store_true",
18 |                     help="Enable architecture detection heuristics")
19 | parser.add_argument("-p", "--pin-tracer", action="store_true",
20 |                     help="Enable PIN tracer")
21 | args = parser.parse_args()
22 | 
23 | def run_test(test_func, args):
24 |     log_info("Start test: "+test_func.__module__)
25 | 
26 |     module_path = os.path.dirname(test_func.__module__.replace('.','/'))
27 |     previous_cwd = os.getcwd()
28 | 
29 |     os.chdir(os.path.join(previous_cwd, module_path))
30 |     ret = test_func(args)
31 |     os.chdir(previous_cwd)
32 |     return ret
33 | 
34 | fail = False
35 | for test in AVAILABLE_TEST:
36 |     fail |= run_test(test, args)
37 | 
38 | assert fail is False
39 | 


--------------------------------------------------------------------------------
/test/utils/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = []
2 | 


--------------------------------------------------------------------------------
/test/utils/log.py:
--------------------------------------------------------------------------------
 1 | colors = {"red": "\033[91;1m",
 2 |           "end": "\033[0m",
 3 |           "green": "\033[92;1m",
 4 |           "lightcyan": "\033[96m",
 5 |           "blue": "\033[94;1m"}
 6 | 
 7 | def log_error(content):
 8 |     msg = "%(red)s[-] " % colors + content + "%(end)s" % colors
 9 |     print msg
10 | 
11 | def log_success(content):
12 |     msg = "%(green)s[+] " % colors + content + "%(end)s" % colors
13 |     print msg
14 | 
15 | def log_info(content):
16 |     print "[+] "+content
17 | 


--------------------------------------------------------------------------------