├── modelLang
    ├── structures
    │   ├── __init__.py
    │   ├── windows_specs.py
    │   └── headers
    │   │   ├── linux_kernel.h
    │   │   └── reactos.h
    ├── parsers
    │   ├── __init__.py
    │   ├── langlex.py
    │   └── parser.py
    ├── __init__.py
    ├── backends
    │   ├── __init__.py
    │   ├── default_backend.py
    │   ├── python_backend.py
    │   └── z3_backend.py
    ├── utils.py
    └── classes.py
├── tests
    ├── statements
    │   ├── fromfiletest
    │   ├── fromfile.lmod
    │   ├── optimization.lmod
    │   ├── __init__.py
    │   ├── conditional_assignment.lmod
    │   ├── fromfile.py
    │   ├── conditional_assignment.py
    │   └── optimization.py
    ├── functional
    │   ├── mod2.lmod
    │   ├── mod1.lmod
    │   ├── __init__.py
    │   ├── negativecombo.py
    │   └── positivecombo.py
    ├── loops
    │   ├── __init__.py
    │   ├── vloop.lmod
    │   ├── conditionalloop.lmod
    │   ├── conditionalloop.py
    │   └── vloop.py
    ├── operators
    │   ├── strcmp.lmod
    │   ├── bitwise.mod
    │   ├── __init__.py
    │   ├── alignment.lmod
    │   ├── algebra.mod
    │   ├── overflow.lmod
    │   ├── alignment.py
    │   ├── strcmp.py
    │   ├── overflow.py
    │   ├── bitwise.py
    │   └── algebra.py
    ├── emptytest.py
    └── __init__.py
├── testcases
    ├── windows
    │   ├── 7
    │   │   ├── win7_printf.exe
    │   │   ├── win7_ntcreateuserprocess.exe
    │   │   └── win7_printf.sh
    │   ├── xp-7
    │   │   ├── testcase_0.constraints
    │   │   ├── testcase_1.constraints
    │   │   ├── testcase_2.constraints
    │   │   ├── testcase_0
    │   │   ├── testcase_1
    │   │   ├── testcase_2
    │   │   └── differential.sh
    │   └── xp
    │   │   ├── winxp_printf.exe
    │   │   ├── winxp_kernel_user.exe
    │   │   ├── winxp_kernel_user.sh
    │   │   ├── winxp_printf.sh
    │   │   └── winxp_createprocess.exe
    ├── reactos_kernel.pe
    └── linux_kernel32.elf
├── .gitmodules
├── merge_user_kernel.sh
├── tooleval
    ├── ghidraplugin
    │   ├── ghidradumpmem.py
    │   └── ghidraanalysis.sh
    ├── Makefile
    ├── idaplugin
    │   ├── idaanalysis.sh
    │   ├── idadumpmem.py
    │   └── idadumpmem.py.asm
    ├── memdump.proto
    ├── __init__.py
    ├── classes.py
    ├── idaadapter.py
    ├── ghidraadapter.py
    ├── winadapter.py
    ├── common.py
    └── r2adapter.py
├── .gitignore
├── setup.py
├── requirements.txt
├── verify.py
├── verify_dataset.py
├── generate.py
├── explore_conditions.py
├── README.md
├── differential.py
└── SPECIFICATIONS.md


/modelLang/structures/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/modelLang/parsers/__init__.py:
--------------------------------------------------------------------------------
1 | from .parser import *
2 | 


--------------------------------------------------------------------------------
/tests/statements/fromfiletest:
--------------------------------------------------------------------------------
1 | 133713371337133713371337133713371337


--------------------------------------------------------------------------------
/testcases/windows/xp-7/testcase_0.constraints:
--------------------------------------------------------------------------------
1 | MiCreateImageFileMap.lmod_V20
2 | 


--------------------------------------------------------------------------------
/testcases/windows/xp-7/testcase_1.constraints:
--------------------------------------------------------------------------------
1 | MiCreateImageFileMap.lmod_V38
2 | 


--------------------------------------------------------------------------------
/testcases/windows/xp-7/testcase_2.constraints:
--------------------------------------------------------------------------------
1 | MiCreateImageFileMap.lmod_V38
2 | 


--------------------------------------------------------------------------------
/tests/functional/mod2.lmod:
--------------------------------------------------------------------------------
1 | INPUT variable 4
2 | 
3 | V1: NOT ISPOW2 variable term
4 | 


--------------------------------------------------------------------------------
/modelLang/__init__.py:
--------------------------------------------------------------------------------
1 | from .parsers import Parser
2 | from .backends import PythonBackend, Z3Backend
3 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "models"]
2 | 	path = models
3 | 	url = git@github.com:eurecom-s3/loaders-models.git
4 | 


--------------------------------------------------------------------------------
/tests/loops/__init__.py:
--------------------------------------------------------------------------------
1 | from .vloop import VLoopTest
2 | from .conditionalloop import ConditionalLoopTest
3 | 


--------------------------------------------------------------------------------
/merge_user_kernel.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | cat $1 $2 | grep -v "Succe" | awk -F ' ' '{print $1}' | sort | uniq > $3
4 | 


--------------------------------------------------------------------------------
/modelLang/backends/__init__.py:
--------------------------------------------------------------------------------
1 | from .z3_backend import Z3Backend
2 | from .python_backend import PythonBackend
3 | 


--------------------------------------------------------------------------------
/testcases/reactos_kernel.pe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eurecom-s3/loaders_modeling/HEAD/testcases/reactos_kernel.pe


--------------------------------------------------------------------------------
/testcases/linux_kernel32.elf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eurecom-s3/loaders_modeling/HEAD/testcases/linux_kernel32.elf


--------------------------------------------------------------------------------
/tests/functional/mod1.lmod:
--------------------------------------------------------------------------------
1 | INPUT variable 4
2 | 
3 | V1: UGT variable 15 term
4 | V2: EQ BITAND variable 0x0000ffff 0 term


--------------------------------------------------------------------------------
/tests/statements/fromfile.lmod:
--------------------------------------------------------------------------------
1 | INPUT file 15
2 | 
3 | P: start <- INT 5 4
4 | 
5 | FROMFILE fromfiletest file start 0 10
6 | 


--------------------------------------------------------------------------------
/testcases/windows/7/win7_printf.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eurecom-s3/loaders_modeling/HEAD/testcases/windows/7/win7_printf.exe


--------------------------------------------------------------------------------
/testcases/windows/xp-7/testcase_0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eurecom-s3/loaders_modeling/HEAD/testcases/windows/xp-7/testcase_0


--------------------------------------------------------------------------------
/testcases/windows/xp-7/testcase_1:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eurecom-s3/loaders_modeling/HEAD/testcases/windows/xp-7/testcase_1


--------------------------------------------------------------------------------
/testcases/windows/xp-7/testcase_2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eurecom-s3/loaders_modeling/HEAD/testcases/windows/xp-7/testcase_2


--------------------------------------------------------------------------------
/testcases/windows/xp/winxp_printf.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eurecom-s3/loaders_modeling/HEAD/testcases/windows/xp/winxp_printf.exe


--------------------------------------------------------------------------------
/tests/functional/__init__.py:
--------------------------------------------------------------------------------
1 | from .positivecombo import PositiveCombinationTest
2 | from .negativecombo import NegativeCombinationTest
3 | 


--------------------------------------------------------------------------------
/tests/operators/strcmp.lmod:
--------------------------------------------------------------------------------
1 | INPUT inp 10
2 | 
3 | P: start <- INT 2 4
4 | P: out <- inp[2, 7]
5 | V1: STRCMP inp start '\x00FOOBAR' term


--------------------------------------------------------------------------------
/testcases/windows/xp/winxp_kernel_user.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eurecom-s3/loaders_modeling/HEAD/testcases/windows/xp/winxp_kernel_user.exe


--------------------------------------------------------------------------------
/tests/emptytest.py:
--------------------------------------------------------------------------------
1 | class Test():
2 |     testfile = ""
3 | 
4 |     @staticmethod
5 |     def run():
6 |         raise NotImplementedError
7 | 
8 | 


--------------------------------------------------------------------------------
/testcases/windows/7/win7_ntcreateuserprocess.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eurecom-s3/loaders_modeling/HEAD/testcases/windows/7/win7_ntcreateuserprocess.exe


--------------------------------------------------------------------------------
/tests/statements/optimization.lmod:
--------------------------------------------------------------------------------
1 | INPUT var1 4
2 | INPUT var2 4
3 | 
4 | V1: UGE var1 10 term
5 | V2: ULE var2 10 term
6 | 
7 | MINIMIZE var1
8 | MAXIMIZE var2
9 | 


--------------------------------------------------------------------------------
/tests/operators/bitwise.mod:
--------------------------------------------------------------------------------
1 | INPUT VARIABLE 4
2 | 
3 | P: VARA <- BITAND VARIABLE 0x01011001
4 | P: VARB <- BITOR  VARIABLE 0x01011001
5 | P: VARC <- BITNOT VARIABLE 
6 | 


--------------------------------------------------------------------------------
/tests/statements/__init__.py:
--------------------------------------------------------------------------------
1 | from .fromfile import FromFileTest
2 | from .optimization import OptimizationTest
3 | from .conditional_assignment import ConditionalAssignmentTest
4 | 


--------------------------------------------------------------------------------
/tooleval/ghidraplugin/ghidradumpmem.py:
--------------------------------------------------------------------------------
1 | prog = getCurrentProgram()
2 | name = prog.getName()
3 | 
4 | print(name)
5 | 
6 | fname = "/tmp/ghidradumps/" + name
7 | 
8 | dumpMemory(fname)
9 | 


--------------------------------------------------------------------------------
/tests/statements/conditional_assignment.lmod:
--------------------------------------------------------------------------------
1 | INPUT inputvar 4
2 | 
3 | V1: EQ inputvar 0x1234 term
4 | P: outvar <- inputvar
5 | 
6 | V2: ULE inputvar 0x1
7 | 
8 | P(V2): outvar <- ADD inputvar 1


--------------------------------------------------------------------------------
/tests/operators/__init__.py:
--------------------------------------------------------------------------------
1 | from .bitwise import BitwiseTest
2 | from .algebra import AlgebraTest
3 | from .alignment import AlignmentTest
4 | from .strcmp import StringCompareTest
5 | from .overflow import OverflowTest
6 | 


--------------------------------------------------------------------------------
/tests/operators/alignment.lmod:
--------------------------------------------------------------------------------
1 | INPUT VARIABLE 4
2 | 
3 | P: alup <- ALIGNUP VARIABLE 0x1000
4 | P: aldown <- ALIGNDOWN VARIABLE 0x1000
5 | V1: EQ VARIABLE 4095 term
6 | V2: EQ alup 4096 term
7 | V3: EQ aldown 0 term
8 | 


--------------------------------------------------------------------------------
/tests/operators/algebra.mod:
--------------------------------------------------------------------------------
1 | INPUT VARIABLE 4
2 | 
3 | P: VARA <- Add VARIABLE 5
4 | P: VARB <- Sub VARIABLE 5
5 | P: VARC <- Mul VARIABLE 5
6 | P: VARD <- DIV VARIABLE 5
7 | P: VARE <- UDiv VARIABLE 5
8 | P: VARF <- MOD VARIABLE 5
9 | 


--------------------------------------------------------------------------------
/modelLang/utils.py:
--------------------------------------------------------------------------------
 1 | from collections import defaultdict
 2 | 
 3 | class customdefdict(defaultdict):
 4 |     def __missing__(self, key):
 5 |         if self.default_factory is None:
 6 |             raise KeyError(key)
 7 |         item = self.default_factory(key)
 8 |         self[key] = item
 9 |         return item
10 | 


--------------------------------------------------------------------------------
/tooleval/Makefile:
--------------------------------------------------------------------------------
 1 | all: memdump.pb.cc memdump_pb2.py ghidra/program/flatapi/Memdump.java
 2 | 
 3 | memdump.pb.cc: memdump.proto
 4 | 	protoc $< --cpp_out=.
 5 | 
 6 | memdump_pb2.py: memdump.proto
 7 | 	protoc $< --python_out=.
 8 | 
 9 | ghidra/program/flatapi/Memdump.java: memdump.proto
10 | 	protoc $< --java_out=.
11 | 
12 | .PHONY: all
13 | 


--------------------------------------------------------------------------------
/testcases/windows/xp/winxp_kernel_user.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | git checkout 566e3b2b89e5d63e6e15e26ce2a79c271005a270
4 | git submodule update --recursive
5 | 
6 | python3 ./generate.py -A models/windows/xp/MiCreateImageFileMap.lmod models/windows/xp/LdrpInitializeProcess.lmod models/windows/generic/return0.lmod models/windows/generic/reasonable_stack.lmod
7 | 


--------------------------------------------------------------------------------
/testcases/windows/7/win7_printf.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | git checkout 5e61eef93ee6031e3854b5c3bede96891b292ec9
4 | git submodule update --recursive
5 | 
6 | ipython -i ./generate.py -- -A models/windows/7/MiCreateImageFileMap.lmod models/windows/7/LdrpInitializeProcess.lmod  models/windows/generic/reasonable_stack.lmod models/windows/generic/printf_import.lmod
7 | 


--------------------------------------------------------------------------------
/testcases/windows/xp/winxp_printf.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | git checkout 44790e0ac5de2569bb583361ba5559a3a89d397c
4 | git submodule update --recursive
5 | 
6 | ipython -i ./generate.py -- -A models/windows/xp/MiCreateImageFileMap.lmod models/windows/xp/LdrpInitializeProcess.lmod models/windows/generic/printf.lmod models/windows/generic/not_managed.lmod models/windows/generic/reasonable_stack.lmod
7 | 


--------------------------------------------------------------------------------
/tests/operators/overflow.lmod:
--------------------------------------------------------------------------------
 1 | INPUT VAR1 4
 2 | INPUT VAR2 4
 3 | INPUT VAR3 4
 4 | 
 5 | INPUT VAR4 4
 6 | INPUT VAR5 4
 7 | 
 8 | V1: EQ VAR1 0x80000000 term
 9 | V2: EQ VAR2 0x80000000 term
10 | V3: EQ VAR3 0x1000 term
11 | 
12 | V4: OVFLADD VAR1 VAR2 term
13 | V5: OVFLADD VAR2 VAR1 term
14 | V6: NOT OVFLADD VAR1 VAR3 term
15 | V7: NOT OVFLADD VAR2 VAR3 term
16 | 
17 | V8: OVFLADD VAR4 VAR5 term


--------------------------------------------------------------------------------
/tooleval/idaplugin/idaanalysis.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | IDAPATH=${1:-/home/dario/ida-7.1/idat}
 4 | INDIR=${2:-/home/dario/phd/loaders_modeling/lang_parser/allcombo}
 5 | OUTDIR=${3:-/tmp/idaalldumps}
 6 | 
 7 | mkdir -p $OUTDIR
 8 | 
 9 | TESTCASES=$(ls $INDIR)
10 | for f in $TESTCASES
11 | do
12 |     if [[ $f == *"cond"* ]]; then
13 | 	continue
14 |     fi;
15 |     ARG="idadumpmem.py $OUTDIR"
16 |     $IDAPATH -A -B -c "-S$ARG" $INDIR/$f
17 | done;
18 | 


--------------------------------------------------------------------------------
/tooleval/memdump.proto:
--------------------------------------------------------------------------------
 1 | syntax = "proto2";
 2 | 
 3 | option java_package = "ghidra.program.flatapi";
 4 | 
 5 | message MemoryRegion {
 6 |   required uint32 vaddr = 1;
 7 |   required uint32 vsize = 2;
 8 |   required bytes content = 3;
 9 |   optional string name = 4;
10 |   optional string permission = 5;
11 |   optional uint32 faddr = 6;
12 |   optional uint32 fsize = 7;
13 | }
14 | 
15 | message MemoryDump {
16 |   repeated MemoryRegion regions = 1;
17 | }


--------------------------------------------------------------------------------
/testcases/windows/xp-7/differential.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | git checkout e13f7b46e1a9b1c1f0c5f5648f0d2910904d3b7d
4 | git submodule update --recursive
5 | 
6 | ipython -i ./differential.py -- -A models/windows/xp/MiCreateImageFileMap.lmod models/windows/xp/LdrpInitializeProcess.lmod models/windows/generic/not_managed.lmod models/windows/generic/return0.lmod models/windows/generic/reasonable_stack.lmod -N models/windows/7/MiCreateImageFileMap.lmod -O xp-7/testcase
7 | 


--------------------------------------------------------------------------------
/tooleval/__init__.py:
--------------------------------------------------------------------------------
 1 | from .common import byteat, NOTDUMPED, NOTFOUND, coalesceregions, FailedRelocExcetion
 2 | from .memdump_pb2 import MemoryRegion, MemoryDump
 3 | from .r2adapter import Radare2Adapter
 4 | from .winadapter import WindowsAdapter
 5 | from .ghidraadapter import GhidraAdapter
 6 | from .idaadapter import IDAAdapter
 7 | TOOLADAPTERS = {'radare2': Radare2Adapter,
 8 |                 'ghidra': GhidraAdapter,
 9 |                 'ida': IDAAdapter}
10 | 


--------------------------------------------------------------------------------
/tests/loops/vloop.lmod:
--------------------------------------------------------------------------------
 1 | DEFINE SIZE 20
 2 | INPUT HEADER SIZE
 3 | 
 4 | P: startingOffset1 <- INT 0 4
 5 | V1: ULT startingOffset1 1
 6 | L1(V1): currentOffset <- VLOOP(startingOffset1, next, V99, 20)
 7 |     V1: Eq HEADER[currentOffset] currentOffset term
 8 |     P: next <- Add currentOffset 1
 9 |     V99: ULE currentOffset 0xf
10 | END L1
11 | 
12 | P: startingOffset2 <- INT 0 4
13 | L2(!V1): currentOffset2 <- VLOOP(startingOffset2, next, V98, 20)
14 |     V1: Eq HEADER[currentOffset2] ADD currentOffset2 1 term
15 |     P: next <- Add currentOffset2 1
16 |     V98: ULE currentOffset2 0xf
17 | END L2
18 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pyc
 2 | *.o
 3 | *.so
 4 | *.a
 5 | .gdb_history
 6 | *.i64
 7 | *.idb
 8 | *.id0
 9 | *.id1
10 | *.id2
11 | *.nam
12 | *.til
13 | *.swp
14 | *.dll
15 | *.obj
16 | *.lib
17 | *.exp
18 | *.pdb
19 | *.ilk
20 | angr/tests/*.png
21 | screenlog.0
22 | angr/tests/screenlog.0
23 | angr/screenlog.0
24 | .idea
25 | *.egg-info
26 | /build
27 | /tags
28 | MANIFEST
29 | dist
30 | .eggs
31 | .vscode/
32 | *~
33 | *.db
34 | *.out
35 | parsetab.py
36 | calclex.py
37 | example.py
38 | test
39 | testcase
40 | Memdump.java
41 | memdump_pb2.py
42 | memdump.pb.h
43 | memdump.pb.cc
44 | ghidra/
45 | *.tex
46 | *.pdf
47 | *.log
48 | *.aux
49 | *#


--------------------------------------------------------------------------------
/tests/loops/conditionalloop.lmod:
--------------------------------------------------------------------------------
 1 | DEFINE SIZE 15
 2 | DEFINE STRUCTSIZE 1
 3 | DEFINE MAXITERATION 10
 4 | 
 5 | INPUT HEADER SIZE
 6 | P: enableLoop1 <- HEADER[0]
 7 | V1: NEQ enableLoop1 0
 8 | V99: EQ enableLoop1 1 term
 9 | 
10 | 
11 | ## This loop will be executed
12 | P: start <- INT 4 4
13 | P: count <- INT 4 4
14 | L1(V1): var1 <- LOOP(HEADER, start, 1, count, 4)
15 |     V3: Eq var1 1 term
16 | END L1
17 | 
18 | 
19 | ## This loop won't be executed
20 | P: enableLoop2 <- HEADER[1]
21 | V2: NEQ enableLoop2 0
22 | V98: EQ enableLoop2 0 term
23 | L2(V2): var2 <- LOOP(HEADER, start, 1, count, 4)
24 |     V4: Eq var2 2 term
25 | END L2
26 | 
27 | 


--------------------------------------------------------------------------------
/tooleval/ghidraplugin/ghidraanalysis.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | GHIDRAPATH=${1:-/home/dario/tools/ghidra/build/dist/ghidra_9.2_DEV/support/analyzeHeadless}
 4 | GHIDRAPROJ=${2:-/home/dario/phd/loaders_modeling/ghidraproj}
 5 | INDIR=${3:-/home/dario/phd/loaders_modeling/lang_parser/allcombo}
 6 | OUTDIR=${4:-/tmp/ghidraalldumps}
 7 | 
 8 | mkdir -p $OUTDIR
 9 | 
10 | rm -r $GHIDRAPROJ
11 | mkdir $GHIDRAPROJ
12 | 
13 | TESTCASES=$(ls $INDIR)
14 | for f in $TESTCASES
15 | do
16 |     if [[ $f == *"cond"* ]]; then
17 | 	continue
18 |     fi;
19 |     $GHIDRAPATH $GHIDRAPROJ ghidratest -loader PeLoader -postscript ghidradumpmem.py -import $INDIR/$f
20 | done;
21 | 


--------------------------------------------------------------------------------
/tooleval/classes.py:
--------------------------------------------------------------------------------
 1 | from collections import namedtuple
 2 | 
 3 | Entry = namedtuple('Entry', ['name', 'size', 'vsize',
 4 |                              'perm', 'paddr', 'vaddr'])
 5 | class MemoryMap(list):
 6 |     def __init__(self):
 7 |         super().__init__(self)
 8 | 
 9 |     def append(self, obj):
10 |         if type(obj) != Entry:
11 |             raise TypeError
12 |         super().append(obj)
13 | 
14 | class MemoryDump(dict):
15 |     def __init__(self):
16 |         super().__init__(self)
17 | 
18 |     def __setitem__(self, key, value):
19 |         if type(key) != Entry:
20 |             raise TypeError
21 |         super().__setitem__(key, value)
22 | 


--------------------------------------------------------------------------------
/tests/operators/alignment.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import z3
 3 | import logging
 4 | 
 5 | from modelLang import parsers, backends
 6 | 
 7 | from modelLang.parsers import Parser
 8 | from modelLang.backends import Z3Backend
 9 | 
10 | class AlignmentTest():
11 |     testfile = "tests/operators/alignment.lmod"
12 | 
13 |     @staticmethod
14 |     def run():
15 |         parser = Parser()
16 |         parser.parse_file(AlignmentTest.testfile)
17 |         backend = Z3Backend()
18 |         backend.log.setLevel(logging.ERROR)
19 |         backend.exec_statements(parser.statements)
20 |         solver = backend.generate_solver()
21 |         assert(solver.check() == z3.sat)
22 | 
23 | if __name__ == "__main__":
24 |     AlignmentTest.run()
25 | 


--------------------------------------------------------------------------------
/tooleval/idaadapter.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | from tooleval import MemoryRegion, MemoryDump
 4 | 
 5 | class IDAAdapter(object):
 6 |     uninbyte = 0x0
 7 |     def __init__(self, path):
 8 |         self._file = open(path, 'rb')
 9 |         self._memdump = None
10 | 
11 |     def close(self):
12 |         self._file.close()
13 | 
14 |     def load(self):
15 |         self._memdump = MemoryDump()
16 |         self._memdump.ParseFromString(self._file.read())
17 | 
18 |     @property
19 |     def memdump(self):
20 |         if not self._memdump:
21 |             self.load()
22 |         return self._memdump
23 | 
24 | if __name__ == "__main__":
25 |     adapter = IDAAdapter("/tmp/idadumps/testcase_35")
26 |     dump = adapter.memdump
27 |     print(dump)
28 | 


--------------------------------------------------------------------------------
/tooleval/ghidraadapter.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | from tooleval import MemoryRegion, MemoryDump
 4 | 
 5 | class GhidraAdapter(object):
 6 |     uninbyte = 0x00
 7 |     def __init__(self, path):
 8 |         self._file = open(path, 'rb')
 9 |         self._memdump = None
10 | 
11 |     def close(self):
12 |         self._file.close()
13 | 
14 |     def load(self):
15 |         self._memdump = MemoryDump()
16 |         self._memdump.ParseFromString(self._file.read())
17 | 
18 |     @property
19 |     def memdump(self):
20 |         if not self._memdump:
21 |             self.load()
22 |         return self._memdump
23 | 
24 | if __name__ == "__main__":
25 |     adapter = GhidraAdapter("/tmp/ghidradumps/testcase_35")
26 |     dump = adapter.memdump
27 |     print(dump)
28 | 


--------------------------------------------------------------------------------
/tooleval/winadapter.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | from tooleval import MemoryRegion, MemoryDump
 4 | 
 5 | class WindowsAdapter(object):
 6 |     def __init__(self, path):
 7 |         self._file = open(path, 'rb')
 8 |         self._memdump = None
 9 | 
10 |     def close(self):
11 |         self._file.close()
12 | 
13 |     def load(self):
14 |         self._memdump = MemoryDump()
15 |         self._memdump.ParseFromString(self._file.read())
16 | 
17 |     @property
18 |     def memdump(self):
19 |         if not self._memdump:
20 |             self.load()
21 |         return self._memdump
22 | 
23 | if __name__ == "__main__":
24 |     adapter = WindowsAdapter("/home/dario/VirtualBox VMs/winxp/shared/mydumps/testcase_35.exe.dump")
25 |     dump = adapter.memdump
26 |     print(dump)
27 | 
28 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | setup(
 4 |     name='modelLang',
 5 |     version='0.1',
 6 |     packages=['modelLang',
 7 |               'modelLang/backends',
 8 |               'modelLang/parsers',
 9 |               'modelLang/structures',
10 |               ],
11 |     install_requires=[
12 |         'coloredlogs==10.0',
13 |         'ply==3.11',
14 |         'pwntools==4.0.1',
15 |         'pycparser==2.19',
16 |         'z3==0.2.0',
17 |         'z3-solver==4.8.7.0',
18 |         'pefile==2019.4.18',
19 |     ],
20 |     maintainer='Dario Nisi',
21 |     maintainer_email='dario.nisi@eurecom.fr'
22 | )
23 | 
24 | setup(
25 |     name='tooleval',
26 |     version='0.1',
27 |     packages=['tooleval',
28 |               ],
29 |     maintainer='Dario Nisi',
30 |     maintainer_email='dario.nisi@eurecom.fr'
31 | )
32 | 


--------------------------------------------------------------------------------
/tests/operators/strcmp.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import z3
 3 | import logging
 4 | 
 5 | from modelLang import parsers, backends
 6 | 
 7 | from modelLang.parsers import Parser
 8 | from modelLang.backends import Z3Backend
 9 | 
10 | class StringCompareTest():
11 |     testfile = "tests/operators/strcmp.lmod"
12 | 
13 |     @staticmethod
14 |     def run():
15 |         parser = Parser()
16 |         parser.parse_file(StringCompareTest.testfile)
17 |         backend = Z3Backend()
18 |         backend.log.setLevel(logging.ERROR)
19 |         backend.exec_statements(parser.statements)
20 |         solver = backend.generate_solver()
21 |         assert(solver.check() == z3.sat)
22 |         model = solver.model()
23 |         outvar = backend.variables['out']
24 |         assert(model.eval(outvar).as_long() == 0x5241424f4f4600)
25 | 
26 | if __name__ == "__main__":
27 |     StringCompareTest.run()
28 | 


--------------------------------------------------------------------------------
/tests/loops/conditionalloop.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import logging
 3 | import z3
 4 | 
 5 | from modelLang import parsers, backends
 6 | 
 7 | from modelLang.parsers import Parser
 8 | from modelLang.backends import Z3Backend
 9 | 
10 | class ConditionalLoopTest():
11 |     testfile = "tests/loops/conditionalloop.lmod"
12 | 
13 |     @staticmethod
14 |     def run():
15 |         parser = Parser()
16 |         parser.parse_file(ConditionalLoopTest.testfile)
17 | 
18 |         backend = Z3Backend()
19 |         backend.log.setLevel(logging.ERROR)
20 |         backend.exec_statements(parser.statements)
21 |         solver = backend.solver
22 |         model = backend.model
23 | 
24 |         assert model, "Model unsat. Test failed"
25 | 
26 |         testcase = backend.generate_testcase()
27 |         expected = b'\x01' * 4
28 |         assert(testcase[4:8] == expected)
29 | 
30 | if __name__ == "__main__":
31 |     ConditionalLoopTest.run()
32 | 


--------------------------------------------------------------------------------
/tests/operators/overflow.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import logging
 4 | import z3
 5 | 
 6 | from modelLang import parsers, backends
 7 | 
 8 | from modelLang.parsers import Parser
 9 | from modelLang.backends import Z3Backend
10 | 
11 | class OverflowTest():
12 |     testfile = "tests/operators/overflow.lmod"
13 | 
14 |     @staticmethod
15 |     def run():
16 |         parser = Parser()
17 |         parser.parse_file(OverflowTest.testfile)
18 |         backend = Z3Backend()
19 |         backend.log.setLevel(logging.ERROR)
20 |         backend.exec_statements(parser.statements)
21 |         solver = backend.generate_solver()
22 |         variables = backend.variables
23 | 
24 |         ### Check sat
25 |         assert(backend.model)
26 | 
27 |         model = backend.model
28 |         var4 = model.eval(variables['VAR4']).as_long()
29 |         var5 = model.eval(variables['VAR5']).as_long()
30 |         assert(var4 + var5 >= 0x100000000)
31 | 


--------------------------------------------------------------------------------
/tests/statements/fromfile.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import logging
 4 | import os.path
 5 | 
 6 | from modelLang import parsers, backends
 7 | 
 8 | from modelLang.parsers import Parser
 9 | from modelLang.backends import Z3Backend
10 | class FromFileTest():
11 |     testfile = "tests/statements/fromfile.lmod"
12 | 
13 |     @staticmethod
14 |     def run():
15 |         parser = Parser(pwd=os.path.dirname(os.path.realpath(__file__)))
16 |         parser.parse_file(FromFileTest.testfile)
17 | 
18 |         backend = Z3Backend()
19 |         backend.log.setLevel(logging.ERROR)
20 |         backend.exec_statements(parser.statements)
21 |         solver = backend.solver
22 |         model = backend.model
23 | 
24 |         assert model, "Model unsat. Test failed"
25 | 
26 |         testcase = backend.generate_testcase(varname="file")
27 |         assert(testcase[5:5+10] == b"1337133713")
28 | 
29 |         return True
30 | 
31 | if __name__ == "__main__":
32 |     FromFileTest.run()
33 | 


--------------------------------------------------------------------------------
/tests/loops/vloop.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import logging
 3 | import z3
 4 | 
 5 | from modelLang import parsers, backends
 6 | 
 7 | from modelLang.parsers import Parser
 8 | from modelLang.backends import Z3Backend
 9 | 
10 | class VLoopTest():
11 |     testfile = "tests/loops/vloop.lmod"
12 | 
13 |     @staticmethod
14 |     def run():
15 |         parser = Parser()
16 |         parser.parse_file(VLoopTest.testfile)
17 | 
18 |         backend = Z3Backend()
19 |         backend.log.setLevel(logging.ERROR)
20 |         backend.exec_statements(parser.statements)
21 |         solver = backend.solver
22 |         model = backend.model
23 | 
24 |         assert model, "Model unsat. Test failed"
25 | 
26 |         testcase = backend.generate_testcase()
27 |         expected = b''.join([x.to_bytes(1, 'little') for x in range(0x10)])
28 |         assert testcase[:0x10] == expected, "First part of the testcase not as expected"
29 |         assert all(x == 0 for x in testcase[0x10:]), "Second part of the testcase not as expected"
30 |         return True
31 | 
32 | if __name__ == "__main__":
33 |     VLoopTest.run()
34 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | asttokens==2.2.1
 2 | backcall==0.2.0
 3 | bcrypt==4.0.1
 4 | boto==2.49.0
 5 | capstone==5.0.0rc2
 6 | certifi==2022.12.7
 7 | cffi==1.15.1
 8 | charset-normalizer==3.0.1
 9 | colored-traceback==0.3.0
10 | coloredlogs==15.0.1
11 | cryptography==39.0.1
12 | decorator==5.1.1
13 | executing==1.2.0
14 | humanfriendly==10.0
15 | idna==3.4
16 | intervaltree==3.1.0
17 | ipython==8.9.0
18 | jedi==0.18.2
19 | Mako==1.2.4
20 | MarkupSafe==2.1.2
21 | matplotlib-inline==0.1.6
22 | packaging==23.0
23 | paramiko==3.0.0
24 | parso==0.8.3
25 | pefile==2023.2.7
26 | pexpect==4.8.0
27 | pickleshare==0.7.5
28 | plumbum==1.8.1
29 | ply==3.11
30 | prompt-toolkit==3.0.36
31 | psutil==5.9.4
32 | ptyprocess==0.7.0
33 | pure-eval==0.2.2
34 | pwntools==4.9.0
35 | pycparser==2.21
36 | pyelftools==0.29
37 | Pygments==2.14.0
38 | PyNaCl==1.5.0
39 | pyserial==3.5
40 | PySocks==1.7.1
41 | python-dateutil==2.8.2
42 | requests==2.28.2
43 | ROPGadget==7.2
44 | rpyc==5.3.0
45 | six==1.16.0
46 | sortedcontainers==2.4.0
47 | stack-data==0.6.2
48 | traitlets==5.9.0
49 | unicorn==2.0.1.post1
50 | urllib3==1.26.14
51 | wcwidth==0.2.6
52 | z3==0.2.0
53 | z3-solver==4.12.1.0
54 | 


--------------------------------------------------------------------------------
/tooleval/common.py:
--------------------------------------------------------------------------------
 1 | NOTDUMPED = "NOTDUMPED"
 2 | NOTFOUND = "NOTFOUND"
 3 | 
 4 | class FailedRelocExcetion(Exception):
 5 |     pass
 6 | 
 7 | def byteat(memdump, addr):
 8 |     for region in memdump.regions:
 9 |         if addr >= region.vaddr and addr < region.vaddr + region.vsize:
10 |             if addr >= region.vaddr + len(region.content):
11 |                 return NOTDUMPED
12 |             return region.content[addr - region.vaddr]
13 |     return NOTFOUND
14 | 
15 | def permissionsat(memdump, addr):
16 |     for region in memdump.regions:
17 |         if addr >= region.vaddr and addr < region.vaddr + region.vsize:
18 |             return region.permission
19 |     return NOTFOUND
20 | 
21 | def coalesceregions(memdump):
22 |     lastaddr = -1
23 |     coalescedregions = []
24 |     for region in memdump.regions:
25 |         if region.vaddr != lastaddr:
26 |             coalescedregions.append((region.vaddr, region.vsize))
27 |         else:
28 |             last = coalescedregions[-1]
29 |             coalescedregions[-1] = (last[0], last[1] + region.vsize)
30 |         lastaddr = region.vaddr + region.vsize
31 |     return coalescedregions
32 | 


--------------------------------------------------------------------------------
/tests/statements/conditional_assignment.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import logging
 4 | import os.path
 5 | 
 6 | from modelLang import parsers, backends
 7 | 
 8 | from modelLang.parsers import Parser
 9 | from modelLang.backends import Z3Backend
10 | 
11 | from pwnlib.util.packing import pack, unpack
12 | 
13 | class ConditionalAssignmentTest():
14 |     testfile = "tests/statements/conditional_assignment.lmod"
15 | 
16 |     @staticmethod
17 |     def run():
18 |         parser = Parser(pwd=os.path.dirname(os.path.realpath(__file__)))
19 |         parser.parse_file(ConditionalAssignmentTest.testfile)
20 | 
21 |         backend = Z3Backend()
22 |         backend.log.setLevel(logging.ERROR)
23 |         backend.exec_statements(parser.statements)
24 |         solver = backend.solver
25 |         model = backend.model
26 | 
27 |         assert model, "Model unsat. Test failed"
28 | 
29 |         outvar = backend.generate_testcase(varname="outvar")
30 |         outvar = unpack(outvar, 'all', endianness="little")
31 |         assert outvar == 0x1234
32 | 
33 |         return True
34 | 
35 | if __name__ == "__main__":
36 |     ConditionalAssignmentTest.run()
37 | 


--------------------------------------------------------------------------------
/tests/operators/bitwise.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import logging
 4 | import z3
 5 | 
 6 | from modelLang import parsers, backends
 7 | 
 8 | from modelLang.parsers import Parser
 9 | from modelLang.backends import Z3Backend
10 | 
11 | 
12 | class BitwiseTest():
13 |     testfile = "tests/operators/bitwise.mod"
14 | 
15 |     @staticmethod
16 |     def run():
17 |         parser = Parser()
18 |         parser.parse_file(BitwiseTest.testfile)
19 |         backend = Z3Backend()
20 |         backend.log.setLevel(logging.ERROR)
21 |         backend.exec_statements(parser.statements)
22 |         solver = backend.generate_solver()
23 |         input = backend.variables['VARIABLE']
24 |         v1 = backend.variables['VARA']
25 |         v2 = backend.variables['VARB']
26 |         v3 = backend.variables['VARC']
27 | 
28 |         solver.add(input == 0xdeadbeef)
29 |         assert(solver.check() == z3.sat)
30 |         model = solver.model()
31 |         assert(model.eval(v1).as_long() == 0x11001)
32 |         assert(model.eval(v2).as_long() == 0xdfadbeef)
33 |         assert(model.eval(v3).as_long() == 0x21524110)
34 | 
35 | if __name__ == "__main__":
36 |     BitwiseTest.run()
37 | 


--------------------------------------------------------------------------------
/tests/statements/optimization.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import logging
 4 | import os.path
 5 | 
 6 | from modelLang import parsers, backends
 7 | 
 8 | from modelLang.parsers import Parser
 9 | from modelLang.backends import Z3Backend
10 | 
11 | from pwnlib.util.packing import pack, unpack
12 | 
13 | class OptimizationTest():
14 |     testfile = "tests/statements/optimization.lmod"
15 | 
16 |     @staticmethod
17 |     def run():
18 |         parser = Parser(pwd=os.path.dirname(os.path.realpath(__file__)))
19 |         parser.parse_file(OptimizationTest.testfile)
20 | 
21 |         backend = Z3Backend()
22 |         backend.log.setLevel(logging.ERROR)
23 |         backend.exec_statements(parser.statements)
24 |         solver = backend.solver
25 |         model = backend.model
26 | 
27 |         assert model, "Model unsat. Test failed"
28 | 
29 |         var1 = backend.generate_testcase(varname="var1")
30 |         var2 = backend.generate_testcase(varname="var2")
31 |         var1 = unpack(var1, 'all', endianness="little")
32 |         var2 = unpack(var2, 'all', endianness="little")
33 |         assert var1 == var2 == 10
34 | 
35 |         return True
36 | 
37 | if __name__ == "__main__":
38 |     OptimizationTest.run()
39 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import logging
 3 | import coloredlogs
 4 | 
 5 | if __name__ == "__main__":
 6 |     def test(c):
 7 |         name = c.__name__
 8 |         log.info(f"Running {name}")
 9 |         try:
10 |             c.run()
11 |         except AssertionError as e:
12 |             log.error(f"{name} failed with error {e}")
13 |         else:
14 |             log.info(f"{name} succeded")
15 |         
16 |     log = logging.getLogger(__name__)
17 |     coloredlogs.install(level="INFO", logger=log)
18 |     from loops import VLoopTest, ConditionalLoopTest
19 |     from operators import (BitwiseTest, AlgebraTest, AlignmentTest,
20 |                            StringCompareTest, OverflowTest)
21 |     from statements import (FromFileTest, OptimizationTest,
22 |                             ConditionalAssignmentTest)
23 |     from functional import PositiveCombinationTest, NegativeCombinationTest
24 | 
25 |     test(BitwiseTest)
26 |     test(AlgebraTest)
27 |     test(AlignmentTest)
28 |     test(StringCompareTest)
29 |     test(OverflowTest)
30 |     test(VLoopTest)
31 |     test(ConditionalLoopTest)
32 |     test(FromFileTest)
33 |     test(OptimizationTest)
34 |     test(ConditionalAssignmentTest)
35 |     test(PositiveCombinationTest)
36 |     test(NegativeCombinationTest)
37 | 


--------------------------------------------------------------------------------
/tests/operators/algebra.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import logging
 4 | import z3
 5 | 
 6 | from modelLang import parsers, backends
 7 | 
 8 | from modelLang.parsers import Parser
 9 | from modelLang.backends import Z3Backend
10 | 
11 | class AlgebraTest():
12 |     testfile = "tests/operators/algebra.mod"
13 | 
14 |     @staticmethod
15 |     def run():
16 |         parser = Parser()
17 |         parser.parse_file(AlgebraTest.testfile)
18 |         backend = Z3Backend()
19 |         backend.log.setLevel(logging.ERROR)
20 |         backend.exec_statements(parser.statements)
21 |         solver = backend.generate_solver()
22 |         variables = backend.variables
23 |         input = variables['VARIABLE']
24 |         v1 = variables['VARA']
25 |         v2 = variables['VARB']
26 |         v3 = variables['VARC']
27 |         v4 = variables['VARD']
28 |         v5 = variables['VARE']
29 |         v6 = variables['VARF']
30 | 
31 |         solver.add(input == 10)
32 |         assert(solver.check() == z3.sat)
33 |         model = solver.model()
34 |         assert(model.eval(v1).as_long() == 15)
35 |         assert(model.eval(v2).as_long() == 5)
36 |         assert(model.eval(v3).as_long() == 50)
37 |         assert(model.eval(v4).as_long() == 2)
38 |         assert(model.eval(v5).as_long() == 2)
39 |         assert(model.eval(v6).as_long() == 0)
40 | 


--------------------------------------------------------------------------------
/modelLang/backends/default_backend.py:
--------------------------------------------------------------------------------
 1 | from ..classes import Expression
 2 | 
 3 | class DefaultBackend(object):
 4 |     def __init__(self):
 5 |         self.variables = {}
 6 |         self.conditions = {}
 7 |         self.terminal_conditions = {}
 8 |         self._statements = None
 9 | 
10 |     def _eval_expression(self, expr):
11 |         opcode = expr.opcode
12 |         operands = expr.operands
13 |         operands_new = []
14 |         for op in operands:
15 |             if isinstance(op, Expression):
16 |                 operands_new.append(self._eval_expression(op))
17 |             else:
18 |                 operands_new.append(op)
19 |         self.log.debug(f"\n{expr.pprint()}")
20 |         return self.dispatch(opcode, *operands_new)
21 | 
22 |     def _exec_statement(self, stmt, **kwargs):
23 |         t = type(stmt)
24 |         self.log.debug(f"Executing: {stmt}")
25 |         self._exec_table[t](self, stmt, **kwargs)
26 | 
27 |     def exec_statements(self, statements, **kwargs):
28 |         for stmt in statements:
29 |             try:
30 |                 self._exec_statement(stmt, **kwargs)
31 |             except Exception:
32 |                 self.log.error("Error occurred while processing statement " +
33 |                                f"at line {stmt.lineno}")
34 |                 raise
35 | 
36 |     def load_statements(self, statements):
37 |         self._statements = statements
38 | 
39 | class VerificationError(Exception):
40 |     def __init__(self, name):
41 |         self.name = name
42 | 


--------------------------------------------------------------------------------
/tooleval/r2adapter.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import r2pipe
 4 | 
 5 | from tooleval import MemoryRegion, MemoryDump, FailedRelocExcetion
 6 | 
 7 | class Radare2Adapter(object):
 8 |     uninbyte = 0xff
 9 |     def __init__(self, path):
10 |         self._instance = Radare2Adapter.createR2Instance(path)
11 |         self._memdump = None
12 | 
13 |     def close(self):
14 |         self._instance.quit()
15 | 
16 |     @staticmethod
17 |     def createR2Instance(path):
18 |         return r2pipe.open(path)
19 | 
20 |     @property
21 |     def memdump(self):
22 |         if self._memdump:
23 |             return self._memdump
24 | 
25 |         self._memdump = MemoryDump()
26 |         mmap = self._instance.cmdj("iSj")
27 |         for region in mmap:
28 |             mregion = self._memdump.regions.add()
29 |             mregion.name = region['name']
30 |             mregion.fsize = region['size']
31 |             try:
32 |                 mregion.vsize = region['vsize']
33 |                 mregion.vaddr = region['vaddr']
34 |             except:
35 |                 raise FailedRelocExcetion
36 |             mregion.permission = region['perm']
37 |             mregion.faddr = region['paddr']
38 |             self._instance.cmd(f"s {hex(region['vaddr'])}")
39 |             contentsize = mregion.vsize if mregion.vsize < 0x10000 else 0x10000
40 |             content = bytes(self._instance.cmdj(f"pxj {hex(contentsize)}"))
41 |             mregion.content = content
42 |         return self._memdump
43 | 
44 | if __name__ == "__main__":
45 |     adapter = Radare2Adapter("/home/dario/phd/loaders_modeling/lang_parser/prova/testcase_41")
46 |     dump = adapter.memdump
47 | 


--------------------------------------------------------------------------------
/tests/functional/negativecombo.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import logging
 3 | import z3
 4 | 
 5 | from modelLang import parsers, backends
 6 | 
 7 | from modelLang.parsers import Parser
 8 | from modelLang.backends import Z3Backend
 9 | 
10 | from pwnlib.util.packing import unpack
11 | 
12 | class NegativeCombinationTest():
13 |     testfile1 = "tests/functional/mod1.lmod"
14 |     testfile2 = "tests/functional/mod2.lmod"
15 | 
16 |     @staticmethod
17 |     def run():
18 |         parser1 = Parser()
19 |         parser1.parse_file(NegativeCombinationTest.testfile1)
20 | 
21 |         backend1 = Z3Backend(name=NegativeCombinationTest.testfile1,
22 |                              voi="variable")
23 |         backend1.log.setLevel(logging.ERROR)
24 |         backend1.exec_statements(parser1.statements)
25 | 
26 |         parser2 = Parser()
27 |         parser2.parse_file(NegativeCombinationTest.testfile2)
28 | 
29 |         backend2 = Z3Backend(name=NegativeCombinationTest.testfile2,
30 |                              voi="variable")
31 |         backend2.log.setLevel(logging.ERROR)
32 |         backend2.exec_statements(parser2.statements)
33 | 
34 |         backend = backend1 & ~backend2
35 |         backend.log.setLevel(logging.ERROR)
36 |         solver = backend.solver
37 |         model = backend.model
38 | 
39 |         assert model, "Model unsat. Test failed"
40 | 
41 |         testcase = backend.generate_testcase("variable")
42 |         testcase = unpack(testcase, 'all')
43 |         assert(testcase > 15)
44 |         assert(testcase & 0xffff == 0)
45 |         assert((testcase & (testcase - 1) == 0))
46 |         return True
47 | 
48 | if __name__ == "__main__":
49 |     NegativeCombinationTest.run()
50 | 


--------------------------------------------------------------------------------
/tests/functional/positivecombo.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import logging
 3 | import z3
 4 | 
 5 | from modelLang import parsers, backends
 6 | 
 7 | from modelLang.parsers import Parser
 8 | from modelLang.backends import Z3Backend
 9 | 
10 | from pwnlib.util.packing import unpack
11 | 
12 | class PositiveCombinationTest():
13 |     testfile1 = "tests/functional/mod1.lmod"
14 |     testfile2 = "tests/functional/mod2.lmod"
15 | 
16 |     @staticmethod
17 |     def run():
18 |         parser1 = Parser()
19 |         parser1.parse_file(PositiveCombinationTest.testfile1)
20 | 
21 |         backend1 = Z3Backend(name=PositiveCombinationTest.testfile1,
22 |                              voi="variable")
23 |         backend1.log.setLevel(logging.ERROR)
24 |         backend1.exec_statements(parser1.statements)
25 | 
26 |         parser2 = Parser()
27 |         parser2.parse_file(PositiveCombinationTest.testfile2)
28 | 
29 |         backend2 = Z3Backend(name=PositiveCombinationTest.testfile2,
30 |                              voi="variable")
31 |         backend2.log.setLevel(logging.ERROR)
32 |         backend2.exec_statements(parser2.statements)
33 | 
34 |         backend = backend1 & backend2
35 |         backend.log.setLevel(logging.ERROR)
36 |         solver = backend.solver
37 |         model = backend.model
38 | 
39 |         assert model, "Model unsat. Test failed"
40 | 
41 |         testcase = backend.generate_testcase("variable")
42 |         testcase = unpack(testcase, 'all')
43 |         assert(testcase > 15)
44 |         assert(testcase & 0xffff == 0)
45 |         assert((testcase & (testcase - 1) != 0))
46 |         return True
47 | 
48 | if __name__ == "__main__":
49 |     PositiveCombinationTest.run()
50 | 


--------------------------------------------------------------------------------
/tooleval/idaplugin/idadumpmem.py:
--------------------------------------------------------------------------------
 1 | from idc import *
 2 | from idaapi import *
 3 | from idautils import *
 4 | import sys
 5 | from os.path import join
 6 | 
 7 | sys.path.append("..")
 8 | sys.path.append(".")
 9 | 
10 | class ToFileStdOut(object):
11 |     def __init__(self):
12 |         self.outfile = open("/tmp/idaout.txt", "w")
13 |     def write(self, text):
14 |         self.outfile.write(text)
15 |     def flush(self):
16 |         self.outfile.flush()
17 |     def isatty(self):
18 |         return False
19 |     def __del__(self):
20 |         self.outfile.close()
21 | sys.stdout = sys.stderr = ToFileStdOut()
22 | try:
23 | 
24 |     from memdump_pb2 import MemoryDump, MemoryRegion
25 | 
26 |     if len(ARGV) < 2:
27 |         dumpdir = "/tmp"
28 |     else:
29 |         dumpdir = ARGV[1]
30 | 
31 |     memdump = MemoryDump()
32 |     for vaddr in Segments():
33 |         memregion = memdump.regions.add()
34 |         memregion.vaddr = vaddr
35 |         memregion.vsize = SegEnd(vaddr) - SegStart(vaddr)
36 |         attr = get_segm_attr(vaddr, SEGATTR_PERM)
37 |         read = attr & SEGPERM_READ != 0
38 |         write = attr & SEGPERM_WRITE != 0
39 |         exc = attr & SEGPERM_EXEC != 0
40 |         memregion.permission = "-" + ("r" if read else "-") + ("w" if write else "-") + ("x" if exc else "-")
41 |         memregion.fsize = min(memregion.vsize, 0x10000)
42 |         memregion.content = bytes()
43 |         for a in xrange(vaddr, vaddr+memregion.fsize):
44 |             if not is_loaded(a):
45 |                 break
46 |             memregion.content += get_bytes(a, 1)
47 |     progname = get_root_filename()
48 |     with open(join(dumpdir, progname), "wb") as fp:
49 |         fp.write(memdump.SerializeToString())
50 | except Exception as e:
51 |     print(e)
52 |     idc.Exit(1)
53 | 
54 | idc.Exit(0)
55 | 


--------------------------------------------------------------------------------
/verify.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import sys
 3 | import logging
 4 | import coloredlogs
 5 | import z3
 6 | import pefile
 7 | 
 8 | from argparse import ArgumentParser
 9 | 
10 | log = logging.getLogger(__name__)
11 | coloredlogs.install(level="NOTSET", logger=log)
12 | 
13 | from modelLang import Parser, Z3Backend, PythonBackend
14 | 
15 | if __name__ == "__main__":
16 |     argpar = ArgumentParser(description='Evaluate model precision')
17 |     argpar.add_argument('model', type=str, help='Loader model')
18 |     argpar.add_argument('executable', type=str, help='File to verify')
19 |     argpar.add_argument('--logLevel', '-l', type=str, default=None,
20 |                         help="Log verbosity")
21 |     argpar.add_argument('--disable-log', '-D', default=False,
22 |                         action='store_true', help="Disable logging")
23 |     argpar.add_argument('--z3-backend', '-Z', default=False,
24 |                         action="store_true", help="Enable z3 backend")
25 | 
26 |     args = argpar.parse_args()
27 |     engine = PythonBackend
28 |     if args.z3_backend:
29 |         engine = Z3Backend
30 | 
31 |     modelfile = args.model
32 |     executable = args.executable
33 |     if args.logLevel:
34 |         logging.getLogger().setLevel(args.logLevel)
35 |     with open(executable, "rb") as fp:
36 |         content = fp.read()
37 | 
38 |     filesize = len(content)
39 |     parser = Parser(ptype=Parser.ParserType.VALIDATOR,
40 |                     custom_defs={"FILESIZE" : filesize})
41 |     parser.parse_file(modelfile)
42 |     backend = engine()
43 |     if args.disable_log:
44 |         backend.log.setLevel(100)
45 |     backend.load_statements(parser.statements)
46 |     if backend.verify(content):
47 |         log.info("PASS")
48 |         sys.exit(0)
49 |     else:
50 |         log.info("FAIL")
51 |         sys.exit(1)
52 | 


--------------------------------------------------------------------------------
/verify_dataset.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import sys
 3 | import logging
 4 | import coloredlogs
 5 | import z3
 6 | import pefile
 7 | 
 8 | from argparse import ArgumentParser
 9 | from pathlib import Path
10 | from functools import partial
11 | from multiprocessing import Pool
12 | from progressbar import progressbar
13 | 
14 | log = logging.getLogger(__name__)
15 | coloredlogs.install(level="NOTSET", logger=log)
16 | 
17 | from modelLang import Parser, Z3Backend, PythonBackend
18 | 
19 | 
20 | def verify(modelfile, executable):
21 |     with executable.open("rb") as fp:
22 |         content = fp.read()
23 | 
24 |     filesize = len(content)
25 |     parser = Parser(ptype=Parser.ParserType.VALIDATOR,
26 |                     custom_defs={"FILESIZE" : filesize})
27 |     parser.parse_file(modelfile)
28 |     backend = engine()
29 | 
30 |     if args.disable_log:
31 |         backend.log.setLevel(100)
32 |     backend.load_statements(parser.statements)
33 |     if backend.verify(content):
34 |         return (executable, None)
35 |     else:
36 |         return (executable, backend._last_fail)
37 | 
38 | if __name__ == "__main__":
39 |     argpar = ArgumentParser(description='Evaluate model precision')
40 |     argpar.add_argument('model', type=str, help='Loader model')
41 |     argpar.add_argument('directory', type=str,
42 |                         help='Path to dataset to verify')
43 |     argpar.add_argument('output', type=str,
44 |                         help='Path to output')
45 |     argpar.add_argument('--logLevel', '-l', type=str, default=None,
46 |                         help="Log verbosity")
47 |     argpar.add_argument('--disable-log', '-D', default=False,
48 |                         action='store_true', help="Disable logging")
49 |     argpar.add_argument('--z3-backend', '-Z', default=False,
50 |                         action="store_true", help="Enable z3 backend")
51 | 
52 |     args = argpar.parse_args()
53 |     engine = PythonBackend
54 |     if args.z3_backend:
55 |         engine = Z3Backend
56 | 
57 |     modelfile = args.model
58 |     directory = Path(args.directory)
59 | 
60 |     if not directory.is_dir():
61 |         log.error("<directory> must be a directoy")
62 |         sys.exit(-1)
63 | 
64 |     samples = list(directory.iterdir())
65 | 
66 |     if args.logLevel:
67 |         logging.getLogger().setLevel(args.logLevel)
68 | 
69 |     pool = Pool()
70 |     results = {x.name: y for x, y in progressbar(pool.imap(partial(verify,
71 |                                                                    modelfile),
72 |                                                            samples),
73 |                                                  max_value=len(samples))}
74 |     pool.close()
75 |     pool.terminate()
76 |     success = sum(1 for x in results.values() if not x)
77 |     with open(args.output, "w") as fp:
78 |         fp.write(f"Success: {success}\n")
79 |         for n, c in results.items():
80 |             if not c:
81 |                 continue
82 |             fp.write(f"{n} {c}\n")
83 | 


--------------------------------------------------------------------------------
/modelLang/structures/windows_specs.py:
--------------------------------------------------------------------------------
 1 | from .cparser import *
 2 | 
 3 | DEFAULT_SIZES = {
 4 |     'char'      : 8,
 5 |     'short'     : 16,
 6 |     'int'       : 32,
 7 |     'long'      : 32,
 8 |     'long long' : 64
 9 | }
10 | 
11 | BASIC_TYPES = {
12 |     'char': SimTypeNum(DEFAULT_SIZES['char'], True),
13 |     'signed char': SimTypeNum(DEFAULT_SIZES['char'], True),
14 |     'unsigned char': SimTypeNum(DEFAULT_SIZES['char'], False),
15 | 
16 |     'short': SimTypeNum(DEFAULT_SIZES['short'], True),
17 |     'signed short': SimTypeNum(DEFAULT_SIZES['short'], True),
18 |     'unsigned short': SimTypeNum(DEFAULT_SIZES['short'], False),
19 |     'short int': SimTypeNum(DEFAULT_SIZES['short'], True),
20 |     'signed short int': SimTypeNum(DEFAULT_SIZES['short'], True),
21 |     'unsigned short int': SimTypeNum(DEFAULT_SIZES['short'], False),
22 | 
23 |     'int': SimTypeNum(DEFAULT_SIZES['int'], True),
24 |     'signed int': SimTypeNum(DEFAULT_SIZES['int'], True),
25 |     'unsigned int': SimTypeNum(DEFAULT_SIZES['int'], False),
26 | 
27 |     'long': SimTypeNum(DEFAULT_SIZES['long'], True),
28 |     'signed long': SimTypeNum(DEFAULT_SIZES['long'], True),
29 |     'unsigned long': SimTypeNum(DEFAULT_SIZES['long'], False),
30 |     'long int': SimTypeNum(DEFAULT_SIZES['long'], True),
31 |     'signed long int': SimTypeNum(DEFAULT_SIZES['long'], True),
32 |     'unsigned long int': SimTypeNum(DEFAULT_SIZES['long'], False),
33 | 
34 |     'long long' : SimTypeNum(DEFAULT_SIZES['long long'], True),
35 |     'signed long long': SimTypeNum(DEFAULT_SIZES['long long'], True),
36 |     'unsigned long long': SimTypeNum(DEFAULT_SIZES['long long'], False),
37 |     'long long int': SimTypeNum(DEFAULT_SIZES['long long'], True),
38 |     'signed long long int': SimTypeNum(DEFAULT_SIZES['long long'], True),
39 |     'unsigned long long int': SimTypeNum(DEFAULT_SIZES['long long'], False),
40 | 
41 |     'float': SimTypeFloat(),
42 |     'double': SimTypeDouble(),
43 |     'void': SimTypeBottom(),
44 | }
45 | 
46 | OTHER_TYPES = {
47 |     '__int64'   : BASIC_TYPES['long long'],
48 |     'BYTE'      : BASIC_TYPES['unsigned char'],
49 |     'CHAR'      : BASIC_TYPES['char'],
50 |     'DWORD'     : BASIC_TYPES['long'],
51 |     'DWORD32'   : BASIC_TYPES['int'],
52 |     'DWORD64'   : BASIC_TYPES['long'],
53 |     'INT'       : BASIC_TYPES['int'],
54 |     'INT8'      : BASIC_TYPES['int'],
55 |     'INT16'     : BASIC_TYPES['short'],
56 |     'INT32'     : BASIC_TYPES['int'],
57 |     'INT64'     : BASIC_TYPES['long long'],
58 |     'LONG'      : BASIC_TYPES['long'],
59 |     'LONGLONG'  : BASIC_TYPES['long long'],
60 |     'UCHAR'     : BASIC_TYPES['unsigned char'],
61 |     'UINT'      : BASIC_TYPES['unsigned int'],
62 |     'UINT8'     : BASIC_TYPES['unsigned int'],
63 |     'UINT16'    : BASIC_TYPES['unsigned short'],
64 |     'UINT32'    : BASIC_TYPES['unsigned int'],
65 |     'UINT64'    : BASIC_TYPES['unsigned long long'],
66 |     'ULONG'     : BASIC_TYPES['unsigned long'],
67 |     'ULONGLONG' : BASIC_TYPES['unsigned long long'],
68 |     'USHORT'    : BASIC_TYPES['unsigned short'],
69 |     'WORD'      : BASIC_TYPES['unsigned short']
70 | }
71 | 
72 | update_types({**BASIC_TYPES, **OTHER_TYPES})
73 | 


--------------------------------------------------------------------------------
/generate.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import argparse
 3 | import sys
 4 | import os.path as path
 5 | import logging
 6 | from functools import reduce
 7 | 
 8 | import coloredlogs
 9 | import z3
10 | import pefile
11 | 
12 | log = logging.getLogger(__name__)
13 | coloredlogs.install(level="INFO", logger=log)
14 | 
15 | from modelLang import Parser
16 | from modelLang import Z3Backend
17 | 
18 | def write_testcase(testcase, fout):
19 |     with open(fout, "wb") as fp:
20 |         fp.write(testcase)
21 | 
22 | if __name__ == "__main__":
23 |     argparser = argparse.ArgumentParser(description="Interpret models and generate testcases")
24 |     argparser.add_argument('--asserts', '-A', action="append",
25 |                            metavar="model", type=str, nargs="+",
26 |                            default=[],
27 |                            help="List of models to assert")
28 |     argparser.add_argument('--negates', '-N', action="append",
29 |                            metavar="model", type=str, nargs="*",
30 |                            default=[],
31 |                            help="List of models to negate")
32 |     argparser.add_argument('--out', '-O', action="store",
33 |                            metavar="outfile", type=str, nargs=1,
34 |                            default="testcase",
35 |                            help="Output file for testcase (default = 'testcase')")
36 |     argparser.add_argument('--var', '-V', action="store",
37 |                            metavar="variable", type=str, nargs=1,
38 |                            default="HEADER",
39 |                            help="Name of the variable in the model representing the entire file (default 'HEADER')")
40 |     argparser.add_argument('--define', '-D', action="store", metavar="define",
41 |                            type=lambda x: (x.split(":")[0],
42 |                                            int(x.split(":")[1])),
43 |                            nargs="*",
44 |                            help="List of constants in the model to overwrite. Syntax <constant name>:<new value>. E.g., FILESIZE:1024")
45 |     argparser.add_argument('--size', '-B', action="store", metavar="bytes",
46 |                           type=int, default=None,
47 |                           help="Size in bytes of the testcase to generate")
48 | 
49 |     args = argparser.parse_args()
50 |     if len(args.asserts) == 0:
51 |         argparser.print_help()
52 |         sys.exit(0)
53 | 
54 |     asserts = reduce(lambda x,y: x + [*y], args.asserts, list())
55 |     negates = reduce(lambda x,y: x + [*y], args.negates, list())
56 |     outfile = args.out
57 |     voi = args.var
58 |     size = args.size
59 |     defs = dict(args.define) if args.define else {}
60 | 
61 |     z3_models_assert = []
62 |     z3_models_negate = []
63 |     for model in [*asserts, *negates]:
64 |         modelname = path.basename(model)
65 |         parser = Parser(ptype=Parser.ParserType.GENERATOR, input_size=size,
66 |                         custom_defs=defs)
67 |         parser.parse_file(model)
68 |         backend = Z3Backend(name=modelname, voi=voi)
69 |         backend.exec_statements(parser.statements)
70 |         if model in asserts:
71 |             z3_models_assert.append(backend)
72 |         else:
73 |             z3_models_negate.append(backend)
74 | 
75 |     backend = z3_models_assert[0]
76 |     for b in z3_models_assert[1:]:
77 |         backend &= b
78 | 
79 |     for b in z3_models_negate:
80 |         backend &= ~b
81 | 
82 |     solver = backend.solver
83 |     model = backend.model
84 |     if model:
85 |         testcase = backend.generate_testcase()
86 |         write_testcase(testcase, outfile)
87 | 
88 |     pef = pefile.PE(outfile)
89 |     print(pef)
90 | 


--------------------------------------------------------------------------------
/modelLang/structures/headers/linux_kernel.h:
--------------------------------------------------------------------------------
  1 | typedef int8_t s8;
  2 | typedef uint8_t u8;
  3 | typedef int16_t s16;
  4 | typedef uint16_t u16;
  5 | typedef int32_t s32;
  6 | typedef uint32_t u32;
  7 | typedef int64_t s64;
  8 | typedef uint64_t u64;
  9 | 
 10 | typedef int8_t __s8;
 11 | typedef uint8_t __u8;
 12 | typedef int16_t __s16;
 13 | typedef uint16_t __u16;
 14 | typedef int32_t __s32;
 15 | typedef uint32_t __u32;
 16 | typedef int64_t __s64;
 17 | typedef uint64_t __u64;
 18 | 
 19 | typedef __u32	Elf32_Addr;
 20 | typedef __u16	Elf32_Half;
 21 | typedef __u32	Elf32_Off;
 22 | typedef __s32	Elf32_Sword;
 23 | typedef __u32	Elf32_Word;
 24 | 
 25 | #define ET_NONE   0
 26 | #define ET_REL    1
 27 | #define ET_EXEC   2
 28 | #define ET_DYN    3
 29 | #define ET_CORE   4
 30 | #define ET_LOPROC 0xff00
 31 | #define ET_HIPROC 0xffff
 32 | 
 33 | #define EI_NIDENT 16
 34 | 
 35 | typedef struct elf32_hdr{
 36 |   unsigned char	e_ident[EI_NIDENT];
 37 |   Elf32_Half	e_type;
 38 |   Elf32_Half	e_machine;
 39 |   Elf32_Word	e_version;
 40 |   Elf32_Addr	e_entry;
 41 |   Elf32_Off	e_phoff;
 42 |   Elf32_Off	e_shoff;
 43 |   Elf32_Word	e_flags;
 44 |   Elf32_Half	e_ehsize;
 45 |   Elf32_Half	e_phentsize;
 46 |   Elf32_Half	e_phnum;
 47 |   Elf32_Half	e_shentsize;
 48 |   Elf32_Half	e_shnum;
 49 |   Elf32_Half	e_shstrndx;
 50 | } Elf32_Ehdr;
 51 | typedef Elf32_Ehdr Elf_Hdr;
 52 | 
 53 | typedef struct elf32_phdr{
 54 |   Elf32_Word	p_type;
 55 |   Elf32_Off	p_offset;
 56 |   Elf32_Addr	p_vaddr;
 57 |   Elf32_Addr	p_paddr;
 58 |   Elf32_Word	p_filesz;
 59 |   Elf32_Word	p_memsz;
 60 |   Elf32_Word	p_flags;
 61 |   Elf32_Word	p_align;
 62 | } Elf32_Phdr;
 63 | typedef Elf32_Phdr Elf_Phdr;
 64 | 
 65 | /* Machine types */
 66 | #define EM_NONE		0
 67 | #define EM_M32		1
 68 | #define EM_SPARC	2
 69 | #define EM_386		3
 70 | #define EM_68K		4
 71 | #define EM_88K		5
 72 | #define EM_486		6	/* Perhaps disused */
 73 | #define EM_860		7
 74 | #define EM_MIPS		8	/* MIPS R3000 (officially, big-endian only) */
 75 | 				/* Next two are historical and binaries and
 76 | 				   modules of these types will be rejected by
 77 | 				   Linux.  */
 78 | #define EM_MIPS_RS3_LE	10	/* MIPS R3000 little-endian */
 79 | #define EM_MIPS_RS4_BE	10	/* MIPS R4000 big-endian */
 80 | 
 81 | #define EM_PARISC	15	/* HPPA */
 82 | #define EM_SPARC32PLUS	18	/* Sun's "v8plus" */
 83 | #define EM_PPC		20	/* PowerPC */
 84 | #define EM_PPC64	21	 /* PowerPC64 */
 85 | #define EM_SPU		23	/* Cell BE SPU */
 86 | #define EM_ARM		40	/* ARM 32 bit */
 87 | #define EM_SH		42	/* SuperH */
 88 | #define EM_SPARCV9	43	/* SPARC v9 64-bit */
 89 | #define EM_H8_300	46	/* Renesas H8/300 */
 90 | #define EM_IA_64	50	/* HP/Intel IA-64 */
 91 | #define EM_X86_64	62	/* AMD x86-64 */
 92 | #define EM_S390		22	/* IBM S/390 */
 93 | #define EM_CRIS		76	/* Axis Communications 32-bit embedded processor */
 94 | #define EM_M32R		88	/* Renesas M32R */
 95 | #define EM_MN10300	89	/* Panasonic/MEI MN10300, AM33 */
 96 | #define EM_OPENRISC     92     /* OpenRISC 32-bit embedded processor */
 97 | #define EM_ARCOMPACT	93	/* ARCompact processor */
 98 | #define EM_XTENSA	94	/* Tensilica Xtensa Architecture */
 99 | #define EM_BLACKFIN     106     /* ADI Blackfin Processor */
100 | #define EM_UNICORE	110	/* UniCore-32 */
101 | #define EM_ALTERA_NIOS2	113	/* Altera Nios II soft-core processor */
102 | #define EM_TI_C6000	140	/* TI C6X DSPs */
103 | #define EM_HEXAGON	164	/* QUALCOMM Hexagon */
104 | #define EM_NDS32	167	/* Andes Technology compact code size
105 | 				   embedded RISC processor family */
106 | #define EM_AARCH64	183	/* ARM 64 bit */
107 | #define EM_TILEPRO	188	/* Tilera TILEPro */
108 | #define EM_MICROBLAZE	189	/* Xilinx MicroBlaze */
109 | #define EM_TILEGX	191	/* Tilera TILE-Gx */
110 | #define EM_ARCV2	195	/* ARCv2 Cores */
111 | #define EM_RISCV	243	/* RISC-V */
112 | #define EM_BPF		247	/* Linux BPF - in-kernel virtual machine */
113 | #define EM_CSKY		252	/* C-SKY */
114 | #define EM_FRV		0x5441	/* Fujitsu FR-V */
115 | 
116 | /*
117 |  * This is an interim value that we will use until the committee comes
118 |  * up with a final number.
119 |  */
120 | #define EM_ALPHA	0x9026
121 | 
122 | /* Bogus old m32r magic number, used by old tools. */
123 | #define EM_CYGNUS_M32R	0x9041
124 | /* This is the old interim value for S/390 architecture */
125 | #define EM_S390_OLD	0xA390
126 | /* Also Panasonic/MEI MN10300, AM33 */
127 | #define EM_CYGNUS_MN10300 0xbeef
128 | 
129 | 
130 | #define PT_NULL    0
131 | #define PT_LOAD    1
132 | #define PT_DYNAMIC 2
133 | #define PT_INTERP  3
134 | #define PT_NOTE    4
135 | #define PT_SHLIB   5
136 | #define PT_PHDR    6
137 | #define PT_TLS     7               /* Thread local storage segment */
138 | #define PT_LOOS    0x60000000      /* OS-specific */
139 | #define PT_HIOS    0x6fffffff      /* OS-specific */
140 | #define PT_LOPROC  0x70000000
141 | #define PT_HIPROC  0x7fffffff
142 | #define PT_GNU_EH_FRAME		0x6474e550
143 | 
144 | #define PT_GNU_STACK	(PT_LOOS + 0x474e551)
145 | 
146 | #define EI_OSABI	7
147 | #define ELFOSABI_ARM_FDPIC  65
148 | 


--------------------------------------------------------------------------------
/explore_conditions.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | import argparse
  3 | import sys
  4 | import os.path as path
  5 | import logging
  6 | from functools import reduce
  7 | from itertools import product
  8 | 
  9 | import coloredlogs
 10 | import z3
 11 | import pefile
 12 | 
 13 | import progressbar
 14 | 
 15 | log = logging.getLogger(__name__)
 16 | coloredlogs.install(level="CRITICAL", logger=log)
 17 | alllog = logging.getLogger("")
 18 | alllog.setLevel(logging.CRITICAL)
 19 | 
 20 | from modelLang import Parser, Z3Backend
 21 | 
 22 | def write_testcase(testcase, outdir, cs, n):
 23 |     testcasename = path.join(outdir, f"testcase_{n}")
 24 |     conditionsname = path.join(outdir, f"testcase_{n}.cond")
 25 |     with open(testcasename, "wb") as fp:
 26 |         fp.write(testcase)
 27 |     with open(conditionsname, "w") as fp:
 28 |         for c in cs:
 29 |             fp.write(f"{c[0][0]} {c[1]}\n")
 30 | 
 31 | def update_blacklist(unsat, cs, blacklist):
 32 |     entry = dict()
 33 |     for cname in unsat:
 34 |         for c in cs:
 35 |             if c[0][0] == str(cname):
 36 |                 entry[c[0][0]] = c[1]
 37 |                 break
 38 |         else:
 39 |             log.warning(f"Could not find {cname} among the constraints")
 40 |             return
 41 |     blacklist.append(entry)
 42 | 
 43 | def isblacklisted(cs, blacklist):
 44 |     for b in blacklist:
 45 |         tmp = {cname: False for cname in b}
 46 |         for cname, value in b.items():
 47 |             for ((cname2, _), value2) in cs:
 48 |                 if (cname, value) == (cname2, value2):
 49 |                     tmp[cname] = True
 50 |         if all(tmp.values()):
 51 |             return True
 52 |     return False
 53 | 
 54 | if __name__ == "__main__":
 55 |     argparser = argparse.ArgumentParser(description="Explore all combinations of non-terminal conditions to generate different testcases.")
 56 |     argparser.add_argument('--models', '-M', action="append",
 57 |                            metavar="model", type=str, nargs="+",
 58 |                            help="Models to explore")
 59 |     argparser.add_argument('--supports', '-S', action="append",
 60 |                            metavar="model", type=str, nargs="*", default=[],
 61 |                            help="Other models to assert")
 62 |     argparser.add_argument('--outdir', '-O', action="store",
 63 |                            metavar="outfile", type=str,
 64 |                            default="testcase",
 65 |                            help="Output directory file for testcases")
 66 |     argparser.add_argument('--size', '-B', action="store", metavar="bytes",
 67 |                           type=int, default=None,
 68 |                           help="Size in bytes of the testcase to generate")
 69 |     argparser.add_argument('--define', '-D', action="store", metavar="define",
 70 |                            type=lambda x: (x.split(":")[0],
 71 |                                            int(x.split(":")[1])),
 72 |                            nargs="*",
 73 |                            help="Overwrite constant definition")
 74 |     argparser.add_argument('--var', '-V', action="store",
 75 |                            metavar="variable", type=str, nargs=1,
 76 |                            default="HEADER",
 77 |                            help="Variable in the model to use for the testcase")
 78 | 
 79 |     args = argparser.parse_args()
 80 |     inputs = reduce(lambda x,y: x | {*y}, args.models, set())
 81 |     supports = reduce(lambda x,y: x | {*y}, args.supports, set())
 82 |     outdir = args.outdir
 83 |     size = args.size
 84 |     voi = args.var
 85 |     defs = dict(args.define) if args.define else {}
 86 | 
 87 |     Z3Backend.print_unsat = False
 88 |     z3models = []
 89 |     for model in inputs:
 90 |         modelname = path.basename(model)
 91 |         parser = Parser(ptype=Parser.ParserType.DIFFERENTIAL_ASSERT,
 92 |                         input_size=size,
 93 |                         custom_defs=defs)
 94 |         parser.parse_file(model)
 95 |         backend = Z3Backend(name=modelname, voi=voi)
 96 |         backend.exec_statements(parser.statements)
 97 |         z3models.append(backend)
 98 | 
 99 |     backend = z3models[0]
100 |     for m in z3models[1:]:
101 |         backend &= m
102 | 
103 |     z3supports = []
104 |     for model in supports:
105 |         modelname = path.basename(model)
106 |         parser = Parser(ptype=Parser.ParserType.DIFFERENTIAL_ASSERT,
107 |                         input_size=size,
108 |                         custom_defs=defs)
109 |         parser.parse_file(model)
110 |         tmp = Z3Backend(name=modelname, voi=voi)
111 |         tmp.exec_statements(parser.statements)
112 |         z3supports.append(tmp)
113 | 
114 |     for m in z3supports:
115 |         backend &= m
116 | 
117 |     nterminal_conds = list(reduce(lambda x, y: x+y,
118 |                                   [[(x, m.conditions[x])
119 |                                     for x in (m.conditions.keys()
120 |                                               - m.terminal_conditions.keys())
121 |                                   ] for m in z3models],
122 |                                   []))
123 |     nconds = len(nterminal_conds)
124 |     log.info(f"{nconds} found. Generating {2**nconds} testcases")
125 |     alltf = product([True, False], repeat=nconds)
126 | 
127 |     n = 0
128 |     blacklist = []
129 |     for tfs in progressbar.progressbar(alltf, max_value=2**nconds):
130 |         progressbar.streams.flush()
131 |         # cs = [((name, z3cond), bool), ... ]
132 |         cs = list(zip(nterminal_conds, tfs))
133 |         if isblacklisted(cs, blacklist):
134 |             log.warning("Combination is known to be unsat")
135 |             continue
136 | 
137 |         support = Z3Backend()
138 |         for c in cs:
139 |             if c[1]:
140 |                 support.terminal_conditions[c[0][0]] = c[0][1]
141 |             else:
142 |                 support.terminal_conditions[c[0][0]] = z3.Not(c[0][1])
143 |         if not support.model:
144 |             log.warning("Support model is unsat. Checking the unsat core and discarinding conflitting constraints.")
145 |             unsat = support.solver.unsat_core()
146 |             update_blacklist(unsat, cs, blacklist)
147 |             continue
148 | 
149 |         tmpbackend = backend & support
150 |         tmpmodel = tmpbackend.model
151 |         if tmpmodel:
152 |             n += 1
153 |             testcase = tmpbackend.generate_testcase()
154 |             write_testcase(testcase, outdir, cs, n)
155 | 


--------------------------------------------------------------------------------
/modelLang/structures/headers/reactos.h:
--------------------------------------------------------------------------------
  1 | #define IMAGE_NUMBEROF_DIRECTORY_ENTRIES    16
  2 | #define IMAGE_SIZEOF_SHORT_NAME              8
  3 | #define IMAGE_SIZEOF_SECTION_HEADER          40
  4 | #define IMAGE_SIZEOF_FILE_HEADER             20
  5 | 
  6 | typedef struct _IMAGE_DOS_HEADER {
  7 |   USHORT e_magic;
  8 |   USHORT e_cblp;
  9 |   USHORT e_cp;
 10 |   USHORT e_crlc;
 11 |   USHORT e_cparhdr;
 12 |   USHORT e_minalloc;
 13 |   USHORT e_maxalloc;
 14 |   USHORT e_ss;
 15 |   USHORT e_sp;
 16 |   USHORT e_csum;
 17 |   USHORT e_ip;
 18 |   USHORT e_cs;
 19 |   USHORT e_lfarlc;
 20 |   USHORT e_ovno;
 21 |   USHORT e_res[4];
 22 |   USHORT e_oemid;
 23 |   USHORT e_oeminfo;
 24 |   USHORT e_res2[10];
 25 |   LONG e_lfanew;
 26 | } IMAGE_DOS_HEADER, *PIMAGE_DOS_HEADER;
 27 | 
 28 | typedef struct _IMAGE_EXPORT_DIRECTORY {
 29 |   ULONG Characteristics;
 30 |   ULONG TimeDateStamp;
 31 |   USHORT MajorVersion;
 32 |   USHORT MinorVersion;
 33 |   ULONG Name;
 34 |   ULONG Base;
 35 |   ULONG NumberOfFunctions;
 36 |   ULONG NumberOfNames;
 37 |   ULONG AddressOfFunctions;
 38 |   ULONG AddressOfNames;
 39 |   ULONG AddressOfNameOrdinals;
 40 | } IMAGE_EXPORT_DIRECTORY, *PIMAGE_EXPORT_DIRECTORY;
 41 | 
 42 | typedef struct _IMAGE_RESOURCE_DATA_ENTRY {
 43 |   ULONG OffsetToData;
 44 |   ULONG Size;
 45 |   ULONG CodePage;
 46 |   ULONG Reserved;
 47 | } IMAGE_RESOURCE_DATA_ENTRY, *PIMAGE_RESOURCE_DATA_ENTRY;
 48 | 
 49 | typedef struct {
 50 |   ULONG Size;
 51 |   ULONG TimeDateStamp;
 52 |   USHORT MajorVersion;
 53 |   USHORT MinorVersion;
 54 |   ULONG GlobalFlagsClear;
 55 |   ULONG GlobalFlagsSet;
 56 |   ULONG CriticalSectionDefaultTimeout;
 57 |   ULONG DeCommitFreeBlockThreshold;
 58 |   ULONG DeCommitTotalFreeThreshold;
 59 |   ULONG LockPrefixTable;
 60 |   ULONG MaximumAllocationSize;
 61 |   ULONG VirtualMemoryThreshold;
 62 |   ULONG ProcessHeapFlags;
 63 |   ULONG ProcessAffinityMask;
 64 |   USHORT CSDVersion;
 65 |   USHORT Reserved1;
 66 |   ULONG EditList;
 67 |   ULONG SecurityCookie;
 68 |   ULONG SEHandlerTable;
 69 |   ULONG SEHandlerCount;
 70 | } IMAGE_LOAD_CONFIG_DIRECTORY32, *PIMAGE_LOAD_CONFIG_DIRECTORY32;
 71 | 
 72 | typedef struct {
 73 |   ULONG Size;
 74 |   ULONG TimeDateStamp;
 75 |   USHORT MajorVersion;
 76 |   USHORT MinorVersion;
 77 |   ULONG GlobalFlagsClear;
 78 |   ULONG GlobalFlagsSet;
 79 |   ULONG CriticalSectionDefaultTimeout;
 80 |   ULONGLONG DeCommitFreeBlockThreshold;
 81 |   ULONGLONG DeCommitTotalFreeThreshold;
 82 |   ULONGLONG LockPrefixTable;
 83 |   ULONGLONG MaximumAllocationSize;
 84 |   ULONGLONG VirtualMemoryThreshold;
 85 |   ULONGLONG ProcessAffinityMask;
 86 |   ULONG ProcessHeapFlags;
 87 |   USHORT CSDVersion;
 88 |   USHORT Reserved1;
 89 |   ULONGLONG EditList;
 90 |   ULONGLONG SecurityCookie;
 91 |   ULONGLONG SEHandlerTable;
 92 |   ULONGLONG SEHandlerCount;
 93 | } IMAGE_LOAD_CONFIG_DIRECTORY64, *PIMAGE_LOAD_CONFIG_DIRECTORY64;
 94 | 
 95 | typedef struct _IMAGE_SECTION_HEADER {
 96 |   UCHAR Name[IMAGE_SIZEOF_SHORT_NAME];
 97 |   union {
 98 |     ULONG PhysicalAddress;
 99 |     ULONG VirtualSize;
100 |   } Misc;
101 |   ULONG VirtualAddress;
102 |   ULONG SizeOfRawData;
103 |   ULONG PointerToRawData;
104 |   ULONG PointerToRelocations;
105 |   ULONG PointerToLinenumbers;
106 |   USHORT NumberOfRelocations;
107 |   USHORT NumberOfLinenumbers;
108 |   ULONG Characteristics;
109 | } IMAGE_SECTION_HEADER, *PIMAGE_SECTION_HEADER;
110 | 
111 | typedef struct _IMAGE_FILE_HEADER {
112 |   USHORT Machine;
113 |   USHORT NumberOfSections;
114 |   ULONG TimeDateStamp;
115 |   ULONG PointerToSymbolTable;
116 |   ULONG NumberOfSymbols;
117 |   USHORT SizeOfOptionalHeader;
118 |   USHORT Characteristics;
119 | } IMAGE_FILE_HEADER, *PIMAGE_FILE_HEADER;
120 | 
121 | typedef struct _IMAGE_DATA_DIRECTORY {
122 |   ULONG VirtualAddress;
123 |   ULONG Size;
124 | } IMAGE_DATA_DIRECTORY, *PIMAGE_DATA_DIRECTORY;
125 | 
126 | typedef struct _IMAGE_OPTIONAL_HEADER {
127 |   USHORT Magic;
128 |   UCHAR MajorLinkerVersion;
129 |   UCHAR MinorLinkerVersion;
130 |   ULONG SizeOfCode;
131 |   ULONG SizeOfInitializedData;
132 |   ULONG SizeOfUninitializedData;
133 |   ULONG AddressOfEntryPoint;
134 |   ULONG BaseOfCode;
135 |   ULONG BaseOfData;
136 |   ULONG ImageBase;
137 |   ULONG SectionAlignment;
138 |   ULONG FileAlignment;
139 |   USHORT MajorOperatingSystemVersion;
140 |   USHORT MinorOperatingSystemVersion;
141 |   USHORT MajorImageVersion;
142 |   USHORT MinorImageVersion;
143 |   USHORT MajorSubsystemVersion;
144 |   USHORT MinorSubsystemVersion;
145 |   ULONG Win32VersionValue;
146 |   ULONG SizeOfImage;
147 |   ULONG SizeOfHeaders;
148 |   ULONG CheckSum;
149 |   USHORT Subsystem;
150 |   USHORT DllCharacteristics;
151 |   ULONG SizeOfStackReserve;
152 |   ULONG SizeOfStackCommit;
153 |   ULONG SizeOfHeapReserve;
154 |   ULONG SizeOfHeapCommit;
155 |   ULONG LoaderFlags;
156 |   ULONG NumberOfRvaAndSizes;
157 |   IMAGE_DATA_DIRECTORY DataDirectory[IMAGE_NUMBEROF_DIRECTORY_ENTRIES];
158 | } IMAGE_OPTIONAL_HEADER32, *PIMAGE_OPTIONAL_HEADER32;
159 | 
160 | typedef struct _IMAGE_ROM_OPTIONAL_HEADER {
161 |   USHORT Magic;
162 |   UCHAR MajorLinkerVersion;
163 |   UCHAR MinorLinkerVersion;
164 |   ULONG SizeOfCode;
165 |   ULONG SizeOfInitializedData;
166 |   ULONG SizeOfUninitializedData;
167 |   ULONG AddressOfEntryPoint;
168 |   ULONG BaseOfCode;
169 |   ULONG BaseOfData;
170 |   ULONG BaseOfBss;
171 |   ULONG GprMask;
172 |   ULONG CprMask[4];
173 |   ULONG GpValue;
174 | } IMAGE_ROM_OPTIONAL_HEADER, *PIMAGE_ROM_OPTIONAL_HEADER;
175 | 
176 | typedef struct _IMAGE_OPTIONAL_HEADER64 {
177 |   USHORT Magic;
178 |   UCHAR MajorLinkerVersion;
179 |   UCHAR MinorLinkerVersion;
180 |   ULONG SizeOfCode;
181 |   ULONG SizeOfInitializedData;
182 |   ULONG SizeOfUninitializedData;
183 |   ULONG AddressOfEntryPoint;
184 |   ULONG BaseOfCode;
185 |   ULONGLONG ImageBase;
186 |   ULONG SectionAlignment;
187 |   ULONG FileAlignment;
188 |   USHORT MajorOperatingSystemVersion;
189 |   USHORT MinorOperatingSystemVersion;
190 |   USHORT MajorImageVersion;
191 |   USHORT MinorImageVersion;
192 |   USHORT MajorSubsystemVersion;
193 |   USHORT MinorSubsystemVersion;
194 |   ULONG Win32VersionValue;
195 |   ULONG SizeOfImage;
196 |   ULONG SizeOfHeaders;
197 |   ULONG CheckSum;
198 |   USHORT Subsystem;
199 |   USHORT DllCharacteristics;
200 |   ULONGLONG SizeOfStackReserve;
201 |   ULONGLONG SizeOfStackCommit;
202 |   ULONGLONG SizeOfHeapReserve;
203 |   ULONGLONG SizeOfHeapCommit;
204 |   ULONG LoaderFlags;
205 |   ULONG NumberOfRvaAndSizes;
206 |   IMAGE_DATA_DIRECTORY DataDirectory[IMAGE_NUMBEROF_DIRECTORY_ENTRIES];
207 | } IMAGE_OPTIONAL_HEADER64, *PIMAGE_OPTIONAL_HEADER64;
208 | 


--------------------------------------------------------------------------------
/modelLang/parsers/langlex.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import ply.lex as lex
  3 | import re
  4 | from enum import Enum, auto, unique
  5 | 
  6 | from ..classes import Base, Optimizations
  7 | 
  8 | log = logging.getLogger(__name__)
  9 | log.setLevel(logging.NOTSET)
 10 | 
 11 | 
 12 | class Lexer:
 13 |     tokens = (
 14 |         'NEWLINE',
 15 | 
 16 |         # these translate to z3 functions
 17 |         'OPERATOR1',
 18 |         'OPERATOR2',
 19 | 
 20 |         # string comparison - a syntactic sugar
 21 |         'STRCMP',
 22 | 
 23 |         'ASSIGNSTART',
 24 |         'CONDITIONNAME',
 25 |         'GENCONDITIONNAME',
 26 |         'LOOPSTART',
 27 |         'LOOPEND',
 28 |         'LOOP',
 29 |         'VLOOP',
 30 |         'DBG',
 31 |         'COMMA',
 32 |         'COLON',
 33 |         'SEMICOLON',
 34 |         'EXCLAMATION',
 35 |         'DOT',
 36 |         'TERMINATOR',
 37 | 
 38 |         # slicing
 39 |         'LBRACKETS',
 40 |         'RBRACKETS',
 41 | 
 42 |         # parentheses
 43 |         'LPAREN',
 44 |         'RPAREN',
 45 | 
 46 |         # ->
 47 |         'ARROW',
 48 | 
 49 |         # #
 50 |         'COMMENT',
 51 | 
 52 |         'NUMBER',
 53 |         'CHAR',
 54 |         'STR',
 55 |         'BOOL',
 56 |         'VARIABLE',
 57 |         'INPUT',
 58 |         'OUTPUT',
 59 | 
 60 |         'LOADTYPES',
 61 |         'TYPE',
 62 |         'SIZEOF',
 63 |         'DEFINE',
 64 | 
 65 |         'FROMFILE',
 66 |         'OPTIMIZE',
 67 |     )
 68 | 
 69 |     def t_OPERATOR1(self, t):
 70 |         r'(NOT|Not|BITNOT|BITNot|BitNot|ISPOW2|IsPow2|isPow2|Setc|SECT|NSect|NSECT|OptHdr|OPTHDR)'
 71 |         t.value = t.value.upper()
 72 |         log.debug("OPERATOR1 token")
 73 |         return t
 74 | 
 75 |     def t_OPERATOR2(self, t):
 76 |         r"(ADD|SUB|DIV|UDIV|AND|OR|ULE|UGE|ULT|UGT|Add|Sub|Div|UDiv|And|Or|ULe|UGe|ULt|UGt|BITAND|BITAnd|BitAnd|BITOR|BITOr|BitOr|LE|Le|GE|Ge|NEQ|NEq|Neq|EQ|Eq|LT|Lt|GT|Gt|INT|Int|MOD|Mod|MUL|Mul|ALIGNUP|ALIGNDOWN|ISALIGNED|SHR|ShR|SHL|ShL|OVFLADD|OVFLAdd|OvflAdd)\s"
 77 |         log.debug("OPERATOR2 token")
 78 |         t.value = t.value[:-1].upper()
 79 |         return t
 80 | 
 81 |     def t_STRCMP(self, t):
 82 |         r"(STRCMP|STRCmp|StrCmp)"
 83 |         t.value = t.value[:-1].upper()
 84 |         return t
 85 | 
 86 |     def t_CHAR(self, t):
 87 |         r'"[^"]"'
 88 |         t.value = ord(t.value[1])
 89 |         log.debug("A single char value token")
 90 |         return t
 91 | 
 92 |     def t_STR(self, t):
 93 |         r"'[^']+'"
 94 |         t.value = eval('"' + t.value[1:-1] + '"')
 95 |         return t
 96 | 
 97 |     def t_BOOL(self, t):
 98 |         r"(TRUE|True|true|FALSE|False|false)"
 99 |         val = t.value.upper()
100 |         t.value = True if val == "TRUE" else False
101 |         log.debug(f"Found immediate boolean value {val}")
102 |         return t
103 | 
104 |     def t_TERMINATOR(self, t):
105 |         r"term"
106 |         log.debug("Terminal condition token")
107 |         return t
108 | 
109 |     t_LBRACKETS   = r'\['
110 |     t_RBRACKETS   = r'\]'
111 |     t_LPAREN      = r'\('
112 |     t_RPAREN      = r'\)'
113 |     t_ARROW       = r'<-'
114 |     t_SEMICOLON   = r';'
115 |     t_EXCLAMATION = r'!'
116 |     t_DOT         = r'\.'
117 |     t_COMMA       = r','
118 |     t_NEWLINE     = r'\n'
119 | 
120 |     def t_COLON(self, t):
121 |         r':'
122 |         return t
123 | 
124 |     def t_INPUT(self, t):
125 |         r'^(INPUT|input)'
126 |         log.debug("Input variable token")
127 |         return t
128 | 
129 |     def t_OUTPUT(self, t):
130 |         r'^(OUTPUT|output)'
131 |         log.debug("Output variable token")
132 |         return t
133 | 
134 |     def t_ASSIGNSTART(self, t):
135 |         r'(P|p)(?=(:|\())'
136 |         log.debug("Assignement start token")
137 |         t.value = t.value.lstrip()
138 |         return t
139 | 
140 |     def t_LOOPSTART(self, t):
141 |         r'(L|l)\d+(?=(:|\())'
142 |         log.debug("Loop start token")
143 |         v = t.value.lstrip()
144 |         v = int(v[1:])
145 |         t.value = v
146 |         return t
147 | 
148 |     def t_DBG(self, t):
149 |         r'(D|d)(?=(:|\())'
150 |         log.debug("Debug token")
151 |         t.value = t.value.lstrip()
152 |         return t
153 | 
154 |     def t_LOOPEND(self, t):
155 |         r'(END|End|end)\s(L|l)\d+'
156 |         log.debug("Loop end token")
157 |         v = t.value.lstrip()
158 |         v = int(v[5:])
159 |         t.value = v
160 |         return t
161 | 
162 |     def t_LOOP(self, t):
163 |         r'LOOP'
164 |         return t
165 | 
166 |     def t_VLOOP(self, t):
167 |         r'VLOOP'
168 |         return t
169 | 
170 |     def t_CONDITIONNAME(self, t):
171 |         r'(V|v)\d+'
172 |         log.debug("Condition name token")
173 |         return t
174 | 
175 |     def t_GENCONDITIONNAME(self, t):
176 |         r'(G|g)\d+'
177 |         log.debug("Condition name token")
178 |         return t
179 | 
180 |     def t_LOADTYPES(self, t):
181 |         r'(LOAD|Load|load)(REL|Rel|rel)?\s'
182 |         if 'rel' in t.value.lower():
183 |             t.value = True
184 |         else:
185 |             t.value = False
186 |         return t
187 | 
188 |     def t_TYPE(self, t):
189 |         r'(AS|As|as)\s'
190 |         return t
191 | 
192 |     def t_SIZEOF(self, t):
193 |         r'(SIZEOF|SizeOf|sizeof)\s'
194 |         return t
195 | 
196 |     def t_DEFINE(self, t):
197 |         r'(DEFINE|Define|define)\s'
198 |         return t
199 | 
200 |     def t_FROMFILE(self, t):
201 |         r'FROMFILE\s'
202 |         return t
203 | 
204 |     def t_OPTIMIZE(self, t):
205 |         r'(MAXIMIZE|MINIMIZE)'
206 |         if 'MAX' in t.value:
207 |             t.value = Optimizations.MAXIMIZE
208 |         else:
209 |             t.value = Optimizations.MINIMIZE
210 |         return t
211 | 
212 |     def t_VARIABLE(self, t):
213 |         r"[a-zA-Z_][a-zA-Z_0-9]+"
214 |         return t
215 | 
216 |     # A regular expression rule with some action code
217 |     def t_NUMBER(self, t):
218 |         r'(0(x|X)[a-fA-F0-9]+|\d+)'
219 |         log.debug("Number token")
220 |         try:
221 |             t.value = int(t.value)
222 |         except ValueError:
223 |             t.value = int(t.value, 16)
224 |         return t
225 | 
226 |     t_ignore_comments = r'\#.*'
227 | 
228 |     # Define a rule so we can track line numbers
229 |     def t_newline(self, t):
230 |         r'\n+'
231 |         log.debug("New line found")
232 |         t.lexer.lineno += len(t.value)
233 | 
234 |     # A string containing ignored characters (spaces and tabs)
235 |     t_ignore  = ' \t'
236 | 
237 |     # Error handling rule
238 |     def t_error(self, t):
239 |         print("Illegal character '%s'" % t.value[0])
240 |         t.lexer.skip(1)
241 | 
242 |     def __init__(self):
243 |         # Build the lexer
244 |         lexer = lex.lex(module=self)
245 | 


--------------------------------------------------------------------------------
/tooleval/idaplugin/idadumpmem.py.asm:
--------------------------------------------------------------------------------
  1 | ;
  2 | ; +-------------------------------------------------------------------------+
  3 | ; |   This file has been generated by The Interactive Disassembler (IDA)    |
  4 | ; |           Copyright (c) 2018 Hex-Rays, <support@hex-rays.com>           |
  5 | ; |                      License info: 48-B237-7154-E0                      |
  6 | ; |                            Institut EURECOM                             |
  7 | ; +-------------------------------------------------------------------------+
  8 | ;
  9 | ; Input SHA256 : 0F18A2FFC56339EB0E4B1DBBD260CFBA380AF0846F3DFDE29A00296EADAEEEE4
 10 | ; Input MD5    : CEE979E03605052C37ECF7348A2C7D35
 11 | ; Input CRC32  : 375248FB
 12 | 
 13 | ; File Name   : /home/dario/phd/loaders_modeling/lang_parser/tooleval/idaplugin/idadumpmem.py
 14 | ; Format      : Binary file
 15 | ; Base Address: 0000h Range: 0000h - 0595h Loaded length: 0595h
 16 | 
 17 |                 .686p
 18 |                 .mmx
 19 |                 .model flat
 20 | 
 21 | ; ===========================================================================
 22 | 
 23 | ; Segment type: Pure code
 24 | seg000          segment byte public 'CODE' use32
 25 |                 assume cs:seg000
 26 |                 assume es:nothing, ss:nothing, ds:nothing, fs:nothing, gs:nothing
 27 |                 dd 6D6F7266h, 63646920h, 706D6920h, 2074726Fh, 72660A2Ah
 28 |                 dd 69206D6Fh, 70616164h, 6D692069h, 74726F70h, 660A2A20h
 29 |                 dd 206D6F72h, 75616469h, 736C6974h, 706D6920h, 2074726Fh
 30 |                 dd 6D690A2Ah, 74726F70h, 73797320h, 6F72660Ah, 736F206Dh
 31 |                 dd 7461702Eh, 6D692068h, 74726F70h, 696F6A20h, 79730A6Eh
 32 |                 dd 61702E73h, 612E6874h, 6E657070h, 2E222864h, 0A29222Eh
 33 |                 dd 2E737973h, 68746170h, 7070612Eh, 28646E65h, 29222E22h
 34 |                 dd 6C630A0Ah, 20737361h, 69466F54h, 7453656Ch, 74754F64h
 35 |                 dd 6A626F28h, 29746365h, 20200A3Ah, 65642020h, 5F5F2066h
 36 |                 dd 74696E69h, 73285F5Fh, 29666C65h, 20200A3Ah, 20202020h
 37 |                 dd 65732020h, 6F2E666Ch, 69667475h, 3D20656Ch, 65706F20h
 38 |                 dd 2F22286Eh, 2F706D74h, 6F616469h, 742E7475h, 2C227478h
 39 |                 dd 22772220h, 20200A29h, 65642020h, 72772066h, 28657469h
 40 |                 dd 666C6573h, 6574202Ch, 3A297478h, 2020200Ah, 20202020h
 41 |                 dd 6C657320h, 756F2E66h, 6C696674h, 72772E65h, 28657469h
 42 |                 dd 74786574h, 20200A29h, 65642020h, 6C662066h, 28687375h
 43 |                 dd 666C6573h, 200A3A29h, 20202020h, 73202020h, 2E666C65h
 44 |                 dd 6674756Fh, 2E656C69h, 73756C66h, 0A292868h, 20202020h
 45 |                 dd 20666564h, 74617369h, 73287974h, 29666C65h, 20200A3Ah
 46 |                 dd 20202020h, 65722020h, 6E727574h, 6C614620h, 200A6573h
 47 |                 dd 64202020h, 5F206665h, 6C65645Fh, 73285F5Fh, 29666C65h
 48 |                 dd 20200A3Ah, 20202020h, 65732020h, 6F2E666Ch, 69667475h
 49 |                 dd 632E656Ch, 65736F6Ch, 730A2928h, 732E7379h, 756F6474h
 50 |                 dd 203D2074h, 2E737973h, 65647473h, 3D207272h, 466F5420h
 51 |                 dd 53656C69h, 754F6474h, 0A292874h, 3A797274h, 20200A0Ah
 52 |                 dd 72662020h, 6D206D6Fh, 75646D65h, 705F706Dh, 69203262h
 53 |                 dd 726F706Dh, 654D2074h, 79726F6Dh, 706D7544h, 654D202Ch
 54 |                 dd 79726F6Dh, 69676552h, 0A0A6E6Fh, 20202020h, 6C206669h
 55 |                 dd 41286E65h, 29564752h, 32203C20h, 20200A3Ah, 20202020h
 56 |                 dd 75642020h, 6964706Dh, 203D2072h, 6D742F22h, 200A2270h
 57 |                 dd 65202020h, 3A65736Ch, 2020200Ah, 20202020h, 6D756420h
 58 |                 dd 72696470h, 41203D20h, 5B564752h, 0A0A5D31h, 20202020h
 59 |                 dd 646D656Dh, 20706D75h, 654D203Dh, 79726F6Dh, 706D7544h
 60 |                 dd 200A2928h, 66202020h, 7620726Fh, 72646461h, 206E6920h
 61 |                 dd 6D676553h, 73746E65h, 0A3A2928h, 2 dup(20202020h), 726D656Dh
 62 |                 dd 6F696765h, 203D206Eh, 646D656Dh, 2E706D75h, 69676572h
 63 |                 dd 2E736E6Fh, 28646461h, 20200A29h, 20202020h, 656D2020h
 64 |                 dd 6765726Dh, 2E6E6F69h, 64646176h, 203D2072h, 64646176h
 65 |                 dd 20200A72h, 20202020h, 656D2020h, 6765726Dh, 2E6E6F69h
 66 |                 dd 7A697376h, 203D2065h, 45676553h, 7628646Eh, 72646461h
 67 |                 dd 202D2029h, 53676553h, 74726174h, 64617628h, 0A297264h
 68 |                 dd 2 dup(20202020h), 72747461h, 67203D20h, 735F7465h, 5F6D6765h
 69 |                 dd 72747461h, 64617628h, 202C7264h, 41474553h, 5F525454h
 70 |                 dd 4D524550h, 20200A29h, 20202020h, 65722020h, 3D206461h
 71 |                 dd 74746120h, 20262072h, 50474553h, 5F4D5245h, 44414552h
 72 |                 dd 203D2120h, 20200A30h, 20202020h, 72772020h, 20657469h
 73 |                 dd 7461203Dh, 26207274h, 47455320h, 4D524550h, 4952575Fh
 74 |                 dd 21204554h, 0A30203Dh, 2 dup(20202020h), 20637865h, 7461203Dh
 75 |                 dd 26207274h, 47455320h, 4D524550h, 4558455Fh, 3D212043h
 76 |                 dd 200A3020h, 20202020h, 6D202020h, 65726D65h, 6E6F6967h
 77 |                 dd 7265702Eh, 7373696Dh, 206E6F69h, 2D22203Dh, 202B2022h
 78 |                 dd 22722228h, 20666920h, 64616572h, 736C6520h, 2D222065h
 79 |                 dd 2B202922h, 77222820h, 66692022h, 69727720h, 65206574h
 80 |                 dd 2065736Ch, 29222D22h, 28202B20h, 20227822h, 65206669h
 81 |                 dd 65206378h, 2065736Ch, 29222D22h, 2020200Ah, 20202020h
 82 |                 dd 6D656D20h, 69676572h, 662E6E6Fh, 657A6973h, 6D203D20h
 83 |                 dd 6D286E69h, 65726D65h, 6E6F6967h, 6973762Eh, 202C657Ah
 84 |                 dd 30317830h, 29303030h, 2020200Ah, 20202020h, 6D656D20h
 85 |                 dd 69676572h, 632E6E6Fh, 65746E6Fh, 3D20746Eh, 74656720h
 86 |                 dd 7479625Fh, 76287365h, 72646461h, 656D202Ch, 6765726Dh
 87 |                 dd 2E6E6F69h, 7A697366h, 200A2965h, 70202020h, 6E676F72h
 88 |                 dd 20656D61h, 6567203Dh, 6F725F74h, 665F746Fh, 6E656C69h
 89 |                 dd 28656D61h, 20200A29h, 69772020h, 6F206874h, 286E6570h
 90 |                 dd 6E696F6Ah, 6D756428h, 72696470h, 7270202Ch, 616E676Fh
 91 |                 dd 222B656Dh, 6D75642Eh, 2C292270h, 62772220h, 61202922h
 92 |                 dd 70662073h, 20200A3Ah, 20202020h, 70662020h, 6972772Eh
 93 |                 dd 6D286574h, 75646D65h, 532E706Dh, 61697265h, 657A696Ch
 94 |                 dd 74536F54h, 676E6972h, 0A292928h, 65637865h, 45207470h
 95 |                 dd 70656378h, 6E6F6974h, 20736120h, 200A3A65h, 70202020h
 96 |                 dd 746E6972h, 0A296528h, 20202020h, 2E636469h, 74697845h
 97 |                 dd 0A293128h, 6364690Ah, 6978452Eh, 29302874h
 98 |                 db 0Ah
 99 | seg000          ends
100 | 
101 | 
102 |                 end
103 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # What is this about?
  2 | 
  3 | This project provides a framework for modeling and analyzing the behavior of parsers for executable file formats, like the ones we can find in operating system loaders and reverse engineering tools.  
  4 | 
  5 | # Why? What's the goal of all this?
  6 | 
  7 | The key problem we deal with: there is *no reference implementation* for parsing PE files, and there is *no comprehensive specifications* for the PE file format. Reimplementation is the de facto rule, and there is a lot of room for implementation differences. This, in turns, leads to *discrepancies* between software the actually needs to load PE executables (e.g., Windows OS) and reverse engineering / malware analysis tools. This a problem: these discrepancies can be used to mislead malware and reverse engineering tools.
  8 | 
  9 | Key contributions:
 10 | - We developed and release an *analysis framework* to *systematically* explore this problem and *enumerate* discrepancies among different software, especially OS loaders vs. reverse engineering / malware analysis tools.
 11 | - We developed and release models for various versions of Windows (XP, 7, 10) and reverse engineering tools (ClamAV, Yara, radare2).
 12 | - We can automatically *validate* and *generate* PE samples "exploiting" these discrepancies, thus tricking reverse engineering tools into extracting misleading information.
 13 | 
 14 | 
 15 | # What do I find in this repo?
 16 | 
 17 | This project ships some ready-to-use models as well as the code of the analysis framework.  
 18 | The models can be found in the dedicated [submodule](https://github.com/eurecom-s3/loaders-models), while the interpreter's code for the custom language is in the [modelLang](modelLang) directory.  
 19 | 
 20 | # Modeling Language
 21 | 
 22 | The first step in the analysis consists in writing a "model" of the parser using the custom language supported by the framework.  
 23 | Here follows an example extracted from the models of the Windows loader.
 24 | ```
 25 | INPUT HEADER 2048 as DOSHeader
 26 | 
 27 | ## Check the MZ magic number
 28 | V1: AND EQ HEADER.magic[0] "M" EQ HEADER.magic[1] "Z" term
 29 | V2: ULE (ADD HEADER.e_lfanew 0xf8) FILESIZE term
 30 | 
 31 | P: NT_HEADER <- HEADER[HEADER.e_lfanew, sizeof _IMAGE_NT_HEADERS] as _IMAGE_NT_HEADERS
 32 | ## Check the PE magic number
 33 | V3: EQ NT_HEADER.Signature 0x4550  term
 34 | 
 35 | ```
 36 | For more information about the modeling language, check out [SPECIFICATIONS.md](SPECIFICATIONS.md).  
 37 | 
 38 | # Analysis Tasks & Examples
 39 | 
 40 | ## Sample validation
 41 | Given an executable and the model of a parser as input, the framework can determine whether the first meets the constraints of the second, in other words, whether the modeled software considers the input file as a valid executable.
 42 | To check whether an executable meets the constraints of a model, you can run:  
 43 | ```
 44 | python3 verify.py <model file> <sample>
 45 | ```
 46 | For example, if you want to check whether an unknown sample can run under Windows 10 by using the ready-to-use models in this project, you can launch:  
 47 | ```
 48 | python3 verify.py models/windows/10/MiCreateImageFile.lmod path/to/the/sample
 49 | ```
 50 | The script returns 0 if the executable is valid, or 1 otherwise (in this case, the scripts also prints one line pointing to the broken constraint in the model).
 51 | 
 52 | ## Sample generation
 53 | The framework can create program headers that are valid according to one or more models.
 54 | The logic for generating valid samples is implemented in the `generate.py` script, which can be invoked as follows:
 55 | ```
 56 | python3 generate.py -A <model 1> [<model 2> [<model 3> ... ]]
 57 | ```
 58 | For example, to generate a valid test case for Windows 7, you can run the following command that combines the models of both the kernel-space and user-space portions of its loader:
 59 | ```
 60 | python3 generate.py -A models/windows/7/MiCreateImageFile.lmod models/windows/7/LdrpInitializeProcess.lmod
 61 | ```
 62 | The output file can be specified with the `-O` flag (default: `testcase`).
 63 | 
 64 | ## Differential test case generation
 65 | Given two or more models, the framework can create program headers that are valid according to a subset of them but invalid for the others.
 66 | `generate.py` also implements the differential test case generation and can be invoked with:
 67 | ```
 68 | python3 generate.py -A <model 1> [<model 2> [...]] -N <model 3> [<model 3> [...]]
 69 | ```
 70 | For example, to generate a sample that runs in Windows 7 but not in Windows 10, you can execute:  
 71 | ```
 72 | ./generate.py -A models/windows/10/MiCreateImageFileMap.lmod models/windows/10/LdrpInitializeProcess.lmod -N models/windows/7/MiCreateImageFileMap.lmod models/windows/7/LdrpInitializeProcess.lmod
 73 | ```
 74 | 
 75 | ## Differences enumeration
 76 | Given two models, the framework is able to create many different differential test cases, exploiting different discrepancies among the two models.  
 77 | 
 78 | The `differential.py` script implements the logic for the differences enumeration technique and can be invoked the same way as `generate.py`.
 79 | 
 80 | ## Corner cases generation
 81 | Given a model, the framework creates many test cases that cover all the possible configurations that a set of models consider valid.  
 82 | This technique is implemented in the `explore_condition.py` script, which can run with the following command:  
 83 | ```
 84 | python3 explore_conditions.py -M <model 1> [<model 2> [...]]
 85 | ```
 86 | 
 87 | # Setup/Installation
 88 | 
 89 | The best way to start using this project is by creating a virtual environment.  
 90 | ```
 91 | mkvirtualenv --python=python3 models
 92 | ```
 93 | This project uses python3-specific features and, as such, is unlikely to work with python2.  
 94 | Most of the project dependencies can be installed using pip:
 95 | ```
 96 | pip install -r requirements.txt
 97 | ```
 98 | The `z3` solver needs to be installed separately. On Ubuntu 20.04, you can do that with:
 99 | ```
100 | sudo apt install z3
101 | ```
102 | 
103 | # Publications and Conference Talks
104 | 
105 | This work was published at the [24th International Symposium on Research in Attacks, Intrusions and Defenses (RAID 2021)](https://raid2021.org/).  
106 | You can read the paper [here](https://www.eurecom.fr/publication/6603/download/sec-publi-6603.pdf).  
107 | If you want to cite this work in your academic paper, you can use this:
108 | 
109 | ```
110 | @inproceedings{10.1145/3471621.3471848,
111 |   author = {Nisi, Dario and Graziano, Mariano and Fratantonio, Yanick and Balzarotti, Davide},
112 |   title = {Lost in the Loader:The Many Faces of the Windows PE File Format},
113 |   year = {2021},
114 |   publisher = {Association for Computing Machinery},
115 |   booktitle = {24th International Symposium on Research in Attacks, Intrusions and Defenses},
116 |   location = {San Sebastian, Spain},
117 |   series = {RAID '21}
118 | }
119 | ```
120 | We will also present this project at [Black Hat Europe 2021](https://www.blackhat.com/eu-21/).
121 | 


--------------------------------------------------------------------------------
/differential.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | import argparse
  3 | import sys
  4 | import os.path as path
  5 | import logging
  6 | from functools import reduce
  7 | from itertools import product, combinations
  8 | 
  9 | import coloredlogs
 10 | import z3
 11 | import pefile
 12 | 
 13 | log = logging.getLogger(__name__)
 14 | coloredlogs.install(level="INFO", logger=log)
 15 | 
 16 | from modelLang import Parser, Z3Backend
 17 | 
 18 | def gen_constraint_name(model, cond):
 19 |     return f"{model.name}_{cond}"
 20 | 
 21 | def create_constraints_db(models):
 22 |     ret = {}
 23 |     for model in models:
 24 |         for name, cond in model.terminal_conditions.items():
 25 |             cname = gen_constraint_name(model, name)
 26 |             ret[cname] = cond
 27 |     return ret
 28 | 
 29 | def write_testcase(testcase, constraints, fout):
 30 |     with open(fout, "wb") as fp:
 31 |         fp.write(testcase)
 32 |     with open(f"{fout}.constraints", "w") as fp:
 33 |         for name in constraints:
 34 |             fp.write(f"{name}\n")
 35 | 
 36 | def generate(z3_models_assert, z3_models_negate, z3_model_support=None):
 37 |     backend = z3_models_assert[0]
 38 |     for b in z3_models_assert[1:]:
 39 |         backend &= b
 40 | 
 41 |     if z3_model_support:
 42 |         backend &= z3_model_support
 43 | 
 44 |     for b in z3_models_negate:
 45 |         backend &= ~b
 46 | 
 47 |     solver = backend.solver
 48 |     model = backend.model
 49 |     testcase = backend.generate_testcase() if model else None
 50 |     return model, testcase
 51 | 
 52 | def find_violations(model, z3_models_negate):
 53 |     ret = set()
 54 |     for mn in z3_models_negate:
 55 |         for name, cond in mn.terminal_conditions.items():
 56 |             if not model.eval(cond):
 57 |                 model_name = gen_constraint_name(mn, name)
 58 |                 ret.add(model_name)
 59 |     return ret
 60 | 
 61 | def next_iteration(violated_constraints, violated_once, processed, to_process,
 62 |                    constraints_db):
 63 |     if len(violated_once) != len(violated_once | violated_constraints):
 64 |         log.critical(f"New Constraints found! {violated_constraints - violated_once}")
 65 |         violated_once |= violated_constraints
 66 |         all_subsets = set()
 67 |         violated_once_sorted = sorted(violated_once)
 68 |         for i in range(1, len(violated_once)+1):
 69 |             all_subsets |= set(combinations(violated_once_sorted, i))
 70 | 
 71 |         new_subsets = all_subsets - processed
 72 |         to_process.extend(new_subsets)
 73 | 
 74 |     while len(to_process) > 0:
 75 |         candidate = to_process.pop(0)
 76 |         if candidate not in processed:
 77 |             break
 78 |     else:
 79 |         return None
 80 | 
 81 |     processed.add(candidate)
 82 |     log.critical(f"{candidate} chosen")
 83 |     z3_model_support = Z3Backend(name="suppport")
 84 |     for constr in candidate:
 85 |         z3constr = constraints_db[constr]
 86 |         z3_model_support.terminal_conditions[constr] = z3constr
 87 |         z3_model_support.conditions[constr] = z3constr
 88 | 
 89 |     return z3_model_support
 90 | 
 91 | if __name__ == "__main__":
 92 |     argparser = argparse.ArgumentParser(description="Interpret models and generate testcases")
 93 |     argparser.add_argument('--asserts', '-A', action="append",
 94 |                            metavar="model", type=str, nargs="+",
 95 |                            help="Model to assert")
 96 |     argparser.add_argument('--negates', '-N', action="append",
 97 |                            metavar="model", type=str, nargs="*",
 98 |                            default=[],
 99 |                            help="Model to negate")
100 |     argparser.add_argument('--out', '-O', action="store",
101 |                            metavar="outfile", type=str,
102 |                            default="testcase",
103 |                            help="Output file for testcase")
104 |     argparser.add_argument('--var', '-V', action="store",
105 |                            metavar="variable", type=str, nargs=1,
106 |                            default="HEADER",
107 |                            help="Variable in the model to use for the testcase")
108 |     argparser.add_argument('--size', '-B', action="store", metavar="bytes",
109 |                           type=int, default=None,
110 |                           help="Size in bytes of the testcase to generate")
111 |     argparser.add_argument('--define', '-D', action="store", metavar="define",
112 |                            type=lambda x: (x.split(":")[0],
113 |                                            int(x.split(":")[1])),
114 |                            nargs="*",
115 |                            help="Overwrite constant definition")
116 | 
117 |     args = argparser.parse_args()
118 |     asserts = reduce(lambda x,y: x | {*y}, args.asserts, set())
119 |     negates = reduce(lambda x,y: x | {*y}, args.negates, set())
120 |     outfile = args.out
121 |     voi = args.var
122 |     size = args.size
123 |     defs = dict(args.define) if args.define else {}
124 | 
125 |     Z3Backend.print_unsat = False
126 |     z3_models_assert = []
127 |     z3_models_negate = []
128 |     for model in asserts:
129 |         modelname = path.basename(model)
130 |         parser = Parser(ptype=Parser.ParserType.DIFFERENTIAL_ASSERT,
131 |                         input_size=size,
132 |                         custom_defs=defs)
133 |         parser.parse_file(model)
134 |         backend = Z3Backend(name=modelname, voi=voi)
135 |         backend.exec_statements(parser.statements)
136 |         z3_models_assert.append(backend)
137 |     for model in negates:
138 |         modelname = path.basename(model)
139 |         parser = Parser(ptype=Parser.ParserType.DIFFERENTIAL_NEGATE,
140 |                         input_size=size,
141 |                         custom_defs=defs)
142 |         parser.parse_file(model)
143 |         backend = Z3Backend(name=modelname, voi=voi)
144 |         backend.exec_statements(parser.statements)
145 |         z3_models_negate.append(backend)
146 | 
147 |     constraints_db = create_constraints_db((*z3_models_assert,
148 |                                             *z3_models_negate))
149 |     to_process = []
150 |     processed = set()
151 |     violated_once = set()
152 |     current_constraints = ()
153 |     z3_model_support = None
154 |     iteration = 0
155 |     models = []
156 |     while True:
157 |         violated_constraints = set()
158 |         model, testcase = generate(z3_models_assert, z3_models_negate,
159 |                                    z3_model_support)
160 |         if model:
161 |             models.append(model)
162 |             #### Find violated constraints
163 |             violated_constraints = find_violations(model, z3_models_negate)
164 |             log.critical(f"Violated Constraints: {violated_constraints}")
165 |             #### Write testcase
166 |             write_testcase(testcase, violated_constraints,
167 |                            f"{outfile}_{iteration}")
168 | 
169 |         z3_model_support = next_iteration(violated_constraints,
170 |                                           violated_once,
171 |                                           processed,
172 |                                           to_process,
173 |                                           constraints_db)
174 |         if not z3_model_support:
175 |             break
176 |         iteration += 1
177 | 


--------------------------------------------------------------------------------
/SPECIFICATIONS.md:
--------------------------------------------------------------------------------
  1 | # Language Specifications
  2 | 
  3 | ## Models
  4 | A model is a file (compliant with these specifications) that describes the loading phase of a program, that precedes its launch.  
  5 | In general, this process can be divided in a series of stages of two types: parsing stages and validation stages.  
  6 | The first stages produce a set of intermediate values starting from the original program.  
  7 | These values are then used during the validation stages to enforce soft or hard constraints.
  8 | Soft constraints are used to determine which are the following steps of the loading process, while hard constraints are those that abort the entire process tout court in case they are not met.  
  9 | Models can be used for two purposes: testcase generation and program validation.  
 10 | Testcase generation consists in interpreting the model as a set of SMT constrains. By means of an SMT solver, it is possible to produce sequences of bytes (testcases) that meet all these constraints. If the model is consistent with the loader's behavior, feeding the loader with a testcase will result in the loading phase succeeding.  
 11 | Program validation, instead, consists in checking whether a given program respects the constraints enforced by a loader. In other words, the validation process ultimately forecasts whether a program would be successfully loaded by a specific loader or not.
 12 | 
 13 | ## Core Concepts of the Language
 14 | ### Structures
 15 | Loaders usually cast part of the program headers to well-known data structures, often declared in the C language.  
 16 | Our language provides support to C types that can be imported in a model by parsing C header files.
 17 | ### Immediate Values
 18 | Immediate values are either integer (both base10 and base16 numbers are allowed) or single characters. Internally, they are all parsed as integers.
 19 | ### Variables
 20 | Symbolic names given to expressions.
 21 | ### Expressions
 22 | Recursive structures that combine variables, immediate values and other expressions by means of operators.
 23 | ### Operators Semantics and Arity
 24 | | Operator | Arity | Signed | Sized | Meaning | Syntax |
 25 | |:--------:|:-----:|:------:|:-----:|:---------------------------------------------:|:--------------------:|
 26 | | ADD | 2 | Y | Y | Integer addition |  |
 27 | | SUB | 2 | Y | Y | Integer difference |  |
 28 | | MUL | 2 | Y | Y | Integer product |  |
 29 | | DIV | 2 | Y | Y | Integer division |  |
 30 | | UDIV | 2 | N | Y | Integer unsigned division |  |
 31 | | MOD | 2 | Y | Y | Integer Modulo |  |
 32 | | BITOR | 2 | N | Y | Bitwise OR |  |
 33 | | BITAND | 2 | N | Y | Bitwise AND |  |
 34 | | BITNOT | 1 | N | Y | Bitwise NOT |  |
 35 | | OR | 2 | - | - | Logic OR |  |
 36 | | AND | 2 | - | - | Logic AND |  |
 37 | | NOT | 1 | - | - | Logic NOT |  |
 38 | | EQ | 2 | - | Y | Integer Equality test |  |
 39 | | NEQ | 2 | - | Y | Integer Inequality test |  |
 40 | | GT/GE | 2 | Y | Y | Integer greater [or equal] comparison |  |
 41 | | LT/LE | 2 | Y | Y | Integer less [or equal] comparison |  |
 42 | | UGT/UGE | 2 | N | Y | Unsigned greater [or equal] comparison |  |
 43 | | ULT/ULE | 2 | N | Y | Unsigned less [or equal] comparison |  |
 44 | | ISPOW2 | 1 | N | N | True if &lt;arg&gt; is a power of 2 |  |
 45 | | SHL/SHR | 2 | N | Y | Left/Right logic bit-shift | |
 46 | | OVFLADD | 2 | - | Y | True if the sum of the two operands produce an overflow | |
 47 | | Indexing | 2 | N | Y | Single byte extraction | &lt;var&gt;[byteindex] |
 48 | | Slice | 3 | N | Y | Bytevector extraction | &lt;var&gt;[start, nbytes] |
 49 | 
 50 | ### Syntactic Sugars
 51 | |Expression | Meaning | Use case |
 52 | |:---------------:|:---------------------------------------:|----------------------------------------------------|
 53 | |STRCMP V1 I 'ME' | AND (EQ V1[I] "M") (EQ V1[ADD I 1] "E") | Comparison/Constraints involving printable strings |
 54 | 
 55 | ## Valid Statements
 56 | ### INPUT Statements
 57 | #### Syntax
 58 | `INPUT <variable name> (`<size in bytes>| AS <type>)`  
 59 | #### Description
 60 | Declare an input variables. For testcase generation, this variable will be completely symbolic, meaning that it could assume any possible values. For program validation, input variables are fed into the process from outside.  
 61 | An example of an input variable is the file to produce or validate.  
 62 | Input variables must have a fixed size, which can be declared explicitly by adding its size in bytes after its name; or implicitely, by adding a type declaration by means of the `AS` keyword.
 63 | 
 64 | ### DEFINE Statements
 65 | #### Syntax
 66 | `DEFINE <constant name> <immediate value>`
 67 | #### Description
 68 | Syntactic sugar to give names to immediate values. Useful for those that repeats often in a model.  
 69 | Interpreted by the frontend parser/preprocessor.
 70 | 
 71 | ### Soft-Constraints Statements
 72 | #### Syntax
 73 | `V<number>: <expression>`
 74 | #### Description
 75 | Introduce a condition that can be later used for conditional statements.  
 76 | The value of the condition is defined by the epression on the right side of the statement.
 77 | 
 78 | ### Validation Statements
 79 | #### Syntax (unconditional)
 80 | `V<number>: <expression> TERM`
 81 | #### Syntax (conditional)
 82 | `V<number>(Vn[, Vm[, Vo[...]]]): <expression> TERM`
 83 | #### Description
 84 | Introduce a hard constraint.  
 85 | During program validation, if the constraints is not met, the entire process fails.  
 86 | During testcase generation, the list of hard constraints are translated into SMT constraints and then fed to the backend to procude the testcase.
 87 | #### Semantics of Conditional Validation
 88 | A conditional validation is a hard constraints that behaves in the following way:
 89 | 1. if the at least one of its prior conditions is not met, its value is TRUE (meaning that the hard constraint is met)  
 90 | 2. if all its prior constraints are met, its value is defined by its expression  
 91 | Conditional hard-constraints
 92 | In other words, conditional hard-constraints influence the loading process _only_ if their prior constraints are met.
 93 | 
 94 | ### Parsing Statements
 95 | #### Syntax (unconditional)
 96 | `P: <variable name> <- <expression> [AS <type>]`
 97 | #### Syntax (conditional)
 98 | `P(Vn[, Vm[, Vo[...]]]): <variable name> <- <expression> [AS <type>]`
 99 | #### Description
100 | Introduce a parsing stage in the model.  
101 | This roughly corresponds to a variable assignement in procedural languages.  
102 | #### Semantics of Conditional Parsing
103 | A conditional parsing statement roughly corresponds to a variable assignment in a _if-then-else_ statement in procedural languages, with a few caveats.
104 | In the first place, a variable introduced within a conditional statement will have value of 0 if the conditions is not met. The language parser will also produce a warning when this happens, since it could lead to unwanted behaviors.  
105 | If, instead, the output variable of the statement was already defined (i.e., in a previous unconditional parsing statement), and if its conditions are not all met, its value is the one it held before the conditional parsing statement.
106 | 
107 | ### Fixed-increment Loop Statements
108 | #### Syntax (start)
109 | `L<number>: <output> <- LOOP(<input>, <startingoffset>, <structsize>, <count>, <maxunroll>) [AS <type>]`  
110 | `<output>`: variable name  
111 | `<input>`: expression  
112 | `<startingoffset>`: expression  
113 | `<structsize>`: immediate  
114 | `<count>`: expression  
115 | `<maxunroll>`: integer  
116 | #### Syntax (end)
117 | `END L<number>`
118 | #### Description
119 | This statement declares a loop iterating over an array of structures.  
120 | An iteration is made up of all the statements between the start of the loop and its end.  
121 | At the n-th iteration of the loop the output value takes the n-th element of the array.  
122 | The `input` argument is the expression on which to slice upon to extract the elements of the array.  
123 | The `startinoffset` is an expression that indicates the offset (in bytes) at which the array start within the `input` variable.  
124 | `structsize` represents the size (in bytes) of each element of the array. It must be an immediate.  
125 | `count` is an expression representing the number of iterations of the loop.  
126 | `maxunroll` is an integer used _only during testcase generation_ as an upper-bound for `count`. Its role is fundamental due to the lack of loop supports in SMT solvers. In fact, loops in our language are unrolled to overcome this limitation.
127 | 
128 | ### Generic Loop Statements
129 | #### Syntax (start)
130 | `L<number>: <output> <- VLOOP(<start>, <next>, <condition>, <maxunroll>)`
131 | `<output>`: variable name  
132 | `<start>`: expression  
133 | `<next>`: variable name  
134 | `<condition>`: condition name  
135 | `<maxunroll>`: integer  
136 | #### Syntax (end)
137 | `END L<number>`
138 | #### Description
139 | Declares a generic loop to execute the same set of statements multiple times up until a certain condition is met.  
140 | At each iteration the `output` variable takes a different value, according to the following scheme:
141 | 1. During the first iteration, its value is set to that of the `start` expression  
142 | 2. In the following iteration, its value is set to that of the `next` variable  
143 | The `next` variable must be set inside the body of the loop, by means of a `P` statement.  
144 | `condition` is the identifier of a soft-constraint declared within the body of the loop.  
145 | The semantics of `maxunroll` is the same as for the `fixed-increment loop` statements.  
146 | 


--------------------------------------------------------------------------------
/testcases/windows/xp/winxp_createprocess.exe:
--------------------------------------------------------------------------------
1 | MZ  PE  L             z                                                       @                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       


--------------------------------------------------------------------------------
/modelLang/classes.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import coloredlogs
  3 | from enum import Enum, unique, auto
  4 | 
  5 | log = logging.getLogger(__name__)
  6 | coloredlogs.install(level="NOTSET", logger=log)
  7 | 
  8 | @unique
  9 | class Optimizations(Enum):
 10 |     MAXIMIZE = auto()
 11 |     MINIMIZE = auto()
 12 | 
 13 | class Statement(object):
 14 |     def __init__(self):
 15 |         self.lineno = 0
 16 | 
 17 | class Base(object):
 18 |     def __sub__(self, other):
 19 |         return self.symb - other
 20 |     def __rsub__(self, other):
 21 |         return other - self.symb
 22 | 
 23 | class Expression(Base):
 24 |     OPCODES = {'VAR'        : 1,
 25 |                'IMM'        : 1,
 26 |                'ADD'        : 2,
 27 |                'SUB'        : 2,
 28 |                'MUL'        : 2,
 29 |                'DIV'        : 2,
 30 |                'UDIV'       : 2,
 31 |                'MOD'        : 2,
 32 |                'AND'        : 2,
 33 |                'OR'         : 2,
 34 |                'NOT'        : 1,
 35 |                'ULE'        : 2,
 36 |                'UGE'        : 2,
 37 |                'ULT'        : 2,
 38 |                'UGT'        : 2,
 39 |                'EQ'         : 2,
 40 |                'NEQ'        : 2,
 41 |                'GE'         : 2,
 42 |                'LE'         : 2,
 43 |                'GT'         : 2,
 44 |                'LT'         : 2,
 45 |                'BITOR'      : 2,
 46 |                'BITAND'     : 2,
 47 |                'BITNOT'     : 1,
 48 |                'SHR'        : 2,
 49 |                'SHL'        : 2,
 50 |                'Slice'      : 3,
 51 |                'Index'      : 2,
 52 |                'ISPOW2'     : 1,
 53 |                'ALIGNUP'    : 2,
 54 |                'ALIGNDOWN'  : 2,
 55 |                'ISALIGNED'  : 2,
 56 |                'SECT'       : 1,
 57 |                'NSECT'      : 1,
 58 |                'OPTHDR'     : 1,
 59 |                'OVFLADD'    : 2,
 60 |                'INT'        : 2,
 61 |     }
 62 | 
 63 |     def __init__(self, opcode, *operands):
 64 |         if opcode not in self.OPCODES:
 65 |             raise ValueError(f"Unknown opcode {opcode}")
 66 |         self.opcode = opcode
 67 |         if len(operands) != self.OPCODES[opcode]:
 68 |             raise ValueError(f"Opcode {opcode} expects {self.OPCODES[opcode]}."
 69 |                              f"{len(operands)} provided instead.")
 70 |         if any([type(op) not in (Variable, Immediate, Expression) for op in operands]):
 71 |             raise TypeError("Operands of not supported types")
 72 | 
 73 |         self.operands = operands
 74 | 
 75 |     def __repr__(self):
 76 |         tmp = f"<Expression ({self.opcode} {self.operands})>"
 77 |         return tmp
 78 | 
 79 |     def pprint(self, spacing="    "):
 80 |         if self.opcode in ("VAR", "IMM"):
 81 |             return self.operands[0].pprint()
 82 |         ret = ""
 83 |         ret += f"{self.opcode}(\n"
 84 |         args = ",\n".join(
 85 |             ["\n".join([(spacing + line) for line in x.pprint().split("\n")])
 86 |              for x in self.operands])
 87 |         ret += args
 88 |         ret += "\n)"
 89 |         return ret
 90 | 
 91 | class Immediate(object):
 92 |     def __init__(self, value):
 93 |         self.value = value
 94 | 
 95 |     def __repr__(self):
 96 |         return f"<Immediate {self.value}>"
 97 | 
 98 |     def pprint(self):
 99 |         return str(self.value)
100 | 
101 | class BoolImmediate(Immediate):
102 |     def __init__(self, value):
103 |         if not isinstance(value, bool):
104 |             t = type(expr)
105 |             raise TypeError(f"expr must be of type bool. {t} found instead")
106 |         self.value = value
107 | 
108 | 
109 | class Variable(object):
110 |     def __init__(self, name, type=None):
111 |         self.name = name
112 |         self.type = type
113 | 
114 |     def __repr__(self):
115 |         t = "" if not self.type else f" of type {self.type}"
116 |         return f"<Variable {self.name}{t}>"
117 | 
118 |     def pprint(self):
119 |         return self.name
120 | 
121 | class Input(Statement):
122 |     def __init__(self, var, size):
123 |         super(Input, self).__init__()
124 |         self.var = var
125 |         self.size = size
126 | 
127 |     def __repr__(self):
128 |         s = f"<Input {self.var} {self.size} bytes>"
129 |         return s
130 | 
131 | class Output(Statement):
132 |     def __init__(self, var, size):
133 |         super(Output, self).__init__()
134 |         self.var = var
135 |         self.size = size
136 | 
137 |     def __repr__(self):
138 |         s = f"<Output {self.var} {self.size} bytes>"
139 |         return s
140 | 
141 | class Assignment(Statement):
142 |     def __init__(self, left, right, conditions=None):
143 |         super(Assignment, self).__init__()
144 |         if not isinstance(left, Variable):
145 |             t = type(left)
146 |             raise TypeError(f"Left operand of an "
147 |                             f"assignment must be a variable. "
148 |                             f"It is {t} instead")
149 |         self.left = left
150 | 
151 |         if not isinstance(right, Expression):
152 |             t = type(right)
153 |             raise TypeError(f"Right operand of an "
154 |                             f"assignment must be an expression. "
155 |                             f"It is {t} instead")
156 |         self.right = right
157 | 
158 |         if conditions and not isinstance(conditions, list):
159 |             t = type(conditions)
160 |             raise TypeError(f"Conditions must be a list. "
161 |                             f"It is {t} instead")
162 |         if conditions and not all(isinstance(x, Condition) for x in conditions):
163 |             raise TypeError("Conditions must be a list of Condition object")
164 |         self._conditions = [] if conditions is None else conditions
165 | 
166 |     def __repr__(self):
167 |         s = f"<Assignment {self.left} <- {self.right}"
168 |         if len(self.conditions) != 0:
169 |             s += f" if {self._conditions}>"
170 |         return s
171 | 
172 |     @property
173 |     def conditions(self):
174 |         return self._conditions
175 |     @conditions.setter
176 |     def conditions(self, new):
177 |         if not isinstance(new, list):
178 |             t = type(new)
179 |             raise TypeError(f"Conditions must be a list. "
180 |                             f"It is {t} instead")
181 |         if not all(isinstance(x, Condition) for x in new):
182 |             raise TypeError("Conditions must be a list of Condition object")
183 |         self._conditions = new
184 |     @property
185 |     def conditional(self):
186 |         return len(self._conditions) != 0
187 | 
188 | class Loop(Statement):
189 |     def __init__(self, loop_name, output_name, input_var, startpos, structsize, count, maxunroll, vtype=None, conditions=None):
190 |         super(Loop, self).__init__()
191 |         self._loop_name = loop_name
192 |         self.output_name = output_name
193 |         if not isinstance(input_var, Expression):
194 |             t = type(input_var)
195 |             raise TypeError(f"Expected Expression for input_var."
196 |                             f"Found {t}")
197 |         self.input_var = input_var
198 | 
199 |         if not isinstance(startpos, Expression):
200 |             t = type(start_pos)
201 |             raise TypeError(f"Expected Expression for start_pos."
202 |                             f"Found {t}")
203 |         self.startpos = startpos
204 | 
205 |         if not isinstance(count, Expression):
206 |             t = type(count)
207 |             raise TypeError(f"Expected Expression for count."
208 |                             f"Found {t}")
209 |         self.count = count
210 | 
211 |         self.maxunroll = maxunroll
212 |         self.structsize = structsize
213 |         self._statements = []
214 |         self.vtype = vtype
215 |         self._conditions = [] if conditions is None else conditions
216 | 
217 |     def add_statement(self, stmt):
218 |         if not isinstance(stmt, Statement):
219 |             t = type(stmt)
220 |             raise TypeError(f"Expected Statement for stmt"
221 |                             f"Found {t} instead")
222 |         self._statements.append(stmt)
223 | 
224 |     def __repr__(self):
225 |         s = f"<Loop {self._loop_name}: {self.output_name} in {self.input_var}>"
226 |         return s
227 | 
228 | class VLoop(Loop):
229 |     def __init__(self, loop_name, output_name, start, nextname, contcondition, maxunroll, vtype=None, conditions=None):
230 |         Statement.__init__(self)
231 |         self._loop_name = loop_name
232 |         self.output_name = output_name
233 |         if not isinstance(start, Expression):
234 |             t = type(start)
235 |             raise TypeError(f"Expected Expression for start."
236 |                             f"Found {t}")
237 |         self.start = start
238 | 
239 |         if not isinstance(nextname, Variable):
240 |             t = type(nextname)
241 |             raise TypeError(f"Expected Variable for nextname."
242 |                             f"Found {t}")
243 |         self.nextname = nextname
244 | 
245 |         if not isinstance(contcondition, str):
246 |             t = type(contcondition)
247 |             raise TypeError(f"Expected str for contcondition."
248 |                             f"Found {t}")
249 |         self.contcondition = contcondition
250 | 
251 |         self.maxunroll = maxunroll
252 |         self._statements = []
253 |         self.vtype = vtype
254 |         self._conditions = [] if conditions is None else conditions
255 | 
256 |     def __repr__(self):
257 |         s = f"<VLoop {self._loop_name}: {self.output_name}, starting as {self.start}, updated by {self.nextname}, until {self.contcondition}>"
258 |         return s
259 | 
260 | class Condition(Statement):
261 |     def __init__(self, expr, isterminal, conditions=None, name=None):
262 |         super(Condition, self).__init__()
263 |         if isinstance(expr, Expression):
264 |             self.expr = expr
265 |         elif isinstance(expr, bool):
266 |             self.expr = Expression("IMM", Immediate(expr))
267 |         else:
268 |             raise TypeError
269 |         self.isterminal = bool(isterminal)
270 | 
271 |         if conditions is None:
272 |             conditions = []
273 | 
274 |         if not isinstance(conditions, list):
275 |             t = type(conditions)
276 |             raise TypeError(f"Conditions must be a list. "
277 |                             f"It is {t} instead")
278 |         if not all(isinstance(x, Condition) for x in conditions):
279 |             raise TypeError("Conditions must be a list of Condition object")
280 |         self._conditions = conditions
281 |         self.name = name
282 | 
283 |     @property
284 |     def conditions(self):
285 |         return self._conditions
286 |     @conditions.setter
287 |     def conditions(self, new):
288 |         if not isinstance(new, list):
289 |             t = type(new)
290 |             raise TypeError(f"Conditions must be a list. "
291 |                             f"It is {t} instead")
292 |         if not all(isinstance(x, Condition) for x in new):
293 |             raise TypeError("Conditions must be a list of Condition object")
294 |         self._conditions = new
295 |     @property
296 |     def conditional(self):
297 |         return len(self._conditions) != 0
298 | 
299 |     def __repr__(self):
300 |         s = "<"
301 |         s += "Terminal " if self.isterminal else ""
302 |         s += f"Condition {self.expr}"
303 |         if len(self.conditions) != 0:
304 |             s += f" if {self._conditions}>"
305 |         return s
306 | 
307 |     def __invert__(self):
308 |         return Condition(Expression("NOT", self.expr), self.isterminal,
309 |                          self._conditions)
310 | 
311 |     def add_prefix(self, prefix):
312 |         if self.name is None:
313 |             log.warning("Adding prefix to unnamed condition")
314 |             self.name = ""
315 |         self.name = prefix + self.name
316 | 
317 |     def clone(self):
318 |         conditions = list(self._conditions)
319 |         new = Condition(self.expr, self.isterminal, conditions)
320 |         new.name = self.name
321 |         return new
322 | 
323 | class Define(Statement):
324 |     def __init__(self, name, value):
325 |         super(Define, self).__init__()
326 |         if not isinstance(value, Expression):
327 |             t = type(value)
328 |             log.error(f"value expected to be of type Expression. {t} found instead")
329 |             raise TypeError
330 |         if value.opcode != 'IMM':
331 |             log.error(f"Value must be an immediate expression. {value.opcode} found instead")
332 |             raise TypeError
333 |         self.name = name
334 |         self.value = value
335 | 
336 | class Optimization(Statement):
337 |     def __init__(self, strategy, expression):
338 |         super(Optimization, self).__init__()
339 |         if not isinstance(strategy, Optimizations):
340 |             t = type(strategy)
341 |             log.error(f"strategy expected to be one of the supported Optimizations. {t} found instead")
342 |             raise TypeErrorx
343 |         self.strategy = strategy
344 |         self.expression = expression
345 | 
346 | class Debug(Statement):
347 |     def __init__(self, expr):
348 |         super(Debug, self).__init__()
349 |         self.expr = expr
350 | 
351 | class ConditionListEntry(Base):
352 |     def __init__(self, name, negated=False):
353 |         self.name = name
354 |         self.negated = negated
355 | 
356 |     def __add__(self, other):
357 |         if isinstance(other, ConditionList):
358 |             return ConditionList([self, *other.l])
359 |         elif isinstance(other, ConditionListEntry):
360 |             return ConditionList([self, other])
361 |         else:
362 |             t = type(other)
363 |             raise TypeError(f"other must be either a ConditionList or a ConditionListEntry"
364 |                             f"It is {t} instead")
365 |     def __repr__(self):
366 |         return self.name
367 | 
368 | class ConditionList(Base):
369 |     def __init__(self, l):
370 |         if not isinstance(l, list):
371 |             t = type(l)
372 |             raise TypeError(f"l must be a list. It is {t} instead")
373 |         if not all(isinstance(x, ConditionListEntry) for x in l):
374 |             raise TypeError("All elements of l must be of type ConditionListEntry")
375 | 
376 |         self.l = l
377 | 
378 |     @property
379 |     def names(self):
380 |         return [x.name for x in self.l]
381 | 
382 |     def __iadd__(self, other):
383 |         if isinstance(other, ConditionList):
384 |             self.l += other.l
385 |         elif isinstance(other, ConditionListEntry):
386 |             self.l += [other]
387 |         else:
388 |             t = type(other)
389 |             raise TypeError(f"other must be either a ConditionList or a ConditionListEntry"
390 |                             f"It is {t} instead")
391 |         return self
392 | 
393 |     def __add__(self, other):
394 |         if isinstance(other, ConditionList):
395 |             return ConditionList(self.l + other.l)
396 |         elif isinstance(other, ConditionListEntry):
397 |             return ConditionList(self.l + [other])
398 |         else:
399 |             t = type(other)
400 |             raise TypeError(f"other must be either a ConditionList or a ConditionListEntry"
401 |                             f"It is {t} instead")
402 | 
403 |     def __repr__(self):
404 |         s = "[" + ', '.join(str(x) for x in self.l) + ']'
405 |         return s
406 | 
407 |     def __iter__(self):
408 |         return self.l.__iter__()
409 | 


--------------------------------------------------------------------------------
/modelLang/backends/python_backend.py:
--------------------------------------------------------------------------------
  1 | from math import log2
  2 | import logging
  3 | import coloredlogs
  4 | from collections import deque
  5 | 
  6 | from pwnlib.util.packing import pack, unpack
  7 | 
  8 | from .default_backend import DefaultBackend, VerificationError
  9 | from ..classes import (Base, Immediate, Variable, Expression, Input,
 10 |                        Assignment, Condition, Loop, VLoop, Debug)
 11 | 
 12 | def extend(value, n, signed):
 13 |     if not signed:
 14 |         return value + b'\x00'*n
 15 |     trail = b'\x00' if (value[-1] & (0x80)) == 0 else b'\xff'
 16 |     return value + trail*n
 17 | 
 18 | def sized(skipargs=(), skipret=False, sign=False):
 19 |     def sized_outer(func):
 20 |         def sized_inner(*args):
 21 |             targs = [x for n, x in enumerate(args) if n not in skipargs]
 22 |             max_size = max(len(x) for x in targs)
 23 |             args = [extend(x, max_size - len(x), sign) for x in targs]
 24 |             ret = func(*args)
 25 |             if not skipret: # pack output
 26 |                 lendiff = len(ret) - max_size
 27 |                 if lendiff >= 0: # if output is longer than input, cut it
 28 |                     return ret[:max_size]
 29 |                 else: # if smaller, extend it (trail, depends on signness)
 30 |                     return extend(ret, -lendiff, sign)
 31 |             else:
 32 |                 return ret
 33 |         return sized_inner
 34 |     return sized_outer
 35 | 
 36 | def unsigned(skipargs=(), skipret=False):
 37 |     def unsigned_outer(func, *skip):
 38 |         def unsigned_inner(*args):
 39 |             # unpack the argumts as unsigned (unless they are ignored)
 40 |             args = [unpack(x, 'all', endianness='little', sign=False)
 41 |                     if n not in skipargs else x for n, x in enumerate(args)]
 42 |             ret = func(*args)
 43 |             return ret if skipret else pack(ret, 'all',
 44 |                                             endianness='little')
 45 |         return unsigned_inner
 46 |     return unsigned_outer
 47 | 
 48 | def signed(skipargs=(), skipret=False):
 49 |     def signed_outer(func, *skip):
 50 |         def signed_inner(*args):
 51 |             args = [unpack(x, 'all', endianness='little', sign=True)
 52 |                     if n not in skipargs else x for n, x in enumerate(args)]
 53 |             ret = func(*args)
 54 |             return ret if skipret else pack(ret, 'all',
 55 |                                             endianness='little', sign=True)
 56 |         return signed_inner
 57 |     return signed_outer
 58 | 
 59 | 
 60 | class PythonBackend(DefaultBackend):
 61 |     def __init__(self):
 62 |         super().__init__()
 63 |         self.funcs = { 'ADD'         : self.ADD,
 64 |                        'SUB'         : self.SUB,
 65 |                        'MUL'         : self.MUL,
 66 |                        'DIV'         : self.DIV,
 67 |                        'UDIV'        : self.UDIV,
 68 |                        'MOD'         : self.MOD,
 69 |                        'AND'         : self.And,
 70 |                        'OR'          : self.Or,
 71 |                        'NOT'         : self.Not,
 72 |                        'ULE'         : self.ULE,
 73 |                        'UGE'         : self.UGE,
 74 |                        'ULT'         : self.ULT,
 75 |                        'UGT'         : self.UGT,
 76 |                        'EQ'          : self.EQ,
 77 |                        'NEQ'         : self.NEQ,
 78 |                        'GE'          : self.GE,
 79 |                        'LE'          : self.LE,
 80 |                        'GT'          : self.GT,
 81 |                        'LT'          : self.LT,
 82 |                        'BITOR'       : self.BITOR,
 83 |                        'BITAND'      : self.BITAND,
 84 |                        'BITNOT'      : self.BITNOT,
 85 |                        'Slice'       : self.Slice,
 86 |                        'Index'       : self.Slice,
 87 |                        'ISPOW2'      : self.ISPOW2,
 88 |                        'INT'         : self.INT,
 89 |                        'VAR'         : self.VAR,
 90 |                        'IMM'         : self.IMM,
 91 |                        'SHR'         : self.SHR,
 92 |                        'SHL'         : self.SHL,
 93 |                        'ALIGNUP'     : self.ALIGNUP,
 94 |                        'ALIGNDOWN'   : self.ALIGNDOWN,
 95 |                        'ISALIGNED'   : self.ISALIGNED,
 96 |                        'OVFLADD'     : self.OVFLADD
 97 |         }
 98 |         self.log = logging.getLogger(__name__)
 99 |         self.log.setLevel(logging.CRITICAL)
100 |         coloredlogs.install(level="CRITICAL", logger=self.log)
101 |         self._last_fail = None
102 | 
103 | 
104 |     @staticmethod
105 |     @sized(sign=False)
106 |     @unsigned()
107 |     def ADD(a, b):
108 |         return a + b
109 | 
110 |     @staticmethod
111 |     @sized(sign=False)
112 |     @unsigned()
113 |     def SUB(a, b):
114 |         return a - b
115 | 
116 |     @staticmethod
117 |     @sized()
118 |     @signed()
119 |     def MUL(a, b):
120 |         return a * b
121 | 
122 |     @staticmethod
123 |     @sized()
124 |     @signed()
125 |     def MOD(a, b):
126 |         return a % b
127 | 
128 |     @staticmethod
129 |     @sized()
130 |     @signed()
131 |     def DIV(a, b):
132 |         return a // b
133 | 
134 |     @staticmethod
135 |     @sized()
136 |     @unsigned()
137 |     def UDIV(a, b):
138 |         return a // b
139 | 
140 |     @staticmethod
141 |     @sized(skipret=True)
142 |     @unsigned(skipret=True)
143 |     def EQ(a, b):
144 |         return a == b
145 | 
146 |     @staticmethod
147 |     @sized(skipret=True)
148 |     @unsigned(skipret=True)
149 |     def NEQ(a, b):
150 |         return a != b
151 | 
152 |     @staticmethod
153 |     @sized(sign=False)
154 |     @unsigned()
155 |     def BITOR(a, b):
156 |         return a | b
157 | 
158 |     @staticmethod
159 |     @sized(sign=False)
160 |     @unsigned()
161 |     def BITAND(a, b):
162 |         return a & b
163 | 
164 |     @staticmethod
165 |     @sized(sign=True)
166 |     @signed()
167 |     def BITNOT(a):
168 |         return ~a
169 | 
170 |     @staticmethod
171 |     @unsigned()
172 |     def ISPOW2(a):
173 |         return (a == 0) or (a & (a - 1)) == 0
174 | 
175 |     @staticmethod
176 |     @unsigned()
177 |     def ISALIGNED(a, b):
178 |         return (a & (b -1)) == 0
179 | 
180 |     @staticmethod
181 |     def And(a, b):
182 |         return a and b
183 | 
184 |     @staticmethod
185 |     def Or(a, b):
186 |         return a or b
187 | 
188 |     @staticmethod
189 |     def Not(a):
190 |         return not a
191 | 
192 |     @staticmethod
193 |     @sized(skipret=True)
194 |     @unsigned(skipret=True)
195 |     def ULE(a, b):
196 |         return a <= b
197 | 
198 |     @staticmethod
199 |     @sized(skipret=True)
200 |     @unsigned(skipret=True)
201 |     def UGE(a, b):
202 |         return a >= b
203 | 
204 |     @staticmethod
205 |     @sized(skipret=True)
206 |     @unsigned(skipret=True)
207 |     def ULT(a, b):
208 |         return a < b
209 | 
210 |     @staticmethod
211 |     @sized(skipret=True)
212 |     @unsigned(skipret=True)
213 |     def UGT(a, b):
214 |         return a > b
215 | 
216 |     @staticmethod
217 |     @sized(skipret=True)
218 |     @signed(skipret=True)
219 |     def GE(a, b):
220 |         return a >= b
221 | 
222 |     @staticmethod
223 |     @sized(skipret=True)
224 |     @signed(skipret=True)
225 |     def LE(a, b):
226 |         return a <= b
227 | 
228 |     @staticmethod
229 |     @sized(skipret=True)
230 |     @signed(skipret=True)
231 |     def LT(a, b):
232 |         return a < b
233 | 
234 |     @staticmethod
235 |     @sized(skipret=True)
236 |     @signed(skipret=True)
237 |     def GT(a, b):
238 |         return a > b
239 | 
240 |     @staticmethod
241 |     @sized(skipret=True)
242 |     @unsigned(skipret=True)
243 |     def INT(a, b):
244 |         return pack(a, b*8, endianness="little")
245 | 
246 |     @staticmethod
247 |     @unsigned(skipargs=(0, ), skipret=True)
248 |     def Slice(var, start, cnt=1):
249 |         if cnt == 1:
250 |             # Indexing a b-string in python returns an int...
251 |             return pack(var[start], 'all')
252 |         else:
253 |             return var[start:start+cnt]
254 | 
255 |     @staticmethod
256 |     def IMM(imm):
257 |         val = imm.value if isinstance(imm, Immediate) else imm
258 |         if type(val) == bool:
259 |             return val
260 |         return pack(val, 'all', endianness='little')
261 | 
262 |     @staticmethod
263 |     @sized()
264 |     @unsigned()
265 |     def SHR(a, b):
266 |         return a >> b
267 | 
268 |     @staticmethod
269 |     @sized()
270 |     @unsigned()
271 |     def SHL(a, b):
272 |         return a << b
273 | 
274 |     @staticmethod
275 |     @sized()
276 |     @unsigned()
277 |     def ALIGNUP(a, b):
278 |         return (a + b - 1) & -b
279 | 
280 |     @staticmethod
281 |     @sized()
282 |     @unsigned()
283 |     def ALIGNDOWN(a, b):
284 |         return a & -b
285 | 
286 |     @staticmethod
287 |     @sized(skipret=True)
288 |     def OVFLADD(a, b):
289 |         size = len(a)
290 |         assert size == len(b)
291 |         maxint = 2**(size*8) - 1
292 |         a = unpack(a, 'all', endianness='little', sign=False)
293 |         b = unpack(b, 'all', endianness='little', sign=False)
294 |         return (maxint - a) < b
295 | 
296 |     def VAR(self, var):
297 |         return self.variables[var.name]
298 | 
299 |     funcs_bool  = {'OR', 'AND', 'NOT'}
300 |     funcs_unsigned = {'BITOR', 'BITAND', 'ULE', 'ULT', 'UGT', 'UGE', 'EQ', 'NEQ'}
301 | 
302 |     def dispatch(self, func, *args):
303 |         if not 0 < len(args) < 4:
304 |             self.log.critical(f"Trying to dispatch function with {len(args)}"
305 |                          " arguments")
306 |             raise TypeError
307 |         ret = self.funcs[func](*args)
308 |         return ret
309 | 
310 |     def _exec_input(self, stmt):
311 |         pass
312 | 
313 |     def _exec_unconditional_assignment(self, stmt):
314 |         left = stmt.left.name
315 |         rigth = stmt.right
316 |         self.variables[left] = self._eval_expression(rigth)
317 | 
318 |     def _exec_conditional_assignment(self, stmt):
319 |         left = stmt.left.name
320 |         rigth = stmt.right
321 |         conditions = stmt.conditions
322 |         if left not in self.variables:
323 |             self.log.warning(f"Variable {left} initialized in conditional statement. Defaulting it to 0.")
324 |             self.variables[left] = pack(0, "all")
325 | 
326 |         if all(self._eval_condition(x) for x in conditions):
327 |             self.variables[left] = self._eval_expression(rigth)
328 | 
329 |     def _exec_assignment(self, stmt):
330 |         if stmt.conditional:
331 |             return self._exec_conditional_assignment(stmt)
332 |         else:
333 |             return self._exec_unconditional_assignment(stmt)
334 | 
335 |     def _eval_condition(self, condition, overwrite=False):
336 |         if not overwrite and condition.name and condition.name in self.conditions:
337 |             return self.conditions[condition.name]
338 |         expr = lambda: self._eval_expression(condition.expr)
339 |         conds = all(self._eval_condition(x)
340 |                     for x in condition.conditions)
341 |         if condition.isterminal:
342 |             if conds:
343 |                 return expr()
344 |             else:
345 |                 return True
346 |         return conds and expr()
347 | 
348 |     def _exec_condition(self, stmt):
349 |         name = stmt.name
350 |         if name is None:
351 |             self.log.warning("Executing unnamed condition... Not sure this is intended.")
352 |         res = self._eval_condition(stmt, overwrite=True)
353 |         self.conditions[name] = res
354 | 
355 |         if not res and stmt.isterminal:
356 |             self.log.critical(f"Terminal condition {name} not met. Verification failed")
357 |             raise VerificationError(name)
358 | 
359 |     def _exec_loop(self, stmt):
360 |         if not all(self._eval_condition(x) for x in stmt._conditions):
361 |             return
362 |         name = f"L{stmt._loop_name}"
363 |         varname = Variable(stmt.output_name)
364 |         inputvar = stmt.input_var
365 |         startpos = stmt.startpos
366 |         count = unpack(self._eval_expression(stmt.count), 'all',
367 |                        endianness='little')
368 |         structsize = Expression("INT",
369 |                                 Expression("IMM",
370 |                                            Immediate(stmt.structsize)),
371 |                                 Expression("IMM", Immediate(4)))
372 | 
373 |         self.log.debug(f"Executing loop {name} {count} times")
374 |         for iteration in range(count):
375 |             conditionpref = f"{name}_{iteration}_"
376 |             iterationexpr = Expression("IMM", Immediate(iteration))
377 |             nstartpos = Expression("ADD", startpos,
378 |                                   Expression("MUL", structsize, iterationexpr))
379 |             sliceexpr = Expression("Slice", inputvar, nstartpos, structsize)
380 |             assignment = Assignment(varname, sliceexpr)
381 |             self._exec_assignment(assignment)
382 |             for s in stmt._statements:
383 |                 if isinstance(s, Condition):
384 |                     s = s.clone()
385 |                     s.add_prefix(conditionpref)
386 |                 self._exec_statement(s)
387 | 
388 |     def _exec_vloop(self, stmt):
389 |         if not all(self._eval_condition(x) for x in stmt._conditions):
390 |             return
391 |         name = f"L{stmt._loop_name}"
392 |         varname = Variable(stmt.output_name)
393 |         start = stmt.start
394 |         nextname = stmt.nextname
395 |         contcondition = stmt.contcondition
396 |         if not all((self._eval_condition(x) for x in stmt._conditions)):
397 |             return
398 |         first_assignment = Assignment(varname, start)
399 |         self._exec_assignment(first_assignment)
400 |         initial_condition = Condition(True, False, name=contcondition)
401 |         self._exec_condition(initial_condition)
402 |         i = 0
403 |         while self.conditions[contcondition]:
404 |             i += 1
405 |             for s in stmt._statements:
406 |                 # try:
407 |                 self._exec_statement(s)
408 |                 # except Exception as e:
409 |                 #     print(self.variables[stmt.output_name])
410 |             next_assignment = Assignment(varname, Expression("VAR", nextname))
411 | 
412 |     def _exec_debug(self, stmt):
413 |         self.log.critical(hex(unpack(self._eval_expression(stmt.expr), 'all')))
414 | 
415 |     _exec_table = {Input: _exec_input,
416 |                    Assignment: _exec_assignment,
417 |                    Condition: _exec_condition,
418 |                    Loop: _exec_loop,
419 |                    VLoop: _exec_vloop,
420 |                    Debug: _exec_debug
421 |     }
422 | 
423 |     def verify(self, test, variable="HEADER"):
424 |         self._last_fail = None
425 |         if not self._statements:
426 |             self.log.error("Load statements before call verify()")
427 |             raise ValueError
428 | 
429 |         self.variables[variable] = test
430 |         for stmt in self._statements:
431 |             try:
432 |                 self._exec_statement(stmt)
433 |             except VerificationError as e:
434 |                 self._last_fail = e.name
435 |                 self.log.error(f"Condition {e.name} not satisfied. "
436 |                           "Verification failed.")
437 |                 return False
438 |         return True
439 | 
440 | if __name__ == "__main__":
441 |     inp = Input(Variable("input"), 64)
442 |     bcknd = PythonBackend()
443 |     bcknd._exec_input(inp)
444 |     expr = Expression("EQ",
445 |                       Expression("ADD",
446 |                                  Expression("VAR", Variable("input")),
447 |                                  Expression("IMM", 8)),
448 |                       Expression("IMM", 8))
449 |     res1 = bcknd._eval_expression(expr)
450 | 


--------------------------------------------------------------------------------
/modelLang/backends/z3_backend.py:
--------------------------------------------------------------------------------
  1 | from math import log2
  2 | import logging
  3 | import coloredlogs
  4 | from collections import deque
  5 | 
  6 | import z3
  7 | 
  8 | from .default_backend import DefaultBackend
  9 | from ..classes import (Base, Immediate, Variable, Expression, Input, Output,
 10 |                        Assignment, Condition, Loop, VLoop, Optimization,
 11 |                        Optimizations, Debug)
 12 | 
 13 | class Z3Backend(DefaultBackend):
 14 |     print_unsat = True
 15 |     def __init__(self, name="", voi=None, enable_optimizations=False):
 16 |         super().__init__()
 17 |         self.name = name
 18 |         self.voi = voi
 19 |         self._solver = None
 20 |         self._model = None
 21 |         self.z3_funcs = { 'ADD'       : z3.Sum,
 22 |                           'SUB'       : self.SUB,
 23 |                           'MUL'       : self.MUL,
 24 |                           'DIV'       : self.DIV,
 25 |                           'UDIV'      : z3.UDiv,
 26 |                           'MOD'       : self.MOD,
 27 |                           'AND'       : z3.And,
 28 |                           'OR'        : z3.Or,
 29 |                           'NOT'       : z3.Not,
 30 |                           'ULE'       : z3.ULE,
 31 |                           'UGE'       : z3.UGE,
 32 |                           'ULT'       : z3.ULT,
 33 |                           'UGT'       : z3.UGT,
 34 |                           'EQ'        : self.EQ,
 35 |                           'NEQ'       : self.NEQ,
 36 |                           'GE'        : self.GE,
 37 |                           'LE'        : self.LE,
 38 |                           'GT'        : self.GT,
 39 |                           'LT'        : self.LT,
 40 |                           'BITOR'     : self.BITOR,
 41 |                           'BITAND'    : self.BITAND,
 42 |                           'BITNOT'    : self.BITNOT,
 43 |                           'SHR'       : self.SHR,
 44 |                           'SHL'       : self.SHL,
 45 |                           'Slice'     : self.Slice,
 46 |                           'Index'     : self.Slice,
 47 |                           'ISPOW2'    : self.ISPOW2,
 48 |                           'ALIGNUP'   : self.ALIGNUP,
 49 |                           'ALIGNDOWN' : self.ALIGNDOWN,
 50 |                           'ISALIGNED' : self.ISALIGNED,
 51 |                           'OVFLADD'   : self.OVFLWADD,
 52 |                           'SECT'      : self.SECT,
 53 |                           'NSECT'     : self.NSECT,
 54 |                           'OPTHDR'    : self.OPTHDR,
 55 |                           'INT'       : self.INT,
 56 |                           'VAR'       : self.VAR,
 57 |                           'IMM'       : self.IMM
 58 |         }
 59 |         self.enable_optimizations = enable_optimizations
 60 |         self.optimizations = []
 61 |         self.log = logging.getLogger(__name__)
 62 |         self.log.setLevel(logging.NOTSET)
 63 |         coloredlogs.install(level="NOTSET", logger=self.log)
 64 | 
 65 | 
 66 |     @staticmethod
 67 |     def SUB(a, b):
 68 |         return a - b
 69 | 
 70 |     @staticmethod
 71 |     def MUL(a, b):
 72 |         return a * b
 73 | 
 74 |     @staticmethod
 75 |     def MOD(a, b):
 76 |         return a % b
 77 | 
 78 |     @staticmethod
 79 |     def DIV(a, b):
 80 |         return a / b
 81 | 
 82 |     @staticmethod
 83 |     def EQ(a, b):
 84 |         return a == b
 85 | 
 86 |     @staticmethod
 87 |     def NEQ(a, b):
 88 |         return a != b
 89 | 
 90 |     @staticmethod
 91 |     def BITOR(a, b):
 92 |         return a | b
 93 | 
 94 |     @staticmethod
 95 |     def BITAND(a, b):
 96 |         return a & b
 97 | 
 98 |     @staticmethod
 99 |     def BITNOT(a):
100 |         return ~a
101 | 
102 |     @staticmethod
103 |     def SHR(a, b):
104 |         return a >> b
105 | 
106 |     @staticmethod
107 |     def SHL(a, b):
108 |         return a << b
109 | 
110 |     @staticmethod
111 |     def ISPOW2(a):
112 |         size = a.size()
113 |         one = z3.BitVecVal(1, size)
114 |         zero = z3.BitVecVal(0, size)
115 |         return (a & (a - one) == zero)
116 | 
117 |     @staticmethod
118 |     def GE(a, b):
119 |         return a >= b
120 | 
121 |     @staticmethod
122 |     def LE(a, b):
123 |         return a <= b
124 | 
125 |     @staticmethod
126 |     def LT(a, b):
127 |         return a < b
128 | 
129 |     @staticmethod
130 |     def GT(a, b):
131 |         return a > b
132 | 
133 |     @staticmethod
134 |     def ALIGNUP(a, b):
135 |         return (a + b - 1) & -b
136 | 
137 |     @staticmethod
138 |     def ALIGNDOWN(a, b):
139 |         return a & -b
140 | 
141 |     @staticmethod
142 |     def ISALIGNED(a, b):
143 |         return (a & (b -1)) == 0
144 | 
145 |     @staticmethod
146 |     def OVFLWADD(a, b):
147 |         maxint = z3.BitVecVal(-1, a.size())
148 |         ### True is there is an overflow
149 |         return z3.ULT(maxint - a, b)
150 | 
151 |     @staticmethod
152 |     def INT(a, b):
153 |         a = a if isinstance(a, int) else a.as_long()
154 |         b = b if isinstance(b, int) else b.as_long()
155 |         return z3.BitVecVal(a, b*8)
156 | 
157 |     @staticmethod
158 |     def Slice(var, start, cnt=1):
159 |         if isinstance(start, z3.BitVecRef):
160 |             zeroext = z3.ZeroExt(var.size() - start.size(), start)
161 |             shifted = z3.LShR(var, zeroext*8)
162 |             var = shifted
163 |         else:
164 |             shifted = z3.LShR(var, start*8)
165 |             var = shifted
166 | 
167 |         if isinstance(cnt, z3.BitVecRef):
168 |             cnt = cnt.as_long()
169 |         return z3.Extract((cnt * 8) - 1, 0, var)
170 | 
171 |     @staticmethod
172 |     def IMM(imm):
173 |         return imm.value if isinstance(imm, Immediate) else imm
174 | 
175 |     def VAR(self, var):
176 |         return self.variables[var.name]
177 | 
178 |     #### This opcode returns the offset of the section table
179 |     def SECT(self, header):
180 |         ntHdrOff = self.Slice(header, 0x3c, 4)
181 |         ntHdr = self.Slice(header, ntHdrOff, 24)
182 |         sizeOptHdr = self.Slice(ntHdr, 20, 2)
183 |         return z3.Sum(ntHdrOff, z3.ZeroExt(16, sizeOptHdr)) + 24
184 | 
185 |     ### Default way to get the number of section from an header
186 |     def NSECT(self, header):
187 |         ntHdrOff = self.Slice(header, 0x3c, 4)
188 |         ntHdr = self.Slice(header, ntHdrOff, 24)
189 |         return self.Slice(ntHdr, 6, 2)
190 | 
191 |     def OPTHDR(self, header):
192 |         ntHdrOff = self.Slice(header, 0x3c, 4)
193 |         return self.Slice(header, ntHdrOff + 24, 224)
194 | 
195 |     z3_funcs_sized = {'ADD', 'SUB', 'MUL', 'UDIV', 'MOD', 'EQ', 'NEQ', 'GE', 'LE', 'GT', 'LT', 'ULE', 'UGE', 'UGT', 'ULT', 'BITOR', 'BITAND', 'ALIGNUP', 'ALIGNDOWN', 'ISALIGNED', 'OVFLWADD', 'SHR', 'SHL'}
196 |     z3_funcs_bool  = {'OR', 'AND', 'NOT'}
197 |     z3_funcs_unsigned = {'ADD', 'SUB', 'BITOR', 'BITAND', 'ULE', 'ULT', 'UGT', 'UGE', 'EQ', 'NEQ', 'OVFLWADD', 'SHR', 'SHL', 'ALIGNUP', 'ALIGNDOWN', 'ISALIGNED'}
198 | 
199 |     def dispatch_z3_1(self, func, arg):
200 |         return self.z3_funcs[func](arg)
201 | 
202 |     def dispatch_z3_2(self, func, arg1, arg2):
203 |         if func not in self.z3_funcs:
204 |             self.log.critical(f"Function {func} not recognized")
205 |             raise NameError
206 |         if (func in self.z3_funcs_sized):
207 |             if isinstance(arg1, int):
208 |                 arg1 = z3.BitVecVal(arg1, int(log2(2**(arg1.bit_length()+1))))
209 |             if isinstance(arg2, int):
210 |                 arg2 = z3.BitVecVal(arg2, int(log2(2**(arg2.bit_length()+1))))
211 |             s1 = arg1.size()
212 |             s2 = arg2.size()
213 |             max_size = max(s1, s2)
214 |             extension_mechanism = (z3.ZeroExt if func in self.z3_funcs_unsigned
215 |                                    else z3.SignExt)
216 |             if s1 != max_size:
217 |                 arg1 = extension_mechanism(max_size - s1,
218 |                                            arg1)
219 |             if s2 != max_size:
220 |                 arg2 = extension_mechanism(max_size - s2,
221 |                                            arg2)
222 |         return self.z3_funcs[func](arg1, arg2)
223 | 
224 |     def dispatch_z3_3(self, func, *args):
225 |         if func != "Slice":
226 |             self.log.CRITICAL(f"{func} not recognized as a 3-arguments function")
227 |             raise ValueError
228 |         return self.z3_funcs[func](*args)
229 | 
230 |     def dispatch_z3(self, func, *args):
231 |         if not 0 < len(args) < 4:
232 |             self.log.critical(f"Trying to dispatch function with {len(args)}"
233 |                          " arguments")
234 |             raise TypeError
235 |         if len(args) == 1:
236 |             return self.dispatch_z3_1(func, *args)
237 |         elif len(args) == 2:
238 |             return self.dispatch_z3_2(func, *args)
239 |         elif len(args) == 3:
240 |             return self.dispatch_z3_3(func, *args)
241 | 
242 |     dispatch = dispatch_z3
243 | 
244 |     def _exec_input(self, stmt):
245 |         variable = stmt.var
246 |         self.log.debug(f"Creating variable {variable} of size {stmt.size}")
247 |         symb = z3.BitVec(variable.name, stmt.size * 8)
248 |         self.variables[variable.name] = symb
249 | 
250 |     def _exec_output(self, stmt):
251 |         variable = stmt.var
252 |         self.log.debug(f"Creating output {variable} of size {stmt.size}")
253 |         symb = z3.BitVec(f"{self.name}_{variable.name}", stmt.size * 8)
254 |         self.variables[variable.name] = symb
255 | 
256 |     def _exec_unconditional_assignment(self, stmt):
257 |         self.log.debug(f"Executing unconditional assignemnt {stmt}")
258 |         var = stmt.left
259 |         expr = stmt.right
260 |         self.variables[var.name] = self._eval_expression(expr)
261 | 
262 |     def _exec_conditional_assignment(self, stmt):
263 |         self.log.debug(f"Executing unconditional assignemnt {stmt}")
264 |         var = stmt.left
265 |         expr = stmt.right
266 |         z3expr = self._eval_expression(expr)
267 |         size = z3expr.size()
268 | 
269 |         if var.name not in self.variables:
270 |             self.log.warning(f"Variable {var.name} declared in a conditional assignement. Its value in case the condition is not satisfied defaults to 0")
271 |             self.variables[var.name] = z3.BitVecVal(0, size)
272 | 
273 |         self.variables[var.name] = z3.If(
274 |             self._eval_condition_list(stmt._conditions),
275 |             z3expr,
276 |             self.variables[var.name])
277 | 
278 |     def _exec_assignment(self, stmt, **kwargs):
279 |         if stmt.conditional:
280 |             return self._exec_conditional_assignment(stmt)
281 |         else:
282 |             return self._exec_unconditional_assignment(stmt)
283 | 
284 |     def _eval_condition(self, condition):
285 |         if not condition.conditional:
286 |             return self._eval_expression(condition.expr)
287 |         if condition.isterminal:
288 |             return z3.If(
289 |                 self._eval_condition_list(condition.conditions),
290 |                 self._eval_expression(condition.expr),
291 |                 z3.BoolVal(True))
292 | 
293 |         return z3.And(self._eval_expression(condition.expr),
294 |                       self._eval_condition_list(condition.conditions))
295 | 
296 |     def _eval_condition_list(self, conditions):
297 |         return z3.And(*[self._eval_condition(x) for x in conditions])
298 | 
299 |     def _exec_condition(self, stmt, **kwargs):
300 |         condname = f"{self.name}_{stmt.name}"
301 |         self.conditions[condname] = self._eval_condition(stmt)
302 |         if stmt.isterminal:
303 |             self.terminal_conditions[condname] = self.conditions[condname]
304 | 
305 |     @staticmethod
306 |     def _build_loop_unrool_condition(loop):
307 |         count = loop.count
308 |         maxunroll = loop.maxunroll
309 |         expr = Expression("ULE",
310 |                           count,
311 |                           Expression("IMM", Immediate(maxunroll)))
312 |         condition = Condition(expr, True, conditions=loop._conditions,
313 |                               name=f"L{loop._loop_name}_unroll")
314 |         return condition
315 | 
316 |     def _exec_loop(self, stmt, prev_prefix=""):
317 |         cond_prefix = f"{prev_prefix}_L{stmt._loop_name}_"
318 |         statements = stmt._statements
319 |         ovar = Variable(stmt.output_name)
320 |         ivar = stmt.input_var
321 |         structsize = stmt.structsize
322 |         startpos = stmt.startpos
323 |         count = stmt.count
324 |         conditions = stmt._conditions
325 |         self._exec_statement(self._build_loop_unrool_condition(stmt))
326 |         for index in range(stmt.maxunroll):
327 |             pref = cond_prefix + f"{index}_"
328 |             self.log.debug(f"Unrolling loop {stmt}. Index {index}")
329 |             lcond = Condition(Expression("UGT", count, Expression("IMM", Immediate(index))), False)
330 |             var_assignement = Assignment(ovar,
331 |                                          Expression("Slice", ivar,
332 |                                                     Expression("ADD", startpos,
333 |                                                                Expression("IMM", Immediate(index*structsize))),
334 |                                                     Expression("IMM", Immediate(structsize))),
335 |                                          [*conditions, lcond])
336 |             self._exec_statement(var_assignement)
337 |             for s in statements:
338 |                 if isinstance(s, Condition):
339 |                     s = s.clone()
340 |                     s.add_prefix(pref)
341 |                 s._conditions.extend(conditions)
342 |                 s._conditions.append(lcond)
343 |                 self._exec_statement(s, prev_prefix=pref)
344 | 
345 |     def _exec_vloop(self, stmt, prev_prefix=""):
346 |         cond_prefix = f"{prev_prefix}_L{stmt._loop_name}_"
347 |         statements = stmt._statements
348 |         ovar = Variable(stmt.output_name)
349 |         start = stmt.start
350 |         nextvar = stmt.nextname
351 |         condname = stmt.contcondition
352 |         maxunroll = stmt.maxunroll
353 |         conditions = stmt._conditions
354 | 
355 |         if self.prefix(condname) in self.conditions:
356 |             cond = self.conditions[self.prefix(condname)]
357 |         else:
358 |             cond = Condition(True, isterminal=False, name=condname)
359 |             self._exec_condition(cond)
360 | 
361 |         # Assign the first value
362 |         initial_assignement = Assignment(ovar, start, [*conditions])
363 |         self._exec_assignment(initial_assignement)
364 |         # Unroll
365 |         for index in range(stmt.maxunroll):
366 |             # Prefix for the conditions
367 |             pref = cond_prefix + f"{index}_"
368 |             self.log.debug(f"Unrolling loop {stmt}. Index {index}")
369 | 
370 |             # For each statement in the loop
371 |             for s in statements:
372 |                 # if the statement is a condition...
373 |                 if isinstance(s, Condition):
374 |                     # ... clone it
375 |                     s = s.clone()
376 |                     # if it changes the loop condition...
377 |                     if s.name == condname:
378 |                         # keep it in mind for later
379 |                         nextcond = s
380 |                     # change its name, adding the prefix
381 |                     s.add_prefix(pref)
382 |                 s._conditions.extend([*conditions, cond])
383 |                 self._exec_statement(s, prev_prefix=pref)
384 | 
385 |             cond = nextcond
386 |             nextcond = None
387 |             nextassignment = Assignment(ovar, Expression("VAR", nextvar),
388 |                                         conditions=[*conditions, cond])
389 |             self._exec_assignment(nextassignment)
390 |         # delete the first conditions, since we don't need it
391 |         del self.conditions[self.prefix(condname)]
392 | 
393 |     def _exec_optimization(self, stmt):
394 |         strategy = stmt.strategy
395 |         expression = stmt.expression
396 |         if strategy in (Optimizations.MAXIMIZE, Optimizations.MINIMIZE):
397 |             self.enable_optimizations = True
398 |             self.optimizations.append((strategy,
399 |                                        self._eval_expression(expression)))
400 |         else:
401 |             log.error(f"Strategy {stmt.strategy} not implemented")
402 |             raise NotImplementedError
403 | 
404 |     def _exec_debug(self, stmt):
405 |         pass
406 | 
407 |     _exec_table = {Input: _exec_input,
408 |                    Output: _exec_output,
409 |                    Assignment: _exec_assignment,
410 |                    Condition: _exec_condition,
411 |                    Loop: _exec_loop,
412 |                    VLoop: _exec_vloop,
413 |                    Optimization: _exec_optimization,
414 |                    Debug: _exec_debug
415 |     }
416 | 
417 |     def generate_solver(self):
418 |         if self.enable_optimizations:
419 |             return self.generate_optimizer()
420 |         self.log.info("Generating solver")
421 |         solver = z3.Solver()
422 |         for name, condition in self.terminal_conditions.items():
423 |             solver.assert_and_track(condition, name)
424 |         self._solver = solver
425 |         return solver
426 | 
427 |     def generate_optimizer(self):
428 |         self.log.info("Generating optimizer")
429 |         solver = z3.Optimize()
430 |         for name, condition in self.terminal_conditions.items():
431 |             solver.assert_and_track(condition, name)
432 |         for strategy, expression in self.optimizations:
433 |             if strategy == Optimizations.MAXIMIZE:
434 |                 solver.maximize(expression)
435 |             elif strategy == Optimizations.MINIMIZE:
436 |                 solver.minimize(expression)
437 |         self._solver = solver
438 |         return solver
439 | 
440 |     @property
441 |     def solver(self):
442 |         if self._solver is None:
443 |             self.generate_solver()
444 |         return self._solver
445 | 
446 |     def check_sat(self):
447 |         solver = self.solver
448 |         self.log.info("Checking satisfiability")
449 |         if solver.check().r != 1:
450 |             self.log.critical("Model unsatisfiable")
451 |             if self.print_unsat:
452 |                 unsat_core = solver.unsat_core()
453 |                 self.log.critical(f"Unsat core: {unsat_core}")
454 |                 for cname in unsat_core:
455 |                     self.log.critical(self.conditions[str(cname)])
456 |             return None
457 |         else:
458 |             self.log.info("Model satisfiable")
459 |             model = solver.model()
460 |             self._model = model
461 |             return model
462 | 
463 |     @property
464 |     def model(self):
465 |         if self._model is None:
466 |             self.check_sat()
467 |         return self._model
468 | 
469 |     # this routine... if it works it's miracle
470 |     def generate_testcase(self, varname="HEADER"):
471 |         model = self.model
472 |         self.log.info("Generating testcase")
473 |         header = self.variables[varname]
474 |         bitvec = model.eval(header)
475 |         string_hex_rev = hex(bitvec.as_long())[2:]
476 |         string_hex_rev = ('0' if (len(string_hex_rev) % 2 == 1) else "") + string_hex_rev
477 |         string_hex = ''.join([string_hex_rev[i:i+2]
478 |                               for i in range(len(string_hex_rev)-2, -2, -2)])
479 |         test = bytes.fromhex(string_hex)
480 |         test += b'\x00' * (int(header.size()/8) - len(test))
481 |         return test
482 | 
483 |     def verify(self, test, variable="HEADER"):
484 |         if not self._statements:
485 |             self.log.error("Load statements before call verify()")
486 |             raise ValueError
487 |         self.exec_statements(self._statements)
488 | 
489 |         var = self.variables[variable]
490 |         size = var.size()
491 |         if len(test) > size:
492 |             self.log.critical("The file to verify is bigger than the input of the model. Aborting.")
493 |             raise ValueError
494 |         test += b'\x00' * (size - len(test))
495 |         testvec = z3.BitVecVal(int.from_bytes(test, "little"), size*8)
496 |         self.variables['TEST__'] = testvec
497 |         expr = Expression("EQ", Expression("VAR", Variable(variable)), Expression("VAR", Variable("TEST__")))
498 |         constraint = Condition(expr, True, name='VTEST')
499 |         self._exec_statement(constraint)
500 |         self.generate_solver()
501 |         return self.check_sat()
502 | 
503 |     def __and__(self, other):
504 |         ret = Z3Backend(name=f"{self.name}&{other.name}", voi=self.voi)
505 | 
506 |         for condname, cond in self.terminal_conditions.items():
507 |             ret.terminal_conditions[condname] = cond
508 |         for condname, cond in other.terminal_conditions.items():
509 |             ret.terminal_conditions[condname] = cond
510 | 
511 |         ret.variables[f'{self.name}_{ret.voi}'] = self.variables[ret.voi]
512 |         ret.variables[f'{ret.voi}'] = self.variables[ret.voi]
513 | 
514 |         ### HACK alert! This avoid to add useless constraints
515 |         ### from z3_model_support in differential.py
516 |         if other.voi != self.voi:
517 |             self.log.warning(f"Variable of interest (voi) differs in the two models. Only adding the constraints of the second model, without enforcinf VOI equality. This can cause troubles.")
518 |             return ret
519 | 
520 |         ret.variables[f'{other.name}_{ret.voi}'] = other.variables[ret.voi]
521 |         voicond = Condition(
522 |             Expression("EQ",
523 |                        Expression("VAR", Variable(f'{self.name}_{ret.voi}')),
524 |                        Expression("VAR", Variable(f'{other.name}_{ret.voi}')),
525 |             ),
526 |             isterminal=True, name="voicond")
527 |         ret._exec_condition(voicond)
528 |         return ret
529 | 
530 |     def __invert__(self):
531 |         ret = Z3Backend(name=f"~{self.name}", voi=self.voi)
532 |         ret.variables[f'{ret.voi}'] = self.variables[ret.voi]
533 |         conditions = []
534 |         for condname, cond in self.terminal_conditions.items():
535 |             ncond = z3.Not(cond)
536 |             conditions.append(ncond)
537 |         ret.terminal_conditions['negated'] = z3.Or(conditions)
538 |         return ret
539 | 
540 |     def prefix(self, conditionname):
541 |         return f"{self.name}_{conditionname}"
542 | 
543 |     def add_inequality(self, var1, var2):
544 |         self.terminal_conditions['ineq'] = var1 != var2
545 | 


--------------------------------------------------------------------------------
/modelLang/parsers/parser.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os.path
  3 | import logging
  4 | import pickle
  5 | from collections import deque, defaultdict
  6 | from enum import Enum, auto
  7 | 
  8 | import coloredlogs
  9 | 
 10 | log = logging.getLogger(__name__)
 11 | coloredlogs.install(level="NOTSET", logger=log)
 12 | 
 13 | import ply.yacc as yacc
 14 | 
 15 | # Get the token map from the lexer.  This is required.
 16 | from .langlex import Lexer
 17 | from ..utils import customdefdict
 18 | from ..classes import Variable, Assignment, Expression, Condition, Immediate, BoolImmediate, ConditionList, ConditionListEntry, Loop, VLoop, Input, Output, Define, Optimization, Optimizations, Debug
 19 | 
 20 | def read_file(filename):
 21 |     with open(filename, "rb") as fp:
 22 |         buf = fp.read()
 23 |         return buf
 24 | 
 25 | class Parser:
 26 |     class ParserType(Enum):
 27 |         GENERATOR = auto()
 28 |         VALIDATOR = auto()
 29 |         DIFFERENTIAL_ASSERT = auto()
 30 |         DIFFERENTIAL_NEGATE = auto()
 31 | 
 32 |     tokens = Lexer.tokens
 33 |     def parse_file(self, fname):
 34 |         self._fname = fname
 35 |         self._cwd = os.path.dirname(fname)
 36 |         with open(fname, "r") as f:
 37 |             lines = f.readlines()
 38 |             cnt = 0
 39 |             for s in lines:
 40 |                 cnt += 1
 41 |                 if not s: continue
 42 |                 log.debug(f"Line {cnt}: {s}")
 43 |                 result = self.parser.parse(s)
 44 |                 if result:
 45 |                     print(result)
 46 |             # Check that there are no un-ended loops
 47 |             if len(self._block_stack) != 0:
 48 |                 log.error(f"Un-ended statement: {self._block_stack[0]}")
 49 |                 raise ValueError
 50 | 
 51 |     @property
 52 |     def variables(self):
 53 |         return self._variables
 54 | 
 55 |     @property
 56 |     def statements(self):
 57 |         return self._statements
 58 | 
 59 |     @property
 60 |     def conditions(self):
 61 |         return self._conditions
 62 | 
 63 |     @property
 64 |     def defines(self):
 65 |         return self._defines
 66 | 
 67 |     def p_input(self, p):
 68 |         'input : input NEWLINE'
 69 |         p[0] = p[1]
 70 | 
 71 |     def p_input_1(self, p):
 72 |         'input : statement'
 73 |         if p[1] and not p[1].lineno:
 74 |             lineno = p.lexer.lineno
 75 |             p[1].lineno = lineno
 76 | 
 77 |     def p_input_fromfile(self, p):
 78 |         'input : FROMFILE VARIABLE expression expression NUMBER NUMBER'
 79 |         filename = os.path.join(self._cwd, p[2])
 80 |         symbol = p[3]
 81 |         start = p[4]
 82 |         foffset = p[5]
 83 |         nbytes = p[6]
 84 |         buf = read_file(filename)
 85 |         for n, b in enumerate(buf[foffset:foffset+nbytes]):
 86 |             curroffset = Expression("ADD", start,
 87 |                                     Expression("IMM", Immediate(n)))
 88 |             nb = Expression("Index", symbol, curroffset)
 89 |             expr = Expression("EQ", nb, Expression("IMM", Immediate(b)))
 90 |             cond = Condition(expr, isterminal=True,
 91 |                              name=f"FROM_{filename}_{n}")
 92 |             self.statements.append(cond)
 93 |             self.conditions[cond.name] = cond
 94 | 
 95 |     def p_input_load(self, p):
 96 |         'input : load_stmt'
 97 |         use_cwd = p[1][2]
 98 |         os = p[1][1]
 99 |         header = p[1][0]
100 |         module_name = ('modelLang.structures.'
101 |                        + (os if os != "DEFAULT" else "cparser"))
102 |         module = __import__(module_name, globals(), locals(), ['parse_file'])
103 |         dirpath = self._cwd if use_cwd else "modelLang/structures/headers"
104 |         header_file = dirpath + f"/{header}.h"
105 |         with open(header_file, "r") as fp:
106 |             fcontent = fp.read()
107 | 
108 |         new_types = module.parse_file(fcontent)
109 |         new_defs = module.preprocess_defs(fcontent)
110 |         self.loaded_types.update(new_types[1])
111 |         new_defs = {x: Expression("IMM", Immediate(y))
112 |                     for x, y in new_defs.items()}
113 |         self.defines.update(new_defs)
114 | 
115 |     def p_statement_ass(self, p):
116 |         'statement : assignment_stmt'
117 |         lineno = p.lineno(0)
118 |         log.debug("Assignment: " + str(p[1]))
119 |         if len(self._block_stack) == 0:
120 |             self.statements.append(p[1])
121 |         else:
122 |             block = self._block_stack.pop()
123 |             block.add_statement(p[1])
124 |             self._block_stack.append(block)
125 |         p[0] = p[1]
126 | 
127 |     def p_statement_cond(self, p):
128 |         'statement : condition_stmt'
129 |         log.debug("Condition " + str(p[1]))
130 |         name, condition = p[1]
131 |         self.conditions[name.upper()] = condition
132 |         condition.name = name.upper()
133 |         if len(self._block_stack) == 0:
134 |             self.statements.append(condition)
135 |         else:
136 |             block = self._block_stack.pop()
137 |             block.add_statement(condition)
138 |             self._block_stack.append(block)
139 |         p[0] = condition
140 | 
141 |     def p_statement_gencond(self, p):
142 |         'statement : gencondition_stmt'
143 |         if self._type in (self.ParserType.GENERATOR,
144 |                           self.ParserType.DIFFERENTIAL_ASSERT):
145 |             self.p_statement_cond(p)
146 |         else:
147 |             p[0] = None
148 | 
149 |     def p_statement_input(self, p):
150 |         'statement : input_stmt'
151 |         log.debug("Input " + str(p[1]))
152 |         size = self._input_size if self._input_size else p[1][1]
153 |         stmt = Input(p[1][0], size)
154 |         self.statements.append(stmt)
155 |         self.variables[p[1][0].name] = p[1][0]
156 |         p[0] = stmt
157 | 
158 |     def p_statement_output(self, p):
159 |         'statement : output_stmt'
160 |         log.debug("Output " + str(p[1]))
161 |         size = p[1][1]
162 |         stmt = Output(p[1][0], size)
163 |         self.statements.append(stmt)
164 |         self.variables[p[1][0].name] = p[1][0]
165 |         p[0] = stmt
166 | 
167 |     def p_statement_loopstart(self, p):
168 |         'statement : loopstart_stmt'
169 |         log.debug("Loop start " + str(p[1]))
170 |         loop = p[1][1]
171 |         self._block_stack.append(loop)
172 |         var = self.variables[loop.output_name]
173 |         var.type = loop.vtype
174 |         p[0] = loop
175 | 
176 |     def p_statement_loopend(self, p):
177 |         'statement : loopend_stmt'
178 |         loop = self._block_stack.pop()
179 |         if loop._loop_name != p[1][0]:
180 |             log.critical("Loop end does not match current loop name")
181 |             raise ValueError
182 |         log.debug("Loop end " + str(p[1][0]))
183 |         if len(self._block_stack) == 0:
184 |             self.statements.append(loop)
185 |         else:
186 |             block = self._block_stack.pop()
187 |             block.add_statement(loop)
188 |             self._block_stack.append(block)
189 |         p[0] = loop
190 | 
191 |     def p_statement_define(self, p):
192 |         'statement : define_stmt'
193 |         stmt = p[1]
194 |         if stmt.name in self.variables:
195 |             log.warning(f"Defining constant {stmt.name}, but a variable with the same name already declared. Skipping")
196 |         else:
197 |             self.defines[stmt.name] = stmt.value
198 |         p[0] = stmt
199 | 
200 |     def p_statement_optimize(self, p):
201 |         'statement : OPTIMIZE expression'
202 |         strategy = p[1]
203 |         expression = p[2]
204 |         opt = Optimization(strategy, expression)
205 |         self.statements.append(opt)
206 |         p[0] = opt
207 | 
208 |     def p_statement_debug(self, p):
209 |         'statement : dbgstatement'
210 |         dbg = p[1]
211 |         if len(self._block_stack) == 0:
212 |             self.statements.append(dbg)
213 |         else:
214 |             block = self._block_stack.pop()
215 |             block.add_statement(dbg)
216 |             self._block_stack.append(block)
217 |         p[0] = p[1]
218 | 
219 |     def p_dbgstatement(self, p):
220 |         'dbgstatement : DBG COLON expression'
221 |         p[0] = Debug(p[3])
222 | 
223 |     def p_define_stmt(self, p):
224 |         'define_stmt : DEFINE VARIABLE expression'
225 |         value = p[3]
226 |         if p[2] in self._custom_defs:
227 |             value = self._custom_defs[p[2]]
228 | 
229 |         p[0] = Define(p[2], value)
230 | 
231 |     def p_load_stmt(self, p):
232 |         'load_stmt : LOADTYPES VARIABLE VARIABLE'
233 |         if p[3] == 'linux':
234 |             os = 'DEFAULT'
235 |         else:
236 |             os = p[3]
237 |         p[0] = (p[2], os, p[1])
238 | 
239 |     def p_load_stmt_2(self, p):
240 |         'load_stmt : LOADTYPES VARIABLE'
241 |         p[0] = (p[2], "DEFAULT", p[1])
242 | 
243 |     def p_input_stmt_type(self, p):
244 |         'input_stmt : INPUT VARIABLE constant TYPE VARIABLE'
245 |         log.debug("Input statement")
246 |         t = p[5]
247 |         if t not in self.loaded_types:
248 |             log.warning(f"Unknown type {t}. Defaulting to untyped variable")
249 |             var = (Variable(p[2]), p[3])
250 |         else:
251 |             var = (Variable(p[2], self.loaded_types[t]), p[3])
252 |         p[0] = var
253 | 
254 |     def p_input_stmt(self, p):
255 |         'input_stmt : INPUT VARIABLE constant'
256 |         log.debug("Input statement")
257 |         var = (Variable(p[2]), p[3])
258 |         p[0] = var
259 | 
260 |     def p_output_stmt(self, p):
261 |         'output_stmt : OUTPUT VARIABLE constant'
262 |         log.debug("Output statement")
263 |         var = (Variable(p[2]), p[3])
264 |         p[0] = var
265 | 
266 |     def p_output_stmt_type(self, p):
267 |         'output_stmt : OUTPUT VARIABLE constant TYPE VARIABLE'
268 |         log.debug("Output statement")
269 |         t = p[5]
270 |         if t not in self.loaded_types:
271 |             log.warning(f"Unknown type {t}. Defaulting to untyped variable")
272 |             var = (Variable(p[2]), p[3])
273 |         else:
274 |             var = (Variable(p[2], self.loaded_types[t]), p[3])
275 |         p[0] = var
276 | 
277 |     def p_constant_number(self, p):
278 |         'constant : NUMBER'
279 |         p[0] = p[1]
280 | 
281 |     def p_constant_define(self, p):
282 |         'constant : VARIABLE'
283 |         name = p[1]
284 |         if name not in self.defines:
285 |             log.error(f"{name} not defined as a constant")
286 |             raise ValueError
287 |         p[0] = self.defines[name].operands[0].value
288 | 
289 |     def p_assignment_stmt_uncond(self, p):
290 |         'assignment_stmt : ASSIGNSTART COLON assignment'
291 |         assignment = p[3]
292 |         p[0] = assignment
293 | 
294 |     def p_assignment_stmt_cond(self, p):
295 |         'assignment_stmt : ASSIGNSTART conditionlist COLON assignment'
296 |         assignement = p[4]
297 |         assignement.left.symb = assignement.right
298 |         conditionslist = p[2]
299 |         conds = [~self.conditions[c.name] if c.negated else
300 |                  self.conditions[c.name]
301 |                  for c in conditionslist]
302 |         assignement.conditions = conds
303 |         p[0] = assignement
304 | 
305 |     def p_condition_stmt_uncond(self, p):
306 |         'condition_stmt : CONDITIONNAME COLON conditionexpr'
307 |         p[3].name = p[1]
308 |         p[0] = (p[1], p[3])
309 | 
310 |     def p_condition_stmt_cond(self, p):
311 |         'condition_stmt : CONDITIONNAME conditionlist COLON conditionexpr'
312 |         cond = p[4]
313 |         cond.name = p[1]
314 |         conditionslist = p[2]
315 |         conds = [~self.conditions[c.name] if c.negated else
316 |                  self.conditions[c.name]
317 |                  for c in conditionslist]
318 |         cond.conditions = conds
319 |         p[0] = (p[1], cond)
320 | 
321 |     def p_condition_stmt_noexpr(self, p):
322 |         'condition_stmt : CONDITIONNAME conditionlist SEMICOLON'
323 |         conditionslist = p[2]
324 |         conds = [self.conditions[c] for c in conditionslist.names]
325 |         cond = Condition(True, False, conds)
326 |         p[0] = (p[1], cond)
327 | 
328 |     def p_gcondition_stmt_uncond(self, p):
329 |         'gencondition_stmt : GENCONDITIONNAME COLON conditionexpr'
330 |         p[3].name = p[1]
331 |         p[0] = (p[1], p[3])
332 | 
333 |     def p_gcondition_stmt_cond(self, p):
334 |         'gencondition_stmt : GENCONDITIONNAME conditionlist COLON conditionexpr'
335 |         cond = p[4]
336 |         cond.name = p[1]
337 |         conditionslist = p[2]
338 |         conds = [~self.conditions[c.name] if c.negated else
339 |                  self.conditions[c.name]
340 |                  for c in conditionslist]
341 |         cond.conditions = conds
342 |         p[0] = (p[1], cond)
343 | 
344 |     def p_gcondition_stmt_noexpr(self, p):
345 |         'gencondition_stmt : GENCONDITIONNAME conditionlist SEMICOLON'
346 |         conditionslist = p[2]
347 |         conds = [self.conditions[c] for c in conditionslist.names]
348 |         cond = Condition(True, False, conds)
349 |         p[0] = (p[1], cond)
350 | 
351 |     def p_loopstart_stmt_typed(self, p):
352 |         '''loopstart_stmt : loopstart TYPE VARIABLE
353 |                             | vloopstart TYPE VARIABLE'''
354 |         t = p[3]
355 |         if t not in self.loaded_types:
356 |             raise TypeError(f"Unknown type {t}")
357 |         loop = p[1]
358 |         loop[1].vtype = self.loaded_types[t]
359 |         p[0] = loop
360 | 
361 |     def p_loopstart_stmt_untyped(self, p):
362 |         '''loopstart_stmt : loopstart
363 |                             | vloopstart'''
364 |         p[0] = p[1]
365 | 
366 |     def p_loopstart_stmt(self, p):
367 |         '''loopstart : LOOPSTART COLON VARIABLE ARROW LOOP LPAREN expression COMMA expression COMMA NUMBER COMMA expression COMMA NUMBER RPAREN
368 |                      | LOOPSTART conditionlist  COLON VARIABLE ARROW LOOP LPAREN expression COMMA expression COMMA NUMBER COMMA expression COMMA NUMBER RPAREN
369 | '''
370 |         loopindex = p[1]
371 |         isconditional = len(p) == 18
372 |         unroll_count = None
373 |         if self._type in (self.ParserType.DIFFERENTIAL_ASSERT,
374 |                           self.ParserType.DIFFERENTIAL_NEGATE):
375 |             unroll_count = 1
376 |         if not isconditional:
377 |             unroll_count = unroll_count if unroll_count else p[15]
378 |             loop = Loop(p[1], p[3], p[7], p[9], p[11], p[13], unroll_count)
379 |         else:
380 |             unroll_count = unroll_count if unroll_count else p[16]
381 |             conds = [self.conditions[c] for c in p[2].names]
382 |             loop = Loop(p[1], p[4], p[8], p[10], p[12], p[14], unroll_count,
383 |                         conditions=conds)
384 |         p[0] = (loopindex, loop)
385 | 
386 |     def p_loopstart_stmt_2(self, p):
387 |         'loopstart : LOOPSTART COLON VARIABLE ARROW LOOP LPAREN expression COMMA expression COMMA expression COMMA expression COMMA NUMBER RPAREN'
388 |         loopindex = p[1]
389 |         structsize = p[11]
390 |         if structsize.opcode != "IMM":
391 |             raise ValueError("Struct size must be a number")
392 |         structsize = structsize.operands[0].value
393 |         if self._type in (self.ParserType.DIFFERENTIAL_ASSERT,
394 |                           self.ParserType.DIFFERENTIAL_NEGATE):
395 |             unroll_count = 1
396 |         else:
397 |             unroll_count = p[15]
398 |         loop = Loop(p[1], p[3], p[7], p[9], structsize, p[13], unroll_count)
399 |         p[0] = (loopindex, loop)
400 | 
401 |     def p_vloopstart_stmt_variable(self, p):
402 |         '''vloopstart : LOOPSTART COLON VARIABLE ARROW VLOOP LPAREN expression COMMA VARIABLE COMMA CONDITIONNAME COMMA NUMBER RPAREN
403 |                       | LOOPSTART conditionlist COLON VARIABLE ARROW VLOOP LPAREN expression COMMA VARIABLE COMMA CONDITIONNAME COMMA NUMBER RPAREN
404 | '''
405 |         t = [p[x] for x in range(len(p))]
406 |         loopindex = t[1]
407 |         if len(p) == 16: # if it's conditional
408 |             conditionlist = [self.conditions[c] for c in t[2].names]
409 |             del t[2]
410 |         else:
411 |             conditionlist = None
412 |         newvar = t[3]
413 |         start = t[7]
414 |         nextname = Variable(t[9])
415 |         condition = t[11]
416 |         maxunroll = t[13]
417 |         if self._type in (self.ParserType.DIFFERENTIAL_ASSERT,
418 |                           self.ParserType.DIFFERENTIAL_NEGATE):
419 |             maxunroll = 1
420 |         loop = VLoop(loopindex, newvar, start, nextname, condition, maxunroll, conditions=conditionlist)
421 |         p[0] = (loopindex, loop)
422 | 
423 |     def p_loopend_stmt(self, p):
424 |         'loopend_stmt : LOOPEND'
425 |         p[0] = (p[1], )
426 | 
427 |     def p_assignment_typed(self, p):
428 |         'assignment : VARIABLE ARROW expression TYPE VARIABLE'
429 |         var = None
430 |         t = p[5]
431 |         if t not in self.loaded_types:
432 |             log.warning(f"Unknown type {t}. Defaulting to untyped assignement")
433 |             return p_assignment_untyped(self, p)
434 | 
435 |         t = self.loaded_types[t]
436 |         if p[1] not in self.variables:
437 |             log.debug(f"New variable found {p[1]} of type {t}")
438 |             var = Variable(p[1], t)
439 |             self.variables[var.name] = var
440 |         else:
441 |             var = self.variables[p[1]]
442 |             if t != var.type:
443 |                 log.warning(f"Variable {var.name} already declared as {var.type}. Cannot convert it as {t}. Leaving it typed as {var.type}.")
444 |         p[0] = Assignment(var, p[3])
445 | 
446 |     def p_assignment_untyped(self, p):
447 |         'assignment : VARIABLE ARROW expression'
448 |         var = None
449 |         if p[1] not in self.variables:
450 |             log.debug(f"New variable found {p[1]}")
451 |             var = Variable(p[1])
452 |             self.variables[var.name] = var
453 |         else:
454 |             var = self.variables[p[1]]
455 |         p[0] = Assignment(var, p[3])
456 | 
457 |     def p_conditionlist(self, p):
458 |         '''conditionlist : LPAREN conditionlistint RPAREN'''
459 |         p[0] = p[2]
460 | 
461 |     def p_conditionlistint_1(self, p):
462 |         'conditionlistint : conditionlistentry'
463 |         p[0] = ConditionList([p[1]])
464 | 
465 |     def p_conditionlistint_2(self, p):
466 |         'conditionlistint : conditionlistint COMMA conditionlistentry'
467 |         p[0] = p[1] + p[3]
468 | 
469 |     def p_conditionlistentry_negcondition(self, p):
470 |         'conditionlistentry : EXCLAMATION CONDITIONNAME'
471 |         p[0] = ConditionListEntry(p[2], True)
472 | 
473 |     def p_conditionlistentry_condition(self, p):
474 |         'conditionlistentry : CONDITIONNAME'
475 |         p[0] = ConditionListEntry(p[1])
476 | 
477 |     def p_condition_terminal(self, p):
478 |         'conditionexpr : expression TERMINATOR'
479 |         p[0] = Condition(p[1], True)
480 | 
481 |     def p_condition_normal(self, p):
482 |         'conditionexpr : expression'
483 |         p[0] = Condition(p[1], False)
484 | 
485 |     def p_expression_z3operator1(self, p):
486 |         'expression : OPERATOR1 expression'
487 |         p2 = p[2]
488 |         p[0] = Expression(p[1], p2)
489 | 
490 |     def p_expression_z3operator2(self, p):
491 |         'expression : OPERATOR2 expression expression'
492 |         p2 = p[2]
493 |         p3 = p[3]
494 |         p[0] = Expression(p[1], p2, p3)
495 | 
496 |     def p_expression_parens(self, p):
497 |         'expression : LPAREN expression RPAREN'
498 |         p[0] = p[2]
499 | 
500 |     def p_expression_slice(self, p):
501 |         'expression : expression LBRACKETS expression COMMA expression RBRACKETS'
502 |         p1 = p[1]
503 |         p3 = p[3]
504 |         p5 = p[5]
505 |         p[0] = Expression('Slice', p1, p3, p5)
506 | 
507 |     def p_expression_indexing(self, p):
508 |         'expression : expression LBRACKETS expression RBRACKETS'
509 |         p1 = p[1]
510 |         p3 = p[3]
511 |         p[0] = Expression('Index', p1, p3)
512 | 
513 |     def p_expression_struct_access(self, p):
514 |         'expression : VARIABLE DOT VARIABLE'
515 |         varname = p[1]
516 |         if varname not in self.variables:
517 |             log.error(f"Unknown varaible {varname}.")
518 |             raise ValueError
519 |         var = self.variables[p[1]]
520 |         if var.type is None:
521 |             log.error(f"Variable {varname} is untyped. Cannot access sub-field {p[3]}.")
522 |             raise ValueError
523 |         field = p[3]
524 |         if field not in var.type.fields:
525 |             log.error(f"Variable of type {var.type} does not have any field named {field}")
526 |             raise ValueError
527 |         field_off = var.type.offsets[field]
528 |         field_size = var.type.fields[field].size // 8
529 |         log.debug(f"Struct access: {var}.{field} --> Slice({var}, {field_off}, {field_size}).")
530 |         p[0] = Expression('Slice', Expression("VAR", var),
531 |                           Expression("IMM", Immediate(field_off)),
532 |                           Expression("IMM", Immediate(field_size)))
533 | 
534 |     def p_expression_sizeof(self, p):
535 |         'expression : SIZEOF VARIABLE'
536 |         typename = p[2]
537 |         if typename not in self.loaded_types:
538 |             raise TypeError(f"Unknown type {typename}")
539 |         size = self.loaded_types[typename].size // 8
540 |         p[0] = Expression("IMM", Immediate(size))
541 | 
542 |     def p_expression_strcmp(self, p):
543 |         'expression : STRCMP expression expression STR'
544 |         s = p[4]
545 |         if len(s) < 2:
546 |             log.error(f"Use STRCMP only for strings longer than 1 character")
547 |             raise ValueError
548 |         inp = p[2]
549 |         start = p[3]
550 |         current = start
551 |         exprs = [Expression("EQ",
552 |                             Expression("Index",
553 |                                        inp,
554 |                                        Expression("ADD",
555 |                                                   start,
556 |                                                   Expression("IMM",
557 |                                                              Immediate(
558 |                                                                  index)))),
559 |                             Expression("IMM", Immediate(ord(char))))
560 |                  for index, char in enumerate(s)]
561 |         ret = Expression("AND", exprs[0], exprs[1])
562 |         for expr in exprs[2:]:
563 |             ret = Expression("AND", ret, expr)
564 |         p[0] = ret
565 | 
566 |     def p_expression_variable(self, p):
567 |         'expression : VARIABLE'
568 |         log.debug("Found variable " + p[1])
569 |         varname = p[1]
570 |         if varname not in self.variables and varname not in self.defines:
571 |             log.critical("Using variable %s before assignement" % varname)
572 |             raise NameError
573 | 
574 |         if varname in self.variables:
575 |             p[0] = Expression("VAR", self.variables[varname])
576 |         else:
577 |             p[0] = self.defines[varname]
578 | 
579 |     def p_expression_number(self, p):
580 |         'expression : NUMBER'
581 |         log.debug("Found NUMBER " + str(p[1]))
582 |         p[0] = Expression("IMM", Immediate(p[1]))
583 | 
584 |     def p_expression_string(self, p):
585 |         'expression : CHAR'
586 |         p[0] = Expression("IMM", Immediate(p[1]))
587 | 
588 |     def p_expression_bool(self, p):
589 |         'expression : BOOL'
590 |         p[0] = Expression("IMM", BoolImmediate(p[1]))
591 | 
592 |     # Error rule for syntax errors
593 |     def p_error(self, p):
594 |         if p is None:
595 |             return
596 |         log.critical("Syntax error in input! %s" % p)
597 |         raise Exception(p)
598 | 
599 |     def __init__(self, pwd="", ptype=ParserType.VALIDATOR, input_size=None,
600 |                  custom_defs=None):
601 |         self.lexer = Lexer()
602 |         self.loaded_types = {}
603 |         self._variables = customdefdict(lambda x: Variable(x))
604 |         self._conditions = {}
605 |         self._defines = {}
606 |         self._block_stack = deque()
607 |         self._statements = []
608 |         self.pwd = pwd
609 |         self._type = ptype
610 |         self._input_size = input_size
611 | 
612 |         if not custom_defs:
613 |             custom_defs = {}
614 |         self._custom_defs = {}
615 |         for var, val in custom_defs.items():
616 |             self._custom_defs[var] = Expression("IMM", Immediate(val))
617 | 
618 |         try:
619 |             self.parser = yacc.yacc(module=self)
620 |         except yacc.YaccError as e:
621 |             log.exception(e)
622 |             sys.exit(1)
623 | 


--------------------------------------------------------------------------------