├── modelLang ├── structures │ ├── __init__.py │ ├── windows_specs.py │ └── headers │ │ ├── linux_kernel.h │ │ └── reactos.h ├── parsers │ ├── __init__.py │ ├── langlex.py │ └── parser.py ├── __init__.py ├── backends │ ├── __init__.py │ ├── default_backend.py │ ├── python_backend.py │ └── z3_backend.py ├── utils.py └── classes.py ├── tests ├── statements │ ├── fromfiletest │ ├── fromfile.lmod │ ├── optimization.lmod │ ├── __init__.py │ ├── conditional_assignment.lmod │ ├── fromfile.py │ ├── conditional_assignment.py │ └── optimization.py ├── functional │ ├── mod2.lmod │ ├── mod1.lmod │ ├── __init__.py │ ├── negativecombo.py │ └── positivecombo.py ├── loops │ ├── __init__.py │ ├── vloop.lmod │ ├── conditionalloop.lmod │ ├── conditionalloop.py │ └── vloop.py ├── operators │ ├── strcmp.lmod │ ├── bitwise.mod │ ├── __init__.py │ ├── alignment.lmod │ ├── algebra.mod │ ├── overflow.lmod │ ├── alignment.py │ ├── strcmp.py │ ├── overflow.py │ ├── bitwise.py │ └── algebra.py ├── emptytest.py └── __init__.py ├── testcases ├── windows │ ├── 7 │ │ ├── win7_printf.exe │ │ ├── win7_ntcreateuserprocess.exe │ │ └── win7_printf.sh │ ├── xp-7 │ │ ├── testcase_0.constraints │ │ ├── testcase_1.constraints │ │ ├── testcase_2.constraints │ │ ├── testcase_0 │ │ ├── testcase_1 │ │ ├── testcase_2 │ │ └── differential.sh │ └── xp │ │ ├── winxp_printf.exe │ │ ├── winxp_kernel_user.exe │ │ ├── winxp_kernel_user.sh │ │ ├── winxp_printf.sh │ │ └── winxp_createprocess.exe ├── reactos_kernel.pe └── linux_kernel32.elf ├── .gitmodules ├── merge_user_kernel.sh ├── tooleval ├── ghidraplugin │ ├── ghidradumpmem.py │ └── ghidraanalysis.sh ├── Makefile ├── idaplugin │ ├── idaanalysis.sh │ ├── idadumpmem.py │ └── idadumpmem.py.asm ├── memdump.proto ├── __init__.py ├── classes.py ├── idaadapter.py ├── ghidraadapter.py ├── winadapter.py ├── common.py └── r2adapter.py ├── .gitignore ├── setup.py ├── requirements.txt ├── verify.py ├── verify_dataset.py ├── generate.py ├── explore_conditions.py ├── README.md ├── differential.py └── SPECIFICATIONS.md /modelLang/structures/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /modelLang/parsers/__init__.py: -------------------------------------------------------------------------------- 1 | from .parser import * 2 | -------------------------------------------------------------------------------- /tests/statements/fromfiletest: -------------------------------------------------------------------------------- 1 | 133713371337133713371337133713371337 -------------------------------------------------------------------------------- /testcases/windows/xp-7/testcase_0.constraints: -------------------------------------------------------------------------------- 1 | MiCreateImageFileMap.lmod_V20 2 | -------------------------------------------------------------------------------- /testcases/windows/xp-7/testcase_1.constraints: -------------------------------------------------------------------------------- 1 | MiCreateImageFileMap.lmod_V38 2 | -------------------------------------------------------------------------------- /testcases/windows/xp-7/testcase_2.constraints: -------------------------------------------------------------------------------- 1 | MiCreateImageFileMap.lmod_V38 2 | -------------------------------------------------------------------------------- /tests/functional/mod2.lmod: -------------------------------------------------------------------------------- 1 | INPUT variable 4 2 | 3 | V1: NOT ISPOW2 variable term 4 | -------------------------------------------------------------------------------- /modelLang/__init__.py: -------------------------------------------------------------------------------- 1 | from .parsers import Parser 2 | from .backends import PythonBackend, Z3Backend 3 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "models"] 2 | path = models 3 | url = git@github.com:eurecom-s3/loaders-models.git 4 | -------------------------------------------------------------------------------- /tests/loops/__init__.py: -------------------------------------------------------------------------------- 1 | from .vloop import VLoopTest 2 | from .conditionalloop import ConditionalLoopTest 3 | -------------------------------------------------------------------------------- /merge_user_kernel.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cat $1 $2 | grep -v "Succe" | awk -F ' ' '{print $1}' | sort | uniq > $3 4 | -------------------------------------------------------------------------------- /modelLang/backends/__init__.py: -------------------------------------------------------------------------------- 1 | from .z3_backend import Z3Backend 2 | from .python_backend import PythonBackend 3 | -------------------------------------------------------------------------------- /testcases/reactos_kernel.pe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eurecom-s3/loaders_modeling/HEAD/testcases/reactos_kernel.pe -------------------------------------------------------------------------------- /testcases/linux_kernel32.elf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eurecom-s3/loaders_modeling/HEAD/testcases/linux_kernel32.elf -------------------------------------------------------------------------------- /tests/functional/mod1.lmod: -------------------------------------------------------------------------------- 1 | INPUT variable 4 2 | 3 | V1: UGT variable 15 term 4 | V2: EQ BITAND variable 0x0000ffff 0 term -------------------------------------------------------------------------------- /tests/statements/fromfile.lmod: -------------------------------------------------------------------------------- 1 | INPUT file 15 2 | 3 | P: start <- INT 5 4 4 | 5 | FROMFILE fromfiletest file start 0 10 6 | -------------------------------------------------------------------------------- /testcases/windows/7/win7_printf.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eurecom-s3/loaders_modeling/HEAD/testcases/windows/7/win7_printf.exe -------------------------------------------------------------------------------- /testcases/windows/xp-7/testcase_0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eurecom-s3/loaders_modeling/HEAD/testcases/windows/xp-7/testcase_0 -------------------------------------------------------------------------------- /testcases/windows/xp-7/testcase_1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eurecom-s3/loaders_modeling/HEAD/testcases/windows/xp-7/testcase_1 -------------------------------------------------------------------------------- /testcases/windows/xp-7/testcase_2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eurecom-s3/loaders_modeling/HEAD/testcases/windows/xp-7/testcase_2 -------------------------------------------------------------------------------- /testcases/windows/xp/winxp_printf.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eurecom-s3/loaders_modeling/HEAD/testcases/windows/xp/winxp_printf.exe -------------------------------------------------------------------------------- /tests/functional/__init__.py: -------------------------------------------------------------------------------- 1 | from .positivecombo import PositiveCombinationTest 2 | from .negativecombo import NegativeCombinationTest 3 | -------------------------------------------------------------------------------- /tests/operators/strcmp.lmod: -------------------------------------------------------------------------------- 1 | INPUT inp 10 2 | 3 | P: start <- INT 2 4 4 | P: out <- inp[2, 7] 5 | V1: STRCMP inp start '\x00FOOBAR' term -------------------------------------------------------------------------------- /testcases/windows/xp/winxp_kernel_user.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eurecom-s3/loaders_modeling/HEAD/testcases/windows/xp/winxp_kernel_user.exe -------------------------------------------------------------------------------- /tests/emptytest.py: -------------------------------------------------------------------------------- 1 | class Test(): 2 | testfile = "" 3 | 4 | @staticmethod 5 | def run(): 6 | raise NotImplementedError 7 | 8 | -------------------------------------------------------------------------------- /testcases/windows/7/win7_ntcreateuserprocess.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eurecom-s3/loaders_modeling/HEAD/testcases/windows/7/win7_ntcreateuserprocess.exe -------------------------------------------------------------------------------- /tests/statements/optimization.lmod: -------------------------------------------------------------------------------- 1 | INPUT var1 4 2 | INPUT var2 4 3 | 4 | V1: UGE var1 10 term 5 | V2: ULE var2 10 term 6 | 7 | MINIMIZE var1 8 | MAXIMIZE var2 9 | -------------------------------------------------------------------------------- /tests/operators/bitwise.mod: -------------------------------------------------------------------------------- 1 | INPUT VARIABLE 4 2 | 3 | P: VARA <- BITAND VARIABLE 0x01011001 4 | P: VARB <- BITOR VARIABLE 0x01011001 5 | P: VARC <- BITNOT VARIABLE 6 | -------------------------------------------------------------------------------- /tests/statements/__init__.py: -------------------------------------------------------------------------------- 1 | from .fromfile import FromFileTest 2 | from .optimization import OptimizationTest 3 | from .conditional_assignment import ConditionalAssignmentTest 4 | -------------------------------------------------------------------------------- /tooleval/ghidraplugin/ghidradumpmem.py: -------------------------------------------------------------------------------- 1 | prog = getCurrentProgram() 2 | name = prog.getName() 3 | 4 | print(name) 5 | 6 | fname = "/tmp/ghidradumps/" + name 7 | 8 | dumpMemory(fname) 9 | -------------------------------------------------------------------------------- /tests/statements/conditional_assignment.lmod: -------------------------------------------------------------------------------- 1 | INPUT inputvar 4 2 | 3 | V1: EQ inputvar 0x1234 term 4 | P: outvar <- inputvar 5 | 6 | V2: ULE inputvar 0x1 7 | 8 | P(V2): outvar <- ADD inputvar 1 -------------------------------------------------------------------------------- /tests/operators/__init__.py: -------------------------------------------------------------------------------- 1 | from .bitwise import BitwiseTest 2 | from .algebra import AlgebraTest 3 | from .alignment import AlignmentTest 4 | from .strcmp import StringCompareTest 5 | from .overflow import OverflowTest 6 | -------------------------------------------------------------------------------- /tests/operators/alignment.lmod: -------------------------------------------------------------------------------- 1 | INPUT VARIABLE 4 2 | 3 | P: alup <- ALIGNUP VARIABLE 0x1000 4 | P: aldown <- ALIGNDOWN VARIABLE 0x1000 5 | V1: EQ VARIABLE 4095 term 6 | V2: EQ alup 4096 term 7 | V3: EQ aldown 0 term 8 | -------------------------------------------------------------------------------- /tests/operators/algebra.mod: -------------------------------------------------------------------------------- 1 | INPUT VARIABLE 4 2 | 3 | P: VARA <- Add VARIABLE 5 4 | P: VARB <- Sub VARIABLE 5 5 | P: VARC <- Mul VARIABLE 5 6 | P: VARD <- DIV VARIABLE 5 7 | P: VARE <- UDiv VARIABLE 5 8 | P: VARF <- MOD VARIABLE 5 9 | -------------------------------------------------------------------------------- /modelLang/utils.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | 3 | class customdefdict(defaultdict): 4 | def __missing__(self, key): 5 | if self.default_factory is None: 6 | raise KeyError(key) 7 | item = self.default_factory(key) 8 | self[key] = item 9 | return item 10 | -------------------------------------------------------------------------------- /tooleval/Makefile: -------------------------------------------------------------------------------- 1 | all: memdump.pb.cc memdump_pb2.py ghidra/program/flatapi/Memdump.java 2 | 3 | memdump.pb.cc: memdump.proto 4 | protoc $< --cpp_out=. 5 | 6 | memdump_pb2.py: memdump.proto 7 | protoc $< --python_out=. 8 | 9 | ghidra/program/flatapi/Memdump.java: memdump.proto 10 | protoc $< --java_out=. 11 | 12 | .PHONY: all 13 | -------------------------------------------------------------------------------- /testcases/windows/xp/winxp_kernel_user.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | git checkout 566e3b2b89e5d63e6e15e26ce2a79c271005a270 4 | git submodule update --recursive 5 | 6 | python3 ./generate.py -A models/windows/xp/MiCreateImageFileMap.lmod models/windows/xp/LdrpInitializeProcess.lmod models/windows/generic/return0.lmod models/windows/generic/reasonable_stack.lmod 7 | -------------------------------------------------------------------------------- /testcases/windows/7/win7_printf.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | git checkout 5e61eef93ee6031e3854b5c3bede96891b292ec9 4 | git submodule update --recursive 5 | 6 | ipython -i ./generate.py -- -A models/windows/7/MiCreateImageFileMap.lmod models/windows/7/LdrpInitializeProcess.lmod models/windows/generic/reasonable_stack.lmod models/windows/generic/printf_import.lmod 7 | -------------------------------------------------------------------------------- /testcases/windows/xp/winxp_printf.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | git checkout 44790e0ac5de2569bb583361ba5559a3a89d397c 4 | git submodule update --recursive 5 | 6 | ipython -i ./generate.py -- -A models/windows/xp/MiCreateImageFileMap.lmod models/windows/xp/LdrpInitializeProcess.lmod models/windows/generic/printf.lmod models/windows/generic/not_managed.lmod models/windows/generic/reasonable_stack.lmod 7 | -------------------------------------------------------------------------------- /tests/operators/overflow.lmod: -------------------------------------------------------------------------------- 1 | INPUT VAR1 4 2 | INPUT VAR2 4 3 | INPUT VAR3 4 4 | 5 | INPUT VAR4 4 6 | INPUT VAR5 4 7 | 8 | V1: EQ VAR1 0x80000000 term 9 | V2: EQ VAR2 0x80000000 term 10 | V3: EQ VAR3 0x1000 term 11 | 12 | V4: OVFLADD VAR1 VAR2 term 13 | V5: OVFLADD VAR2 VAR1 term 14 | V6: NOT OVFLADD VAR1 VAR3 term 15 | V7: NOT OVFLADD VAR2 VAR3 term 16 | 17 | V8: OVFLADD VAR4 VAR5 term -------------------------------------------------------------------------------- /tooleval/idaplugin/idaanalysis.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | IDAPATH=${1:-/home/dario/ida-7.1/idat} 4 | INDIR=${2:-/home/dario/phd/loaders_modeling/lang_parser/allcombo} 5 | OUTDIR=${3:-/tmp/idaalldumps} 6 | 7 | mkdir -p $OUTDIR 8 | 9 | TESTCASES=$(ls $INDIR) 10 | for f in $TESTCASES 11 | do 12 | if [[ $f == *"cond"* ]]; then 13 | continue 14 | fi; 15 | ARG="idadumpmem.py $OUTDIR" 16 | $IDAPATH -A -B -c "-S$ARG" $INDIR/$f 17 | done; 18 | -------------------------------------------------------------------------------- /tooleval/memdump.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto2"; 2 | 3 | option java_package = "ghidra.program.flatapi"; 4 | 5 | message MemoryRegion { 6 | required uint32 vaddr = 1; 7 | required uint32 vsize = 2; 8 | required bytes content = 3; 9 | optional string name = 4; 10 | optional string permission = 5; 11 | optional uint32 faddr = 6; 12 | optional uint32 fsize = 7; 13 | } 14 | 15 | message MemoryDump { 16 | repeated MemoryRegion regions = 1; 17 | } -------------------------------------------------------------------------------- /testcases/windows/xp-7/differential.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | git checkout e13f7b46e1a9b1c1f0c5f5648f0d2910904d3b7d 4 | git submodule update --recursive 5 | 6 | ipython -i ./differential.py -- -A models/windows/xp/MiCreateImageFileMap.lmod models/windows/xp/LdrpInitializeProcess.lmod models/windows/generic/not_managed.lmod models/windows/generic/return0.lmod models/windows/generic/reasonable_stack.lmod -N models/windows/7/MiCreateImageFileMap.lmod -O xp-7/testcase 7 | -------------------------------------------------------------------------------- /tooleval/__init__.py: -------------------------------------------------------------------------------- 1 | from .common import byteat, NOTDUMPED, NOTFOUND, coalesceregions, FailedRelocExcetion 2 | from .memdump_pb2 import MemoryRegion, MemoryDump 3 | from .r2adapter import Radare2Adapter 4 | from .winadapter import WindowsAdapter 5 | from .ghidraadapter import GhidraAdapter 6 | from .idaadapter import IDAAdapter 7 | TOOLADAPTERS = {'radare2': Radare2Adapter, 8 | 'ghidra': GhidraAdapter, 9 | 'ida': IDAAdapter} 10 | -------------------------------------------------------------------------------- /tests/loops/vloop.lmod: -------------------------------------------------------------------------------- 1 | DEFINE SIZE 20 2 | INPUT HEADER SIZE 3 | 4 | P: startingOffset1 <- INT 0 4 5 | V1: ULT startingOffset1 1 6 | L1(V1): currentOffset <- VLOOP(startingOffset1, next, V99, 20) 7 | V1: Eq HEADER[currentOffset] currentOffset term 8 | P: next <- Add currentOffset 1 9 | V99: ULE currentOffset 0xf 10 | END L1 11 | 12 | P: startingOffset2 <- INT 0 4 13 | L2(!V1): currentOffset2 <- VLOOP(startingOffset2, next, V98, 20) 14 | V1: Eq HEADER[currentOffset2] ADD currentOffset2 1 term 15 | P: next <- Add currentOffset2 1 16 | V98: ULE currentOffset2 0xf 17 | END L2 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.o 3 | *.so 4 | *.a 5 | .gdb_history 6 | *.i64 7 | *.idb 8 | *.id0 9 | *.id1 10 | *.id2 11 | *.nam 12 | *.til 13 | *.swp 14 | *.dll 15 | *.obj 16 | *.lib 17 | *.exp 18 | *.pdb 19 | *.ilk 20 | angr/tests/*.png 21 | screenlog.0 22 | angr/tests/screenlog.0 23 | angr/screenlog.0 24 | .idea 25 | *.egg-info 26 | /build 27 | /tags 28 | MANIFEST 29 | dist 30 | .eggs 31 | .vscode/ 32 | *~ 33 | *.db 34 | *.out 35 | parsetab.py 36 | calclex.py 37 | example.py 38 | test 39 | testcase 40 | Memdump.java 41 | memdump_pb2.py 42 | memdump.pb.h 43 | memdump.pb.cc 44 | ghidra/ 45 | *.tex 46 | *.pdf 47 | *.log 48 | *.aux 49 | *# -------------------------------------------------------------------------------- /tests/loops/conditionalloop.lmod: -------------------------------------------------------------------------------- 1 | DEFINE SIZE 15 2 | DEFINE STRUCTSIZE 1 3 | DEFINE MAXITERATION 10 4 | 5 | INPUT HEADER SIZE 6 | P: enableLoop1 <- HEADER[0] 7 | V1: NEQ enableLoop1 0 8 | V99: EQ enableLoop1 1 term 9 | 10 | 11 | ## This loop will be executed 12 | P: start <- INT 4 4 13 | P: count <- INT 4 4 14 | L1(V1): var1 <- LOOP(HEADER, start, 1, count, 4) 15 | V3: Eq var1 1 term 16 | END L1 17 | 18 | 19 | ## This loop won't be executed 20 | P: enableLoop2 <- HEADER[1] 21 | V2: NEQ enableLoop2 0 22 | V98: EQ enableLoop2 0 term 23 | L2(V2): var2 <- LOOP(HEADER, start, 1, count, 4) 24 | V4: Eq var2 2 term 25 | END L2 26 | 27 | -------------------------------------------------------------------------------- /tooleval/ghidraplugin/ghidraanalysis.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | GHIDRAPATH=${1:-/home/dario/tools/ghidra/build/dist/ghidra_9.2_DEV/support/analyzeHeadless} 4 | GHIDRAPROJ=${2:-/home/dario/phd/loaders_modeling/ghidraproj} 5 | INDIR=${3:-/home/dario/phd/loaders_modeling/lang_parser/allcombo} 6 | OUTDIR=${4:-/tmp/ghidraalldumps} 7 | 8 | mkdir -p $OUTDIR 9 | 10 | rm -r $GHIDRAPROJ 11 | mkdir $GHIDRAPROJ 12 | 13 | TESTCASES=$(ls $INDIR) 14 | for f in $TESTCASES 15 | do 16 | if [[ $f == *"cond"* ]]; then 17 | continue 18 | fi; 19 | $GHIDRAPATH $GHIDRAPROJ ghidratest -loader PeLoader -postscript ghidradumpmem.py -import $INDIR/$f 20 | done; 21 | -------------------------------------------------------------------------------- /tooleval/classes.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | 3 | Entry = namedtuple('Entry', ['name', 'size', 'vsize', 4 | 'perm', 'paddr', 'vaddr']) 5 | class MemoryMap(list): 6 | def __init__(self): 7 | super().__init__(self) 8 | 9 | def append(self, obj): 10 | if type(obj) != Entry: 11 | raise TypeError 12 | super().append(obj) 13 | 14 | class MemoryDump(dict): 15 | def __init__(self): 16 | super().__init__(self) 17 | 18 | def __setitem__(self, key, value): 19 | if type(key) != Entry: 20 | raise TypeError 21 | super().__setitem__(key, value) 22 | -------------------------------------------------------------------------------- /tests/operators/alignment.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import z3 3 | import logging 4 | 5 | from modelLang import parsers, backends 6 | 7 | from modelLang.parsers import Parser 8 | from modelLang.backends import Z3Backend 9 | 10 | class AlignmentTest(): 11 | testfile = "tests/operators/alignment.lmod" 12 | 13 | @staticmethod 14 | def run(): 15 | parser = Parser() 16 | parser.parse_file(AlignmentTest.testfile) 17 | backend = Z3Backend() 18 | backend.log.setLevel(logging.ERROR) 19 | backend.exec_statements(parser.statements) 20 | solver = backend.generate_solver() 21 | assert(solver.check() == z3.sat) 22 | 23 | if __name__ == "__main__": 24 | AlignmentTest.run() 25 | -------------------------------------------------------------------------------- /tooleval/idaadapter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from tooleval import MemoryRegion, MemoryDump 4 | 5 | class IDAAdapter(object): 6 | uninbyte = 0x0 7 | def __init__(self, path): 8 | self._file = open(path, 'rb') 9 | self._memdump = None 10 | 11 | def close(self): 12 | self._file.close() 13 | 14 | def load(self): 15 | self._memdump = MemoryDump() 16 | self._memdump.ParseFromString(self._file.read()) 17 | 18 | @property 19 | def memdump(self): 20 | if not self._memdump: 21 | self.load() 22 | return self._memdump 23 | 24 | if __name__ == "__main__": 25 | adapter = IDAAdapter("/tmp/idadumps/testcase_35") 26 | dump = adapter.memdump 27 | print(dump) 28 | -------------------------------------------------------------------------------- /tooleval/ghidraadapter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from tooleval import MemoryRegion, MemoryDump 4 | 5 | class GhidraAdapter(object): 6 | uninbyte = 0x00 7 | def __init__(self, path): 8 | self._file = open(path, 'rb') 9 | self._memdump = None 10 | 11 | def close(self): 12 | self._file.close() 13 | 14 | def load(self): 15 | self._memdump = MemoryDump() 16 | self._memdump.ParseFromString(self._file.read()) 17 | 18 | @property 19 | def memdump(self): 20 | if not self._memdump: 21 | self.load() 22 | return self._memdump 23 | 24 | if __name__ == "__main__": 25 | adapter = GhidraAdapter("/tmp/ghidradumps/testcase_35") 26 | dump = adapter.memdump 27 | print(dump) 28 | -------------------------------------------------------------------------------- /tooleval/winadapter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from tooleval import MemoryRegion, MemoryDump 4 | 5 | class WindowsAdapter(object): 6 | def __init__(self, path): 7 | self._file = open(path, 'rb') 8 | self._memdump = None 9 | 10 | def close(self): 11 | self._file.close() 12 | 13 | def load(self): 14 | self._memdump = MemoryDump() 15 | self._memdump.ParseFromString(self._file.read()) 16 | 17 | @property 18 | def memdump(self): 19 | if not self._memdump: 20 | self.load() 21 | return self._memdump 22 | 23 | if __name__ == "__main__": 24 | adapter = WindowsAdapter("/home/dario/VirtualBox VMs/winxp/shared/mydumps/testcase_35.exe.dump") 25 | dump = adapter.memdump 26 | print(dump) 27 | 28 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup( 4 | name='modelLang', 5 | version='0.1', 6 | packages=['modelLang', 7 | 'modelLang/backends', 8 | 'modelLang/parsers', 9 | 'modelLang/structures', 10 | ], 11 | install_requires=[ 12 | 'coloredlogs==10.0', 13 | 'ply==3.11', 14 | 'pwntools==4.0.1', 15 | 'pycparser==2.19', 16 | 'z3==0.2.0', 17 | 'z3-solver==4.8.7.0', 18 | 'pefile==2019.4.18', 19 | ], 20 | maintainer='Dario Nisi', 21 | maintainer_email='dario.nisi@eurecom.fr' 22 | ) 23 | 24 | setup( 25 | name='tooleval', 26 | version='0.1', 27 | packages=['tooleval', 28 | ], 29 | maintainer='Dario Nisi', 30 | maintainer_email='dario.nisi@eurecom.fr' 31 | ) 32 | -------------------------------------------------------------------------------- /tests/operators/strcmp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import z3 3 | import logging 4 | 5 | from modelLang import parsers, backends 6 | 7 | from modelLang.parsers import Parser 8 | from modelLang.backends import Z3Backend 9 | 10 | class StringCompareTest(): 11 | testfile = "tests/operators/strcmp.lmod" 12 | 13 | @staticmethod 14 | def run(): 15 | parser = Parser() 16 | parser.parse_file(StringCompareTest.testfile) 17 | backend = Z3Backend() 18 | backend.log.setLevel(logging.ERROR) 19 | backend.exec_statements(parser.statements) 20 | solver = backend.generate_solver() 21 | assert(solver.check() == z3.sat) 22 | model = solver.model() 23 | outvar = backend.variables['out'] 24 | assert(model.eval(outvar).as_long() == 0x5241424f4f4600) 25 | 26 | if __name__ == "__main__": 27 | StringCompareTest.run() 28 | -------------------------------------------------------------------------------- /tests/loops/conditionalloop.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import logging 3 | import z3 4 | 5 | from modelLang import parsers, backends 6 | 7 | from modelLang.parsers import Parser 8 | from modelLang.backends import Z3Backend 9 | 10 | class ConditionalLoopTest(): 11 | testfile = "tests/loops/conditionalloop.lmod" 12 | 13 | @staticmethod 14 | def run(): 15 | parser = Parser() 16 | parser.parse_file(ConditionalLoopTest.testfile) 17 | 18 | backend = Z3Backend() 19 | backend.log.setLevel(logging.ERROR) 20 | backend.exec_statements(parser.statements) 21 | solver = backend.solver 22 | model = backend.model 23 | 24 | assert model, "Model unsat. Test failed" 25 | 26 | testcase = backend.generate_testcase() 27 | expected = b'\x01' * 4 28 | assert(testcase[4:8] == expected) 29 | 30 | if __name__ == "__main__": 31 | ConditionalLoopTest.run() 32 | -------------------------------------------------------------------------------- /tests/operators/overflow.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import logging 4 | import z3 5 | 6 | from modelLang import parsers, backends 7 | 8 | from modelLang.parsers import Parser 9 | from modelLang.backends import Z3Backend 10 | 11 | class OverflowTest(): 12 | testfile = "tests/operators/overflow.lmod" 13 | 14 | @staticmethod 15 | def run(): 16 | parser = Parser() 17 | parser.parse_file(OverflowTest.testfile) 18 | backend = Z3Backend() 19 | backend.log.setLevel(logging.ERROR) 20 | backend.exec_statements(parser.statements) 21 | solver = backend.generate_solver() 22 | variables = backend.variables 23 | 24 | ### Check sat 25 | assert(backend.model) 26 | 27 | model = backend.model 28 | var4 = model.eval(variables['VAR4']).as_long() 29 | var5 = model.eval(variables['VAR5']).as_long() 30 | assert(var4 + var5 >= 0x100000000) 31 | -------------------------------------------------------------------------------- /tests/statements/fromfile.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import logging 4 | import os.path 5 | 6 | from modelLang import parsers, backends 7 | 8 | from modelLang.parsers import Parser 9 | from modelLang.backends import Z3Backend 10 | class FromFileTest(): 11 | testfile = "tests/statements/fromfile.lmod" 12 | 13 | @staticmethod 14 | def run(): 15 | parser = Parser(pwd=os.path.dirname(os.path.realpath(__file__))) 16 | parser.parse_file(FromFileTest.testfile) 17 | 18 | backend = Z3Backend() 19 | backend.log.setLevel(logging.ERROR) 20 | backend.exec_statements(parser.statements) 21 | solver = backend.solver 22 | model = backend.model 23 | 24 | assert model, "Model unsat. Test failed" 25 | 26 | testcase = backend.generate_testcase(varname="file") 27 | assert(testcase[5:5+10] == b"1337133713") 28 | 29 | return True 30 | 31 | if __name__ == "__main__": 32 | FromFileTest.run() 33 | -------------------------------------------------------------------------------- /tests/loops/vloop.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import logging 3 | import z3 4 | 5 | from modelLang import parsers, backends 6 | 7 | from modelLang.parsers import Parser 8 | from modelLang.backends import Z3Backend 9 | 10 | class VLoopTest(): 11 | testfile = "tests/loops/vloop.lmod" 12 | 13 | @staticmethod 14 | def run(): 15 | parser = Parser() 16 | parser.parse_file(VLoopTest.testfile) 17 | 18 | backend = Z3Backend() 19 | backend.log.setLevel(logging.ERROR) 20 | backend.exec_statements(parser.statements) 21 | solver = backend.solver 22 | model = backend.model 23 | 24 | assert model, "Model unsat. Test failed" 25 | 26 | testcase = backend.generate_testcase() 27 | expected = b''.join([x.to_bytes(1, 'little') for x in range(0x10)]) 28 | assert testcase[:0x10] == expected, "First part of the testcase not as expected" 29 | assert all(x == 0 for x in testcase[0x10:]), "Second part of the testcase not as expected" 30 | return True 31 | 32 | if __name__ == "__main__": 33 | VLoopTest.run() 34 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | asttokens==2.2.1 2 | backcall==0.2.0 3 | bcrypt==4.0.1 4 | boto==2.49.0 5 | capstone==5.0.0rc2 6 | certifi==2022.12.7 7 | cffi==1.15.1 8 | charset-normalizer==3.0.1 9 | colored-traceback==0.3.0 10 | coloredlogs==15.0.1 11 | cryptography==39.0.1 12 | decorator==5.1.1 13 | executing==1.2.0 14 | humanfriendly==10.0 15 | idna==3.4 16 | intervaltree==3.1.0 17 | ipython==8.9.0 18 | jedi==0.18.2 19 | Mako==1.2.4 20 | MarkupSafe==2.1.2 21 | matplotlib-inline==0.1.6 22 | packaging==23.0 23 | paramiko==3.0.0 24 | parso==0.8.3 25 | pefile==2023.2.7 26 | pexpect==4.8.0 27 | pickleshare==0.7.5 28 | plumbum==1.8.1 29 | ply==3.11 30 | prompt-toolkit==3.0.36 31 | psutil==5.9.4 32 | ptyprocess==0.7.0 33 | pure-eval==0.2.2 34 | pwntools==4.9.0 35 | pycparser==2.21 36 | pyelftools==0.29 37 | Pygments==2.14.0 38 | PyNaCl==1.5.0 39 | pyserial==3.5 40 | PySocks==1.7.1 41 | python-dateutil==2.8.2 42 | requests==2.28.2 43 | ROPGadget==7.2 44 | rpyc==5.3.0 45 | six==1.16.0 46 | sortedcontainers==2.4.0 47 | stack-data==0.6.2 48 | traitlets==5.9.0 49 | unicorn==2.0.1.post1 50 | urllib3==1.26.14 51 | wcwidth==0.2.6 52 | z3==0.2.0 53 | z3-solver==4.12.1.0 54 | -------------------------------------------------------------------------------- /tooleval/common.py: -------------------------------------------------------------------------------- 1 | NOTDUMPED = "NOTDUMPED" 2 | NOTFOUND = "NOTFOUND" 3 | 4 | class FailedRelocExcetion(Exception): 5 | pass 6 | 7 | def byteat(memdump, addr): 8 | for region in memdump.regions: 9 | if addr >= region.vaddr and addr < region.vaddr + region.vsize: 10 | if addr >= region.vaddr + len(region.content): 11 | return NOTDUMPED 12 | return region.content[addr - region.vaddr] 13 | return NOTFOUND 14 | 15 | def permissionsat(memdump, addr): 16 | for region in memdump.regions: 17 | if addr >= region.vaddr and addr < region.vaddr + region.vsize: 18 | return region.permission 19 | return NOTFOUND 20 | 21 | def coalesceregions(memdump): 22 | lastaddr = -1 23 | coalescedregions = [] 24 | for region in memdump.regions: 25 | if region.vaddr != lastaddr: 26 | coalescedregions.append((region.vaddr, region.vsize)) 27 | else: 28 | last = coalescedregions[-1] 29 | coalescedregions[-1] = (last[0], last[1] + region.vsize) 30 | lastaddr = region.vaddr + region.vsize 31 | return coalescedregions 32 | -------------------------------------------------------------------------------- /tests/statements/conditional_assignment.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import logging 4 | import os.path 5 | 6 | from modelLang import parsers, backends 7 | 8 | from modelLang.parsers import Parser 9 | from modelLang.backends import Z3Backend 10 | 11 | from pwnlib.util.packing import pack, unpack 12 | 13 | class ConditionalAssignmentTest(): 14 | testfile = "tests/statements/conditional_assignment.lmod" 15 | 16 | @staticmethod 17 | def run(): 18 | parser = Parser(pwd=os.path.dirname(os.path.realpath(__file__))) 19 | parser.parse_file(ConditionalAssignmentTest.testfile) 20 | 21 | backend = Z3Backend() 22 | backend.log.setLevel(logging.ERROR) 23 | backend.exec_statements(parser.statements) 24 | solver = backend.solver 25 | model = backend.model 26 | 27 | assert model, "Model unsat. Test failed" 28 | 29 | outvar = backend.generate_testcase(varname="outvar") 30 | outvar = unpack(outvar, 'all', endianness="little") 31 | assert outvar == 0x1234 32 | 33 | return True 34 | 35 | if __name__ == "__main__": 36 | ConditionalAssignmentTest.run() 37 | -------------------------------------------------------------------------------- /tests/operators/bitwise.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import logging 4 | import z3 5 | 6 | from modelLang import parsers, backends 7 | 8 | from modelLang.parsers import Parser 9 | from modelLang.backends import Z3Backend 10 | 11 | 12 | class BitwiseTest(): 13 | testfile = "tests/operators/bitwise.mod" 14 | 15 | @staticmethod 16 | def run(): 17 | parser = Parser() 18 | parser.parse_file(BitwiseTest.testfile) 19 | backend = Z3Backend() 20 | backend.log.setLevel(logging.ERROR) 21 | backend.exec_statements(parser.statements) 22 | solver = backend.generate_solver() 23 | input = backend.variables['VARIABLE'] 24 | v1 = backend.variables['VARA'] 25 | v2 = backend.variables['VARB'] 26 | v3 = backend.variables['VARC'] 27 | 28 | solver.add(input == 0xdeadbeef) 29 | assert(solver.check() == z3.sat) 30 | model = solver.model() 31 | assert(model.eval(v1).as_long() == 0x11001) 32 | assert(model.eval(v2).as_long() == 0xdfadbeef) 33 | assert(model.eval(v3).as_long() == 0x21524110) 34 | 35 | if __name__ == "__main__": 36 | BitwiseTest.run() 37 | -------------------------------------------------------------------------------- /tests/statements/optimization.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import logging 4 | import os.path 5 | 6 | from modelLang import parsers, backends 7 | 8 | from modelLang.parsers import Parser 9 | from modelLang.backends import Z3Backend 10 | 11 | from pwnlib.util.packing import pack, unpack 12 | 13 | class OptimizationTest(): 14 | testfile = "tests/statements/optimization.lmod" 15 | 16 | @staticmethod 17 | def run(): 18 | parser = Parser(pwd=os.path.dirname(os.path.realpath(__file__))) 19 | parser.parse_file(OptimizationTest.testfile) 20 | 21 | backend = Z3Backend() 22 | backend.log.setLevel(logging.ERROR) 23 | backend.exec_statements(parser.statements) 24 | solver = backend.solver 25 | model = backend.model 26 | 27 | assert model, "Model unsat. Test failed" 28 | 29 | var1 = backend.generate_testcase(varname="var1") 30 | var2 = backend.generate_testcase(varname="var2") 31 | var1 = unpack(var1, 'all', endianness="little") 32 | var2 = unpack(var2, 'all', endianness="little") 33 | assert var1 == var2 == 10 34 | 35 | return True 36 | 37 | if __name__ == "__main__": 38 | OptimizationTest.run() 39 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import logging 3 | import coloredlogs 4 | 5 | if __name__ == "__main__": 6 | def test(c): 7 | name = c.__name__ 8 | log.info(f"Running {name}") 9 | try: 10 | c.run() 11 | except AssertionError as e: 12 | log.error(f"{name} failed with error {e}") 13 | else: 14 | log.info(f"{name} succeded") 15 | 16 | log = logging.getLogger(__name__) 17 | coloredlogs.install(level="INFO", logger=log) 18 | from loops import VLoopTest, ConditionalLoopTest 19 | from operators import (BitwiseTest, AlgebraTest, AlignmentTest, 20 | StringCompareTest, OverflowTest) 21 | from statements import (FromFileTest, OptimizationTest, 22 | ConditionalAssignmentTest) 23 | from functional import PositiveCombinationTest, NegativeCombinationTest 24 | 25 | test(BitwiseTest) 26 | test(AlgebraTest) 27 | test(AlignmentTest) 28 | test(StringCompareTest) 29 | test(OverflowTest) 30 | test(VLoopTest) 31 | test(ConditionalLoopTest) 32 | test(FromFileTest) 33 | test(OptimizationTest) 34 | test(ConditionalAssignmentTest) 35 | test(PositiveCombinationTest) 36 | test(NegativeCombinationTest) 37 | -------------------------------------------------------------------------------- /tests/operators/algebra.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import logging 4 | import z3 5 | 6 | from modelLang import parsers, backends 7 | 8 | from modelLang.parsers import Parser 9 | from modelLang.backends import Z3Backend 10 | 11 | class AlgebraTest(): 12 | testfile = "tests/operators/algebra.mod" 13 | 14 | @staticmethod 15 | def run(): 16 | parser = Parser() 17 | parser.parse_file(AlgebraTest.testfile) 18 | backend = Z3Backend() 19 | backend.log.setLevel(logging.ERROR) 20 | backend.exec_statements(parser.statements) 21 | solver = backend.generate_solver() 22 | variables = backend.variables 23 | input = variables['VARIABLE'] 24 | v1 = variables['VARA'] 25 | v2 = variables['VARB'] 26 | v3 = variables['VARC'] 27 | v4 = variables['VARD'] 28 | v5 = variables['VARE'] 29 | v6 = variables['VARF'] 30 | 31 | solver.add(input == 10) 32 | assert(solver.check() == z3.sat) 33 | model = solver.model() 34 | assert(model.eval(v1).as_long() == 15) 35 | assert(model.eval(v2).as_long() == 5) 36 | assert(model.eval(v3).as_long() == 50) 37 | assert(model.eval(v4).as_long() == 2) 38 | assert(model.eval(v5).as_long() == 2) 39 | assert(model.eval(v6).as_long() == 0) 40 | -------------------------------------------------------------------------------- /modelLang/backends/default_backend.py: -------------------------------------------------------------------------------- 1 | from ..classes import Expression 2 | 3 | class DefaultBackend(object): 4 | def __init__(self): 5 | self.variables = {} 6 | self.conditions = {} 7 | self.terminal_conditions = {} 8 | self._statements = None 9 | 10 | def _eval_expression(self, expr): 11 | opcode = expr.opcode 12 | operands = expr.operands 13 | operands_new = [] 14 | for op in operands: 15 | if isinstance(op, Expression): 16 | operands_new.append(self._eval_expression(op)) 17 | else: 18 | operands_new.append(op) 19 | self.log.debug(f"\n{expr.pprint()}") 20 | return self.dispatch(opcode, *operands_new) 21 | 22 | def _exec_statement(self, stmt, **kwargs): 23 | t = type(stmt) 24 | self.log.debug(f"Executing: {stmt}") 25 | self._exec_table[t](self, stmt, **kwargs) 26 | 27 | def exec_statements(self, statements, **kwargs): 28 | for stmt in statements: 29 | try: 30 | self._exec_statement(stmt, **kwargs) 31 | except Exception: 32 | self.log.error("Error occurred while processing statement " + 33 | f"at line {stmt.lineno}") 34 | raise 35 | 36 | def load_statements(self, statements): 37 | self._statements = statements 38 | 39 | class VerificationError(Exception): 40 | def __init__(self, name): 41 | self.name = name 42 | -------------------------------------------------------------------------------- /tooleval/r2adapter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import r2pipe 4 | 5 | from tooleval import MemoryRegion, MemoryDump, FailedRelocExcetion 6 | 7 | class Radare2Adapter(object): 8 | uninbyte = 0xff 9 | def __init__(self, path): 10 | self._instance = Radare2Adapter.createR2Instance(path) 11 | self._memdump = None 12 | 13 | def close(self): 14 | self._instance.quit() 15 | 16 | @staticmethod 17 | def createR2Instance(path): 18 | return r2pipe.open(path) 19 | 20 | @property 21 | def memdump(self): 22 | if self._memdump: 23 | return self._memdump 24 | 25 | self._memdump = MemoryDump() 26 | mmap = self._instance.cmdj("iSj") 27 | for region in mmap: 28 | mregion = self._memdump.regions.add() 29 | mregion.name = region['name'] 30 | mregion.fsize = region['size'] 31 | try: 32 | mregion.vsize = region['vsize'] 33 | mregion.vaddr = region['vaddr'] 34 | except: 35 | raise FailedRelocExcetion 36 | mregion.permission = region['perm'] 37 | mregion.faddr = region['paddr'] 38 | self._instance.cmd(f"s {hex(region['vaddr'])}") 39 | contentsize = mregion.vsize if mregion.vsize < 0x10000 else 0x10000 40 | content = bytes(self._instance.cmdj(f"pxj {hex(contentsize)}")) 41 | mregion.content = content 42 | return self._memdump 43 | 44 | if __name__ == "__main__": 45 | adapter = Radare2Adapter("/home/dario/phd/loaders_modeling/lang_parser/prova/testcase_41") 46 | dump = adapter.memdump 47 | -------------------------------------------------------------------------------- /tests/functional/negativecombo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import logging 3 | import z3 4 | 5 | from modelLang import parsers, backends 6 | 7 | from modelLang.parsers import Parser 8 | from modelLang.backends import Z3Backend 9 | 10 | from pwnlib.util.packing import unpack 11 | 12 | class NegativeCombinationTest(): 13 | testfile1 = "tests/functional/mod1.lmod" 14 | testfile2 = "tests/functional/mod2.lmod" 15 | 16 | @staticmethod 17 | def run(): 18 | parser1 = Parser() 19 | parser1.parse_file(NegativeCombinationTest.testfile1) 20 | 21 | backend1 = Z3Backend(name=NegativeCombinationTest.testfile1, 22 | voi="variable") 23 | backend1.log.setLevel(logging.ERROR) 24 | backend1.exec_statements(parser1.statements) 25 | 26 | parser2 = Parser() 27 | parser2.parse_file(NegativeCombinationTest.testfile2) 28 | 29 | backend2 = Z3Backend(name=NegativeCombinationTest.testfile2, 30 | voi="variable") 31 | backend2.log.setLevel(logging.ERROR) 32 | backend2.exec_statements(parser2.statements) 33 | 34 | backend = backend1 & ~backend2 35 | backend.log.setLevel(logging.ERROR) 36 | solver = backend.solver 37 | model = backend.model 38 | 39 | assert model, "Model unsat. Test failed" 40 | 41 | testcase = backend.generate_testcase("variable") 42 | testcase = unpack(testcase, 'all') 43 | assert(testcase > 15) 44 | assert(testcase & 0xffff == 0) 45 | assert((testcase & (testcase - 1) == 0)) 46 | return True 47 | 48 | if __name__ == "__main__": 49 | NegativeCombinationTest.run() 50 | -------------------------------------------------------------------------------- /tests/functional/positivecombo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import logging 3 | import z3 4 | 5 | from modelLang import parsers, backends 6 | 7 | from modelLang.parsers import Parser 8 | from modelLang.backends import Z3Backend 9 | 10 | from pwnlib.util.packing import unpack 11 | 12 | class PositiveCombinationTest(): 13 | testfile1 = "tests/functional/mod1.lmod" 14 | testfile2 = "tests/functional/mod2.lmod" 15 | 16 | @staticmethod 17 | def run(): 18 | parser1 = Parser() 19 | parser1.parse_file(PositiveCombinationTest.testfile1) 20 | 21 | backend1 = Z3Backend(name=PositiveCombinationTest.testfile1, 22 | voi="variable") 23 | backend1.log.setLevel(logging.ERROR) 24 | backend1.exec_statements(parser1.statements) 25 | 26 | parser2 = Parser() 27 | parser2.parse_file(PositiveCombinationTest.testfile2) 28 | 29 | backend2 = Z3Backend(name=PositiveCombinationTest.testfile2, 30 | voi="variable") 31 | backend2.log.setLevel(logging.ERROR) 32 | backend2.exec_statements(parser2.statements) 33 | 34 | backend = backend1 & backend2 35 | backend.log.setLevel(logging.ERROR) 36 | solver = backend.solver 37 | model = backend.model 38 | 39 | assert model, "Model unsat. Test failed" 40 | 41 | testcase = backend.generate_testcase("variable") 42 | testcase = unpack(testcase, 'all') 43 | assert(testcase > 15) 44 | assert(testcase & 0xffff == 0) 45 | assert((testcase & (testcase - 1) != 0)) 46 | return True 47 | 48 | if __name__ == "__main__": 49 | PositiveCombinationTest.run() 50 | -------------------------------------------------------------------------------- /tooleval/idaplugin/idadumpmem.py: -------------------------------------------------------------------------------- 1 | from idc import * 2 | from idaapi import * 3 | from idautils import * 4 | import sys 5 | from os.path import join 6 | 7 | sys.path.append("..") 8 | sys.path.append(".") 9 | 10 | class ToFileStdOut(object): 11 | def __init__(self): 12 | self.outfile = open("/tmp/idaout.txt", "w") 13 | def write(self, text): 14 | self.outfile.write(text) 15 | def flush(self): 16 | self.outfile.flush() 17 | def isatty(self): 18 | return False 19 | def __del__(self): 20 | self.outfile.close() 21 | sys.stdout = sys.stderr = ToFileStdOut() 22 | try: 23 | 24 | from memdump_pb2 import MemoryDump, MemoryRegion 25 | 26 | if len(ARGV) < 2: 27 | dumpdir = "/tmp" 28 | else: 29 | dumpdir = ARGV[1] 30 | 31 | memdump = MemoryDump() 32 | for vaddr in Segments(): 33 | memregion = memdump.regions.add() 34 | memregion.vaddr = vaddr 35 | memregion.vsize = SegEnd(vaddr) - SegStart(vaddr) 36 | attr = get_segm_attr(vaddr, SEGATTR_PERM) 37 | read = attr & SEGPERM_READ != 0 38 | write = attr & SEGPERM_WRITE != 0 39 | exc = attr & SEGPERM_EXEC != 0 40 | memregion.permission = "-" + ("r" if read else "-") + ("w" if write else "-") + ("x" if exc else "-") 41 | memregion.fsize = min(memregion.vsize, 0x10000) 42 | memregion.content = bytes() 43 | for a in xrange(vaddr, vaddr+memregion.fsize): 44 | if not is_loaded(a): 45 | break 46 | memregion.content += get_bytes(a, 1) 47 | progname = get_root_filename() 48 | with open(join(dumpdir, progname), "wb") as fp: 49 | fp.write(memdump.SerializeToString()) 50 | except Exception as e: 51 | print(e) 52 | idc.Exit(1) 53 | 54 | idc.Exit(0) 55 | -------------------------------------------------------------------------------- /verify.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import sys 3 | import logging 4 | import coloredlogs 5 | import z3 6 | import pefile 7 | 8 | from argparse import ArgumentParser 9 | 10 | log = logging.getLogger(__name__) 11 | coloredlogs.install(level="NOTSET", logger=log) 12 | 13 | from modelLang import Parser, Z3Backend, PythonBackend 14 | 15 | if __name__ == "__main__": 16 | argpar = ArgumentParser(description='Evaluate model precision') 17 | argpar.add_argument('model', type=str, help='Loader model') 18 | argpar.add_argument('executable', type=str, help='File to verify') 19 | argpar.add_argument('--logLevel', '-l', type=str, default=None, 20 | help="Log verbosity") 21 | argpar.add_argument('--disable-log', '-D', default=False, 22 | action='store_true', help="Disable logging") 23 | argpar.add_argument('--z3-backend', '-Z', default=False, 24 | action="store_true", help="Enable z3 backend") 25 | 26 | args = argpar.parse_args() 27 | engine = PythonBackend 28 | if args.z3_backend: 29 | engine = Z3Backend 30 | 31 | modelfile = args.model 32 | executable = args.executable 33 | if args.logLevel: 34 | logging.getLogger().setLevel(args.logLevel) 35 | with open(executable, "rb") as fp: 36 | content = fp.read() 37 | 38 | filesize = len(content) 39 | parser = Parser(ptype=Parser.ParserType.VALIDATOR, 40 | custom_defs={"FILESIZE" : filesize}) 41 | parser.parse_file(modelfile) 42 | backend = engine() 43 | if args.disable_log: 44 | backend.log.setLevel(100) 45 | backend.load_statements(parser.statements) 46 | if backend.verify(content): 47 | log.info("PASS") 48 | sys.exit(0) 49 | else: 50 | log.info("FAIL") 51 | sys.exit(1) 52 | -------------------------------------------------------------------------------- /verify_dataset.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import sys 3 | import logging 4 | import coloredlogs 5 | import z3 6 | import pefile 7 | 8 | from argparse import ArgumentParser 9 | from pathlib import Path 10 | from functools import partial 11 | from multiprocessing import Pool 12 | from progressbar import progressbar 13 | 14 | log = logging.getLogger(__name__) 15 | coloredlogs.install(level="NOTSET", logger=log) 16 | 17 | from modelLang import Parser, Z3Backend, PythonBackend 18 | 19 | 20 | def verify(modelfile, executable): 21 | with executable.open("rb") as fp: 22 | content = fp.read() 23 | 24 | filesize = len(content) 25 | parser = Parser(ptype=Parser.ParserType.VALIDATOR, 26 | custom_defs={"FILESIZE" : filesize}) 27 | parser.parse_file(modelfile) 28 | backend = engine() 29 | 30 | if args.disable_log: 31 | backend.log.setLevel(100) 32 | backend.load_statements(parser.statements) 33 | if backend.verify(content): 34 | return (executable, None) 35 | else: 36 | return (executable, backend._last_fail) 37 | 38 | if __name__ == "__main__": 39 | argpar = ArgumentParser(description='Evaluate model precision') 40 | argpar.add_argument('model', type=str, help='Loader model') 41 | argpar.add_argument('directory', type=str, 42 | help='Path to dataset to verify') 43 | argpar.add_argument('output', type=str, 44 | help='Path to output') 45 | argpar.add_argument('--logLevel', '-l', type=str, default=None, 46 | help="Log verbosity") 47 | argpar.add_argument('--disable-log', '-D', default=False, 48 | action='store_true', help="Disable logging") 49 | argpar.add_argument('--z3-backend', '-Z', default=False, 50 | action="store_true", help="Enable z3 backend") 51 | 52 | args = argpar.parse_args() 53 | engine = PythonBackend 54 | if args.z3_backend: 55 | engine = Z3Backend 56 | 57 | modelfile = args.model 58 | directory = Path(args.directory) 59 | 60 | if not directory.is_dir(): 61 | log.error(" must be a directoy") 62 | sys.exit(-1) 63 | 64 | samples = list(directory.iterdir()) 65 | 66 | if args.logLevel: 67 | logging.getLogger().setLevel(args.logLevel) 68 | 69 | pool = Pool() 70 | results = {x.name: y for x, y in progressbar(pool.imap(partial(verify, 71 | modelfile), 72 | samples), 73 | max_value=len(samples))} 74 | pool.close() 75 | pool.terminate() 76 | success = sum(1 for x in results.values() if not x) 77 | with open(args.output, "w") as fp: 78 | fp.write(f"Success: {success}\n") 79 | for n, c in results.items(): 80 | if not c: 81 | continue 82 | fp.write(f"{n} {c}\n") 83 | -------------------------------------------------------------------------------- /modelLang/structures/windows_specs.py: -------------------------------------------------------------------------------- 1 | from .cparser import * 2 | 3 | DEFAULT_SIZES = { 4 | 'char' : 8, 5 | 'short' : 16, 6 | 'int' : 32, 7 | 'long' : 32, 8 | 'long long' : 64 9 | } 10 | 11 | BASIC_TYPES = { 12 | 'char': SimTypeNum(DEFAULT_SIZES['char'], True), 13 | 'signed char': SimTypeNum(DEFAULT_SIZES['char'], True), 14 | 'unsigned char': SimTypeNum(DEFAULT_SIZES['char'], False), 15 | 16 | 'short': SimTypeNum(DEFAULT_SIZES['short'], True), 17 | 'signed short': SimTypeNum(DEFAULT_SIZES['short'], True), 18 | 'unsigned short': SimTypeNum(DEFAULT_SIZES['short'], False), 19 | 'short int': SimTypeNum(DEFAULT_SIZES['short'], True), 20 | 'signed short int': SimTypeNum(DEFAULT_SIZES['short'], True), 21 | 'unsigned short int': SimTypeNum(DEFAULT_SIZES['short'], False), 22 | 23 | 'int': SimTypeNum(DEFAULT_SIZES['int'], True), 24 | 'signed int': SimTypeNum(DEFAULT_SIZES['int'], True), 25 | 'unsigned int': SimTypeNum(DEFAULT_SIZES['int'], False), 26 | 27 | 'long': SimTypeNum(DEFAULT_SIZES['long'], True), 28 | 'signed long': SimTypeNum(DEFAULT_SIZES['long'], True), 29 | 'unsigned long': SimTypeNum(DEFAULT_SIZES['long'], False), 30 | 'long int': SimTypeNum(DEFAULT_SIZES['long'], True), 31 | 'signed long int': SimTypeNum(DEFAULT_SIZES['long'], True), 32 | 'unsigned long int': SimTypeNum(DEFAULT_SIZES['long'], False), 33 | 34 | 'long long' : SimTypeNum(DEFAULT_SIZES['long long'], True), 35 | 'signed long long': SimTypeNum(DEFAULT_SIZES['long long'], True), 36 | 'unsigned long long': SimTypeNum(DEFAULT_SIZES['long long'], False), 37 | 'long long int': SimTypeNum(DEFAULT_SIZES['long long'], True), 38 | 'signed long long int': SimTypeNum(DEFAULT_SIZES['long long'], True), 39 | 'unsigned long long int': SimTypeNum(DEFAULT_SIZES['long long'], False), 40 | 41 | 'float': SimTypeFloat(), 42 | 'double': SimTypeDouble(), 43 | 'void': SimTypeBottom(), 44 | } 45 | 46 | OTHER_TYPES = { 47 | '__int64' : BASIC_TYPES['long long'], 48 | 'BYTE' : BASIC_TYPES['unsigned char'], 49 | 'CHAR' : BASIC_TYPES['char'], 50 | 'DWORD' : BASIC_TYPES['long'], 51 | 'DWORD32' : BASIC_TYPES['int'], 52 | 'DWORD64' : BASIC_TYPES['long'], 53 | 'INT' : BASIC_TYPES['int'], 54 | 'INT8' : BASIC_TYPES['int'], 55 | 'INT16' : BASIC_TYPES['short'], 56 | 'INT32' : BASIC_TYPES['int'], 57 | 'INT64' : BASIC_TYPES['long long'], 58 | 'LONG' : BASIC_TYPES['long'], 59 | 'LONGLONG' : BASIC_TYPES['long long'], 60 | 'UCHAR' : BASIC_TYPES['unsigned char'], 61 | 'UINT' : BASIC_TYPES['unsigned int'], 62 | 'UINT8' : BASIC_TYPES['unsigned int'], 63 | 'UINT16' : BASIC_TYPES['unsigned short'], 64 | 'UINT32' : BASIC_TYPES['unsigned int'], 65 | 'UINT64' : BASIC_TYPES['unsigned long long'], 66 | 'ULONG' : BASIC_TYPES['unsigned long'], 67 | 'ULONGLONG' : BASIC_TYPES['unsigned long long'], 68 | 'USHORT' : BASIC_TYPES['unsigned short'], 69 | 'WORD' : BASIC_TYPES['unsigned short'] 70 | } 71 | 72 | update_types({**BASIC_TYPES, **OTHER_TYPES}) 73 | -------------------------------------------------------------------------------- /generate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import sys 4 | import os.path as path 5 | import logging 6 | from functools import reduce 7 | 8 | import coloredlogs 9 | import z3 10 | import pefile 11 | 12 | log = logging.getLogger(__name__) 13 | coloredlogs.install(level="INFO", logger=log) 14 | 15 | from modelLang import Parser 16 | from modelLang import Z3Backend 17 | 18 | def write_testcase(testcase, fout): 19 | with open(fout, "wb") as fp: 20 | fp.write(testcase) 21 | 22 | if __name__ == "__main__": 23 | argparser = argparse.ArgumentParser(description="Interpret models and generate testcases") 24 | argparser.add_argument('--asserts', '-A', action="append", 25 | metavar="model", type=str, nargs="+", 26 | default=[], 27 | help="List of models to assert") 28 | argparser.add_argument('--negates', '-N', action="append", 29 | metavar="model", type=str, nargs="*", 30 | default=[], 31 | help="List of models to negate") 32 | argparser.add_argument('--out', '-O', action="store", 33 | metavar="outfile", type=str, nargs=1, 34 | default="testcase", 35 | help="Output file for testcase (default = 'testcase')") 36 | argparser.add_argument('--var', '-V', action="store", 37 | metavar="variable", type=str, nargs=1, 38 | default="HEADER", 39 | help="Name of the variable in the model representing the entire file (default 'HEADER')") 40 | argparser.add_argument('--define', '-D', action="store", metavar="define", 41 | type=lambda x: (x.split(":")[0], 42 | int(x.split(":")[1])), 43 | nargs="*", 44 | help="List of constants in the model to overwrite. Syntax :. E.g., FILESIZE:1024") 45 | argparser.add_argument('--size', '-B', action="store", metavar="bytes", 46 | type=int, default=None, 47 | help="Size in bytes of the testcase to generate") 48 | 49 | args = argparser.parse_args() 50 | if len(args.asserts) == 0: 51 | argparser.print_help() 52 | sys.exit(0) 53 | 54 | asserts = reduce(lambda x,y: x + [*y], args.asserts, list()) 55 | negates = reduce(lambda x,y: x + [*y], args.negates, list()) 56 | outfile = args.out 57 | voi = args.var 58 | size = args.size 59 | defs = dict(args.define) if args.define else {} 60 | 61 | z3_models_assert = [] 62 | z3_models_negate = [] 63 | for model in [*asserts, *negates]: 64 | modelname = path.basename(model) 65 | parser = Parser(ptype=Parser.ParserType.GENERATOR, input_size=size, 66 | custom_defs=defs) 67 | parser.parse_file(model) 68 | backend = Z3Backend(name=modelname, voi=voi) 69 | backend.exec_statements(parser.statements) 70 | if model in asserts: 71 | z3_models_assert.append(backend) 72 | else: 73 | z3_models_negate.append(backend) 74 | 75 | backend = z3_models_assert[0] 76 | for b in z3_models_assert[1:]: 77 | backend &= b 78 | 79 | for b in z3_models_negate: 80 | backend &= ~b 81 | 82 | solver = backend.solver 83 | model = backend.model 84 | if model: 85 | testcase = backend.generate_testcase() 86 | write_testcase(testcase, outfile) 87 | 88 | pef = pefile.PE(outfile) 89 | print(pef) 90 | -------------------------------------------------------------------------------- /modelLang/structures/headers/linux_kernel.h: -------------------------------------------------------------------------------- 1 | typedef int8_t s8; 2 | typedef uint8_t u8; 3 | typedef int16_t s16; 4 | typedef uint16_t u16; 5 | typedef int32_t s32; 6 | typedef uint32_t u32; 7 | typedef int64_t s64; 8 | typedef uint64_t u64; 9 | 10 | typedef int8_t __s8; 11 | typedef uint8_t __u8; 12 | typedef int16_t __s16; 13 | typedef uint16_t __u16; 14 | typedef int32_t __s32; 15 | typedef uint32_t __u32; 16 | typedef int64_t __s64; 17 | typedef uint64_t __u64; 18 | 19 | typedef __u32 Elf32_Addr; 20 | typedef __u16 Elf32_Half; 21 | typedef __u32 Elf32_Off; 22 | typedef __s32 Elf32_Sword; 23 | typedef __u32 Elf32_Word; 24 | 25 | #define ET_NONE 0 26 | #define ET_REL 1 27 | #define ET_EXEC 2 28 | #define ET_DYN 3 29 | #define ET_CORE 4 30 | #define ET_LOPROC 0xff00 31 | #define ET_HIPROC 0xffff 32 | 33 | #define EI_NIDENT 16 34 | 35 | typedef struct elf32_hdr{ 36 | unsigned char e_ident[EI_NIDENT]; 37 | Elf32_Half e_type; 38 | Elf32_Half e_machine; 39 | Elf32_Word e_version; 40 | Elf32_Addr e_entry; 41 | Elf32_Off e_phoff; 42 | Elf32_Off e_shoff; 43 | Elf32_Word e_flags; 44 | Elf32_Half e_ehsize; 45 | Elf32_Half e_phentsize; 46 | Elf32_Half e_phnum; 47 | Elf32_Half e_shentsize; 48 | Elf32_Half e_shnum; 49 | Elf32_Half e_shstrndx; 50 | } Elf32_Ehdr; 51 | typedef Elf32_Ehdr Elf_Hdr; 52 | 53 | typedef struct elf32_phdr{ 54 | Elf32_Word p_type; 55 | Elf32_Off p_offset; 56 | Elf32_Addr p_vaddr; 57 | Elf32_Addr p_paddr; 58 | Elf32_Word p_filesz; 59 | Elf32_Word p_memsz; 60 | Elf32_Word p_flags; 61 | Elf32_Word p_align; 62 | } Elf32_Phdr; 63 | typedef Elf32_Phdr Elf_Phdr; 64 | 65 | /* Machine types */ 66 | #define EM_NONE 0 67 | #define EM_M32 1 68 | #define EM_SPARC 2 69 | #define EM_386 3 70 | #define EM_68K 4 71 | #define EM_88K 5 72 | #define EM_486 6 /* Perhaps disused */ 73 | #define EM_860 7 74 | #define EM_MIPS 8 /* MIPS R3000 (officially, big-endian only) */ 75 | /* Next two are historical and binaries and 76 | modules of these types will be rejected by 77 | Linux. */ 78 | #define EM_MIPS_RS3_LE 10 /* MIPS R3000 little-endian */ 79 | #define EM_MIPS_RS4_BE 10 /* MIPS R4000 big-endian */ 80 | 81 | #define EM_PARISC 15 /* HPPA */ 82 | #define EM_SPARC32PLUS 18 /* Sun's "v8plus" */ 83 | #define EM_PPC 20 /* PowerPC */ 84 | #define EM_PPC64 21 /* PowerPC64 */ 85 | #define EM_SPU 23 /* Cell BE SPU */ 86 | #define EM_ARM 40 /* ARM 32 bit */ 87 | #define EM_SH 42 /* SuperH */ 88 | #define EM_SPARCV9 43 /* SPARC v9 64-bit */ 89 | #define EM_H8_300 46 /* Renesas H8/300 */ 90 | #define EM_IA_64 50 /* HP/Intel IA-64 */ 91 | #define EM_X86_64 62 /* AMD x86-64 */ 92 | #define EM_S390 22 /* IBM S/390 */ 93 | #define EM_CRIS 76 /* Axis Communications 32-bit embedded processor */ 94 | #define EM_M32R 88 /* Renesas M32R */ 95 | #define EM_MN10300 89 /* Panasonic/MEI MN10300, AM33 */ 96 | #define EM_OPENRISC 92 /* OpenRISC 32-bit embedded processor */ 97 | #define EM_ARCOMPACT 93 /* ARCompact processor */ 98 | #define EM_XTENSA 94 /* Tensilica Xtensa Architecture */ 99 | #define EM_BLACKFIN 106 /* ADI Blackfin Processor */ 100 | #define EM_UNICORE 110 /* UniCore-32 */ 101 | #define EM_ALTERA_NIOS2 113 /* Altera Nios II soft-core processor */ 102 | #define EM_TI_C6000 140 /* TI C6X DSPs */ 103 | #define EM_HEXAGON 164 /* QUALCOMM Hexagon */ 104 | #define EM_NDS32 167 /* Andes Technology compact code size 105 | embedded RISC processor family */ 106 | #define EM_AARCH64 183 /* ARM 64 bit */ 107 | #define EM_TILEPRO 188 /* Tilera TILEPro */ 108 | #define EM_MICROBLAZE 189 /* Xilinx MicroBlaze */ 109 | #define EM_TILEGX 191 /* Tilera TILE-Gx */ 110 | #define EM_ARCV2 195 /* ARCv2 Cores */ 111 | #define EM_RISCV 243 /* RISC-V */ 112 | #define EM_BPF 247 /* Linux BPF - in-kernel virtual machine */ 113 | #define EM_CSKY 252 /* C-SKY */ 114 | #define EM_FRV 0x5441 /* Fujitsu FR-V */ 115 | 116 | /* 117 | * This is an interim value that we will use until the committee comes 118 | * up with a final number. 119 | */ 120 | #define EM_ALPHA 0x9026 121 | 122 | /* Bogus old m32r magic number, used by old tools. */ 123 | #define EM_CYGNUS_M32R 0x9041 124 | /* This is the old interim value for S/390 architecture */ 125 | #define EM_S390_OLD 0xA390 126 | /* Also Panasonic/MEI MN10300, AM33 */ 127 | #define EM_CYGNUS_MN10300 0xbeef 128 | 129 | 130 | #define PT_NULL 0 131 | #define PT_LOAD 1 132 | #define PT_DYNAMIC 2 133 | #define PT_INTERP 3 134 | #define PT_NOTE 4 135 | #define PT_SHLIB 5 136 | #define PT_PHDR 6 137 | #define PT_TLS 7 /* Thread local storage segment */ 138 | #define PT_LOOS 0x60000000 /* OS-specific */ 139 | #define PT_HIOS 0x6fffffff /* OS-specific */ 140 | #define PT_LOPROC 0x70000000 141 | #define PT_HIPROC 0x7fffffff 142 | #define PT_GNU_EH_FRAME 0x6474e550 143 | 144 | #define PT_GNU_STACK (PT_LOOS + 0x474e551) 145 | 146 | #define EI_OSABI 7 147 | #define ELFOSABI_ARM_FDPIC 65 148 | -------------------------------------------------------------------------------- /explore_conditions.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import sys 4 | import os.path as path 5 | import logging 6 | from functools import reduce 7 | from itertools import product 8 | 9 | import coloredlogs 10 | import z3 11 | import pefile 12 | 13 | import progressbar 14 | 15 | log = logging.getLogger(__name__) 16 | coloredlogs.install(level="CRITICAL", logger=log) 17 | alllog = logging.getLogger("") 18 | alllog.setLevel(logging.CRITICAL) 19 | 20 | from modelLang import Parser, Z3Backend 21 | 22 | def write_testcase(testcase, outdir, cs, n): 23 | testcasename = path.join(outdir, f"testcase_{n}") 24 | conditionsname = path.join(outdir, f"testcase_{n}.cond") 25 | with open(testcasename, "wb") as fp: 26 | fp.write(testcase) 27 | with open(conditionsname, "w") as fp: 28 | for c in cs: 29 | fp.write(f"{c[0][0]} {c[1]}\n") 30 | 31 | def update_blacklist(unsat, cs, blacklist): 32 | entry = dict() 33 | for cname in unsat: 34 | for c in cs: 35 | if c[0][0] == str(cname): 36 | entry[c[0][0]] = c[1] 37 | break 38 | else: 39 | log.warning(f"Could not find {cname} among the constraints") 40 | return 41 | blacklist.append(entry) 42 | 43 | def isblacklisted(cs, blacklist): 44 | for b in blacklist: 45 | tmp = {cname: False for cname in b} 46 | for cname, value in b.items(): 47 | for ((cname2, _), value2) in cs: 48 | if (cname, value) == (cname2, value2): 49 | tmp[cname] = True 50 | if all(tmp.values()): 51 | return True 52 | return False 53 | 54 | if __name__ == "__main__": 55 | argparser = argparse.ArgumentParser(description="Explore all combinations of non-terminal conditions to generate different testcases.") 56 | argparser.add_argument('--models', '-M', action="append", 57 | metavar="model", type=str, nargs="+", 58 | help="Models to explore") 59 | argparser.add_argument('--supports', '-S', action="append", 60 | metavar="model", type=str, nargs="*", default=[], 61 | help="Other models to assert") 62 | argparser.add_argument('--outdir', '-O', action="store", 63 | metavar="outfile", type=str, 64 | default="testcase", 65 | help="Output directory file for testcases") 66 | argparser.add_argument('--size', '-B', action="store", metavar="bytes", 67 | type=int, default=None, 68 | help="Size in bytes of the testcase to generate") 69 | argparser.add_argument('--define', '-D', action="store", metavar="define", 70 | type=lambda x: (x.split(":")[0], 71 | int(x.split(":")[1])), 72 | nargs="*", 73 | help="Overwrite constant definition") 74 | argparser.add_argument('--var', '-V', action="store", 75 | metavar="variable", type=str, nargs=1, 76 | default="HEADER", 77 | help="Variable in the model to use for the testcase") 78 | 79 | args = argparser.parse_args() 80 | inputs = reduce(lambda x,y: x | {*y}, args.models, set()) 81 | supports = reduce(lambda x,y: x | {*y}, args.supports, set()) 82 | outdir = args.outdir 83 | size = args.size 84 | voi = args.var 85 | defs = dict(args.define) if args.define else {} 86 | 87 | Z3Backend.print_unsat = False 88 | z3models = [] 89 | for model in inputs: 90 | modelname = path.basename(model) 91 | parser = Parser(ptype=Parser.ParserType.DIFFERENTIAL_ASSERT, 92 | input_size=size, 93 | custom_defs=defs) 94 | parser.parse_file(model) 95 | backend = Z3Backend(name=modelname, voi=voi) 96 | backend.exec_statements(parser.statements) 97 | z3models.append(backend) 98 | 99 | backend = z3models[0] 100 | for m in z3models[1:]: 101 | backend &= m 102 | 103 | z3supports = [] 104 | for model in supports: 105 | modelname = path.basename(model) 106 | parser = Parser(ptype=Parser.ParserType.DIFFERENTIAL_ASSERT, 107 | input_size=size, 108 | custom_defs=defs) 109 | parser.parse_file(model) 110 | tmp = Z3Backend(name=modelname, voi=voi) 111 | tmp.exec_statements(parser.statements) 112 | z3supports.append(tmp) 113 | 114 | for m in z3supports: 115 | backend &= m 116 | 117 | nterminal_conds = list(reduce(lambda x, y: x+y, 118 | [[(x, m.conditions[x]) 119 | for x in (m.conditions.keys() 120 | - m.terminal_conditions.keys()) 121 | ] for m in z3models], 122 | [])) 123 | nconds = len(nterminal_conds) 124 | log.info(f"{nconds} found. Generating {2**nconds} testcases") 125 | alltf = product([True, False], repeat=nconds) 126 | 127 | n = 0 128 | blacklist = [] 129 | for tfs in progressbar.progressbar(alltf, max_value=2**nconds): 130 | progressbar.streams.flush() 131 | # cs = [((name, z3cond), bool), ... ] 132 | cs = list(zip(nterminal_conds, tfs)) 133 | if isblacklisted(cs, blacklist): 134 | log.warning("Combination is known to be unsat") 135 | continue 136 | 137 | support = Z3Backend() 138 | for c in cs: 139 | if c[1]: 140 | support.terminal_conditions[c[0][0]] = c[0][1] 141 | else: 142 | support.terminal_conditions[c[0][0]] = z3.Not(c[0][1]) 143 | if not support.model: 144 | log.warning("Support model is unsat. Checking the unsat core and discarinding conflitting constraints.") 145 | unsat = support.solver.unsat_core() 146 | update_blacklist(unsat, cs, blacklist) 147 | continue 148 | 149 | tmpbackend = backend & support 150 | tmpmodel = tmpbackend.model 151 | if tmpmodel: 152 | n += 1 153 | testcase = tmpbackend.generate_testcase() 154 | write_testcase(testcase, outdir, cs, n) 155 | -------------------------------------------------------------------------------- /modelLang/structures/headers/reactos.h: -------------------------------------------------------------------------------- 1 | #define IMAGE_NUMBEROF_DIRECTORY_ENTRIES 16 2 | #define IMAGE_SIZEOF_SHORT_NAME 8 3 | #define IMAGE_SIZEOF_SECTION_HEADER 40 4 | #define IMAGE_SIZEOF_FILE_HEADER 20 5 | 6 | typedef struct _IMAGE_DOS_HEADER { 7 | USHORT e_magic; 8 | USHORT e_cblp; 9 | USHORT e_cp; 10 | USHORT e_crlc; 11 | USHORT e_cparhdr; 12 | USHORT e_minalloc; 13 | USHORT e_maxalloc; 14 | USHORT e_ss; 15 | USHORT e_sp; 16 | USHORT e_csum; 17 | USHORT e_ip; 18 | USHORT e_cs; 19 | USHORT e_lfarlc; 20 | USHORT e_ovno; 21 | USHORT e_res[4]; 22 | USHORT e_oemid; 23 | USHORT e_oeminfo; 24 | USHORT e_res2[10]; 25 | LONG e_lfanew; 26 | } IMAGE_DOS_HEADER, *PIMAGE_DOS_HEADER; 27 | 28 | typedef struct _IMAGE_EXPORT_DIRECTORY { 29 | ULONG Characteristics; 30 | ULONG TimeDateStamp; 31 | USHORT MajorVersion; 32 | USHORT MinorVersion; 33 | ULONG Name; 34 | ULONG Base; 35 | ULONG NumberOfFunctions; 36 | ULONG NumberOfNames; 37 | ULONG AddressOfFunctions; 38 | ULONG AddressOfNames; 39 | ULONG AddressOfNameOrdinals; 40 | } IMAGE_EXPORT_DIRECTORY, *PIMAGE_EXPORT_DIRECTORY; 41 | 42 | typedef struct _IMAGE_RESOURCE_DATA_ENTRY { 43 | ULONG OffsetToData; 44 | ULONG Size; 45 | ULONG CodePage; 46 | ULONG Reserved; 47 | } IMAGE_RESOURCE_DATA_ENTRY, *PIMAGE_RESOURCE_DATA_ENTRY; 48 | 49 | typedef struct { 50 | ULONG Size; 51 | ULONG TimeDateStamp; 52 | USHORT MajorVersion; 53 | USHORT MinorVersion; 54 | ULONG GlobalFlagsClear; 55 | ULONG GlobalFlagsSet; 56 | ULONG CriticalSectionDefaultTimeout; 57 | ULONG DeCommitFreeBlockThreshold; 58 | ULONG DeCommitTotalFreeThreshold; 59 | ULONG LockPrefixTable; 60 | ULONG MaximumAllocationSize; 61 | ULONG VirtualMemoryThreshold; 62 | ULONG ProcessHeapFlags; 63 | ULONG ProcessAffinityMask; 64 | USHORT CSDVersion; 65 | USHORT Reserved1; 66 | ULONG EditList; 67 | ULONG SecurityCookie; 68 | ULONG SEHandlerTable; 69 | ULONG SEHandlerCount; 70 | } IMAGE_LOAD_CONFIG_DIRECTORY32, *PIMAGE_LOAD_CONFIG_DIRECTORY32; 71 | 72 | typedef struct { 73 | ULONG Size; 74 | ULONG TimeDateStamp; 75 | USHORT MajorVersion; 76 | USHORT MinorVersion; 77 | ULONG GlobalFlagsClear; 78 | ULONG GlobalFlagsSet; 79 | ULONG CriticalSectionDefaultTimeout; 80 | ULONGLONG DeCommitFreeBlockThreshold; 81 | ULONGLONG DeCommitTotalFreeThreshold; 82 | ULONGLONG LockPrefixTable; 83 | ULONGLONG MaximumAllocationSize; 84 | ULONGLONG VirtualMemoryThreshold; 85 | ULONGLONG ProcessAffinityMask; 86 | ULONG ProcessHeapFlags; 87 | USHORT CSDVersion; 88 | USHORT Reserved1; 89 | ULONGLONG EditList; 90 | ULONGLONG SecurityCookie; 91 | ULONGLONG SEHandlerTable; 92 | ULONGLONG SEHandlerCount; 93 | } IMAGE_LOAD_CONFIG_DIRECTORY64, *PIMAGE_LOAD_CONFIG_DIRECTORY64; 94 | 95 | typedef struct _IMAGE_SECTION_HEADER { 96 | UCHAR Name[IMAGE_SIZEOF_SHORT_NAME]; 97 | union { 98 | ULONG PhysicalAddress; 99 | ULONG VirtualSize; 100 | } Misc; 101 | ULONG VirtualAddress; 102 | ULONG SizeOfRawData; 103 | ULONG PointerToRawData; 104 | ULONG PointerToRelocations; 105 | ULONG PointerToLinenumbers; 106 | USHORT NumberOfRelocations; 107 | USHORT NumberOfLinenumbers; 108 | ULONG Characteristics; 109 | } IMAGE_SECTION_HEADER, *PIMAGE_SECTION_HEADER; 110 | 111 | typedef struct _IMAGE_FILE_HEADER { 112 | USHORT Machine; 113 | USHORT NumberOfSections; 114 | ULONG TimeDateStamp; 115 | ULONG PointerToSymbolTable; 116 | ULONG NumberOfSymbols; 117 | USHORT SizeOfOptionalHeader; 118 | USHORT Characteristics; 119 | } IMAGE_FILE_HEADER, *PIMAGE_FILE_HEADER; 120 | 121 | typedef struct _IMAGE_DATA_DIRECTORY { 122 | ULONG VirtualAddress; 123 | ULONG Size; 124 | } IMAGE_DATA_DIRECTORY, *PIMAGE_DATA_DIRECTORY; 125 | 126 | typedef struct _IMAGE_OPTIONAL_HEADER { 127 | USHORT Magic; 128 | UCHAR MajorLinkerVersion; 129 | UCHAR MinorLinkerVersion; 130 | ULONG SizeOfCode; 131 | ULONG SizeOfInitializedData; 132 | ULONG SizeOfUninitializedData; 133 | ULONG AddressOfEntryPoint; 134 | ULONG BaseOfCode; 135 | ULONG BaseOfData; 136 | ULONG ImageBase; 137 | ULONG SectionAlignment; 138 | ULONG FileAlignment; 139 | USHORT MajorOperatingSystemVersion; 140 | USHORT MinorOperatingSystemVersion; 141 | USHORT MajorImageVersion; 142 | USHORT MinorImageVersion; 143 | USHORT MajorSubsystemVersion; 144 | USHORT MinorSubsystemVersion; 145 | ULONG Win32VersionValue; 146 | ULONG SizeOfImage; 147 | ULONG SizeOfHeaders; 148 | ULONG CheckSum; 149 | USHORT Subsystem; 150 | USHORT DllCharacteristics; 151 | ULONG SizeOfStackReserve; 152 | ULONG SizeOfStackCommit; 153 | ULONG SizeOfHeapReserve; 154 | ULONG SizeOfHeapCommit; 155 | ULONG LoaderFlags; 156 | ULONG NumberOfRvaAndSizes; 157 | IMAGE_DATA_DIRECTORY DataDirectory[IMAGE_NUMBEROF_DIRECTORY_ENTRIES]; 158 | } IMAGE_OPTIONAL_HEADER32, *PIMAGE_OPTIONAL_HEADER32; 159 | 160 | typedef struct _IMAGE_ROM_OPTIONAL_HEADER { 161 | USHORT Magic; 162 | UCHAR MajorLinkerVersion; 163 | UCHAR MinorLinkerVersion; 164 | ULONG SizeOfCode; 165 | ULONG SizeOfInitializedData; 166 | ULONG SizeOfUninitializedData; 167 | ULONG AddressOfEntryPoint; 168 | ULONG BaseOfCode; 169 | ULONG BaseOfData; 170 | ULONG BaseOfBss; 171 | ULONG GprMask; 172 | ULONG CprMask[4]; 173 | ULONG GpValue; 174 | } IMAGE_ROM_OPTIONAL_HEADER, *PIMAGE_ROM_OPTIONAL_HEADER; 175 | 176 | typedef struct _IMAGE_OPTIONAL_HEADER64 { 177 | USHORT Magic; 178 | UCHAR MajorLinkerVersion; 179 | UCHAR MinorLinkerVersion; 180 | ULONG SizeOfCode; 181 | ULONG SizeOfInitializedData; 182 | ULONG SizeOfUninitializedData; 183 | ULONG AddressOfEntryPoint; 184 | ULONG BaseOfCode; 185 | ULONGLONG ImageBase; 186 | ULONG SectionAlignment; 187 | ULONG FileAlignment; 188 | USHORT MajorOperatingSystemVersion; 189 | USHORT MinorOperatingSystemVersion; 190 | USHORT MajorImageVersion; 191 | USHORT MinorImageVersion; 192 | USHORT MajorSubsystemVersion; 193 | USHORT MinorSubsystemVersion; 194 | ULONG Win32VersionValue; 195 | ULONG SizeOfImage; 196 | ULONG SizeOfHeaders; 197 | ULONG CheckSum; 198 | USHORT Subsystem; 199 | USHORT DllCharacteristics; 200 | ULONGLONG SizeOfStackReserve; 201 | ULONGLONG SizeOfStackCommit; 202 | ULONGLONG SizeOfHeapReserve; 203 | ULONGLONG SizeOfHeapCommit; 204 | ULONG LoaderFlags; 205 | ULONG NumberOfRvaAndSizes; 206 | IMAGE_DATA_DIRECTORY DataDirectory[IMAGE_NUMBEROF_DIRECTORY_ENTRIES]; 207 | } IMAGE_OPTIONAL_HEADER64, *PIMAGE_OPTIONAL_HEADER64; 208 | -------------------------------------------------------------------------------- /modelLang/parsers/langlex.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import ply.lex as lex 3 | import re 4 | from enum import Enum, auto, unique 5 | 6 | from ..classes import Base, Optimizations 7 | 8 | log = logging.getLogger(__name__) 9 | log.setLevel(logging.NOTSET) 10 | 11 | 12 | class Lexer: 13 | tokens = ( 14 | 'NEWLINE', 15 | 16 | # these translate to z3 functions 17 | 'OPERATOR1', 18 | 'OPERATOR2', 19 | 20 | # string comparison - a syntactic sugar 21 | 'STRCMP', 22 | 23 | 'ASSIGNSTART', 24 | 'CONDITIONNAME', 25 | 'GENCONDITIONNAME', 26 | 'LOOPSTART', 27 | 'LOOPEND', 28 | 'LOOP', 29 | 'VLOOP', 30 | 'DBG', 31 | 'COMMA', 32 | 'COLON', 33 | 'SEMICOLON', 34 | 'EXCLAMATION', 35 | 'DOT', 36 | 'TERMINATOR', 37 | 38 | # slicing 39 | 'LBRACKETS', 40 | 'RBRACKETS', 41 | 42 | # parentheses 43 | 'LPAREN', 44 | 'RPAREN', 45 | 46 | # -> 47 | 'ARROW', 48 | 49 | # # 50 | 'COMMENT', 51 | 52 | 'NUMBER', 53 | 'CHAR', 54 | 'STR', 55 | 'BOOL', 56 | 'VARIABLE', 57 | 'INPUT', 58 | 'OUTPUT', 59 | 60 | 'LOADTYPES', 61 | 'TYPE', 62 | 'SIZEOF', 63 | 'DEFINE', 64 | 65 | 'FROMFILE', 66 | 'OPTIMIZE', 67 | ) 68 | 69 | def t_OPERATOR1(self, t): 70 | r'(NOT|Not|BITNOT|BITNot|BitNot|ISPOW2|IsPow2|isPow2|Setc|SECT|NSect|NSECT|OptHdr|OPTHDR)' 71 | t.value = t.value.upper() 72 | log.debug("OPERATOR1 token") 73 | return t 74 | 75 | def t_OPERATOR2(self, t): 76 | r"(ADD|SUB|DIV|UDIV|AND|OR|ULE|UGE|ULT|UGT|Add|Sub|Div|UDiv|And|Or|ULe|UGe|ULt|UGt|BITAND|BITAnd|BitAnd|BITOR|BITOr|BitOr|LE|Le|GE|Ge|NEQ|NEq|Neq|EQ|Eq|LT|Lt|GT|Gt|INT|Int|MOD|Mod|MUL|Mul|ALIGNUP|ALIGNDOWN|ISALIGNED|SHR|ShR|SHL|ShL|OVFLADD|OVFLAdd|OvflAdd)\s" 77 | log.debug("OPERATOR2 token") 78 | t.value = t.value[:-1].upper() 79 | return t 80 | 81 | def t_STRCMP(self, t): 82 | r"(STRCMP|STRCmp|StrCmp)" 83 | t.value = t.value[:-1].upper() 84 | return t 85 | 86 | def t_CHAR(self, t): 87 | r'"[^"]"' 88 | t.value = ord(t.value[1]) 89 | log.debug("A single char value token") 90 | return t 91 | 92 | def t_STR(self, t): 93 | r"'[^']+'" 94 | t.value = eval('"' + t.value[1:-1] + '"') 95 | return t 96 | 97 | def t_BOOL(self, t): 98 | r"(TRUE|True|true|FALSE|False|false)" 99 | val = t.value.upper() 100 | t.value = True if val == "TRUE" else False 101 | log.debug(f"Found immediate boolean value {val}") 102 | return t 103 | 104 | def t_TERMINATOR(self, t): 105 | r"term" 106 | log.debug("Terminal condition token") 107 | return t 108 | 109 | t_LBRACKETS = r'\[' 110 | t_RBRACKETS = r'\]' 111 | t_LPAREN = r'\(' 112 | t_RPAREN = r'\)' 113 | t_ARROW = r'<-' 114 | t_SEMICOLON = r';' 115 | t_EXCLAMATION = r'!' 116 | t_DOT = r'\.' 117 | t_COMMA = r',' 118 | t_NEWLINE = r'\n' 119 | 120 | def t_COLON(self, t): 121 | r':' 122 | return t 123 | 124 | def t_INPUT(self, t): 125 | r'^(INPUT|input)' 126 | log.debug("Input variable token") 127 | return t 128 | 129 | def t_OUTPUT(self, t): 130 | r'^(OUTPUT|output)' 131 | log.debug("Output variable token") 132 | return t 133 | 134 | def t_ASSIGNSTART(self, t): 135 | r'(P|p)(?=(:|\())' 136 | log.debug("Assignement start token") 137 | t.value = t.value.lstrip() 138 | return t 139 | 140 | def t_LOOPSTART(self, t): 141 | r'(L|l)\d+(?=(:|\())' 142 | log.debug("Loop start token") 143 | v = t.value.lstrip() 144 | v = int(v[1:]) 145 | t.value = v 146 | return t 147 | 148 | def t_DBG(self, t): 149 | r'(D|d)(?=(:|\())' 150 | log.debug("Debug token") 151 | t.value = t.value.lstrip() 152 | return t 153 | 154 | def t_LOOPEND(self, t): 155 | r'(END|End|end)\s(L|l)\d+' 156 | log.debug("Loop end token") 157 | v = t.value.lstrip() 158 | v = int(v[5:]) 159 | t.value = v 160 | return t 161 | 162 | def t_LOOP(self, t): 163 | r'LOOP' 164 | return t 165 | 166 | def t_VLOOP(self, t): 167 | r'VLOOP' 168 | return t 169 | 170 | def t_CONDITIONNAME(self, t): 171 | r'(V|v)\d+' 172 | log.debug("Condition name token") 173 | return t 174 | 175 | def t_GENCONDITIONNAME(self, t): 176 | r'(G|g)\d+' 177 | log.debug("Condition name token") 178 | return t 179 | 180 | def t_LOADTYPES(self, t): 181 | r'(LOAD|Load|load)(REL|Rel|rel)?\s' 182 | if 'rel' in t.value.lower(): 183 | t.value = True 184 | else: 185 | t.value = False 186 | return t 187 | 188 | def t_TYPE(self, t): 189 | r'(AS|As|as)\s' 190 | return t 191 | 192 | def t_SIZEOF(self, t): 193 | r'(SIZEOF|SizeOf|sizeof)\s' 194 | return t 195 | 196 | def t_DEFINE(self, t): 197 | r'(DEFINE|Define|define)\s' 198 | return t 199 | 200 | def t_FROMFILE(self, t): 201 | r'FROMFILE\s' 202 | return t 203 | 204 | def t_OPTIMIZE(self, t): 205 | r'(MAXIMIZE|MINIMIZE)' 206 | if 'MAX' in t.value: 207 | t.value = Optimizations.MAXIMIZE 208 | else: 209 | t.value = Optimizations.MINIMIZE 210 | return t 211 | 212 | def t_VARIABLE(self, t): 213 | r"[a-zA-Z_][a-zA-Z_0-9]+" 214 | return t 215 | 216 | # A regular expression rule with some action code 217 | def t_NUMBER(self, t): 218 | r'(0(x|X)[a-fA-F0-9]+|\d+)' 219 | log.debug("Number token") 220 | try: 221 | t.value = int(t.value) 222 | except ValueError: 223 | t.value = int(t.value, 16) 224 | return t 225 | 226 | t_ignore_comments = r'\#.*' 227 | 228 | # Define a rule so we can track line numbers 229 | def t_newline(self, t): 230 | r'\n+' 231 | log.debug("New line found") 232 | t.lexer.lineno += len(t.value) 233 | 234 | # A string containing ignored characters (spaces and tabs) 235 | t_ignore = ' \t' 236 | 237 | # Error handling rule 238 | def t_error(self, t): 239 | print("Illegal character '%s'" % t.value[0]) 240 | t.lexer.skip(1) 241 | 242 | def __init__(self): 243 | # Build the lexer 244 | lexer = lex.lex(module=self) 245 | -------------------------------------------------------------------------------- /tooleval/idaplugin/idadumpmem.py.asm: -------------------------------------------------------------------------------- 1 | ; 2 | ; +-------------------------------------------------------------------------+ 3 | ; | This file has been generated by The Interactive Disassembler (IDA) | 4 | ; | Copyright (c) 2018 Hex-Rays, | 5 | ; | License info: 48-B237-7154-E0 | 6 | ; | Institut EURECOM | 7 | ; +-------------------------------------------------------------------------+ 8 | ; 9 | ; Input SHA256 : 0F18A2FFC56339EB0E4B1DBBD260CFBA380AF0846F3DFDE29A00296EADAEEEE4 10 | ; Input MD5 : CEE979E03605052C37ECF7348A2C7D35 11 | ; Input CRC32 : 375248FB 12 | 13 | ; File Name : /home/dario/phd/loaders_modeling/lang_parser/tooleval/idaplugin/idadumpmem.py 14 | ; Format : Binary file 15 | ; Base Address: 0000h Range: 0000h - 0595h Loaded length: 0595h 16 | 17 | .686p 18 | .mmx 19 | .model flat 20 | 21 | ; =========================================================================== 22 | 23 | ; Segment type: Pure code 24 | seg000 segment byte public 'CODE' use32 25 | assume cs:seg000 26 | assume es:nothing, ss:nothing, ds:nothing, fs:nothing, gs:nothing 27 | dd 6D6F7266h, 63646920h, 706D6920h, 2074726Fh, 72660A2Ah 28 | dd 69206D6Fh, 70616164h, 6D692069h, 74726F70h, 660A2A20h 29 | dd 206D6F72h, 75616469h, 736C6974h, 706D6920h, 2074726Fh 30 | dd 6D690A2Ah, 74726F70h, 73797320h, 6F72660Ah, 736F206Dh 31 | dd 7461702Eh, 6D692068h, 74726F70h, 696F6A20h, 79730A6Eh 32 | dd 61702E73h, 612E6874h, 6E657070h, 2E222864h, 0A29222Eh 33 | dd 2E737973h, 68746170h, 7070612Eh, 28646E65h, 29222E22h 34 | dd 6C630A0Ah, 20737361h, 69466F54h, 7453656Ch, 74754F64h 35 | dd 6A626F28h, 29746365h, 20200A3Ah, 65642020h, 5F5F2066h 36 | dd 74696E69h, 73285F5Fh, 29666C65h, 20200A3Ah, 20202020h 37 | dd 65732020h, 6F2E666Ch, 69667475h, 3D20656Ch, 65706F20h 38 | dd 2F22286Eh, 2F706D74h, 6F616469h, 742E7475h, 2C227478h 39 | dd 22772220h, 20200A29h, 65642020h, 72772066h, 28657469h 40 | dd 666C6573h, 6574202Ch, 3A297478h, 2020200Ah, 20202020h 41 | dd 6C657320h, 756F2E66h, 6C696674h, 72772E65h, 28657469h 42 | dd 74786574h, 20200A29h, 65642020h, 6C662066h, 28687375h 43 | dd 666C6573h, 200A3A29h, 20202020h, 73202020h, 2E666C65h 44 | dd 6674756Fh, 2E656C69h, 73756C66h, 0A292868h, 20202020h 45 | dd 20666564h, 74617369h, 73287974h, 29666C65h, 20200A3Ah 46 | dd 20202020h, 65722020h, 6E727574h, 6C614620h, 200A6573h 47 | dd 64202020h, 5F206665h, 6C65645Fh, 73285F5Fh, 29666C65h 48 | dd 20200A3Ah, 20202020h, 65732020h, 6F2E666Ch, 69667475h 49 | dd 632E656Ch, 65736F6Ch, 730A2928h, 732E7379h, 756F6474h 50 | dd 203D2074h, 2E737973h, 65647473h, 3D207272h, 466F5420h 51 | dd 53656C69h, 754F6474h, 0A292874h, 3A797274h, 20200A0Ah 52 | dd 72662020h, 6D206D6Fh, 75646D65h, 705F706Dh, 69203262h 53 | dd 726F706Dh, 654D2074h, 79726F6Dh, 706D7544h, 654D202Ch 54 | dd 79726F6Dh, 69676552h, 0A0A6E6Fh, 20202020h, 6C206669h 55 | dd 41286E65h, 29564752h, 32203C20h, 20200A3Ah, 20202020h 56 | dd 75642020h, 6964706Dh, 203D2072h, 6D742F22h, 200A2270h 57 | dd 65202020h, 3A65736Ch, 2020200Ah, 20202020h, 6D756420h 58 | dd 72696470h, 41203D20h, 5B564752h, 0A0A5D31h, 20202020h 59 | dd 646D656Dh, 20706D75h, 654D203Dh, 79726F6Dh, 706D7544h 60 | dd 200A2928h, 66202020h, 7620726Fh, 72646461h, 206E6920h 61 | dd 6D676553h, 73746E65h, 0A3A2928h, 2 dup(20202020h), 726D656Dh 62 | dd 6F696765h, 203D206Eh, 646D656Dh, 2E706D75h, 69676572h 63 | dd 2E736E6Fh, 28646461h, 20200A29h, 20202020h, 656D2020h 64 | dd 6765726Dh, 2E6E6F69h, 64646176h, 203D2072h, 64646176h 65 | dd 20200A72h, 20202020h, 656D2020h, 6765726Dh, 2E6E6F69h 66 | dd 7A697376h, 203D2065h, 45676553h, 7628646Eh, 72646461h 67 | dd 202D2029h, 53676553h, 74726174h, 64617628h, 0A297264h 68 | dd 2 dup(20202020h), 72747461h, 67203D20h, 735F7465h, 5F6D6765h 69 | dd 72747461h, 64617628h, 202C7264h, 41474553h, 5F525454h 70 | dd 4D524550h, 20200A29h, 20202020h, 65722020h, 3D206461h 71 | dd 74746120h, 20262072h, 50474553h, 5F4D5245h, 44414552h 72 | dd 203D2120h, 20200A30h, 20202020h, 72772020h, 20657469h 73 | dd 7461203Dh, 26207274h, 47455320h, 4D524550h, 4952575Fh 74 | dd 21204554h, 0A30203Dh, 2 dup(20202020h), 20637865h, 7461203Dh 75 | dd 26207274h, 47455320h, 4D524550h, 4558455Fh, 3D212043h 76 | dd 200A3020h, 20202020h, 6D202020h, 65726D65h, 6E6F6967h 77 | dd 7265702Eh, 7373696Dh, 206E6F69h, 2D22203Dh, 202B2022h 78 | dd 22722228h, 20666920h, 64616572h, 736C6520h, 2D222065h 79 | dd 2B202922h, 77222820h, 66692022h, 69727720h, 65206574h 80 | dd 2065736Ch, 29222D22h, 28202B20h, 20227822h, 65206669h 81 | dd 65206378h, 2065736Ch, 29222D22h, 2020200Ah, 20202020h 82 | dd 6D656D20h, 69676572h, 662E6E6Fh, 657A6973h, 6D203D20h 83 | dd 6D286E69h, 65726D65h, 6E6F6967h, 6973762Eh, 202C657Ah 84 | dd 30317830h, 29303030h, 2020200Ah, 20202020h, 6D656D20h 85 | dd 69676572h, 632E6E6Fh, 65746E6Fh, 3D20746Eh, 74656720h 86 | dd 7479625Fh, 76287365h, 72646461h, 656D202Ch, 6765726Dh 87 | dd 2E6E6F69h, 7A697366h, 200A2965h, 70202020h, 6E676F72h 88 | dd 20656D61h, 6567203Dh, 6F725F74h, 665F746Fh, 6E656C69h 89 | dd 28656D61h, 20200A29h, 69772020h, 6F206874h, 286E6570h 90 | dd 6E696F6Ah, 6D756428h, 72696470h, 7270202Ch, 616E676Fh 91 | dd 222B656Dh, 6D75642Eh, 2C292270h, 62772220h, 61202922h 92 | dd 70662073h, 20200A3Ah, 20202020h, 70662020h, 6972772Eh 93 | dd 6D286574h, 75646D65h, 532E706Dh, 61697265h, 657A696Ch 94 | dd 74536F54h, 676E6972h, 0A292928h, 65637865h, 45207470h 95 | dd 70656378h, 6E6F6974h, 20736120h, 200A3A65h, 70202020h 96 | dd 746E6972h, 0A296528h, 20202020h, 2E636469h, 74697845h 97 | dd 0A293128h, 6364690Ah, 6978452Eh, 29302874h 98 | db 0Ah 99 | seg000 ends 100 | 101 | 102 | end 103 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # What is this about? 2 | 3 | This project provides a framework for modeling and analyzing the behavior of parsers for executable file formats, like the ones we can find in operating system loaders and reverse engineering tools. 4 | 5 | # Why? What's the goal of all this? 6 | 7 | The key problem we deal with: there is *no reference implementation* for parsing PE files, and there is *no comprehensive specifications* for the PE file format. Reimplementation is the de facto rule, and there is a lot of room for implementation differences. This, in turns, leads to *discrepancies* between software the actually needs to load PE executables (e.g., Windows OS) and reverse engineering / malware analysis tools. This a problem: these discrepancies can be used to mislead malware and reverse engineering tools. 8 | 9 | Key contributions: 10 | - We developed and release an *analysis framework* to *systematically* explore this problem and *enumerate* discrepancies among different software, especially OS loaders vs. reverse engineering / malware analysis tools. 11 | - We developed and release models for various versions of Windows (XP, 7, 10) and reverse engineering tools (ClamAV, Yara, radare2). 12 | - We can automatically *validate* and *generate* PE samples "exploiting" these discrepancies, thus tricking reverse engineering tools into extracting misleading information. 13 | 14 | 15 | # What do I find in this repo? 16 | 17 | This project ships some ready-to-use models as well as the code of the analysis framework. 18 | The models can be found in the dedicated [submodule](https://github.com/eurecom-s3/loaders-models), while the interpreter's code for the custom language is in the [modelLang](modelLang) directory. 19 | 20 | # Modeling Language 21 | 22 | The first step in the analysis consists in writing a "model" of the parser using the custom language supported by the framework. 23 | Here follows an example extracted from the models of the Windows loader. 24 | ``` 25 | INPUT HEADER 2048 as DOSHeader 26 | 27 | ## Check the MZ magic number 28 | V1: AND EQ HEADER.magic[0] "M" EQ HEADER.magic[1] "Z" term 29 | V2: ULE (ADD HEADER.e_lfanew 0xf8) FILESIZE term 30 | 31 | P: NT_HEADER <- HEADER[HEADER.e_lfanew, sizeof _IMAGE_NT_HEADERS] as _IMAGE_NT_HEADERS 32 | ## Check the PE magic number 33 | V3: EQ NT_HEADER.Signature 0x4550 term 34 | 35 | ``` 36 | For more information about the modeling language, check out [SPECIFICATIONS.md](SPECIFICATIONS.md). 37 | 38 | # Analysis Tasks & Examples 39 | 40 | ## Sample validation 41 | Given an executable and the model of a parser as input, the framework can determine whether the first meets the constraints of the second, in other words, whether the modeled software considers the input file as a valid executable. 42 | To check whether an executable meets the constraints of a model, you can run: 43 | ``` 44 | python3 verify.py 45 | ``` 46 | For example, if you want to check whether an unknown sample can run under Windows 10 by using the ready-to-use models in this project, you can launch: 47 | ``` 48 | python3 verify.py models/windows/10/MiCreateImageFile.lmod path/to/the/sample 49 | ``` 50 | The script returns 0 if the executable is valid, or 1 otherwise (in this case, the scripts also prints one line pointing to the broken constraint in the model). 51 | 52 | ## Sample generation 53 | The framework can create program headers that are valid according to one or more models. 54 | The logic for generating valid samples is implemented in the `generate.py` script, which can be invoked as follows: 55 | ``` 56 | python3 generate.py -A [ [ ... ]] 57 | ``` 58 | For example, to generate a valid test case for Windows 7, you can run the following command that combines the models of both the kernel-space and user-space portions of its loader: 59 | ``` 60 | python3 generate.py -A models/windows/7/MiCreateImageFile.lmod models/windows/7/LdrpInitializeProcess.lmod 61 | ``` 62 | The output file can be specified with the `-O` flag (default: `testcase`). 63 | 64 | ## Differential test case generation 65 | Given two or more models, the framework can create program headers that are valid according to a subset of them but invalid for the others. 66 | `generate.py` also implements the differential test case generation and can be invoked with: 67 | ``` 68 | python3 generate.py -A [ [...]] -N [ [...]] 69 | ``` 70 | For example, to generate a sample that runs in Windows 7 but not in Windows 10, you can execute: 71 | ``` 72 | ./generate.py -A models/windows/10/MiCreateImageFileMap.lmod models/windows/10/LdrpInitializeProcess.lmod -N models/windows/7/MiCreateImageFileMap.lmod models/windows/7/LdrpInitializeProcess.lmod 73 | ``` 74 | 75 | ## Differences enumeration 76 | Given two models, the framework is able to create many different differential test cases, exploiting different discrepancies among the two models. 77 | 78 | The `differential.py` script implements the logic for the differences enumeration technique and can be invoked the same way as `generate.py`. 79 | 80 | ## Corner cases generation 81 | Given a model, the framework creates many test cases that cover all the possible configurations that a set of models consider valid. 82 | This technique is implemented in the `explore_condition.py` script, which can run with the following command: 83 | ``` 84 | python3 explore_conditions.py -M [ [...]] 85 | ``` 86 | 87 | # Setup/Installation 88 | 89 | The best way to start using this project is by creating a virtual environment. 90 | ``` 91 | mkvirtualenv --python=python3 models 92 | ``` 93 | This project uses python3-specific features and, as such, is unlikely to work with python2. 94 | Most of the project dependencies can be installed using pip: 95 | ``` 96 | pip install -r requirements.txt 97 | ``` 98 | The `z3` solver needs to be installed separately. On Ubuntu 20.04, you can do that with: 99 | ``` 100 | sudo apt install z3 101 | ``` 102 | 103 | # Publications and Conference Talks 104 | 105 | This work was published at the [24th International Symposium on Research in Attacks, Intrusions and Defenses (RAID 2021)](https://raid2021.org/). 106 | You can read the paper [here](https://www.eurecom.fr/publication/6603/download/sec-publi-6603.pdf). 107 | If you want to cite this work in your academic paper, you can use this: 108 | 109 | ``` 110 | @inproceedings{10.1145/3471621.3471848, 111 | author = {Nisi, Dario and Graziano, Mariano and Fratantonio, Yanick and Balzarotti, Davide}, 112 | title = {Lost in the Loader:The Many Faces of the Windows PE File Format}, 113 | year = {2021}, 114 | publisher = {Association for Computing Machinery}, 115 | booktitle = {24th International Symposium on Research in Attacks, Intrusions and Defenses}, 116 | location = {San Sebastian, Spain}, 117 | series = {RAID '21} 118 | } 119 | ``` 120 | We will also present this project at [Black Hat Europe 2021](https://www.blackhat.com/eu-21/). 121 | -------------------------------------------------------------------------------- /differential.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import sys 4 | import os.path as path 5 | import logging 6 | from functools import reduce 7 | from itertools import product, combinations 8 | 9 | import coloredlogs 10 | import z3 11 | import pefile 12 | 13 | log = logging.getLogger(__name__) 14 | coloredlogs.install(level="INFO", logger=log) 15 | 16 | from modelLang import Parser, Z3Backend 17 | 18 | def gen_constraint_name(model, cond): 19 | return f"{model.name}_{cond}" 20 | 21 | def create_constraints_db(models): 22 | ret = {} 23 | for model in models: 24 | for name, cond in model.terminal_conditions.items(): 25 | cname = gen_constraint_name(model, name) 26 | ret[cname] = cond 27 | return ret 28 | 29 | def write_testcase(testcase, constraints, fout): 30 | with open(fout, "wb") as fp: 31 | fp.write(testcase) 32 | with open(f"{fout}.constraints", "w") as fp: 33 | for name in constraints: 34 | fp.write(f"{name}\n") 35 | 36 | def generate(z3_models_assert, z3_models_negate, z3_model_support=None): 37 | backend = z3_models_assert[0] 38 | for b in z3_models_assert[1:]: 39 | backend &= b 40 | 41 | if z3_model_support: 42 | backend &= z3_model_support 43 | 44 | for b in z3_models_negate: 45 | backend &= ~b 46 | 47 | solver = backend.solver 48 | model = backend.model 49 | testcase = backend.generate_testcase() if model else None 50 | return model, testcase 51 | 52 | def find_violations(model, z3_models_negate): 53 | ret = set() 54 | for mn in z3_models_negate: 55 | for name, cond in mn.terminal_conditions.items(): 56 | if not model.eval(cond): 57 | model_name = gen_constraint_name(mn, name) 58 | ret.add(model_name) 59 | return ret 60 | 61 | def next_iteration(violated_constraints, violated_once, processed, to_process, 62 | constraints_db): 63 | if len(violated_once) != len(violated_once | violated_constraints): 64 | log.critical(f"New Constraints found! {violated_constraints - violated_once}") 65 | violated_once |= violated_constraints 66 | all_subsets = set() 67 | violated_once_sorted = sorted(violated_once) 68 | for i in range(1, len(violated_once)+1): 69 | all_subsets |= set(combinations(violated_once_sorted, i)) 70 | 71 | new_subsets = all_subsets - processed 72 | to_process.extend(new_subsets) 73 | 74 | while len(to_process) > 0: 75 | candidate = to_process.pop(0) 76 | if candidate not in processed: 77 | break 78 | else: 79 | return None 80 | 81 | processed.add(candidate) 82 | log.critical(f"{candidate} chosen") 83 | z3_model_support = Z3Backend(name="suppport") 84 | for constr in candidate: 85 | z3constr = constraints_db[constr] 86 | z3_model_support.terminal_conditions[constr] = z3constr 87 | z3_model_support.conditions[constr] = z3constr 88 | 89 | return z3_model_support 90 | 91 | if __name__ == "__main__": 92 | argparser = argparse.ArgumentParser(description="Interpret models and generate testcases") 93 | argparser.add_argument('--asserts', '-A', action="append", 94 | metavar="model", type=str, nargs="+", 95 | help="Model to assert") 96 | argparser.add_argument('--negates', '-N', action="append", 97 | metavar="model", type=str, nargs="*", 98 | default=[], 99 | help="Model to negate") 100 | argparser.add_argument('--out', '-O', action="store", 101 | metavar="outfile", type=str, 102 | default="testcase", 103 | help="Output file for testcase") 104 | argparser.add_argument('--var', '-V', action="store", 105 | metavar="variable", type=str, nargs=1, 106 | default="HEADER", 107 | help="Variable in the model to use for the testcase") 108 | argparser.add_argument('--size', '-B', action="store", metavar="bytes", 109 | type=int, default=None, 110 | help="Size in bytes of the testcase to generate") 111 | argparser.add_argument('--define', '-D', action="store", metavar="define", 112 | type=lambda x: (x.split(":")[0], 113 | int(x.split(":")[1])), 114 | nargs="*", 115 | help="Overwrite constant definition") 116 | 117 | args = argparser.parse_args() 118 | asserts = reduce(lambda x,y: x | {*y}, args.asserts, set()) 119 | negates = reduce(lambda x,y: x | {*y}, args.negates, set()) 120 | outfile = args.out 121 | voi = args.var 122 | size = args.size 123 | defs = dict(args.define) if args.define else {} 124 | 125 | Z3Backend.print_unsat = False 126 | z3_models_assert = [] 127 | z3_models_negate = [] 128 | for model in asserts: 129 | modelname = path.basename(model) 130 | parser = Parser(ptype=Parser.ParserType.DIFFERENTIAL_ASSERT, 131 | input_size=size, 132 | custom_defs=defs) 133 | parser.parse_file(model) 134 | backend = Z3Backend(name=modelname, voi=voi) 135 | backend.exec_statements(parser.statements) 136 | z3_models_assert.append(backend) 137 | for model in negates: 138 | modelname = path.basename(model) 139 | parser = Parser(ptype=Parser.ParserType.DIFFERENTIAL_NEGATE, 140 | input_size=size, 141 | custom_defs=defs) 142 | parser.parse_file(model) 143 | backend = Z3Backend(name=modelname, voi=voi) 144 | backend.exec_statements(parser.statements) 145 | z3_models_negate.append(backend) 146 | 147 | constraints_db = create_constraints_db((*z3_models_assert, 148 | *z3_models_negate)) 149 | to_process = [] 150 | processed = set() 151 | violated_once = set() 152 | current_constraints = () 153 | z3_model_support = None 154 | iteration = 0 155 | models = [] 156 | while True: 157 | violated_constraints = set() 158 | model, testcase = generate(z3_models_assert, z3_models_negate, 159 | z3_model_support) 160 | if model: 161 | models.append(model) 162 | #### Find violated constraints 163 | violated_constraints = find_violations(model, z3_models_negate) 164 | log.critical(f"Violated Constraints: {violated_constraints}") 165 | #### Write testcase 166 | write_testcase(testcase, violated_constraints, 167 | f"{outfile}_{iteration}") 168 | 169 | z3_model_support = next_iteration(violated_constraints, 170 | violated_once, 171 | processed, 172 | to_process, 173 | constraints_db) 174 | if not z3_model_support: 175 | break 176 | iteration += 1 177 | -------------------------------------------------------------------------------- /SPECIFICATIONS.md: -------------------------------------------------------------------------------- 1 | # Language Specifications 2 | 3 | ## Models 4 | A model is a file (compliant with these specifications) that describes the loading phase of a program, that precedes its launch. 5 | In general, this process can be divided in a series of stages of two types: parsing stages and validation stages. 6 | The first stages produce a set of intermediate values starting from the original program. 7 | These values are then used during the validation stages to enforce soft or hard constraints. 8 | Soft constraints are used to determine which are the following steps of the loading process, while hard constraints are those that abort the entire process tout court in case they are not met. 9 | Models can be used for two purposes: testcase generation and program validation. 10 | Testcase generation consists in interpreting the model as a set of SMT constrains. By means of an SMT solver, it is possible to produce sequences of bytes (testcases) that meet all these constraints. If the model is consistent with the loader's behavior, feeding the loader with a testcase will result in the loading phase succeeding. 11 | Program validation, instead, consists in checking whether a given program respects the constraints enforced by a loader. In other words, the validation process ultimately forecasts whether a program would be successfully loaded by a specific loader or not. 12 | 13 | ## Core Concepts of the Language 14 | ### Structures 15 | Loaders usually cast part of the program headers to well-known data structures, often declared in the C language. 16 | Our language provides support to C types that can be imported in a model by parsing C header files. 17 | ### Immediate Values 18 | Immediate values are either integer (both base10 and base16 numbers are allowed) or single characters. Internally, they are all parsed as integers. 19 | ### Variables 20 | Symbolic names given to expressions. 21 | ### Expressions 22 | Recursive structures that combine variables, immediate values and other expressions by means of operators. 23 | ### Operators Semantics and Arity 24 | | Operator | Arity | Signed | Sized | Meaning | Syntax | 25 | |:--------:|:-----:|:------:|:-----:|:---------------------------------------------:|:--------------------:| 26 | | ADD | 2 | Y | Y | Integer addition | | 27 | | SUB | 2 | Y | Y | Integer difference | | 28 | | MUL | 2 | Y | Y | Integer product | | 29 | | DIV | 2 | Y | Y | Integer division | | 30 | | UDIV | 2 | N | Y | Integer unsigned division | | 31 | | MOD | 2 | Y | Y | Integer Modulo | | 32 | | BITOR | 2 | N | Y | Bitwise OR | | 33 | | BITAND | 2 | N | Y | Bitwise AND | | 34 | | BITNOT | 1 | N | Y | Bitwise NOT | | 35 | | OR | 2 | - | - | Logic OR | | 36 | | AND | 2 | - | - | Logic AND | | 37 | | NOT | 1 | - | - | Logic NOT | | 38 | | EQ | 2 | - | Y | Integer Equality test | | 39 | | NEQ | 2 | - | Y | Integer Inequality test | | 40 | | GT/GE | 2 | Y | Y | Integer greater [or equal] comparison | | 41 | | LT/LE | 2 | Y | Y | Integer less [or equal] comparison | | 42 | | UGT/UGE | 2 | N | Y | Unsigned greater [or equal] comparison | | 43 | | ULT/ULE | 2 | N | Y | Unsigned less [or equal] comparison | | 44 | | ISPOW2 | 1 | N | N | True if <arg> is a power of 2 | | 45 | | SHL/SHR | 2 | N | Y | Left/Right logic bit-shift | | 46 | | OVFLADD | 2 | - | Y | True if the sum of the two operands produce an overflow | | 47 | | Indexing | 2 | N | Y | Single byte extraction | <var>[byteindex] | 48 | | Slice | 3 | N | Y | Bytevector extraction | <var>[start, nbytes] | 49 | 50 | ### Syntactic Sugars 51 | |Expression | Meaning | Use case | 52 | |:---------------:|:---------------------------------------:|----------------------------------------------------| 53 | |STRCMP V1 I 'ME' | AND (EQ V1[I] "M") (EQ V1[ADD I 1] "E") | Comparison/Constraints involving printable strings | 54 | 55 | ## Valid Statements 56 | ### INPUT Statements 57 | #### Syntax 58 | `INPUT (`| AS )` 59 | #### Description 60 | Declare an input variables. For testcase generation, this variable will be completely symbolic, meaning that it could assume any possible values. For program validation, input variables are fed into the process from outside. 61 | An example of an input variable is the file to produce or validate. 62 | Input variables must have a fixed size, which can be declared explicitly by adding its size in bytes after its name; or implicitely, by adding a type declaration by means of the `AS` keyword. 63 | 64 | ### DEFINE Statements 65 | #### Syntax 66 | `DEFINE ` 67 | #### Description 68 | Syntactic sugar to give names to immediate values. Useful for those that repeats often in a model. 69 | Interpreted by the frontend parser/preprocessor. 70 | 71 | ### Soft-Constraints Statements 72 | #### Syntax 73 | `V: ` 74 | #### Description 75 | Introduce a condition that can be later used for conditional statements. 76 | The value of the condition is defined by the epression on the right side of the statement. 77 | 78 | ### Validation Statements 79 | #### Syntax (unconditional) 80 | `V: TERM` 81 | #### Syntax (conditional) 82 | `V(Vn[, Vm[, Vo[...]]]): TERM` 83 | #### Description 84 | Introduce a hard constraint. 85 | During program validation, if the constraints is not met, the entire process fails. 86 | During testcase generation, the list of hard constraints are translated into SMT constraints and then fed to the backend to procude the testcase. 87 | #### Semantics of Conditional Validation 88 | A conditional validation is a hard constraints that behaves in the following way: 89 | 1. if the at least one of its prior conditions is not met, its value is TRUE (meaning that the hard constraint is met) 90 | 2. if all its prior constraints are met, its value is defined by its expression 91 | Conditional hard-constraints 92 | In other words, conditional hard-constraints influence the loading process _only_ if their prior constraints are met. 93 | 94 | ### Parsing Statements 95 | #### Syntax (unconditional) 96 | `P: <- [AS ]` 97 | #### Syntax (conditional) 98 | `P(Vn[, Vm[, Vo[...]]]): <- [AS ]` 99 | #### Description 100 | Introduce a parsing stage in the model. 101 | This roughly corresponds to a variable assignement in procedural languages. 102 | #### Semantics of Conditional Parsing 103 | A conditional parsing statement roughly corresponds to a variable assignment in a _if-then-else_ statement in procedural languages, with a few caveats. 104 | In the first place, a variable introduced within a conditional statement will have value of 0 if the conditions is not met. The language parser will also produce a warning when this happens, since it could lead to unwanted behaviors. 105 | If, instead, the output variable of the statement was already defined (i.e., in a previous unconditional parsing statement), and if its conditions are not all met, its value is the one it held before the conditional parsing statement. 106 | 107 | ### Fixed-increment Loop Statements 108 | #### Syntax (start) 109 | `L: <- LOOP(, , , , ) [AS ]` 110 | ``: variable name 111 | ``: expression 112 | ``: expression 113 | ``: immediate 114 | ``: expression 115 | ``: integer 116 | #### Syntax (end) 117 | `END L` 118 | #### Description 119 | This statement declares a loop iterating over an array of structures. 120 | An iteration is made up of all the statements between the start of the loop and its end. 121 | At the n-th iteration of the loop the output value takes the n-th element of the array. 122 | The `input` argument is the expression on which to slice upon to extract the elements of the array. 123 | The `startinoffset` is an expression that indicates the offset (in bytes) at which the array start within the `input` variable. 124 | `structsize` represents the size (in bytes) of each element of the array. It must be an immediate. 125 | `count` is an expression representing the number of iterations of the loop. 126 | `maxunroll` is an integer used _only during testcase generation_ as an upper-bound for `count`. Its role is fundamental due to the lack of loop supports in SMT solvers. In fact, loops in our language are unrolled to overcome this limitation. 127 | 128 | ### Generic Loop Statements 129 | #### Syntax (start) 130 | `L: <- VLOOP(, , , )` 131 | ``: variable name 132 | ``: expression 133 | ``: variable name 134 | ``: condition name 135 | ``: integer 136 | #### Syntax (end) 137 | `END L` 138 | #### Description 139 | Declares a generic loop to execute the same set of statements multiple times up until a certain condition is met. 140 | At each iteration the `output` variable takes a different value, according to the following scheme: 141 | 1. During the first iteration, its value is set to that of the `start` expression 142 | 2. In the following iteration, its value is set to that of the `next` variable 143 | The `next` variable must be set inside the body of the loop, by means of a `P` statement. 144 | `condition` is the identifier of a soft-constraint declared within the body of the loop. 145 | The semantics of `maxunroll` is the same as for the `fixed-increment loop` statements. 146 | -------------------------------------------------------------------------------- /testcases/windows/xp/winxp_createprocess.exe: -------------------------------------------------------------------------------- 1 | MZPELz @ -------------------------------------------------------------------------------- /modelLang/classes.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import coloredlogs 3 | from enum import Enum, unique, auto 4 | 5 | log = logging.getLogger(__name__) 6 | coloredlogs.install(level="NOTSET", logger=log) 7 | 8 | @unique 9 | class Optimizations(Enum): 10 | MAXIMIZE = auto() 11 | MINIMIZE = auto() 12 | 13 | class Statement(object): 14 | def __init__(self): 15 | self.lineno = 0 16 | 17 | class Base(object): 18 | def __sub__(self, other): 19 | return self.symb - other 20 | def __rsub__(self, other): 21 | return other - self.symb 22 | 23 | class Expression(Base): 24 | OPCODES = {'VAR' : 1, 25 | 'IMM' : 1, 26 | 'ADD' : 2, 27 | 'SUB' : 2, 28 | 'MUL' : 2, 29 | 'DIV' : 2, 30 | 'UDIV' : 2, 31 | 'MOD' : 2, 32 | 'AND' : 2, 33 | 'OR' : 2, 34 | 'NOT' : 1, 35 | 'ULE' : 2, 36 | 'UGE' : 2, 37 | 'ULT' : 2, 38 | 'UGT' : 2, 39 | 'EQ' : 2, 40 | 'NEQ' : 2, 41 | 'GE' : 2, 42 | 'LE' : 2, 43 | 'GT' : 2, 44 | 'LT' : 2, 45 | 'BITOR' : 2, 46 | 'BITAND' : 2, 47 | 'BITNOT' : 1, 48 | 'SHR' : 2, 49 | 'SHL' : 2, 50 | 'Slice' : 3, 51 | 'Index' : 2, 52 | 'ISPOW2' : 1, 53 | 'ALIGNUP' : 2, 54 | 'ALIGNDOWN' : 2, 55 | 'ISALIGNED' : 2, 56 | 'SECT' : 1, 57 | 'NSECT' : 1, 58 | 'OPTHDR' : 1, 59 | 'OVFLADD' : 2, 60 | 'INT' : 2, 61 | } 62 | 63 | def __init__(self, opcode, *operands): 64 | if opcode not in self.OPCODES: 65 | raise ValueError(f"Unknown opcode {opcode}") 66 | self.opcode = opcode 67 | if len(operands) != self.OPCODES[opcode]: 68 | raise ValueError(f"Opcode {opcode} expects {self.OPCODES[opcode]}." 69 | f"{len(operands)} provided instead.") 70 | if any([type(op) not in (Variable, Immediate, Expression) for op in operands]): 71 | raise TypeError("Operands of not supported types") 72 | 73 | self.operands = operands 74 | 75 | def __repr__(self): 76 | tmp = f"" 77 | return tmp 78 | 79 | def pprint(self, spacing=" "): 80 | if self.opcode in ("VAR", "IMM"): 81 | return self.operands[0].pprint() 82 | ret = "" 83 | ret += f"{self.opcode}(\n" 84 | args = ",\n".join( 85 | ["\n".join([(spacing + line) for line in x.pprint().split("\n")]) 86 | for x in self.operands]) 87 | ret += args 88 | ret += "\n)" 89 | return ret 90 | 91 | class Immediate(object): 92 | def __init__(self, value): 93 | self.value = value 94 | 95 | def __repr__(self): 96 | return f"" 97 | 98 | def pprint(self): 99 | return str(self.value) 100 | 101 | class BoolImmediate(Immediate): 102 | def __init__(self, value): 103 | if not isinstance(value, bool): 104 | t = type(expr) 105 | raise TypeError(f"expr must be of type bool. {t} found instead") 106 | self.value = value 107 | 108 | 109 | class Variable(object): 110 | def __init__(self, name, type=None): 111 | self.name = name 112 | self.type = type 113 | 114 | def __repr__(self): 115 | t = "" if not self.type else f" of type {self.type}" 116 | return f"" 117 | 118 | def pprint(self): 119 | return self.name 120 | 121 | class Input(Statement): 122 | def __init__(self, var, size): 123 | super(Input, self).__init__() 124 | self.var = var 125 | self.size = size 126 | 127 | def __repr__(self): 128 | s = f"" 129 | return s 130 | 131 | class Output(Statement): 132 | def __init__(self, var, size): 133 | super(Output, self).__init__() 134 | self.var = var 135 | self.size = size 136 | 137 | def __repr__(self): 138 | s = f"" 139 | return s 140 | 141 | class Assignment(Statement): 142 | def __init__(self, left, right, conditions=None): 143 | super(Assignment, self).__init__() 144 | if not isinstance(left, Variable): 145 | t = type(left) 146 | raise TypeError(f"Left operand of an " 147 | f"assignment must be a variable. " 148 | f"It is {t} instead") 149 | self.left = left 150 | 151 | if not isinstance(right, Expression): 152 | t = type(right) 153 | raise TypeError(f"Right operand of an " 154 | f"assignment must be an expression. " 155 | f"It is {t} instead") 156 | self.right = right 157 | 158 | if conditions and not isinstance(conditions, list): 159 | t = type(conditions) 160 | raise TypeError(f"Conditions must be a list. " 161 | f"It is {t} instead") 162 | if conditions and not all(isinstance(x, Condition) for x in conditions): 163 | raise TypeError("Conditions must be a list of Condition object") 164 | self._conditions = [] if conditions is None else conditions 165 | 166 | def __repr__(self): 167 | s = f"" 170 | return s 171 | 172 | @property 173 | def conditions(self): 174 | return self._conditions 175 | @conditions.setter 176 | def conditions(self, new): 177 | if not isinstance(new, list): 178 | t = type(new) 179 | raise TypeError(f"Conditions must be a list. " 180 | f"It is {t} instead") 181 | if not all(isinstance(x, Condition) for x in new): 182 | raise TypeError("Conditions must be a list of Condition object") 183 | self._conditions = new 184 | @property 185 | def conditional(self): 186 | return len(self._conditions) != 0 187 | 188 | class Loop(Statement): 189 | def __init__(self, loop_name, output_name, input_var, startpos, structsize, count, maxunroll, vtype=None, conditions=None): 190 | super(Loop, self).__init__() 191 | self._loop_name = loop_name 192 | self.output_name = output_name 193 | if not isinstance(input_var, Expression): 194 | t = type(input_var) 195 | raise TypeError(f"Expected Expression for input_var." 196 | f"Found {t}") 197 | self.input_var = input_var 198 | 199 | if not isinstance(startpos, Expression): 200 | t = type(start_pos) 201 | raise TypeError(f"Expected Expression for start_pos." 202 | f"Found {t}") 203 | self.startpos = startpos 204 | 205 | if not isinstance(count, Expression): 206 | t = type(count) 207 | raise TypeError(f"Expected Expression for count." 208 | f"Found {t}") 209 | self.count = count 210 | 211 | self.maxunroll = maxunroll 212 | self.structsize = structsize 213 | self._statements = [] 214 | self.vtype = vtype 215 | self._conditions = [] if conditions is None else conditions 216 | 217 | def add_statement(self, stmt): 218 | if not isinstance(stmt, Statement): 219 | t = type(stmt) 220 | raise TypeError(f"Expected Statement for stmt" 221 | f"Found {t} instead") 222 | self._statements.append(stmt) 223 | 224 | def __repr__(self): 225 | s = f"" 226 | return s 227 | 228 | class VLoop(Loop): 229 | def __init__(self, loop_name, output_name, start, nextname, contcondition, maxunroll, vtype=None, conditions=None): 230 | Statement.__init__(self) 231 | self._loop_name = loop_name 232 | self.output_name = output_name 233 | if not isinstance(start, Expression): 234 | t = type(start) 235 | raise TypeError(f"Expected Expression for start." 236 | f"Found {t}") 237 | self.start = start 238 | 239 | if not isinstance(nextname, Variable): 240 | t = type(nextname) 241 | raise TypeError(f"Expected Variable for nextname." 242 | f"Found {t}") 243 | self.nextname = nextname 244 | 245 | if not isinstance(contcondition, str): 246 | t = type(contcondition) 247 | raise TypeError(f"Expected str for contcondition." 248 | f"Found {t}") 249 | self.contcondition = contcondition 250 | 251 | self.maxunroll = maxunroll 252 | self._statements = [] 253 | self.vtype = vtype 254 | self._conditions = [] if conditions is None else conditions 255 | 256 | def __repr__(self): 257 | s = f"" 258 | return s 259 | 260 | class Condition(Statement): 261 | def __init__(self, expr, isterminal, conditions=None, name=None): 262 | super(Condition, self).__init__() 263 | if isinstance(expr, Expression): 264 | self.expr = expr 265 | elif isinstance(expr, bool): 266 | self.expr = Expression("IMM", Immediate(expr)) 267 | else: 268 | raise TypeError 269 | self.isterminal = bool(isterminal) 270 | 271 | if conditions is None: 272 | conditions = [] 273 | 274 | if not isinstance(conditions, list): 275 | t = type(conditions) 276 | raise TypeError(f"Conditions must be a list. " 277 | f"It is {t} instead") 278 | if not all(isinstance(x, Condition) for x in conditions): 279 | raise TypeError("Conditions must be a list of Condition object") 280 | self._conditions = conditions 281 | self.name = name 282 | 283 | @property 284 | def conditions(self): 285 | return self._conditions 286 | @conditions.setter 287 | def conditions(self, new): 288 | if not isinstance(new, list): 289 | t = type(new) 290 | raise TypeError(f"Conditions must be a list. " 291 | f"It is {t} instead") 292 | if not all(isinstance(x, Condition) for x in new): 293 | raise TypeError("Conditions must be a list of Condition object") 294 | self._conditions = new 295 | @property 296 | def conditional(self): 297 | return len(self._conditions) != 0 298 | 299 | def __repr__(self): 300 | s = "<" 301 | s += "Terminal " if self.isterminal else "" 302 | s += f"Condition {self.expr}" 303 | if len(self.conditions) != 0: 304 | s += f" if {self._conditions}>" 305 | return s 306 | 307 | def __invert__(self): 308 | return Condition(Expression("NOT", self.expr), self.isterminal, 309 | self._conditions) 310 | 311 | def add_prefix(self, prefix): 312 | if self.name is None: 313 | log.warning("Adding prefix to unnamed condition") 314 | self.name = "" 315 | self.name = prefix + self.name 316 | 317 | def clone(self): 318 | conditions = list(self._conditions) 319 | new = Condition(self.expr, self.isterminal, conditions) 320 | new.name = self.name 321 | return new 322 | 323 | class Define(Statement): 324 | def __init__(self, name, value): 325 | super(Define, self).__init__() 326 | if not isinstance(value, Expression): 327 | t = type(value) 328 | log.error(f"value expected to be of type Expression. {t} found instead") 329 | raise TypeError 330 | if value.opcode != 'IMM': 331 | log.error(f"Value must be an immediate expression. {value.opcode} found instead") 332 | raise TypeError 333 | self.name = name 334 | self.value = value 335 | 336 | class Optimization(Statement): 337 | def __init__(self, strategy, expression): 338 | super(Optimization, self).__init__() 339 | if not isinstance(strategy, Optimizations): 340 | t = type(strategy) 341 | log.error(f"strategy expected to be one of the supported Optimizations. {t} found instead") 342 | raise TypeErrorx 343 | self.strategy = strategy 344 | self.expression = expression 345 | 346 | class Debug(Statement): 347 | def __init__(self, expr): 348 | super(Debug, self).__init__() 349 | self.expr = expr 350 | 351 | class ConditionListEntry(Base): 352 | def __init__(self, name, negated=False): 353 | self.name = name 354 | self.negated = negated 355 | 356 | def __add__(self, other): 357 | if isinstance(other, ConditionList): 358 | return ConditionList([self, *other.l]) 359 | elif isinstance(other, ConditionListEntry): 360 | return ConditionList([self, other]) 361 | else: 362 | t = type(other) 363 | raise TypeError(f"other must be either a ConditionList or a ConditionListEntry" 364 | f"It is {t} instead") 365 | def __repr__(self): 366 | return self.name 367 | 368 | class ConditionList(Base): 369 | def __init__(self, l): 370 | if not isinstance(l, list): 371 | t = type(l) 372 | raise TypeError(f"l must be a list. It is {t} instead") 373 | if not all(isinstance(x, ConditionListEntry) for x in l): 374 | raise TypeError("All elements of l must be of type ConditionListEntry") 375 | 376 | self.l = l 377 | 378 | @property 379 | def names(self): 380 | return [x.name for x in self.l] 381 | 382 | def __iadd__(self, other): 383 | if isinstance(other, ConditionList): 384 | self.l += other.l 385 | elif isinstance(other, ConditionListEntry): 386 | self.l += [other] 387 | else: 388 | t = type(other) 389 | raise TypeError(f"other must be either a ConditionList or a ConditionListEntry" 390 | f"It is {t} instead") 391 | return self 392 | 393 | def __add__(self, other): 394 | if isinstance(other, ConditionList): 395 | return ConditionList(self.l + other.l) 396 | elif isinstance(other, ConditionListEntry): 397 | return ConditionList(self.l + [other]) 398 | else: 399 | t = type(other) 400 | raise TypeError(f"other must be either a ConditionList or a ConditionListEntry" 401 | f"It is {t} instead") 402 | 403 | def __repr__(self): 404 | s = "[" + ', '.join(str(x) for x in self.l) + ']' 405 | return s 406 | 407 | def __iter__(self): 408 | return self.l.__iter__() 409 | -------------------------------------------------------------------------------- /modelLang/backends/python_backend.py: -------------------------------------------------------------------------------- 1 | from math import log2 2 | import logging 3 | import coloredlogs 4 | from collections import deque 5 | 6 | from pwnlib.util.packing import pack, unpack 7 | 8 | from .default_backend import DefaultBackend, VerificationError 9 | from ..classes import (Base, Immediate, Variable, Expression, Input, 10 | Assignment, Condition, Loop, VLoop, Debug) 11 | 12 | def extend(value, n, signed): 13 | if not signed: 14 | return value + b'\x00'*n 15 | trail = b'\x00' if (value[-1] & (0x80)) == 0 else b'\xff' 16 | return value + trail*n 17 | 18 | def sized(skipargs=(), skipret=False, sign=False): 19 | def sized_outer(func): 20 | def sized_inner(*args): 21 | targs = [x for n, x in enumerate(args) if n not in skipargs] 22 | max_size = max(len(x) for x in targs) 23 | args = [extend(x, max_size - len(x), sign) for x in targs] 24 | ret = func(*args) 25 | if not skipret: # pack output 26 | lendiff = len(ret) - max_size 27 | if lendiff >= 0: # if output is longer than input, cut it 28 | return ret[:max_size] 29 | else: # if smaller, extend it (trail, depends on signness) 30 | return extend(ret, -lendiff, sign) 31 | else: 32 | return ret 33 | return sized_inner 34 | return sized_outer 35 | 36 | def unsigned(skipargs=(), skipret=False): 37 | def unsigned_outer(func, *skip): 38 | def unsigned_inner(*args): 39 | # unpack the argumts as unsigned (unless they are ignored) 40 | args = [unpack(x, 'all', endianness='little', sign=False) 41 | if n not in skipargs else x for n, x in enumerate(args)] 42 | ret = func(*args) 43 | return ret if skipret else pack(ret, 'all', 44 | endianness='little') 45 | return unsigned_inner 46 | return unsigned_outer 47 | 48 | def signed(skipargs=(), skipret=False): 49 | def signed_outer(func, *skip): 50 | def signed_inner(*args): 51 | args = [unpack(x, 'all', endianness='little', sign=True) 52 | if n not in skipargs else x for n, x in enumerate(args)] 53 | ret = func(*args) 54 | return ret if skipret else pack(ret, 'all', 55 | endianness='little', sign=True) 56 | return signed_inner 57 | return signed_outer 58 | 59 | 60 | class PythonBackend(DefaultBackend): 61 | def __init__(self): 62 | super().__init__() 63 | self.funcs = { 'ADD' : self.ADD, 64 | 'SUB' : self.SUB, 65 | 'MUL' : self.MUL, 66 | 'DIV' : self.DIV, 67 | 'UDIV' : self.UDIV, 68 | 'MOD' : self.MOD, 69 | 'AND' : self.And, 70 | 'OR' : self.Or, 71 | 'NOT' : self.Not, 72 | 'ULE' : self.ULE, 73 | 'UGE' : self.UGE, 74 | 'ULT' : self.ULT, 75 | 'UGT' : self.UGT, 76 | 'EQ' : self.EQ, 77 | 'NEQ' : self.NEQ, 78 | 'GE' : self.GE, 79 | 'LE' : self.LE, 80 | 'GT' : self.GT, 81 | 'LT' : self.LT, 82 | 'BITOR' : self.BITOR, 83 | 'BITAND' : self.BITAND, 84 | 'BITNOT' : self.BITNOT, 85 | 'Slice' : self.Slice, 86 | 'Index' : self.Slice, 87 | 'ISPOW2' : self.ISPOW2, 88 | 'INT' : self.INT, 89 | 'VAR' : self.VAR, 90 | 'IMM' : self.IMM, 91 | 'SHR' : self.SHR, 92 | 'SHL' : self.SHL, 93 | 'ALIGNUP' : self.ALIGNUP, 94 | 'ALIGNDOWN' : self.ALIGNDOWN, 95 | 'ISALIGNED' : self.ISALIGNED, 96 | 'OVFLADD' : self.OVFLADD 97 | } 98 | self.log = logging.getLogger(__name__) 99 | self.log.setLevel(logging.CRITICAL) 100 | coloredlogs.install(level="CRITICAL", logger=self.log) 101 | self._last_fail = None 102 | 103 | 104 | @staticmethod 105 | @sized(sign=False) 106 | @unsigned() 107 | def ADD(a, b): 108 | return a + b 109 | 110 | @staticmethod 111 | @sized(sign=False) 112 | @unsigned() 113 | def SUB(a, b): 114 | return a - b 115 | 116 | @staticmethod 117 | @sized() 118 | @signed() 119 | def MUL(a, b): 120 | return a * b 121 | 122 | @staticmethod 123 | @sized() 124 | @signed() 125 | def MOD(a, b): 126 | return a % b 127 | 128 | @staticmethod 129 | @sized() 130 | @signed() 131 | def DIV(a, b): 132 | return a // b 133 | 134 | @staticmethod 135 | @sized() 136 | @unsigned() 137 | def UDIV(a, b): 138 | return a // b 139 | 140 | @staticmethod 141 | @sized(skipret=True) 142 | @unsigned(skipret=True) 143 | def EQ(a, b): 144 | return a == b 145 | 146 | @staticmethod 147 | @sized(skipret=True) 148 | @unsigned(skipret=True) 149 | def NEQ(a, b): 150 | return a != b 151 | 152 | @staticmethod 153 | @sized(sign=False) 154 | @unsigned() 155 | def BITOR(a, b): 156 | return a | b 157 | 158 | @staticmethod 159 | @sized(sign=False) 160 | @unsigned() 161 | def BITAND(a, b): 162 | return a & b 163 | 164 | @staticmethod 165 | @sized(sign=True) 166 | @signed() 167 | def BITNOT(a): 168 | return ~a 169 | 170 | @staticmethod 171 | @unsigned() 172 | def ISPOW2(a): 173 | return (a == 0) or (a & (a - 1)) == 0 174 | 175 | @staticmethod 176 | @unsigned() 177 | def ISALIGNED(a, b): 178 | return (a & (b -1)) == 0 179 | 180 | @staticmethod 181 | def And(a, b): 182 | return a and b 183 | 184 | @staticmethod 185 | def Or(a, b): 186 | return a or b 187 | 188 | @staticmethod 189 | def Not(a): 190 | return not a 191 | 192 | @staticmethod 193 | @sized(skipret=True) 194 | @unsigned(skipret=True) 195 | def ULE(a, b): 196 | return a <= b 197 | 198 | @staticmethod 199 | @sized(skipret=True) 200 | @unsigned(skipret=True) 201 | def UGE(a, b): 202 | return a >= b 203 | 204 | @staticmethod 205 | @sized(skipret=True) 206 | @unsigned(skipret=True) 207 | def ULT(a, b): 208 | return a < b 209 | 210 | @staticmethod 211 | @sized(skipret=True) 212 | @unsigned(skipret=True) 213 | def UGT(a, b): 214 | return a > b 215 | 216 | @staticmethod 217 | @sized(skipret=True) 218 | @signed(skipret=True) 219 | def GE(a, b): 220 | return a >= b 221 | 222 | @staticmethod 223 | @sized(skipret=True) 224 | @signed(skipret=True) 225 | def LE(a, b): 226 | return a <= b 227 | 228 | @staticmethod 229 | @sized(skipret=True) 230 | @signed(skipret=True) 231 | def LT(a, b): 232 | return a < b 233 | 234 | @staticmethod 235 | @sized(skipret=True) 236 | @signed(skipret=True) 237 | def GT(a, b): 238 | return a > b 239 | 240 | @staticmethod 241 | @sized(skipret=True) 242 | @unsigned(skipret=True) 243 | def INT(a, b): 244 | return pack(a, b*8, endianness="little") 245 | 246 | @staticmethod 247 | @unsigned(skipargs=(0, ), skipret=True) 248 | def Slice(var, start, cnt=1): 249 | if cnt == 1: 250 | # Indexing a b-string in python returns an int... 251 | return pack(var[start], 'all') 252 | else: 253 | return var[start:start+cnt] 254 | 255 | @staticmethod 256 | def IMM(imm): 257 | val = imm.value if isinstance(imm, Immediate) else imm 258 | if type(val) == bool: 259 | return val 260 | return pack(val, 'all', endianness='little') 261 | 262 | @staticmethod 263 | @sized() 264 | @unsigned() 265 | def SHR(a, b): 266 | return a >> b 267 | 268 | @staticmethod 269 | @sized() 270 | @unsigned() 271 | def SHL(a, b): 272 | return a << b 273 | 274 | @staticmethod 275 | @sized() 276 | @unsigned() 277 | def ALIGNUP(a, b): 278 | return (a + b - 1) & -b 279 | 280 | @staticmethod 281 | @sized() 282 | @unsigned() 283 | def ALIGNDOWN(a, b): 284 | return a & -b 285 | 286 | @staticmethod 287 | @sized(skipret=True) 288 | def OVFLADD(a, b): 289 | size = len(a) 290 | assert size == len(b) 291 | maxint = 2**(size*8) - 1 292 | a = unpack(a, 'all', endianness='little', sign=False) 293 | b = unpack(b, 'all', endianness='little', sign=False) 294 | return (maxint - a) < b 295 | 296 | def VAR(self, var): 297 | return self.variables[var.name] 298 | 299 | funcs_bool = {'OR', 'AND', 'NOT'} 300 | funcs_unsigned = {'BITOR', 'BITAND', 'ULE', 'ULT', 'UGT', 'UGE', 'EQ', 'NEQ'} 301 | 302 | def dispatch(self, func, *args): 303 | if not 0 < len(args) < 4: 304 | self.log.critical(f"Trying to dispatch function with {len(args)}" 305 | " arguments") 306 | raise TypeError 307 | ret = self.funcs[func](*args) 308 | return ret 309 | 310 | def _exec_input(self, stmt): 311 | pass 312 | 313 | def _exec_unconditional_assignment(self, stmt): 314 | left = stmt.left.name 315 | rigth = stmt.right 316 | self.variables[left] = self._eval_expression(rigth) 317 | 318 | def _exec_conditional_assignment(self, stmt): 319 | left = stmt.left.name 320 | rigth = stmt.right 321 | conditions = stmt.conditions 322 | if left not in self.variables: 323 | self.log.warning(f"Variable {left} initialized in conditional statement. Defaulting it to 0.") 324 | self.variables[left] = pack(0, "all") 325 | 326 | if all(self._eval_condition(x) for x in conditions): 327 | self.variables[left] = self._eval_expression(rigth) 328 | 329 | def _exec_assignment(self, stmt): 330 | if stmt.conditional: 331 | return self._exec_conditional_assignment(stmt) 332 | else: 333 | return self._exec_unconditional_assignment(stmt) 334 | 335 | def _eval_condition(self, condition, overwrite=False): 336 | if not overwrite and condition.name and condition.name in self.conditions: 337 | return self.conditions[condition.name] 338 | expr = lambda: self._eval_expression(condition.expr) 339 | conds = all(self._eval_condition(x) 340 | for x in condition.conditions) 341 | if condition.isterminal: 342 | if conds: 343 | return expr() 344 | else: 345 | return True 346 | return conds and expr() 347 | 348 | def _exec_condition(self, stmt): 349 | name = stmt.name 350 | if name is None: 351 | self.log.warning("Executing unnamed condition... Not sure this is intended.") 352 | res = self._eval_condition(stmt, overwrite=True) 353 | self.conditions[name] = res 354 | 355 | if not res and stmt.isterminal: 356 | self.log.critical(f"Terminal condition {name} not met. Verification failed") 357 | raise VerificationError(name) 358 | 359 | def _exec_loop(self, stmt): 360 | if not all(self._eval_condition(x) for x in stmt._conditions): 361 | return 362 | name = f"L{stmt._loop_name}" 363 | varname = Variable(stmt.output_name) 364 | inputvar = stmt.input_var 365 | startpos = stmt.startpos 366 | count = unpack(self._eval_expression(stmt.count), 'all', 367 | endianness='little') 368 | structsize = Expression("INT", 369 | Expression("IMM", 370 | Immediate(stmt.structsize)), 371 | Expression("IMM", Immediate(4))) 372 | 373 | self.log.debug(f"Executing loop {name} {count} times") 374 | for iteration in range(count): 375 | conditionpref = f"{name}_{iteration}_" 376 | iterationexpr = Expression("IMM", Immediate(iteration)) 377 | nstartpos = Expression("ADD", startpos, 378 | Expression("MUL", structsize, iterationexpr)) 379 | sliceexpr = Expression("Slice", inputvar, nstartpos, structsize) 380 | assignment = Assignment(varname, sliceexpr) 381 | self._exec_assignment(assignment) 382 | for s in stmt._statements: 383 | if isinstance(s, Condition): 384 | s = s.clone() 385 | s.add_prefix(conditionpref) 386 | self._exec_statement(s) 387 | 388 | def _exec_vloop(self, stmt): 389 | if not all(self._eval_condition(x) for x in stmt._conditions): 390 | return 391 | name = f"L{stmt._loop_name}" 392 | varname = Variable(stmt.output_name) 393 | start = stmt.start 394 | nextname = stmt.nextname 395 | contcondition = stmt.contcondition 396 | if not all((self._eval_condition(x) for x in stmt._conditions)): 397 | return 398 | first_assignment = Assignment(varname, start) 399 | self._exec_assignment(first_assignment) 400 | initial_condition = Condition(True, False, name=contcondition) 401 | self._exec_condition(initial_condition) 402 | i = 0 403 | while self.conditions[contcondition]: 404 | i += 1 405 | for s in stmt._statements: 406 | # try: 407 | self._exec_statement(s) 408 | # except Exception as e: 409 | # print(self.variables[stmt.output_name]) 410 | next_assignment = Assignment(varname, Expression("VAR", nextname)) 411 | 412 | def _exec_debug(self, stmt): 413 | self.log.critical(hex(unpack(self._eval_expression(stmt.expr), 'all'))) 414 | 415 | _exec_table = {Input: _exec_input, 416 | Assignment: _exec_assignment, 417 | Condition: _exec_condition, 418 | Loop: _exec_loop, 419 | VLoop: _exec_vloop, 420 | Debug: _exec_debug 421 | } 422 | 423 | def verify(self, test, variable="HEADER"): 424 | self._last_fail = None 425 | if not self._statements: 426 | self.log.error("Load statements before call verify()") 427 | raise ValueError 428 | 429 | self.variables[variable] = test 430 | for stmt in self._statements: 431 | try: 432 | self._exec_statement(stmt) 433 | except VerificationError as e: 434 | self._last_fail = e.name 435 | self.log.error(f"Condition {e.name} not satisfied. " 436 | "Verification failed.") 437 | return False 438 | return True 439 | 440 | if __name__ == "__main__": 441 | inp = Input(Variable("input"), 64) 442 | bcknd = PythonBackend() 443 | bcknd._exec_input(inp) 444 | expr = Expression("EQ", 445 | Expression("ADD", 446 | Expression("VAR", Variable("input")), 447 | Expression("IMM", 8)), 448 | Expression("IMM", 8)) 449 | res1 = bcknd._eval_expression(expr) 450 | -------------------------------------------------------------------------------- /modelLang/backends/z3_backend.py: -------------------------------------------------------------------------------- 1 | from math import log2 2 | import logging 3 | import coloredlogs 4 | from collections import deque 5 | 6 | import z3 7 | 8 | from .default_backend import DefaultBackend 9 | from ..classes import (Base, Immediate, Variable, Expression, Input, Output, 10 | Assignment, Condition, Loop, VLoop, Optimization, 11 | Optimizations, Debug) 12 | 13 | class Z3Backend(DefaultBackend): 14 | print_unsat = True 15 | def __init__(self, name="", voi=None, enable_optimizations=False): 16 | super().__init__() 17 | self.name = name 18 | self.voi = voi 19 | self._solver = None 20 | self._model = None 21 | self.z3_funcs = { 'ADD' : z3.Sum, 22 | 'SUB' : self.SUB, 23 | 'MUL' : self.MUL, 24 | 'DIV' : self.DIV, 25 | 'UDIV' : z3.UDiv, 26 | 'MOD' : self.MOD, 27 | 'AND' : z3.And, 28 | 'OR' : z3.Or, 29 | 'NOT' : z3.Not, 30 | 'ULE' : z3.ULE, 31 | 'UGE' : z3.UGE, 32 | 'ULT' : z3.ULT, 33 | 'UGT' : z3.UGT, 34 | 'EQ' : self.EQ, 35 | 'NEQ' : self.NEQ, 36 | 'GE' : self.GE, 37 | 'LE' : self.LE, 38 | 'GT' : self.GT, 39 | 'LT' : self.LT, 40 | 'BITOR' : self.BITOR, 41 | 'BITAND' : self.BITAND, 42 | 'BITNOT' : self.BITNOT, 43 | 'SHR' : self.SHR, 44 | 'SHL' : self.SHL, 45 | 'Slice' : self.Slice, 46 | 'Index' : self.Slice, 47 | 'ISPOW2' : self.ISPOW2, 48 | 'ALIGNUP' : self.ALIGNUP, 49 | 'ALIGNDOWN' : self.ALIGNDOWN, 50 | 'ISALIGNED' : self.ISALIGNED, 51 | 'OVFLADD' : self.OVFLWADD, 52 | 'SECT' : self.SECT, 53 | 'NSECT' : self.NSECT, 54 | 'OPTHDR' : self.OPTHDR, 55 | 'INT' : self.INT, 56 | 'VAR' : self.VAR, 57 | 'IMM' : self.IMM 58 | } 59 | self.enable_optimizations = enable_optimizations 60 | self.optimizations = [] 61 | self.log = logging.getLogger(__name__) 62 | self.log.setLevel(logging.NOTSET) 63 | coloredlogs.install(level="NOTSET", logger=self.log) 64 | 65 | 66 | @staticmethod 67 | def SUB(a, b): 68 | return a - b 69 | 70 | @staticmethod 71 | def MUL(a, b): 72 | return a * b 73 | 74 | @staticmethod 75 | def MOD(a, b): 76 | return a % b 77 | 78 | @staticmethod 79 | def DIV(a, b): 80 | return a / b 81 | 82 | @staticmethod 83 | def EQ(a, b): 84 | return a == b 85 | 86 | @staticmethod 87 | def NEQ(a, b): 88 | return a != b 89 | 90 | @staticmethod 91 | def BITOR(a, b): 92 | return a | b 93 | 94 | @staticmethod 95 | def BITAND(a, b): 96 | return a & b 97 | 98 | @staticmethod 99 | def BITNOT(a): 100 | return ~a 101 | 102 | @staticmethod 103 | def SHR(a, b): 104 | return a >> b 105 | 106 | @staticmethod 107 | def SHL(a, b): 108 | return a << b 109 | 110 | @staticmethod 111 | def ISPOW2(a): 112 | size = a.size() 113 | one = z3.BitVecVal(1, size) 114 | zero = z3.BitVecVal(0, size) 115 | return (a & (a - one) == zero) 116 | 117 | @staticmethod 118 | def GE(a, b): 119 | return a >= b 120 | 121 | @staticmethod 122 | def LE(a, b): 123 | return a <= b 124 | 125 | @staticmethod 126 | def LT(a, b): 127 | return a < b 128 | 129 | @staticmethod 130 | def GT(a, b): 131 | return a > b 132 | 133 | @staticmethod 134 | def ALIGNUP(a, b): 135 | return (a + b - 1) & -b 136 | 137 | @staticmethod 138 | def ALIGNDOWN(a, b): 139 | return a & -b 140 | 141 | @staticmethod 142 | def ISALIGNED(a, b): 143 | return (a & (b -1)) == 0 144 | 145 | @staticmethod 146 | def OVFLWADD(a, b): 147 | maxint = z3.BitVecVal(-1, a.size()) 148 | ### True is there is an overflow 149 | return z3.ULT(maxint - a, b) 150 | 151 | @staticmethod 152 | def INT(a, b): 153 | a = a if isinstance(a, int) else a.as_long() 154 | b = b if isinstance(b, int) else b.as_long() 155 | return z3.BitVecVal(a, b*8) 156 | 157 | @staticmethod 158 | def Slice(var, start, cnt=1): 159 | if isinstance(start, z3.BitVecRef): 160 | zeroext = z3.ZeroExt(var.size() - start.size(), start) 161 | shifted = z3.LShR(var, zeroext*8) 162 | var = shifted 163 | else: 164 | shifted = z3.LShR(var, start*8) 165 | var = shifted 166 | 167 | if isinstance(cnt, z3.BitVecRef): 168 | cnt = cnt.as_long() 169 | return z3.Extract((cnt * 8) - 1, 0, var) 170 | 171 | @staticmethod 172 | def IMM(imm): 173 | return imm.value if isinstance(imm, Immediate) else imm 174 | 175 | def VAR(self, var): 176 | return self.variables[var.name] 177 | 178 | #### This opcode returns the offset of the section table 179 | def SECT(self, header): 180 | ntHdrOff = self.Slice(header, 0x3c, 4) 181 | ntHdr = self.Slice(header, ntHdrOff, 24) 182 | sizeOptHdr = self.Slice(ntHdr, 20, 2) 183 | return z3.Sum(ntHdrOff, z3.ZeroExt(16, sizeOptHdr)) + 24 184 | 185 | ### Default way to get the number of section from an header 186 | def NSECT(self, header): 187 | ntHdrOff = self.Slice(header, 0x3c, 4) 188 | ntHdr = self.Slice(header, ntHdrOff, 24) 189 | return self.Slice(ntHdr, 6, 2) 190 | 191 | def OPTHDR(self, header): 192 | ntHdrOff = self.Slice(header, 0x3c, 4) 193 | return self.Slice(header, ntHdrOff + 24, 224) 194 | 195 | z3_funcs_sized = {'ADD', 'SUB', 'MUL', 'UDIV', 'MOD', 'EQ', 'NEQ', 'GE', 'LE', 'GT', 'LT', 'ULE', 'UGE', 'UGT', 'ULT', 'BITOR', 'BITAND', 'ALIGNUP', 'ALIGNDOWN', 'ISALIGNED', 'OVFLWADD', 'SHR', 'SHL'} 196 | z3_funcs_bool = {'OR', 'AND', 'NOT'} 197 | z3_funcs_unsigned = {'ADD', 'SUB', 'BITOR', 'BITAND', 'ULE', 'ULT', 'UGT', 'UGE', 'EQ', 'NEQ', 'OVFLWADD', 'SHR', 'SHL', 'ALIGNUP', 'ALIGNDOWN', 'ISALIGNED'} 198 | 199 | def dispatch_z3_1(self, func, arg): 200 | return self.z3_funcs[func](arg) 201 | 202 | def dispatch_z3_2(self, func, arg1, arg2): 203 | if func not in self.z3_funcs: 204 | self.log.critical(f"Function {func} not recognized") 205 | raise NameError 206 | if (func in self.z3_funcs_sized): 207 | if isinstance(arg1, int): 208 | arg1 = z3.BitVecVal(arg1, int(log2(2**(arg1.bit_length()+1)))) 209 | if isinstance(arg2, int): 210 | arg2 = z3.BitVecVal(arg2, int(log2(2**(arg2.bit_length()+1)))) 211 | s1 = arg1.size() 212 | s2 = arg2.size() 213 | max_size = max(s1, s2) 214 | extension_mechanism = (z3.ZeroExt if func in self.z3_funcs_unsigned 215 | else z3.SignExt) 216 | if s1 != max_size: 217 | arg1 = extension_mechanism(max_size - s1, 218 | arg1) 219 | if s2 != max_size: 220 | arg2 = extension_mechanism(max_size - s2, 221 | arg2) 222 | return self.z3_funcs[func](arg1, arg2) 223 | 224 | def dispatch_z3_3(self, func, *args): 225 | if func != "Slice": 226 | self.log.CRITICAL(f"{func} not recognized as a 3-arguments function") 227 | raise ValueError 228 | return self.z3_funcs[func](*args) 229 | 230 | def dispatch_z3(self, func, *args): 231 | if not 0 < len(args) < 4: 232 | self.log.critical(f"Trying to dispatch function with {len(args)}" 233 | " arguments") 234 | raise TypeError 235 | if len(args) == 1: 236 | return self.dispatch_z3_1(func, *args) 237 | elif len(args) == 2: 238 | return self.dispatch_z3_2(func, *args) 239 | elif len(args) == 3: 240 | return self.dispatch_z3_3(func, *args) 241 | 242 | dispatch = dispatch_z3 243 | 244 | def _exec_input(self, stmt): 245 | variable = stmt.var 246 | self.log.debug(f"Creating variable {variable} of size {stmt.size}") 247 | symb = z3.BitVec(variable.name, stmt.size * 8) 248 | self.variables[variable.name] = symb 249 | 250 | def _exec_output(self, stmt): 251 | variable = stmt.var 252 | self.log.debug(f"Creating output {variable} of size {stmt.size}") 253 | symb = z3.BitVec(f"{self.name}_{variable.name}", stmt.size * 8) 254 | self.variables[variable.name] = symb 255 | 256 | def _exec_unconditional_assignment(self, stmt): 257 | self.log.debug(f"Executing unconditional assignemnt {stmt}") 258 | var = stmt.left 259 | expr = stmt.right 260 | self.variables[var.name] = self._eval_expression(expr) 261 | 262 | def _exec_conditional_assignment(self, stmt): 263 | self.log.debug(f"Executing unconditional assignemnt {stmt}") 264 | var = stmt.left 265 | expr = stmt.right 266 | z3expr = self._eval_expression(expr) 267 | size = z3expr.size() 268 | 269 | if var.name not in self.variables: 270 | self.log.warning(f"Variable {var.name} declared in a conditional assignement. Its value in case the condition is not satisfied defaults to 0") 271 | self.variables[var.name] = z3.BitVecVal(0, size) 272 | 273 | self.variables[var.name] = z3.If( 274 | self._eval_condition_list(stmt._conditions), 275 | z3expr, 276 | self.variables[var.name]) 277 | 278 | def _exec_assignment(self, stmt, **kwargs): 279 | if stmt.conditional: 280 | return self._exec_conditional_assignment(stmt) 281 | else: 282 | return self._exec_unconditional_assignment(stmt) 283 | 284 | def _eval_condition(self, condition): 285 | if not condition.conditional: 286 | return self._eval_expression(condition.expr) 287 | if condition.isterminal: 288 | return z3.If( 289 | self._eval_condition_list(condition.conditions), 290 | self._eval_expression(condition.expr), 291 | z3.BoolVal(True)) 292 | 293 | return z3.And(self._eval_expression(condition.expr), 294 | self._eval_condition_list(condition.conditions)) 295 | 296 | def _eval_condition_list(self, conditions): 297 | return z3.And(*[self._eval_condition(x) for x in conditions]) 298 | 299 | def _exec_condition(self, stmt, **kwargs): 300 | condname = f"{self.name}_{stmt.name}" 301 | self.conditions[condname] = self._eval_condition(stmt) 302 | if stmt.isterminal: 303 | self.terminal_conditions[condname] = self.conditions[condname] 304 | 305 | @staticmethod 306 | def _build_loop_unrool_condition(loop): 307 | count = loop.count 308 | maxunroll = loop.maxunroll 309 | expr = Expression("ULE", 310 | count, 311 | Expression("IMM", Immediate(maxunroll))) 312 | condition = Condition(expr, True, conditions=loop._conditions, 313 | name=f"L{loop._loop_name}_unroll") 314 | return condition 315 | 316 | def _exec_loop(self, stmt, prev_prefix=""): 317 | cond_prefix = f"{prev_prefix}_L{stmt._loop_name}_" 318 | statements = stmt._statements 319 | ovar = Variable(stmt.output_name) 320 | ivar = stmt.input_var 321 | structsize = stmt.structsize 322 | startpos = stmt.startpos 323 | count = stmt.count 324 | conditions = stmt._conditions 325 | self._exec_statement(self._build_loop_unrool_condition(stmt)) 326 | for index in range(stmt.maxunroll): 327 | pref = cond_prefix + f"{index}_" 328 | self.log.debug(f"Unrolling loop {stmt}. Index {index}") 329 | lcond = Condition(Expression("UGT", count, Expression("IMM", Immediate(index))), False) 330 | var_assignement = Assignment(ovar, 331 | Expression("Slice", ivar, 332 | Expression("ADD", startpos, 333 | Expression("IMM", Immediate(index*structsize))), 334 | Expression("IMM", Immediate(structsize))), 335 | [*conditions, lcond]) 336 | self._exec_statement(var_assignement) 337 | for s in statements: 338 | if isinstance(s, Condition): 339 | s = s.clone() 340 | s.add_prefix(pref) 341 | s._conditions.extend(conditions) 342 | s._conditions.append(lcond) 343 | self._exec_statement(s, prev_prefix=pref) 344 | 345 | def _exec_vloop(self, stmt, prev_prefix=""): 346 | cond_prefix = f"{prev_prefix}_L{stmt._loop_name}_" 347 | statements = stmt._statements 348 | ovar = Variable(stmt.output_name) 349 | start = stmt.start 350 | nextvar = stmt.nextname 351 | condname = stmt.contcondition 352 | maxunroll = stmt.maxunroll 353 | conditions = stmt._conditions 354 | 355 | if self.prefix(condname) in self.conditions: 356 | cond = self.conditions[self.prefix(condname)] 357 | else: 358 | cond = Condition(True, isterminal=False, name=condname) 359 | self._exec_condition(cond) 360 | 361 | # Assign the first value 362 | initial_assignement = Assignment(ovar, start, [*conditions]) 363 | self._exec_assignment(initial_assignement) 364 | # Unroll 365 | for index in range(stmt.maxunroll): 366 | # Prefix for the conditions 367 | pref = cond_prefix + f"{index}_" 368 | self.log.debug(f"Unrolling loop {stmt}. Index {index}") 369 | 370 | # For each statement in the loop 371 | for s in statements: 372 | # if the statement is a condition... 373 | if isinstance(s, Condition): 374 | # ... clone it 375 | s = s.clone() 376 | # if it changes the loop condition... 377 | if s.name == condname: 378 | # keep it in mind for later 379 | nextcond = s 380 | # change its name, adding the prefix 381 | s.add_prefix(pref) 382 | s._conditions.extend([*conditions, cond]) 383 | self._exec_statement(s, prev_prefix=pref) 384 | 385 | cond = nextcond 386 | nextcond = None 387 | nextassignment = Assignment(ovar, Expression("VAR", nextvar), 388 | conditions=[*conditions, cond]) 389 | self._exec_assignment(nextassignment) 390 | # delete the first conditions, since we don't need it 391 | del self.conditions[self.prefix(condname)] 392 | 393 | def _exec_optimization(self, stmt): 394 | strategy = stmt.strategy 395 | expression = stmt.expression 396 | if strategy in (Optimizations.MAXIMIZE, Optimizations.MINIMIZE): 397 | self.enable_optimizations = True 398 | self.optimizations.append((strategy, 399 | self._eval_expression(expression))) 400 | else: 401 | log.error(f"Strategy {stmt.strategy} not implemented") 402 | raise NotImplementedError 403 | 404 | def _exec_debug(self, stmt): 405 | pass 406 | 407 | _exec_table = {Input: _exec_input, 408 | Output: _exec_output, 409 | Assignment: _exec_assignment, 410 | Condition: _exec_condition, 411 | Loop: _exec_loop, 412 | VLoop: _exec_vloop, 413 | Optimization: _exec_optimization, 414 | Debug: _exec_debug 415 | } 416 | 417 | def generate_solver(self): 418 | if self.enable_optimizations: 419 | return self.generate_optimizer() 420 | self.log.info("Generating solver") 421 | solver = z3.Solver() 422 | for name, condition in self.terminal_conditions.items(): 423 | solver.assert_and_track(condition, name) 424 | self._solver = solver 425 | return solver 426 | 427 | def generate_optimizer(self): 428 | self.log.info("Generating optimizer") 429 | solver = z3.Optimize() 430 | for name, condition in self.terminal_conditions.items(): 431 | solver.assert_and_track(condition, name) 432 | for strategy, expression in self.optimizations: 433 | if strategy == Optimizations.MAXIMIZE: 434 | solver.maximize(expression) 435 | elif strategy == Optimizations.MINIMIZE: 436 | solver.minimize(expression) 437 | self._solver = solver 438 | return solver 439 | 440 | @property 441 | def solver(self): 442 | if self._solver is None: 443 | self.generate_solver() 444 | return self._solver 445 | 446 | def check_sat(self): 447 | solver = self.solver 448 | self.log.info("Checking satisfiability") 449 | if solver.check().r != 1: 450 | self.log.critical("Model unsatisfiable") 451 | if self.print_unsat: 452 | unsat_core = solver.unsat_core() 453 | self.log.critical(f"Unsat core: {unsat_core}") 454 | for cname in unsat_core: 455 | self.log.critical(self.conditions[str(cname)]) 456 | return None 457 | else: 458 | self.log.info("Model satisfiable") 459 | model = solver.model() 460 | self._model = model 461 | return model 462 | 463 | @property 464 | def model(self): 465 | if self._model is None: 466 | self.check_sat() 467 | return self._model 468 | 469 | # this routine... if it works it's miracle 470 | def generate_testcase(self, varname="HEADER"): 471 | model = self.model 472 | self.log.info("Generating testcase") 473 | header = self.variables[varname] 474 | bitvec = model.eval(header) 475 | string_hex_rev = hex(bitvec.as_long())[2:] 476 | string_hex_rev = ('0' if (len(string_hex_rev) % 2 == 1) else "") + string_hex_rev 477 | string_hex = ''.join([string_hex_rev[i:i+2] 478 | for i in range(len(string_hex_rev)-2, -2, -2)]) 479 | test = bytes.fromhex(string_hex) 480 | test += b'\x00' * (int(header.size()/8) - len(test)) 481 | return test 482 | 483 | def verify(self, test, variable="HEADER"): 484 | if not self._statements: 485 | self.log.error("Load statements before call verify()") 486 | raise ValueError 487 | self.exec_statements(self._statements) 488 | 489 | var = self.variables[variable] 490 | size = var.size() 491 | if len(test) > size: 492 | self.log.critical("The file to verify is bigger than the input of the model. Aborting.") 493 | raise ValueError 494 | test += b'\x00' * (size - len(test)) 495 | testvec = z3.BitVecVal(int.from_bytes(test, "little"), size*8) 496 | self.variables['TEST__'] = testvec 497 | expr = Expression("EQ", Expression("VAR", Variable(variable)), Expression("VAR", Variable("TEST__"))) 498 | constraint = Condition(expr, True, name='VTEST') 499 | self._exec_statement(constraint) 500 | self.generate_solver() 501 | return self.check_sat() 502 | 503 | def __and__(self, other): 504 | ret = Z3Backend(name=f"{self.name}&{other.name}", voi=self.voi) 505 | 506 | for condname, cond in self.terminal_conditions.items(): 507 | ret.terminal_conditions[condname] = cond 508 | for condname, cond in other.terminal_conditions.items(): 509 | ret.terminal_conditions[condname] = cond 510 | 511 | ret.variables[f'{self.name}_{ret.voi}'] = self.variables[ret.voi] 512 | ret.variables[f'{ret.voi}'] = self.variables[ret.voi] 513 | 514 | ### HACK alert! This avoid to add useless constraints 515 | ### from z3_model_support in differential.py 516 | if other.voi != self.voi: 517 | self.log.warning(f"Variable of interest (voi) differs in the two models. Only adding the constraints of the second model, without enforcinf VOI equality. This can cause troubles.") 518 | return ret 519 | 520 | ret.variables[f'{other.name}_{ret.voi}'] = other.variables[ret.voi] 521 | voicond = Condition( 522 | Expression("EQ", 523 | Expression("VAR", Variable(f'{self.name}_{ret.voi}')), 524 | Expression("VAR", Variable(f'{other.name}_{ret.voi}')), 525 | ), 526 | isterminal=True, name="voicond") 527 | ret._exec_condition(voicond) 528 | return ret 529 | 530 | def __invert__(self): 531 | ret = Z3Backend(name=f"~{self.name}", voi=self.voi) 532 | ret.variables[f'{ret.voi}'] = self.variables[ret.voi] 533 | conditions = [] 534 | for condname, cond in self.terminal_conditions.items(): 535 | ncond = z3.Not(cond) 536 | conditions.append(ncond) 537 | ret.terminal_conditions['negated'] = z3.Or(conditions) 538 | return ret 539 | 540 | def prefix(self, conditionname): 541 | return f"{self.name}_{conditionname}" 542 | 543 | def add_inequality(self, var1, var2): 544 | self.terminal_conditions['ineq'] = var1 != var2 545 | -------------------------------------------------------------------------------- /modelLang/parsers/parser.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os.path 3 | import logging 4 | import pickle 5 | from collections import deque, defaultdict 6 | from enum import Enum, auto 7 | 8 | import coloredlogs 9 | 10 | log = logging.getLogger(__name__) 11 | coloredlogs.install(level="NOTSET", logger=log) 12 | 13 | import ply.yacc as yacc 14 | 15 | # Get the token map from the lexer. This is required. 16 | from .langlex import Lexer 17 | from ..utils import customdefdict 18 | from ..classes import Variable, Assignment, Expression, Condition, Immediate, BoolImmediate, ConditionList, ConditionListEntry, Loop, VLoop, Input, Output, Define, Optimization, Optimizations, Debug 19 | 20 | def read_file(filename): 21 | with open(filename, "rb") as fp: 22 | buf = fp.read() 23 | return buf 24 | 25 | class Parser: 26 | class ParserType(Enum): 27 | GENERATOR = auto() 28 | VALIDATOR = auto() 29 | DIFFERENTIAL_ASSERT = auto() 30 | DIFFERENTIAL_NEGATE = auto() 31 | 32 | tokens = Lexer.tokens 33 | def parse_file(self, fname): 34 | self._fname = fname 35 | self._cwd = os.path.dirname(fname) 36 | with open(fname, "r") as f: 37 | lines = f.readlines() 38 | cnt = 0 39 | for s in lines: 40 | cnt += 1 41 | if not s: continue 42 | log.debug(f"Line {cnt}: {s}") 43 | result = self.parser.parse(s) 44 | if result: 45 | print(result) 46 | # Check that there are no un-ended loops 47 | if len(self._block_stack) != 0: 48 | log.error(f"Un-ended statement: {self._block_stack[0]}") 49 | raise ValueError 50 | 51 | @property 52 | def variables(self): 53 | return self._variables 54 | 55 | @property 56 | def statements(self): 57 | return self._statements 58 | 59 | @property 60 | def conditions(self): 61 | return self._conditions 62 | 63 | @property 64 | def defines(self): 65 | return self._defines 66 | 67 | def p_input(self, p): 68 | 'input : input NEWLINE' 69 | p[0] = p[1] 70 | 71 | def p_input_1(self, p): 72 | 'input : statement' 73 | if p[1] and not p[1].lineno: 74 | lineno = p.lexer.lineno 75 | p[1].lineno = lineno 76 | 77 | def p_input_fromfile(self, p): 78 | 'input : FROMFILE VARIABLE expression expression NUMBER NUMBER' 79 | filename = os.path.join(self._cwd, p[2]) 80 | symbol = p[3] 81 | start = p[4] 82 | foffset = p[5] 83 | nbytes = p[6] 84 | buf = read_file(filename) 85 | for n, b in enumerate(buf[foffset:foffset+nbytes]): 86 | curroffset = Expression("ADD", start, 87 | Expression("IMM", Immediate(n))) 88 | nb = Expression("Index", symbol, curroffset) 89 | expr = Expression("EQ", nb, Expression("IMM", Immediate(b))) 90 | cond = Condition(expr, isterminal=True, 91 | name=f"FROM_{filename}_{n}") 92 | self.statements.append(cond) 93 | self.conditions[cond.name] = cond 94 | 95 | def p_input_load(self, p): 96 | 'input : load_stmt' 97 | use_cwd = p[1][2] 98 | os = p[1][1] 99 | header = p[1][0] 100 | module_name = ('modelLang.structures.' 101 | + (os if os != "DEFAULT" else "cparser")) 102 | module = __import__(module_name, globals(), locals(), ['parse_file']) 103 | dirpath = self._cwd if use_cwd else "modelLang/structures/headers" 104 | header_file = dirpath + f"/{header}.h" 105 | with open(header_file, "r") as fp: 106 | fcontent = fp.read() 107 | 108 | new_types = module.parse_file(fcontent) 109 | new_defs = module.preprocess_defs(fcontent) 110 | self.loaded_types.update(new_types[1]) 111 | new_defs = {x: Expression("IMM", Immediate(y)) 112 | for x, y in new_defs.items()} 113 | self.defines.update(new_defs) 114 | 115 | def p_statement_ass(self, p): 116 | 'statement : assignment_stmt' 117 | lineno = p.lineno(0) 118 | log.debug("Assignment: " + str(p[1])) 119 | if len(self._block_stack) == 0: 120 | self.statements.append(p[1]) 121 | else: 122 | block = self._block_stack.pop() 123 | block.add_statement(p[1]) 124 | self._block_stack.append(block) 125 | p[0] = p[1] 126 | 127 | def p_statement_cond(self, p): 128 | 'statement : condition_stmt' 129 | log.debug("Condition " + str(p[1])) 130 | name, condition = p[1] 131 | self.conditions[name.upper()] = condition 132 | condition.name = name.upper() 133 | if len(self._block_stack) == 0: 134 | self.statements.append(condition) 135 | else: 136 | block = self._block_stack.pop() 137 | block.add_statement(condition) 138 | self._block_stack.append(block) 139 | p[0] = condition 140 | 141 | def p_statement_gencond(self, p): 142 | 'statement : gencondition_stmt' 143 | if self._type in (self.ParserType.GENERATOR, 144 | self.ParserType.DIFFERENTIAL_ASSERT): 145 | self.p_statement_cond(p) 146 | else: 147 | p[0] = None 148 | 149 | def p_statement_input(self, p): 150 | 'statement : input_stmt' 151 | log.debug("Input " + str(p[1])) 152 | size = self._input_size if self._input_size else p[1][1] 153 | stmt = Input(p[1][0], size) 154 | self.statements.append(stmt) 155 | self.variables[p[1][0].name] = p[1][0] 156 | p[0] = stmt 157 | 158 | def p_statement_output(self, p): 159 | 'statement : output_stmt' 160 | log.debug("Output " + str(p[1])) 161 | size = p[1][1] 162 | stmt = Output(p[1][0], size) 163 | self.statements.append(stmt) 164 | self.variables[p[1][0].name] = p[1][0] 165 | p[0] = stmt 166 | 167 | def p_statement_loopstart(self, p): 168 | 'statement : loopstart_stmt' 169 | log.debug("Loop start " + str(p[1])) 170 | loop = p[1][1] 171 | self._block_stack.append(loop) 172 | var = self.variables[loop.output_name] 173 | var.type = loop.vtype 174 | p[0] = loop 175 | 176 | def p_statement_loopend(self, p): 177 | 'statement : loopend_stmt' 178 | loop = self._block_stack.pop() 179 | if loop._loop_name != p[1][0]: 180 | log.critical("Loop end does not match current loop name") 181 | raise ValueError 182 | log.debug("Loop end " + str(p[1][0])) 183 | if len(self._block_stack) == 0: 184 | self.statements.append(loop) 185 | else: 186 | block = self._block_stack.pop() 187 | block.add_statement(loop) 188 | self._block_stack.append(block) 189 | p[0] = loop 190 | 191 | def p_statement_define(self, p): 192 | 'statement : define_stmt' 193 | stmt = p[1] 194 | if stmt.name in self.variables: 195 | log.warning(f"Defining constant {stmt.name}, but a variable with the same name already declared. Skipping") 196 | else: 197 | self.defines[stmt.name] = stmt.value 198 | p[0] = stmt 199 | 200 | def p_statement_optimize(self, p): 201 | 'statement : OPTIMIZE expression' 202 | strategy = p[1] 203 | expression = p[2] 204 | opt = Optimization(strategy, expression) 205 | self.statements.append(opt) 206 | p[0] = opt 207 | 208 | def p_statement_debug(self, p): 209 | 'statement : dbgstatement' 210 | dbg = p[1] 211 | if len(self._block_stack) == 0: 212 | self.statements.append(dbg) 213 | else: 214 | block = self._block_stack.pop() 215 | block.add_statement(dbg) 216 | self._block_stack.append(block) 217 | p[0] = p[1] 218 | 219 | def p_dbgstatement(self, p): 220 | 'dbgstatement : DBG COLON expression' 221 | p[0] = Debug(p[3]) 222 | 223 | def p_define_stmt(self, p): 224 | 'define_stmt : DEFINE VARIABLE expression' 225 | value = p[3] 226 | if p[2] in self._custom_defs: 227 | value = self._custom_defs[p[2]] 228 | 229 | p[0] = Define(p[2], value) 230 | 231 | def p_load_stmt(self, p): 232 | 'load_stmt : LOADTYPES VARIABLE VARIABLE' 233 | if p[3] == 'linux': 234 | os = 'DEFAULT' 235 | else: 236 | os = p[3] 237 | p[0] = (p[2], os, p[1]) 238 | 239 | def p_load_stmt_2(self, p): 240 | 'load_stmt : LOADTYPES VARIABLE' 241 | p[0] = (p[2], "DEFAULT", p[1]) 242 | 243 | def p_input_stmt_type(self, p): 244 | 'input_stmt : INPUT VARIABLE constant TYPE VARIABLE' 245 | log.debug("Input statement") 246 | t = p[5] 247 | if t not in self.loaded_types: 248 | log.warning(f"Unknown type {t}. Defaulting to untyped variable") 249 | var = (Variable(p[2]), p[3]) 250 | else: 251 | var = (Variable(p[2], self.loaded_types[t]), p[3]) 252 | p[0] = var 253 | 254 | def p_input_stmt(self, p): 255 | 'input_stmt : INPUT VARIABLE constant' 256 | log.debug("Input statement") 257 | var = (Variable(p[2]), p[3]) 258 | p[0] = var 259 | 260 | def p_output_stmt(self, p): 261 | 'output_stmt : OUTPUT VARIABLE constant' 262 | log.debug("Output statement") 263 | var = (Variable(p[2]), p[3]) 264 | p[0] = var 265 | 266 | def p_output_stmt_type(self, p): 267 | 'output_stmt : OUTPUT VARIABLE constant TYPE VARIABLE' 268 | log.debug("Output statement") 269 | t = p[5] 270 | if t not in self.loaded_types: 271 | log.warning(f"Unknown type {t}. Defaulting to untyped variable") 272 | var = (Variable(p[2]), p[3]) 273 | else: 274 | var = (Variable(p[2], self.loaded_types[t]), p[3]) 275 | p[0] = var 276 | 277 | def p_constant_number(self, p): 278 | 'constant : NUMBER' 279 | p[0] = p[1] 280 | 281 | def p_constant_define(self, p): 282 | 'constant : VARIABLE' 283 | name = p[1] 284 | if name not in self.defines: 285 | log.error(f"{name} not defined as a constant") 286 | raise ValueError 287 | p[0] = self.defines[name].operands[0].value 288 | 289 | def p_assignment_stmt_uncond(self, p): 290 | 'assignment_stmt : ASSIGNSTART COLON assignment' 291 | assignment = p[3] 292 | p[0] = assignment 293 | 294 | def p_assignment_stmt_cond(self, p): 295 | 'assignment_stmt : ASSIGNSTART conditionlist COLON assignment' 296 | assignement = p[4] 297 | assignement.left.symb = assignement.right 298 | conditionslist = p[2] 299 | conds = [~self.conditions[c.name] if c.negated else 300 | self.conditions[c.name] 301 | for c in conditionslist] 302 | assignement.conditions = conds 303 | p[0] = assignement 304 | 305 | def p_condition_stmt_uncond(self, p): 306 | 'condition_stmt : CONDITIONNAME COLON conditionexpr' 307 | p[3].name = p[1] 308 | p[0] = (p[1], p[3]) 309 | 310 | def p_condition_stmt_cond(self, p): 311 | 'condition_stmt : CONDITIONNAME conditionlist COLON conditionexpr' 312 | cond = p[4] 313 | cond.name = p[1] 314 | conditionslist = p[2] 315 | conds = [~self.conditions[c.name] if c.negated else 316 | self.conditions[c.name] 317 | for c in conditionslist] 318 | cond.conditions = conds 319 | p[0] = (p[1], cond) 320 | 321 | def p_condition_stmt_noexpr(self, p): 322 | 'condition_stmt : CONDITIONNAME conditionlist SEMICOLON' 323 | conditionslist = p[2] 324 | conds = [self.conditions[c] for c in conditionslist.names] 325 | cond = Condition(True, False, conds) 326 | p[0] = (p[1], cond) 327 | 328 | def p_gcondition_stmt_uncond(self, p): 329 | 'gencondition_stmt : GENCONDITIONNAME COLON conditionexpr' 330 | p[3].name = p[1] 331 | p[0] = (p[1], p[3]) 332 | 333 | def p_gcondition_stmt_cond(self, p): 334 | 'gencondition_stmt : GENCONDITIONNAME conditionlist COLON conditionexpr' 335 | cond = p[4] 336 | cond.name = p[1] 337 | conditionslist = p[2] 338 | conds = [~self.conditions[c.name] if c.negated else 339 | self.conditions[c.name] 340 | for c in conditionslist] 341 | cond.conditions = conds 342 | p[0] = (p[1], cond) 343 | 344 | def p_gcondition_stmt_noexpr(self, p): 345 | 'gencondition_stmt : GENCONDITIONNAME conditionlist SEMICOLON' 346 | conditionslist = p[2] 347 | conds = [self.conditions[c] for c in conditionslist.names] 348 | cond = Condition(True, False, conds) 349 | p[0] = (p[1], cond) 350 | 351 | def p_loopstart_stmt_typed(self, p): 352 | '''loopstart_stmt : loopstart TYPE VARIABLE 353 | | vloopstart TYPE VARIABLE''' 354 | t = p[3] 355 | if t not in self.loaded_types: 356 | raise TypeError(f"Unknown type {t}") 357 | loop = p[1] 358 | loop[1].vtype = self.loaded_types[t] 359 | p[0] = loop 360 | 361 | def p_loopstart_stmt_untyped(self, p): 362 | '''loopstart_stmt : loopstart 363 | | vloopstart''' 364 | p[0] = p[1] 365 | 366 | def p_loopstart_stmt(self, p): 367 | '''loopstart : LOOPSTART COLON VARIABLE ARROW LOOP LPAREN expression COMMA expression COMMA NUMBER COMMA expression COMMA NUMBER RPAREN 368 | | LOOPSTART conditionlist COLON VARIABLE ARROW LOOP LPAREN expression COMMA expression COMMA NUMBER COMMA expression COMMA NUMBER RPAREN 369 | ''' 370 | loopindex = p[1] 371 | isconditional = len(p) == 18 372 | unroll_count = None 373 | if self._type in (self.ParserType.DIFFERENTIAL_ASSERT, 374 | self.ParserType.DIFFERENTIAL_NEGATE): 375 | unroll_count = 1 376 | if not isconditional: 377 | unroll_count = unroll_count if unroll_count else p[15] 378 | loop = Loop(p[1], p[3], p[7], p[9], p[11], p[13], unroll_count) 379 | else: 380 | unroll_count = unroll_count if unroll_count else p[16] 381 | conds = [self.conditions[c] for c in p[2].names] 382 | loop = Loop(p[1], p[4], p[8], p[10], p[12], p[14], unroll_count, 383 | conditions=conds) 384 | p[0] = (loopindex, loop) 385 | 386 | def p_loopstart_stmt_2(self, p): 387 | 'loopstart : LOOPSTART COLON VARIABLE ARROW LOOP LPAREN expression COMMA expression COMMA expression COMMA expression COMMA NUMBER RPAREN' 388 | loopindex = p[1] 389 | structsize = p[11] 390 | if structsize.opcode != "IMM": 391 | raise ValueError("Struct size must be a number") 392 | structsize = structsize.operands[0].value 393 | if self._type in (self.ParserType.DIFFERENTIAL_ASSERT, 394 | self.ParserType.DIFFERENTIAL_NEGATE): 395 | unroll_count = 1 396 | else: 397 | unroll_count = p[15] 398 | loop = Loop(p[1], p[3], p[7], p[9], structsize, p[13], unroll_count) 399 | p[0] = (loopindex, loop) 400 | 401 | def p_vloopstart_stmt_variable(self, p): 402 | '''vloopstart : LOOPSTART COLON VARIABLE ARROW VLOOP LPAREN expression COMMA VARIABLE COMMA CONDITIONNAME COMMA NUMBER RPAREN 403 | | LOOPSTART conditionlist COLON VARIABLE ARROW VLOOP LPAREN expression COMMA VARIABLE COMMA CONDITIONNAME COMMA NUMBER RPAREN 404 | ''' 405 | t = [p[x] for x in range(len(p))] 406 | loopindex = t[1] 407 | if len(p) == 16: # if it's conditional 408 | conditionlist = [self.conditions[c] for c in t[2].names] 409 | del t[2] 410 | else: 411 | conditionlist = None 412 | newvar = t[3] 413 | start = t[7] 414 | nextname = Variable(t[9]) 415 | condition = t[11] 416 | maxunroll = t[13] 417 | if self._type in (self.ParserType.DIFFERENTIAL_ASSERT, 418 | self.ParserType.DIFFERENTIAL_NEGATE): 419 | maxunroll = 1 420 | loop = VLoop(loopindex, newvar, start, nextname, condition, maxunroll, conditions=conditionlist) 421 | p[0] = (loopindex, loop) 422 | 423 | def p_loopend_stmt(self, p): 424 | 'loopend_stmt : LOOPEND' 425 | p[0] = (p[1], ) 426 | 427 | def p_assignment_typed(self, p): 428 | 'assignment : VARIABLE ARROW expression TYPE VARIABLE' 429 | var = None 430 | t = p[5] 431 | if t not in self.loaded_types: 432 | log.warning(f"Unknown type {t}. Defaulting to untyped assignement") 433 | return p_assignment_untyped(self, p) 434 | 435 | t = self.loaded_types[t] 436 | if p[1] not in self.variables: 437 | log.debug(f"New variable found {p[1]} of type {t}") 438 | var = Variable(p[1], t) 439 | self.variables[var.name] = var 440 | else: 441 | var = self.variables[p[1]] 442 | if t != var.type: 443 | log.warning(f"Variable {var.name} already declared as {var.type}. Cannot convert it as {t}. Leaving it typed as {var.type}.") 444 | p[0] = Assignment(var, p[3]) 445 | 446 | def p_assignment_untyped(self, p): 447 | 'assignment : VARIABLE ARROW expression' 448 | var = None 449 | if p[1] not in self.variables: 450 | log.debug(f"New variable found {p[1]}") 451 | var = Variable(p[1]) 452 | self.variables[var.name] = var 453 | else: 454 | var = self.variables[p[1]] 455 | p[0] = Assignment(var, p[3]) 456 | 457 | def p_conditionlist(self, p): 458 | '''conditionlist : LPAREN conditionlistint RPAREN''' 459 | p[0] = p[2] 460 | 461 | def p_conditionlistint_1(self, p): 462 | 'conditionlistint : conditionlistentry' 463 | p[0] = ConditionList([p[1]]) 464 | 465 | def p_conditionlistint_2(self, p): 466 | 'conditionlistint : conditionlistint COMMA conditionlistentry' 467 | p[0] = p[1] + p[3] 468 | 469 | def p_conditionlistentry_negcondition(self, p): 470 | 'conditionlistentry : EXCLAMATION CONDITIONNAME' 471 | p[0] = ConditionListEntry(p[2], True) 472 | 473 | def p_conditionlistentry_condition(self, p): 474 | 'conditionlistentry : CONDITIONNAME' 475 | p[0] = ConditionListEntry(p[1]) 476 | 477 | def p_condition_terminal(self, p): 478 | 'conditionexpr : expression TERMINATOR' 479 | p[0] = Condition(p[1], True) 480 | 481 | def p_condition_normal(self, p): 482 | 'conditionexpr : expression' 483 | p[0] = Condition(p[1], False) 484 | 485 | def p_expression_z3operator1(self, p): 486 | 'expression : OPERATOR1 expression' 487 | p2 = p[2] 488 | p[0] = Expression(p[1], p2) 489 | 490 | def p_expression_z3operator2(self, p): 491 | 'expression : OPERATOR2 expression expression' 492 | p2 = p[2] 493 | p3 = p[3] 494 | p[0] = Expression(p[1], p2, p3) 495 | 496 | def p_expression_parens(self, p): 497 | 'expression : LPAREN expression RPAREN' 498 | p[0] = p[2] 499 | 500 | def p_expression_slice(self, p): 501 | 'expression : expression LBRACKETS expression COMMA expression RBRACKETS' 502 | p1 = p[1] 503 | p3 = p[3] 504 | p5 = p[5] 505 | p[0] = Expression('Slice', p1, p3, p5) 506 | 507 | def p_expression_indexing(self, p): 508 | 'expression : expression LBRACKETS expression RBRACKETS' 509 | p1 = p[1] 510 | p3 = p[3] 511 | p[0] = Expression('Index', p1, p3) 512 | 513 | def p_expression_struct_access(self, p): 514 | 'expression : VARIABLE DOT VARIABLE' 515 | varname = p[1] 516 | if varname not in self.variables: 517 | log.error(f"Unknown varaible {varname}.") 518 | raise ValueError 519 | var = self.variables[p[1]] 520 | if var.type is None: 521 | log.error(f"Variable {varname} is untyped. Cannot access sub-field {p[3]}.") 522 | raise ValueError 523 | field = p[3] 524 | if field not in var.type.fields: 525 | log.error(f"Variable of type {var.type} does not have any field named {field}") 526 | raise ValueError 527 | field_off = var.type.offsets[field] 528 | field_size = var.type.fields[field].size // 8 529 | log.debug(f"Struct access: {var}.{field} --> Slice({var}, {field_off}, {field_size}).") 530 | p[0] = Expression('Slice', Expression("VAR", var), 531 | Expression("IMM", Immediate(field_off)), 532 | Expression("IMM", Immediate(field_size))) 533 | 534 | def p_expression_sizeof(self, p): 535 | 'expression : SIZEOF VARIABLE' 536 | typename = p[2] 537 | if typename not in self.loaded_types: 538 | raise TypeError(f"Unknown type {typename}") 539 | size = self.loaded_types[typename].size // 8 540 | p[0] = Expression("IMM", Immediate(size)) 541 | 542 | def p_expression_strcmp(self, p): 543 | 'expression : STRCMP expression expression STR' 544 | s = p[4] 545 | if len(s) < 2: 546 | log.error(f"Use STRCMP only for strings longer than 1 character") 547 | raise ValueError 548 | inp = p[2] 549 | start = p[3] 550 | current = start 551 | exprs = [Expression("EQ", 552 | Expression("Index", 553 | inp, 554 | Expression("ADD", 555 | start, 556 | Expression("IMM", 557 | Immediate( 558 | index)))), 559 | Expression("IMM", Immediate(ord(char)))) 560 | for index, char in enumerate(s)] 561 | ret = Expression("AND", exprs[0], exprs[1]) 562 | for expr in exprs[2:]: 563 | ret = Expression("AND", ret, expr) 564 | p[0] = ret 565 | 566 | def p_expression_variable(self, p): 567 | 'expression : VARIABLE' 568 | log.debug("Found variable " + p[1]) 569 | varname = p[1] 570 | if varname not in self.variables and varname not in self.defines: 571 | log.critical("Using variable %s before assignement" % varname) 572 | raise NameError 573 | 574 | if varname in self.variables: 575 | p[0] = Expression("VAR", self.variables[varname]) 576 | else: 577 | p[0] = self.defines[varname] 578 | 579 | def p_expression_number(self, p): 580 | 'expression : NUMBER' 581 | log.debug("Found NUMBER " + str(p[1])) 582 | p[0] = Expression("IMM", Immediate(p[1])) 583 | 584 | def p_expression_string(self, p): 585 | 'expression : CHAR' 586 | p[0] = Expression("IMM", Immediate(p[1])) 587 | 588 | def p_expression_bool(self, p): 589 | 'expression : BOOL' 590 | p[0] = Expression("IMM", BoolImmediate(p[1])) 591 | 592 | # Error rule for syntax errors 593 | def p_error(self, p): 594 | if p is None: 595 | return 596 | log.critical("Syntax error in input! %s" % p) 597 | raise Exception(p) 598 | 599 | def __init__(self, pwd="", ptype=ParserType.VALIDATOR, input_size=None, 600 | custom_defs=None): 601 | self.lexer = Lexer() 602 | self.loaded_types = {} 603 | self._variables = customdefdict(lambda x: Variable(x)) 604 | self._conditions = {} 605 | self._defines = {} 606 | self._block_stack = deque() 607 | self._statements = [] 608 | self.pwd = pwd 609 | self._type = ptype 610 | self._input_size = input_size 611 | 612 | if not custom_defs: 613 | custom_defs = {} 614 | self._custom_defs = {} 615 | for var, val in custom_defs.items(): 616 | self._custom_defs[var] = Expression("IMM", Immediate(val)) 617 | 618 | try: 619 | self.parser = yacc.yacc(module=self) 620 | except yacc.YaccError as e: 621 | log.exception(e) 622 | sys.exit(1) 623 | --------------------------------------------------------------------------------