├── .gitignore ├── LICENSE ├── README.md ├── reil-parser.cabal ├── scripts └── to_reil.py └── src └── Data └── REIL ├── BasicBlock.hs ├── CFG.hs ├── InstructionSet.hs ├── Interpreter.hs └── Parse.hs /.gitignore: -------------------------------------------------------------------------------- 1 | dist 2 | dist-* 3 | cabal-dev 4 | *.o 5 | *.hi 6 | *.chi 7 | *.chs.h 8 | *.dyn_o 9 | *.dyn_hi 10 | .hpc 11 | .hsenv 12 | .cabal-sandbox/ 13 | cabal.sandbox.config 14 | *.prof 15 | *.aux 16 | *.hp 17 | *.eventlog 18 | .stack-work/ 19 | cabal.project.local 20 | cabal.project.local~ 21 | .HTF/ 22 | .ghc.environment.* 23 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Adrian Herrera 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # REIL Parser 2 | 3 | Parser for the Reverse Engineering Intermediate Language (REIL), as used by 4 | Google's/Zynamics' [BinNavi](https://github.com/google/binnavi). 5 | 6 | ## Generating REIL code 7 | REIL code can either be written by hand (in a text file) or generated by 8 | BinNavi. The stand-alone Python script `scripts/to_reil.py` can be used to 9 | generated REIL code for a module already stored in a BinNavi database. Note 10 | that the stand-alone script must be run in [Jython](http://www.jython.org) and 11 | requires a copy of the BinNavi jar (`binnavi.jar`). 12 | 13 | Run `jython scripts/to_reil.py --help` for instructions on how to use this 14 | script to generate REIL code. 15 | -------------------------------------------------------------------------------- /reil-parser.cabal: -------------------------------------------------------------------------------- 1 | Name: reil-parser 2 | Version: 0.0.0.1 3 | License: MIT 4 | License-file: LICENSE 5 | Author: Adrian Herrera 6 | Maintainer: Adrian Herrera 7 | Category: Text 8 | Build-type: Simple 9 | Cabal-version: >=1.10 10 | Synopsis: REIL parsing library 11 | 12 | Description: 13 | A parser for the Reverse Engineering Intermediate Language (REIL) as 14 | generated by BinNavi. 15 | 16 | Source-repository head 17 | type: git 18 | location: http://github.com/adrianherrera/reil-parser 19 | 20 | Library 21 | Hs-source-dirs: src 22 | Exposed-modules: Data.REIL.Parse, 23 | Data.REIL.InstructionSet 24 | 25 | Default-language: Haskell2010 26 | 27 | Other-modules: Data.REIL.BasicBlock, 28 | Data.REIL.CFG 29 | 30 | Ghc-options: -Wall 31 | 32 | Build-depends: base >= 4 && < 5, 33 | parsec >= 3 34 | -------------------------------------------------------------------------------- /scripts/to_reil.py: -------------------------------------------------------------------------------- 1 | """ 2 | Parse a module in a BinNavi database and translate it to REIL code. 3 | 4 | Author: Adrian Herrera 5 | """ 6 | 7 | from __future__ import print_function 8 | 9 | import argparse 10 | import sys 11 | 12 | 13 | def print_error(*args): 14 | """ 15 | Print an error message to stderr. 16 | 17 | Args: 18 | args: Variable-length arguments to print in the error message 19 | """ 20 | print('ERROR:', *args, file=sys.stderr) 21 | 22 | 23 | def parse_args(): 24 | """ 25 | Parse the command-line arguments. 26 | 27 | Returns: 28 | The parsed command-line arguments 29 | """ 30 | parser = argparse.ArgumentParser(description='Convert a BinNavi module to ' 31 | 'REIL code') 32 | parser.add_argument('-b', '--binnavi-path', action='store', required=True, 33 | help='BinNavi installation directory') 34 | parser.add_argument('-d', '--db-description', action='store', 35 | required=True, 36 | help='The description of the database') 37 | parser.add_argument('-ho', '--db-host', action='store', 38 | default='localhost', 39 | help='Host address of the database server') 40 | parser.add_argument('-n', '--db-name', action='store', required=True, 41 | help='The name of the database on the database server') 42 | parser.add_argument('-u', '--db-user', action='store', required=True, 43 | help='The user that is used to connect to the ' 44 | 'database') 45 | parser.add_argument('-p', '--db-password', action='store', required=True, 46 | help='The password that is used to connect to the ' 47 | 'database') 48 | parser.add_argument('-i', '--db-identity', action='store', required=True, 49 | help='The identity under which the current user ' 50 | 'operates') 51 | parser.add_argument('-m', '--module', action='store', help='Module name') 52 | parser.add_argument('-f', '--function', action='store', 53 | help='Function name') 54 | parser.add_argument('-o', '--output', action='store', 55 | help='Output file path') 56 | 57 | return parser.parse_args() 58 | 59 | 60 | def load_binnavi(path): 61 | """ 62 | Include the relevant JAR files that contain the BinNavi API and the 63 | database access layer and import the required modules. 64 | 65 | Args: 66 | path (str): BinNavi JAR path 67 | 68 | Returns: 69 | The BinNavi plugin interface that can be used in standalone scripts, or 70 | `None` if BinNavi fails to load 71 | """ 72 | sys.path.append(path) 73 | 74 | try: 75 | from com.google.security.zynamics.binnavi.API.plugins import StandAlone 76 | 77 | return StandAlone.getPluginInterface() 78 | except ImportError: 79 | return None 80 | 81 | 82 | def get_database(plugin_interface, db_info): 83 | """ 84 | Get the database based on the given description. 85 | 86 | Args: 87 | plugin_interface (PluginInterface): The BinNavi plugin interface 88 | db_info (dict): A dictionary that describes the database connection 89 | 90 | Returns: 91 | The disconnected and unloaded database, or `None` if it doesn't exist 92 | """ 93 | dbm = plugin_interface.getDatabaseManager() 94 | 95 | try: 96 | return dbm.addDatabase(db_info['description'], 97 | 'org.postgresql.Driver', 98 | db_info['host'], 99 | db_info['name'], 100 | db_info['user'], 101 | db_info['password'], 102 | db_info['identity'], 103 | False, 104 | False) 105 | except: 106 | return None 107 | 108 | 109 | def get_module(database, name): 110 | """ 111 | Get the module with the given name. 112 | 113 | Args: 114 | db (Database): The connected and loaded database 115 | name (str): The module name 116 | 117 | Returns: 118 | The unloaded module, or `None` if it doesn't exist 119 | """ 120 | for module in database.getModules(): 121 | if module.getName() == name: 122 | return module 123 | 124 | return None 125 | 126 | 127 | def get_functions(module): 128 | """ 129 | Get a list of functions from the given module. 130 | 131 | Args: 132 | module (Module): The loaded module 133 | 134 | Returns: 135 | A list of unloaded functions from the loaded module 136 | """ 137 | from com.google.security.zynamics.binnavi.API.disassembly import \ 138 | FunctionType 139 | 140 | # We are not interested in functions that are dynamically imported from an 141 | # external module 142 | return [function for function in module.getFunctions() 143 | if function.getType() != FunctionType.Import] 144 | 145 | def get_function(module, name): 146 | """ 147 | Get the function with the given name. 148 | 149 | Args: 150 | module (Module): The loaded module 151 | name (str): The function name 152 | 153 | Returns: 154 | The unloaded function, or `None` if it doesn't exist 155 | """ 156 | for function in module.getFunctions(): 157 | if function.getName() == name: 158 | return function 159 | 160 | return None 161 | 162 | 163 | def get_reil(function): 164 | """ 165 | Get the REIL code for a given function. 166 | 167 | Args: 168 | function (Function): The function 169 | 170 | Returns: 171 | A tuple of the function's start address and a list of REIL nodes, 172 | sorted by address 173 | """ 174 | function.load() 175 | reil_nodes = [node for node in 176 | function.getReilCode().getGraph().getNodes()] 177 | sorted(reil_nodes, key=lambda n: n.getAddress()) 178 | function.close() 179 | print('Translated function `%s`' % function.getName()) 180 | 181 | return reil_nodes[0].getAddress(), reil_nodes 182 | 183 | 184 | def main(): 185 | """ 186 | The main function. 187 | """ 188 | # Parse the command-line arguments 189 | args = parse_args() 190 | 191 | # Load the BinNavi plugin interface 192 | plugin_interface = load_binnavi(args.binnavi_path) 193 | if plugin_interface is None: 194 | print_error('Unable to load BinNavi from `%s`' % args.binnavi_path) 195 | sys.exit(1) 196 | 197 | # Connect to and load the required database 198 | db_info = { 199 | 'description': args.db_description, 200 | 'host': args.db_host, 201 | 'name': args.db_name, 202 | 'user': args.db_user, 203 | 'password': args.db_password, 204 | 'identity': args.db_identity, 205 | } 206 | 207 | database = get_database(plugin_interface, db_info) 208 | if database is None: 209 | print_error('Unable to load database `%s`' % db_info['name']) 210 | sys.exit(1) 211 | database.connect() 212 | database.load() 213 | 214 | # If no module was specified, list the available modules 215 | available_modules = ['`%s`' % mod.getName() for 216 | mod in database.getModules()] 217 | if args.module is None: 218 | print('No module specified. Available modules: %s' % \ 219 | ', '.join(available_modules)) 220 | sys.exit(1) 221 | 222 | # Load the required module 223 | module = get_module(database, args.module) 224 | if module is None: 225 | print_error('Unable to load module `%s`. Available modules: %s' % \ 226 | (args.module, ', '.join(available_modules))) 227 | sys.exit(1) 228 | module.load() 229 | 230 | if args.function is None: 231 | # No function specified. Translate all functions 232 | 233 | # Get a list of tuples that contain the start address and the REIL code 234 | # for a particular function 235 | functions = get_functions(module) 236 | reil_functions = [get_reil(func) for func in functions] 237 | 238 | # Sort the functions by their start address (the first element in the 239 | # tuple) 240 | sorted(reil_functions, key=lambda f: f[0]) 241 | 242 | # Join all the REIL code for each function into a single string. Ignore 243 | # the start address of the file 244 | reil_str = '\n'.join([str(node).strip() for _, func in reil_functions 245 | for node in func]) 246 | else: 247 | # Search for the given function and only translate it 248 | function = get_function(module, args.function) 249 | if function is None: 250 | print_error('Function `%s` does not exist' % args.function) 251 | sys.exit(1) 252 | _, reil_function = get_reil(function) 253 | 254 | reil_str = '\n'.join(str(node).strip() for node in reil_function) 255 | 256 | # Write translated REIL code to the output file 257 | if args.output is None: 258 | print('\n\n%s' % reil_str) 259 | else: 260 | with open(args.output, 'w') as out_file: 261 | out_file.write(reil_str) 262 | 263 | print('REIL code for `%s` successfully written to `%s`' % \ 264 | (args.module, args.output)) 265 | 266 | # Clean up 267 | module.close() 268 | database.close() 269 | 270 | 271 | if __name__ == '__main__': 272 | main() 273 | -------------------------------------------------------------------------------- /src/Data/REIL/BasicBlock.hs: -------------------------------------------------------------------------------- 1 | {- | 2 | Module : $Header$ 3 | Description : REIL basic block 4 | Maintainer : Adrian Herrera 5 | Stability : experimental 6 | -} 7 | 8 | {-# LANGUAGE ViewPatterns #-} 9 | 10 | module Data.REIL.BasicBlock ( 11 | -- * Statement 12 | Statement(..), 13 | 14 | -- * Basic block 15 | BasicBlock(..), 16 | empty, 17 | null, 18 | startAddress, 19 | endAddress, 20 | addStatement, 21 | ) where 22 | 23 | import Prelude hiding (null) 24 | import qualified Data.Map as M 25 | 26 | import qualified Data.REIL.InstructionSet as IS 27 | 28 | ------------------------------------------------------------------------------- 29 | -- Statement 30 | ------------------------------------------------------------------------------- 31 | 32 | -- | A statement consists of an instruction at a specific address 33 | data Statement = 34 | Statement IS.Address IS.Instruction 35 | 36 | instance Show Statement where 37 | show (Statement addr inst) = 38 | IS.showAddress addr ++ ": " ++ show inst 39 | 40 | -- Find an address in a statement map based on a given `find` function (e.g. 41 | -- Map.findMin or Map.findMax) 42 | findAddr :: (M.Map IS.Address IS.Instruction -> (IS.Address, IS.Instruction)) 43 | -> M.Map IS.Address IS.Instruction 44 | -> Maybe IS.Address 45 | findAddr _ (M.null -> True) = 46 | Nothing 47 | findAddr func stmts = 48 | Just $ fst $ func stmts 49 | 50 | -- Find the minimum address in a statement map 51 | findMinAddr :: M.Map IS.Address IS.Instruction -> Maybe IS.Address 52 | findMinAddr = 53 | findAddr M.findMin 54 | 55 | -- Find the maximum address in a statement map 56 | findMaxAddr :: M.Map IS.Address IS.Instruction -> Maybe IS.Address 57 | findMaxAddr = 58 | findAddr M.findMax 59 | 60 | ------------------------------------------------------------------------------- 61 | -- Basic block 62 | ------------------------------------------------------------------------------- 63 | 64 | -- | A basic block consists of a map of statements 65 | newtype BasicBlock = 66 | BasicBlock (M.Map IS.Address IS.Instruction) 67 | 68 | -- | Create an empty basic block 69 | empty :: BasicBlock 70 | empty = 71 | BasicBlock M.empty 72 | 73 | -- | Check if the basic block is empty 74 | null :: BasicBlock -> Bool 75 | null (BasicBlock stmts) = 76 | M.null stmts 77 | 78 | -- | Get the start address for a basic block. If the basic block is empty, 79 | -- return @Nothing@ 80 | startAddress :: BasicBlock -> Maybe IS.Address 81 | startAddress (BasicBlock stmts) = 82 | findMinAddr stmts 83 | 84 | -- | Get the end address for a basic block. If the basic block is empty, return 85 | -- @Nothing@ 86 | endAddress :: BasicBlock -> Maybe IS.Address 87 | endAddress (BasicBlock stmts) = 88 | findMaxAddr stmts 89 | 90 | -- | Add a statement to the end of a basic block. The statement is only added 91 | -- if its address is greater than the address of the last statement in the 92 | -- basic block 93 | addStatement :: BasicBlock -> Statement -> BasicBlock 94 | addStatement (null -> True) (Statement addr inst) = 95 | BasicBlock $ M.singleton addr inst 96 | addStatement (BasicBlock stmts) (Statement addr inst) 97 | | checkStatement = BasicBlock $ M.insert addr inst stmts 98 | | otherwise = error "Unable to add the statement to the basic block" 99 | -- Check that the address of the instruction that we are about to add to 100 | -- the basic block does not already exist. 101 | -- 102 | -- Also check that the address of the instruction that we are about to add 103 | -- to the basic block is greater than the last statement in the basic block 104 | where checkStatement = M.notMember addr stmts && 105 | addr > (fst $ M.findMax stmts) 106 | -------------------------------------------------------------------------------- /src/Data/REIL/CFG.hs: -------------------------------------------------------------------------------- 1 | {- | 2 | Module : $Header$ 3 | Description : REIL control flow graph 4 | Maintainer : Adrian Herrera 5 | Stability : experimental 6 | -} 7 | 8 | module Data.REIL.CFG ( 9 | ) where -------------------------------------------------------------------------------- /src/Data/REIL/InstructionSet.hs: -------------------------------------------------------------------------------- 1 | {- | 2 | Module : $Header$ 3 | Description : REIL instruction set 4 | Maintainer : Adrian Herrera 5 | Stability : experimental 6 | -} 7 | 8 | -- | This module defines the REIL instruction set. The documentation is 9 | -- primarily sourced from 10 | -- http://www.zynamics.com/binnavi/manual/html/reil_language.htm 11 | 12 | module Data.REIL.InstructionSet ( 13 | -- * Address 14 | Address, 15 | showAddress, 16 | 17 | -- * Register 18 | RegisterName, 19 | 20 | -- * Operand 21 | Operand(..), 22 | OperandSize(..), 23 | 24 | -- * Instruction 25 | Instruction(..), 26 | getInstOp1, 27 | getInstOp2, 28 | getInstOp3, 29 | ) where 30 | 31 | import Numeric (showHex) 32 | 33 | -- | For our purposes an address is simply an integer 34 | type Address = Int 35 | 36 | -- | Pretty-print an address 37 | showAddress :: Address -> String 38 | showAddress addr = 39 | "0x" ++ showHex addr "" 40 | 41 | -- | A register name is just a string 42 | type RegisterName = String 43 | 44 | -- | The size of a REIL operand is either b1 (1 byte), b2 (2 bytes), b4 (4 45 | -- bytes), b8 (8 bytes) or b16 (16 bytes). 46 | -- 47 | -- Note that these terms don't appear to be used by BinNavi, which instead uses 48 | -- byte, word, dword, qword and oword respectively 49 | data OperandSize = 50 | Byte 51 | | Word 52 | | DWord 53 | | QWord 54 | | OWord 55 | 56 | instance Show OperandSize where 57 | show Byte = "BYTE" 58 | show Word = "WORD" 59 | show DWord = "DWORD" 60 | show QWord = "QWORD" 61 | show OWord = "OWORD" 62 | 63 | -- | REIL operands are defined by their size and their type. Valid types for 64 | -- REIL operands are 'integer literal', 'register', and 'REIL offset'. An 65 | -- operand can also be empty. 66 | data Operand = 67 | Empty 68 | | IntegerLiteral Int OperandSize 69 | | Register RegisterName OperandSize 70 | | Offset Address 71 | 72 | instance Show Operand where 73 | show Empty = 74 | "EMPTY" 75 | show (IntegerLiteral lit size) = 76 | show size ++ " " ++ show lit 77 | show (Register reg size) = 78 | show size ++ " " ++ reg 79 | show (Offset off) = 80 | "ADDRESS " ++ showAddress off 81 | 82 | -- | The REIL instruction set knows only 17 different instructions. Each 83 | -- instruction calculates at most one result (multiple effects like setting 84 | -- flags are not allowed) and has exactly three operands (although some 85 | -- operands can be empty). 86 | data Instruction = 87 | -- | Addition 88 | Add Operand Operand Operand 89 | -- | Binary and 90 | | And Operand Operand Operand 91 | -- | Boolean is-zero 92 | | Bisz Operand Operand Operand 93 | -- | Binary shift 94 | | Bsh Operand Operand Operand 95 | -- | Unsigned division 96 | | Div Operand Operand Operand 97 | -- | Jump conditional 98 | | Jcc Operand Operand Operand 99 | -- | Load from memory 100 | | Ldm Operand Operand Operand 101 | -- | Modulo 102 | | Mod Operand Operand Operand 103 | -- | Unsigned multiplication 104 | | Mul Operand Operand Operand 105 | -- | Nop operation 106 | | Nop Operand Operand Operand 107 | -- | Bitwise or 108 | | Or Operand Operand Operand 109 | -- | Store to memory 110 | | Stm Operand Operand Operand 111 | -- | Store to register 112 | | Str Operand Operand Operand 113 | -- | Subtract 114 | | Sub Operand Operand Operand 115 | -- | Undefines a register 116 | | Undef Operand Operand Operand 117 | -- | Unknown instruction 118 | | Unkn Operand Operand Operand 119 | -- | Bitwise xor 120 | | Xor Operand Operand Operand 121 | 122 | instance Show Instruction where 123 | show inst = 124 | showInst inst ++ " " ++ showInstOp1 inst ++ ", " ++ 125 | showInstOp2 inst ++ ", " ++ 126 | showInstOp3 inst 127 | 128 | -- | Get an instruction's first operand 129 | getInstOp1 :: Instruction -> Operand 130 | getInstOp1 (Add op _ _) = op 131 | getInstOp1 (And op _ _) = op 132 | getInstOp1 (Bisz op _ _) = op 133 | getInstOp1 (Bsh op _ _) = op 134 | getInstOp1 (Div op _ _) = op 135 | getInstOp1 (Jcc op _ _) = op 136 | getInstOp1 (Ldm op _ _) = op 137 | getInstOp1 (Mod op _ _) = op 138 | getInstOp1 (Mul op _ _) = op 139 | getInstOp1 (Nop op _ _) = op 140 | getInstOp1 (Or op _ _) = op 141 | getInstOp1 (Stm op _ _) = op 142 | getInstOp1 (Str op _ _) = op 143 | getInstOp1 (Sub op _ _) = op 144 | getInstOp1 (Undef op _ _) = op 145 | getInstOp1 (Unkn op _ _) = op 146 | getInstOp1 (Xor op _ _) = op 147 | 148 | -- | Get an instruction's second operand 149 | getInstOp2 :: Instruction -> Operand 150 | getInstOp2 (Add _ op _) = op 151 | getInstOp2 (And _ op _) = op 152 | getInstOp2 (Bisz _ op _) = op 153 | getInstOp2 (Bsh _ op _) = op 154 | getInstOp2 (Div _ op _) = op 155 | getInstOp2 (Jcc _ op _) = op 156 | getInstOp2 (Ldm _ op _) = op 157 | getInstOp2 (Mod _ op _) = op 158 | getInstOp2 (Mul _ op _) = op 159 | getInstOp2 (Nop _ op _) = op 160 | getInstOp2 (Or _ op _) = op 161 | getInstOp2 (Stm _ op _) = op 162 | getInstOp2 (Str _ op _) = op 163 | getInstOp2 (Sub _ op _) = op 164 | getInstOp2 (Undef _ op _) = op 165 | getInstOp2 (Unkn _ op _) = op 166 | getInstOp2 (Xor _ op _) = op 167 | 168 | -- | Get an instruction's third operand 169 | getInstOp3 :: Instruction -> Operand 170 | getInstOp3 (Add _ _ op) = op 171 | getInstOp3 (And _ _ op) = op 172 | getInstOp3 (Bisz _ _ op) = op 173 | getInstOp3 (Bsh _ _ op) = op 174 | getInstOp3 (Div _ _ op) = op 175 | getInstOp3 (Jcc _ _ op) = op 176 | getInstOp3 (Ldm _ _ op) = op 177 | getInstOp3 (Mod _ _ op) = op 178 | getInstOp3 (Mul _ _ op) = op 179 | getInstOp3 (Nop _ _ op) = op 180 | getInstOp3 (Or _ _ op) = op 181 | getInstOp3 (Stm _ _ op) = op 182 | getInstOp3 (Str _ _ op) = op 183 | getInstOp3 (Sub _ _ op) = op 184 | getInstOp3 (Undef _ _ op) = op 185 | getInstOp3 (Unkn _ _ op) = op 186 | getInstOp3 (Xor _ _ op) = op 187 | 188 | -- Helper functions for showing an instruction 189 | showInst :: Instruction -> String 190 | showInst (Add _ _ _) = "add" 191 | showInst (And _ _ _) = "and" 192 | showInst (Bisz _ _ _) = "bisz" 193 | showInst (Bsh _ _ _) = "bsh" 194 | showInst (Div _ _ _) = "div" 195 | showInst (Jcc _ _ _) = "jcc" 196 | showInst (Ldm _ _ _) = "ldm" 197 | showInst (Mod _ _ _) = "mod" 198 | showInst (Mul _ _ _) = "mul" 199 | showInst (Nop _ _ _) = "nop" 200 | showInst (Or _ _ _) = "or" 201 | showInst (Stm _ _ _) = "stm" 202 | showInst (Str _ _ _) = "str" 203 | showInst (Sub _ _ _) = "sub" 204 | showInst (Undef _ _ _) = "undef" 205 | showInst (Unkn _ _ _) = "unkn" 206 | showInst (Xor _ _ _) = "xor" 207 | 208 | -- | Show an instruction's first operand 209 | showInstOp1 :: Instruction -> String 210 | showInstOp1 = 211 | show . getInstOp1 212 | 213 | -- | Show an instruction's second operand 214 | showInstOp2 :: Instruction -> String 215 | showInstOp2 = 216 | show . getInstOp2 217 | 218 | -- | Show an instruction's third operand 219 | showInstOp3 :: Instruction -> String 220 | showInstOp3 = 221 | show . getInstOp3 222 | -------------------------------------------------------------------------------- /src/Data/REIL/Interpreter.hs: -------------------------------------------------------------------------------- 1 | {- | 2 | Module : $Header$ 3 | Description : REIL interpreter and its environment 4 | Maintainer : Adrian Herrera 5 | Stability : experimental 6 | -} 7 | 8 | module Data.REIL.Interpreter ( 9 | -- * Environment 10 | Environment(..), 11 | newEnvironment, 12 | readRegister, 13 | writeRegister, 14 | readMemory, 15 | writeMemory, 16 | 17 | -- * Interpreter 18 | Interpreter(..), 19 | ) where 20 | 21 | import Data.Bits 22 | import qualified Data.Map as M 23 | 24 | import qualified Data.REIL.InstructionSet as IS 25 | 26 | -- | The environment describes a program's "state". It consists of data stored 27 | -- in both registers and memory. 28 | data Environment a = 29 | Environment { 30 | -- | Map of register names to the value the register contains 31 | registers :: M.Map IS.RegisterName a, 32 | -- | Map of addresses to the value stored at that address 33 | memory :: M.Map IS.Address a 34 | } 35 | 36 | -- | Create a new, empty environment 37 | newEnvironment :: Environment a 38 | newEnvironment = 39 | Environment { 40 | registers = M.empty, 41 | memory = M.empty 42 | } 43 | 44 | -- | Read a register's value. Calls 'error' if the register is undefined (see 45 | -- the @undef@ instruction). 46 | readRegister :: IS.RegisterName -> Environment a -> a 47 | readRegister reg env = 48 | case M.lookup reg (registers env) of 49 | Just val -> val 50 | Nothing -> error $ "Register " ++ show reg ++ " is not defined" 51 | 52 | -- | Write a value to a register 53 | writeRegister :: IS.RegisterName -> a -> Environment a -> Environment a 54 | writeRegister reg val env = 55 | Environment { 56 | registers = M.insert reg val (registers env), 57 | memory = memory env 58 | } 59 | 60 | -- | Read a value stored at a memory address, or a default value if it hasn't 61 | -- previously been set 62 | readMemory :: IS.Address -> a -> Environment a -> a 63 | readMemory addr dflt env = 64 | case M.lookup addr (memory env) of 65 | Just val -> val 66 | Nothing -> dflt 67 | 68 | -- | Write a value to a memory address 69 | writeMemory :: IS.Address -> a -> Environment a -> Environment a 70 | writeMemory addr val env = 71 | Environment { 72 | registers = registers env, 73 | memory = M.insert addr val (memory env) 74 | } 75 | 76 | -- | An interpreter "executes" instructions and updates its environment as a 77 | -- result 78 | class Interpreter a where 79 | -- | Given an environment, "execute" an instruction and return a new, 80 | -- updated environment. This function should describe the language's big 81 | -- step operational semantics 82 | execute :: IS.Instruction -> Environment a -> Environment a 83 | 84 | -- | This interpreter instance essentially describes an idealised version of 85 | -- REIL's concrete semantics. We ignore issues such as overflow, etc. 86 | -- 87 | -- TODO how to handle the operand size? 88 | instance Interpreter Int where 89 | -- Add instruction 90 | execute (IS.Add (IS.IntegerLiteral i1 _) 91 | (IS.IntegerLiteral i2 _) 92 | (IS.Register r _)) env = 93 | writeRegister r (i1 + i2) env 94 | execute (IS.Add (IS.IntegerLiteral i _) 95 | (IS.Register r1 _) 96 | (IS.Register r2 _)) env = 97 | writeRegister r2 (i + readRegister r1 env) env 98 | execute (IS.Add (IS.Register r1 _) 99 | (IS.IntegerLiteral i _) 100 | (IS.Register r2 _)) env = 101 | writeRegister r2 (readRegister r1 env + i) env 102 | execute (IS.Add (IS.Register r1 _) 103 | (IS.Register r2 _) 104 | (IS.Register r3 _)) env = 105 | writeRegister r3 (readRegister r1 env + readRegister r2 env) env 106 | -- And instruction 107 | execute (IS.And (IS.IntegerLiteral i1 _) 108 | (IS.IntegerLiteral i2 _) 109 | (IS.Register r _)) env = 110 | writeRegister r (i1 .&. i2) env 111 | execute (IS.And (IS.IntegerLiteral i _) 112 | (IS.Register r1 _) 113 | (IS.Register r2 _)) env = 114 | writeRegister r2 (i .&. readRegister r1 env) env 115 | execute (IS.And (IS.Register r1 _) 116 | (IS.IntegerLiteral i _) 117 | (IS.Register r2 _)) env = 118 | writeRegister r2 (readRegister r1 env .&. i) env 119 | execute (IS.And (IS.Register r1 _) 120 | (IS.Register r2 _) 121 | (IS.Register r3 _)) env = 122 | writeRegister r3 (readRegister r1 env .&. readRegister r2 env) env 123 | -- Bisz instruction 124 | execute (IS.Bisz (IS.IntegerLiteral i _) 125 | _ 126 | (IS.Register r _)) env 127 | | i == 0 = writeRegister r 1 env 128 | | otherwise = writeRegister r 0 env 129 | execute (IS.Bisz (IS.Register r1 _) 130 | _ 131 | (IS.Register r2 _)) env 132 | | val == 0 = writeRegister r2 1 env 133 | | otherwise = writeRegister r2 0 env 134 | where val = readRegister r1 env 135 | -- Bsh instruction 136 | execute (IS.Bsh (IS.IntegerLiteral i1 _) 137 | (IS.IntegerLiteral i2 _) 138 | (IS.Register r _)) env = 139 | writeRegister r (i1 `shift` i2) env 140 | execute (IS.Bsh (IS.IntegerLiteral i _) 141 | (IS.Register r1 _) 142 | (IS.Register r2 _)) env = 143 | writeRegister r2 (i `shift` readRegister r1 env) env 144 | execute (IS.Bsh (IS.Register r1 _) 145 | (IS.IntegerLiteral i _) 146 | (IS.Register r2 _)) env = 147 | writeRegister r2 (readRegister r1 env `shift` i) env 148 | execute (IS.Bsh (IS.Register r1 _) 149 | (IS.Register r2 _) 150 | (IS.Register r3 _)) env = 151 | writeRegister r3 (readRegister r1 env `shift` readRegister r2 env) env 152 | -- Div instruction 153 | execute (IS.Div (IS.IntegerLiteral i1 _) 154 | (IS.IntegerLiteral i2 _) 155 | (IS.Register r _)) env = 156 | writeRegister r (i1 `quot` i2) env 157 | execute (IS.Div (IS.IntegerLiteral i _) 158 | (IS.Register r1 _) 159 | (IS.Register r2 _)) env = 160 | writeRegister r2 (i `quot` readRegister r1 env) env 161 | execute (IS.Div (IS.Register r1 _) 162 | (IS.IntegerLiteral i _) 163 | (IS.Register r2 _)) env = 164 | writeRegister r2 (readRegister r1 env `quot` i) env 165 | execute (IS.Div (IS.Register r1 _) 166 | (IS.Register r2 _) 167 | (IS.Register r3 _)) env = 168 | writeRegister r3 (readRegister r1 env `quot` readRegister r2 env) env 169 | -- Jcc instruction 170 | execute (IS.Jcc (IS.IntegerLiteral i1 _) 171 | _ 172 | (IS.IntegerLiteral i2 _)) env = 173 | undefined 174 | execute (IS.Jcc (IS.IntegerLiteral i _) 175 | _ 176 | (IS.Register r _)) env = 177 | undefined 178 | execute (IS.Jcc (IS.IntegerLiteral i _) 179 | _ 180 | (IS.Offset o)) env = 181 | undefined 182 | execute (IS.Jcc (IS.Register r _) 183 | _ 184 | (IS.IntegerLiteral i _)) env = 185 | undefined 186 | execute (IS.Jcc (IS.Register r1 _) 187 | _ 188 | (IS.Register r2 _)) env = 189 | undefined 190 | execute (IS.Jcc (IS.Register r _) 191 | _ 192 | (IS.Offset o)) env = 193 | undefined 194 | -- Ldm instruction 195 | execute (IS.Ldm (IS.IntegerLiteral i _) 196 | _ 197 | (IS.Register r _)) env = 198 | writeRegister r (readMemory i 0 env) env 199 | execute (IS.Ldm (IS.Register r1 _) 200 | _ 201 | (IS.Register r2 _)) env = 202 | writeRegister r2 (readMemory (readRegister r1 env) 0 env) env 203 | -- Mod instruction 204 | execute (IS.Mod (IS.IntegerLiteral i1 _) 205 | (IS.IntegerLiteral i2 _) 206 | (IS.Register r _)) env = 207 | writeRegister r (i1 `mod` i2) env 208 | execute (IS.Mod (IS.IntegerLiteral i _) 209 | (IS.Register r1 _) 210 | (IS.Register r2 _)) env = 211 | writeRegister r2 (i `mod` readRegister r1 env) env 212 | execute (IS.Mod (IS.Register r1 _) 213 | (IS.IntegerLiteral i _) 214 | (IS.Register r2 _)) env = 215 | writeRegister r2 (readRegister r1 env `mod` i) env 216 | execute (IS.Mod (IS.Register r1 _) 217 | (IS.Register r2 _) 218 | (IS.Register r3 _)) env = 219 | writeRegister r3 (readRegister r1 env `mod` readRegister r2 env) env 220 | -- Mul instruction 221 | execute (IS.Mul (IS.IntegerLiteral i1 _) 222 | (IS.IntegerLiteral i2 _) 223 | (IS.Register r _)) env = 224 | writeRegister r (i1 * i2) env 225 | execute (IS.Mul (IS.IntegerLiteral i _) 226 | (IS.Register r1 _) 227 | (IS.Register r2 _)) env = 228 | writeRegister r2 (i * readRegister r1 env) env 229 | execute (IS.Mul (IS.Register r1 _) 230 | (IS.IntegerLiteral i _) 231 | (IS.Register r2 _)) env = 232 | writeRegister r2 (readRegister r1 env * i) env 233 | execute (IS.Mul (IS.Register r1 _) 234 | (IS.Register r2 _) 235 | (IS.Register r3 _)) env = 236 | writeRegister r3 (readRegister r1 env * readRegister r2 env) env 237 | -- Nop instruction 238 | execute (IS.Nop _ _ _) env = 239 | env 240 | -- Or instruction 241 | execute (IS.Or (IS.IntegerLiteral i1 _) 242 | (IS.IntegerLiteral i2 _) 243 | (IS.Register r _)) env = 244 | writeRegister r (i1 .|. i2) env 245 | execute (IS.Or (IS.IntegerLiteral i _) 246 | (IS.Register r1 _) 247 | (IS.Register r2 _)) env = 248 | writeRegister r2 (i .|. readRegister r1 env) env 249 | execute (IS.Or (IS.Register r1 _) 250 | (IS.IntegerLiteral i _) 251 | (IS.Register r2 _)) env = 252 | writeRegister r2 (readRegister r1 env .|. i) env 253 | execute (IS.Or (IS.Register r1 _) 254 | (IS.Register r2 _) 255 | (IS.Register r3 _)) env = 256 | writeRegister r3 (readRegister r1 env .|. readRegister r2 env) env 257 | -- Stm instruction 258 | execute (IS.Stm (IS.IntegerLiteral i1 _) 259 | _ 260 | (IS.IntegerLiteral i2 _)) env = 261 | writeMemory i2 i1 env 262 | execute (IS.Stm (IS.IntegerLiteral i _) 263 | _ 264 | (IS.Register r _)) env = 265 | writeMemory (readRegister r env) i env 266 | execute (IS.Stm (IS.Register r _) 267 | _ 268 | (IS.IntegerLiteral i _)) env = 269 | writeMemory i (readRegister r env) env 270 | execute (IS.Stm (IS.Register r1 _) 271 | _ 272 | (IS.Register r2 _)) env = 273 | writeMemory (readRegister r2 env) (readRegister r1 env) env 274 | -- Str instruction 275 | execute (IS.Str (IS.IntegerLiteral i _) 276 | _ 277 | (IS.Register r _)) env = 278 | writeRegister r i env 279 | execute (IS.Str (IS.Register r1 _) 280 | _ 281 | (IS.Register r2 _)) env = 282 | writeRegister r2 (readRegister r1 env) env 283 | -- Sub instruction 284 | execute (IS.Sub (IS.IntegerLiteral i1 _) 285 | (IS.IntegerLiteral i2 _) 286 | (IS.Register r _)) env = 287 | writeRegister r (i1 - i2) env 288 | execute (IS.Sub (IS.IntegerLiteral i _) 289 | (IS.Register r1 _) 290 | (IS.Register r2 _)) env = 291 | writeRegister r2 (i - readRegister r1 env) env 292 | execute (IS.Sub (IS.Register r1 _) 293 | (IS.IntegerLiteral i _) 294 | (IS.Register r2 _)) env = 295 | writeRegister r2 (readRegister r1 env - i) env 296 | execute (IS.Sub (IS.Register r1 _) 297 | (IS.Register r2 _) 298 | (IS.Register r3 _)) env = 299 | writeRegister r3 (readRegister r1 env - readRegister r2 env) env 300 | -- Undef instruction 301 | execute (IS.Undef _ _ (IS.Register r _)) env = 302 | Environment { 303 | registers = M.delete r (registers env), 304 | memory = memory env 305 | } 306 | -- Unkn instruction 307 | execute (IS.Unkn _ _ _) env = 308 | env 309 | -- Xor instruction 310 | execute (IS.Xor (IS.IntegerLiteral i1 _) 311 | (IS.IntegerLiteral i2 _) 312 | (IS.Register r _)) env = 313 | writeRegister r (i1 `xor` i2) env 314 | execute (IS.Xor (IS.IntegerLiteral i _) 315 | (IS.Register r1 _) 316 | (IS.Register r2 _)) env = 317 | writeRegister r2 (i `xor` readRegister r1 env) env 318 | execute (IS.Xor (IS.Register r1 _) 319 | (IS.IntegerLiteral i _) 320 | (IS.Register r2 _)) env = 321 | writeRegister r2 (readRegister r1 env `xor` i) env 322 | execute (IS.Xor (IS.Register r1 _) 323 | (IS.Register r2 _) 324 | (IS.Register r3 _)) env = 325 | writeRegister r3 (readRegister r1 env `xor` readRegister r2 env) env 326 | -- Error 327 | execute inst _ = 328 | error $ "Invalid instruction: " ++ show inst 329 | -------------------------------------------------------------------------------- /src/Data/REIL/Parse.hs: -------------------------------------------------------------------------------- 1 | {- | 2 | Module : $Header$ 3 | Description : Parses a REIL text file 4 | Maintainer : Adrian Herrera 5 | Stability : experimental 6 | -} 7 | 8 | -- | This module parses a text file containing REIL instructions. 9 | 10 | module Data.REIL.Parse ( 11 | -- TODO 12 | ) where 13 | 14 | import qualified Data.Map as M 15 | 16 | import Text.Parsec 17 | import Text.Parsec.String 18 | 19 | import Control.Applicative hiding ((<|>), many) 20 | 21 | import Numeric (readHex) 22 | 23 | import qualified Data.REIL.InstructionSet as IS 24 | import qualified Data.REIL.BasicBlock as BB 25 | import qualified Data.REIL.CFG as CFG 26 | 27 | ------------------------------------------------------------------------------- 28 | -- Helper functions 29 | ------------------------------------------------------------------------------- 30 | 31 | -- Applicative cons (is this name accurate?) 32 | (<:>) :: Applicative f => f a -> f [a] -> f [a] 33 | h <:> t = 34 | (:) <$> h <*> t 35 | 36 | -- Parse 1 or more digits 37 | digits :: Parser String 38 | digits = 39 | many1 digit 40 | 41 | ------------------------------------------------------------------------------- 42 | 43 | -- | Parse an address. An address is represented as a hexadecimal integer. E.g. 44 | -- @0000100015C20700@ 45 | address :: Parser IS.Address 46 | address = 47 | do 48 | addr <- many1 hexDigit 49 | let ((hexInt, _) : _) = readHex addr 50 | return hexInt 51 | 52 | -- Type synonym for convienience 53 | type InstructionConstructor = 54 | IS.Operand -> IS.Operand -> IS.Operand -> IS.Instruction 55 | 56 | -- | Valid REIL instructions 57 | instructionConstructor :: Parser InstructionConstructor 58 | instructionConstructor = 59 | (IS.Add <$ try (string "add")) 60 | <|> (IS.And <$ try (string "and")) 61 | <|> (IS.Bisz <$ try (string "bisz")) 62 | <|> (IS.Bsh <$ try (string "bsh")) 63 | <|> (IS.Div <$ try (string "div")) 64 | <|> (IS.Jcc <$ try (string "jcc")) 65 | <|> (IS.Ldm <$ try (string "ldm")) 66 | <|> (IS.Mod <$ try (string "mod")) 67 | <|> (IS.Mul <$ try (string "mul")) 68 | <|> (IS.Nop <$ try (string "nop")) 69 | <|> (IS.Or <$ try (string "or")) 70 | <|> (IS.Stm <$ try (string "stm")) 71 | <|> (IS.Str <$ try (string "str")) 72 | <|> (IS.Sub <$ try (string "sub")) 73 | <|> (IS.Undef <$ try (string "undef")) 74 | <|> (IS.Unkn <$ try (string "unkn")) 75 | <|> (IS.Xor <$ try (string "xor")) 76 | "a valid instruction" 77 | 78 | -- | Parse a REIL instruction. A REIL instruction has the format 79 | -- @inst [SIZE op1, SIZE op2, SIZE op3]@, where @inst@ is one of the valid REIL 80 | -- instructions, @SIZE@ is the operand size and @op@ is an operand 81 | instruction :: Parser IS.Instruction 82 | instruction = 83 | do 84 | inst <- instructionConstructor 85 | _ <- spaces >> char '[' 86 | op1 <- operand 87 | _ <- operandSeparator 88 | op2 <- operand 89 | _ <- operandSeparator 90 | op3 <- operand 91 | _ <- char ']' 92 | return $ inst op1 op2 op3 93 | where operandSeparator = char ',' >> spaces 94 | 95 | -- | Parse a single REIL operand 96 | operand :: Parser IS.Operand 97 | operand = 98 | try emptyOperand 99 | <|> try integerLiteralOperand 100 | <|> try registerOperand 101 | <|> try offsetOperand 102 | "a valid operand type" 103 | 104 | -- | Parse a REIL operand size. An operand size is one of the strings "BYTE", 105 | -- "WORD", "DWORD", "QWORD" or "OWORD" 106 | operandSize :: Parser IS.OperandSize 107 | operandSize = 108 | (IS.Byte <$ string "BYTE") 109 | <|> (IS.Word <$ string "WORD") 110 | <|> (IS.DWord <$ string "DWORD") 111 | <|> (IS.QWord <$ string "QWORD") 112 | <|> (IS.OWord <$ string "OWORD") 113 | "a valid operand size" 114 | 115 | -- | Parse the empty operand. The empty operand is simply the string "EMPTY" 116 | emptyOperand :: Parser IS.Operand 117 | emptyOperand = 118 | IS.Empty <$ (string "EMPTY" >> spaces) 119 | 120 | -- | Parse an integer literal operand. An integer literal operand is a base-10 121 | -- integer 122 | integerLiteralOperand :: Parser IS.Operand 123 | integerLiteralOperand = 124 | flip IS.IntegerLiteral <$> operandSize <* spaces <*> intLit 125 | where intLit = read <$> (sign <:> digits) 126 | sign = option '0' (char '-') 127 | 128 | -- | Parse a register operand. A register must start by a letter, followed by 129 | -- one or more alphanumeric characters 130 | registerOperand :: Parser IS.Operand 131 | registerOperand = 132 | flip IS.Register <$> operandSize <* spaces <*> reg 133 | where reg = letter <:> many1 alphaNum 134 | 135 | -- | Parse a REIL offset operand. An offset has the format "ADDRESS xxx.yyy", 136 | -- where "ADDRESS" denotes the fact that this operand is an offset and 137 | -- @xxx.yyy@ is the address in decimal notation. The absolute address can be 138 | -- calculated using the formula @xxx * 0x100 + yyy@ 139 | offsetOperand :: Parser IS.Operand 140 | offsetOperand = 141 | IS.Offset <$> (string "ADDRESS" >> spaces *> decimalAddress) 142 | where decimalAddress = do 143 | addr <- digits 144 | _ <- char '.' 145 | off <- digits 146 | return $ read addr * 0x100 + read off 147 | 148 | -- | Parse the separator between an address and an instruction. The separator 149 | -- between an address and an instruction has the format @: @. The result is 150 | -- ignored 151 | addressInstructionSep :: Parser () 152 | addressInstructionSep = 153 | char ':' >> spaces >> return () 154 | 155 | -- | Parse a statement (combination of an address and an instruction - with a 156 | -- separator in between) 157 | statement :: Parser BB.Statement 158 | statement = 159 | BB.Statement <$> address <* addressInstructionSep <*> instruction 160 | 161 | -- | Parse multiple statements (separated by a new line) 162 | statements :: Parser (M.Map IS.Address IS.Instruction) 163 | statements = 164 | foldr insertStmt M.empty <$> sepEndBy statement newline 165 | where insertStmt (BB.Statement addr inst) stmtMap = 166 | M.insert addr inst stmtMap 167 | --------------------------------------------------------------------------------