├── .gitignore
├── Interpreter.hs
├── LICENSE
├── PCode.hs
├── Parser.hs
├── README.md
├── Types.hs
└── WordString.hs


/.gitignore:
--------------------------------------------------------------------------------
 1 | dist
 2 | dist-*
 3 | cabal-dev
 4 | *.o
 5 | *.hi
 6 | *.hie
 7 | *.chi
 8 | *.chs.h
 9 | *.dyn_o
10 | *.dyn_hi
11 | .hpc
12 | .hsenv
13 | .cabal-sandbox/
14 | cabal.sandbox.config
15 | *.prof
16 | *.aux
17 | *.hp
18 | *.eventlog
19 | .stack-work/
20 | cabal.project.local
21 | cabal.project.local~
22 | .HTF/
23 | .ghc.environment.*
24 | 


--------------------------------------------------------------------------------
/Interpreter.hs:
--------------------------------------------------------------------------------
  1 | {-|
  2 | Module      : Interpreter
  3 | Description : PCode interpreter
  4 | Copyright   : (c) Nico Naus, 2022
  5 | Maintainer  : niconaus@vt.edu
  6 | Stability   : experimental
  7 | This module defines a simple interpreter for Ghidra P-Code
  8 | -}
  9 | module Interpreter where
 10 | 
 11 | import qualified Data.Map as M
 12 | import Types
 13 | import Parser ( pFile )
 14 | import qualified Text.ParserCombinators.Parsec as P
 15 | import Data.Word ( Word8, Word16, Word32, Word64 )
 16 | import Data.Binary.IEEE754
 17 |     ( doubleToWord, floatToWord, wordToDouble, wordToFloat )
 18 | import Data.ByteString.Builder ()
 19 | import qualified Data.ByteString.Lazy as BS
 20 | import qualified Text.Hex as Hex
 21 | import qualified Data.Text as Text
 22 | import Data.Bits
 23 |     ( Bits(shiftR, popCount, complement, (.&.), (.|.), shift, xor,
 24 |            shiftL) )
 25 | import GHC.Float
 26 |     ( double2Float,
 27 |       float2Double,
 28 |       negateDouble,
 29 |       negateFloat )
 30 | import Data.Maybe (fromMaybe)
 31 | import WordString
 32 | 
 33 | ---------------------
 34 | --- Register definitions
 35 | ---------------------
 36 | 
 37 | retAddr, retVal, arg0, arg1, arg2, arg3, arg4, arg5 :: Addr
 38 | retAddr = [0,0,0,0,0,0,3,231]
 39 | retVal  = [0,0,0,0,0,0,3,120]
 40 | arg0 = [0,0,0,0,0,0,0,56]
 41 | arg1 = [0,0,0,0,0,0,0,48]
 42 | arg2 = [0,0,0,0,0,0,0,16]
 43 | arg3 = [0,0,0,0,0,0,0,8]
 44 | arg4 = [0,0,0,0,0,0,0,128]
 45 | arg5 = [0,0,0,0,0,0,0,136]
 46 | 
 47 | ---------------------
 48 | --- EVALUATION FUNCTIONS
 49 | ---------------------
 50 | -- This function takes a PCode program, together with an entry point, and returns a state
 51 | run :: PCode -> State -> Fname -> Addr -> State
 52 | run prog s f a = case M.lookup a blocks of
 53 |     Nothing -> error "block not found"
 54 |     Just b -> evalB blocks' a b s
 55 |   where blocks = fromMaybe (error $ "function not found: " ++ show f) (M.lookup f prog)
 56 |         blocks' = M.unions $ map snd $ M.toList prog
 57 | 
 58 | -- a is the current block address, so next can be calculated
 59 | evalB :: PBlocks -> Addr -> PBlock -> State -> State
 60 | --- no terminator seen, advance to next block
 61 | evalB p a [] (mem,r,var) = evalB p next (getBlock p next) s
 62 |   where next = getNext p a
 63 |         s    = (mem, setReg r retAddr a,var)
 64 | --- terminator instructions
 65 | evalB p a [BRANCH vn] (m,r,v) = evalB p next (getBlock p next) s
 66 |   where next = getAddress vn
 67 |         s    = (m, setReg r retAddr a,v)
 68 |    -- NOTE: there is an error in CBRANCH on Ghidra's side. We assume that this is fixed by dumping script
 69 | evalB p a [BRANCHIND vn] state = evalB p a [BRANCH (Ram (getVN state vn) 8)] state
 70 | evalB p a [CBRANCH vn1 vn2 vn3] (m,r,v) | toBool (decodeWord8 $ getVN (m,r,v) vn3) = let next = getAddress vn1 in evalB p next (getBlock p next) s
 71 |                                         | otherwise = let next = getAddress vn2 in evalB p next (getBlock p next) s
 72 |   where s = (m, setReg r retAddr a,v)
 73 | evalB _ a [RETURN _ Nothing]   (m,r,v) = (m,r,v)
 74 | evalB _ a [RETURN _ (Just vn)] (m,r,v) = (m,setReg r retVal (getVN (m,r,v) vn),v)
 75 | -- --- sequential evaluation
 76 | evalB p a (x:xs) s = evalB p a xs (evalI p a x s)
 77 | 
 78 | evalI :: PBlocks -> Addr -> PInstr -> State -> State
 79 | evalI _ a (STORE _ output input)      (mem,r,vars) = (writeMem mem (getVN (mem,r,vars) input)( encodeWord64 $ getVN64 (mem,r,vars) output),r,vars)  -- again, assuming 64 bits
 80 | evalI p _ (Do call)                   state        = fst $ evalS p call 8 state
 81 | evalI p _ (PCAss (Reg n s) call)      state        = (\((m',r',v'),res) -> (m',setReg r' n res,v')) (evalS p call s state)
 82 | evalI p _ (PCAss (Ram a s) call)      state        = (\((m',r',v'),res) -> (writeMem m' res a,r',v')) (evalS p call s state)
 83 | evalI p _ (PCAss (Variable n s) call) state        = (\((m',r',v'),res) -> (m',r',setVar v' n res)) (evalS p call s state)
 84 | evalI _ a (PAss (Reg n s) i)          (mem,reg,vars) = (\e -> if sizeToInt s == length e then (mem,setReg reg n e,vars) else error "error") (evalO i s (mem,reg,vars))
 85 | evalI _ a ins@(PAss (Variable n s) i) (mem,reg,vars) = (\e -> if sizeToInt s == length e then (mem,reg,setVar vars n e) else error $ "error" ++ show ins ++ " " ++ show e) (evalO i s (mem,reg,vars))
 86 | evalI _ a (PAss (Ram r s) i)          (mem,reg,vars) = (\e -> if sizeToInt s == length e then (writeMem mem e r, reg,vars) else error "error") (evalO i s (mem,reg,vars))
 87 | evalI _ _ (PAss (Const _ _) _)  _ = error "assignment into a constant. There is something wrong with your P-Code"
 88 | evalI _ _ (PCAss (Const _ _) _) _ = error "assignment into a constant. There is something wrong with your P-Code"
 89 | evalI _ _ (BRANCH _)            _ = error "BRANCH instruction should be handled at evalB level"
 90 | evalI _ _ CBRANCH {}            _ = error "CBRANCH instruction should be handled at evalB level"
 91 | evalI _ _ (BRANCHIND _)         _ = error "BRANCHIND instruction should be handled at evalB level"
 92 | evalI _ _ (RETURN _ _)          _ = error "RETURN instruction should be handled at evalB level"
 93 | 
 94 | evalS :: PBlocks -> PCall -> Size -> State -> (State,[Word8])
 95 | evalS p (CALL vn args) s (m,r,v) = assemble res
 96 |   where reg = foldr (\(vn',a') r' -> setReg r' a' (getVN (m,r,v) vn')) r (zip args [arg0,arg1,arg2,arg3,arg4,arg5])
 97 |         dest = getAddress vn
 98 |         res = evalB p dest (getBlock p dest) (m,resetReg reg retAddr 8,M.empty)
 99 |         restoreRegisters cr = foldr (\a r' -> setReg r' a (getReg r a 8)) cr [arg0,arg1,arg2,arg3,arg4,arg5,retAddr] --TODO: what about ret vale?
100 |         assemble = \(m',r',_) -> ((m',restoreRegisters r',v), getReg r' retVal s)
101 | evalS p (CALLIND vn args) s state = evalS p (CALL (Ram (getVN state vn) 8) args) s state
102 | evalS _ (EXTCALL _) _ _ = error "External calls not supported for now"
103 | evalS _ (CALLOTHER _ _) _ _ = error "External calls not supported for now"
104 | 
105 | evalO :: POp -> Size -> State -> [Word8]
106 | evalO (COPY vn)                 i  s = if i == vnSize vn then getVN s vn else error "error!"
107 | evalO (LOAD _ vn)        s (mem,reg,vars) = readMem mem (getVN (mem,reg,vars) vn) s
108 | evalO (PIECE vn1 vn2)           _  s = getVN s vn1 ++ getVN s vn2
109 | evalO (SUBPIECE vn1 vn2)        _  s = case vnSize vn2 of
110 |         8 -> drop (sizeToInt (vnSize vn1) - 8 - l) (take l (getVN s vn1)) where l = fromEnum $ getVN64 s vn2
111 |         _ -> error "case for this size not defined"
112 | evalO (POPCOUNT vn)             n s = zeroExtend n [toEnum $ sum (map popCount (getVN s vn))]
113 | -- INTEGER OPERATIONS
114 | evalO (INT_EQUAL vn1 vn2)       1 s | vnSize vn1 == 8 = if getVN64 s vn1 == getVN64 s vn2 then [trueW] else [falseW]
115 |                                     | vnSize vn1 == 4 = if getVN32 s vn1 == getVN32 s vn2 then [trueW] else [falseW]
116 |                                     | vnSize vn1 == 2 = if getVN16 s vn1 == getVN16 s vn2 then [trueW] else [falseW]
117 |                                     | vnSize vn1 == 1 = if getVN8  s vn1 == getVN8  s vn2 then [trueW] else [falseW]
118 | evalO (INT_EQUAL _ _ )          _  _ = error "Boolean operation requested for a size larger than 1"
119 | evalO (INT_NOTEQUAL vn1 vn2)    1 s | vnSize vn1 == 8 = if getVN64 s vn1 /= getVN64 s vn2 then [trueW] else [falseW]
120 |                                     | vnSize vn1 == 4 = if getVN32 s vn1 /= getVN32 s vn2 then [trueW] else [falseW]
121 |                                     | vnSize vn1 == 2 = if getVN16 s vn1 /= getVN16 s vn2 then [trueW] else [falseW]
122 |                                     | vnSize vn1 == 1 = if getVN8 s vn1  /= getVN8  s vn2 then [trueW] else [falseW]
123 | evalO (INT_NOTEQUAL _ _ )       _  _ = error "Boolean operation requested for a size larger than 1"
124 | evalO (INT_LESS vn1 vn2)        1 s | vnSize vn1 == 8 = if getVN64 s vn1 < getVN64 s vn2 then [trueW] else [falseW]
125 |                                     | vnSize vn1 == 4 = if getVN32 s vn1 < getVN32 s vn2 then [trueW] else [falseW]
126 |                                     | vnSize vn1 == 2 = if getVN16 s vn1 < getVN16 s vn2 then [trueW] else [falseW]
127 |                                     | vnSize vn1 == 1 = if getVN8  s vn1 < getVN8  s vn2 then [trueW] else [falseW]
128 | evalO (INT_LESS _ _ )           _ _ = error "Boolean operation requested for a size larger than 1"
129 | evalO (INT_SLESS vn1 vn2)       1 s = if bs2i (getVN s vn1) < bs2i (getVN s vn2) then [trueW] else [falseW]
130 | evalO (INT_SLESS _ _ )          _ _ = error "Boolean operation requested for a size larger than 1"
131 | evalO (INT_LESSEQUAL vn1 vn2)   1 s = if getVN64 s vn1 <= getVN64 s vn2 then [trueW] else [falseW]
132 | evalO (INT_LESSEQUAL _ _ )      _ _ = error "Boolean operation requested for a size larger than 1"
133 | evalO (INT_SLESSEQUAL vn1 vn2)  1 s = if bs2i (getVN s vn1) <= bs2i (getVN s vn2) then [trueW] else [falseW]
134 | evalO (INT_SLESSEQUAL _ _ )     _ _ = error "Boolean operation requested for a size larger than 1"
135 | evalO (INT_ZEXT vn)             i s = zeroExtend i (getVN s vn)
136 | evalO (INT_SEXT vn)             i s = signExtend i (getVN s vn)
137 | evalO (INT_ADD vn1 vn2)         1 s = operate8  (getVN s vn1) (getVN s vn2) (+)
138 | evalO (INT_ADD vn1 vn2)         2 s = operate16 (getVN s vn1) (getVN s vn2) (+)
139 | evalO (INT_ADD vn1 vn2)         4 s = operate32 (getVN s vn1) (getVN s vn2) (+)
140 | evalO (INT_ADD vn1 vn2)         8 s = operate64 (getVN s vn1) (getVN s vn2) (+)
141 | evalO (INT_ADD _ _)             _ _ = error "Cannot perform addition on irregular shaped bytestring"
142 | evalO (INT_SUB vn1 vn2)         1 s = operate8  (getVN s vn1) (getVN s vn2) (-)
143 | evalO (INT_SUB vn1 vn2)         2 s = operate16 (getVN s vn1) (getVN s vn2) (-)
144 | evalO (INT_SUB vn1 vn2)         4 s = operate32 (getVN s vn1) (getVN s vn2) (-)
145 | evalO (INT_SUB vn1 vn2)         8 s = operate64 (getVN s vn1) (getVN s vn2) (-)
146 | evalO (INT_SUB _ _)             _ _ = error "Cannot perform subtraction on irregular shaped bytestring"
147 | evalO (INT_CARRY vn1 vn2)       1 s | vnSize vn1 == 8 = if (a + b) < max a b then [trueW] else [falseW]
148 |                                       where a = getVN64 s vn1
149 |                                             b = getVN64 s vn2
150 | evalO (INT_CARRY _ _ )          _ _ = error "Boolean operation requested for a size larger than 1"
151 | evalO (INT_SCARRY _ _)          _ _ = undefined
152 | evalO (INT_SBORROW _ _)         _ _ = undefined
153 | evalO (INT_2COMP vn)            1 s = encodeWord8  $ complement (getVN8 s vn)  + 1
154 | evalO (INT_2COMP vn)            2 s = encodeWord16 $ complement (getVN16 s vn) + 1
155 | evalO (INT_2COMP vn)            4 s = encodeWord32 $ complement (getVN32 s vn) + 1
156 | evalO (INT_2COMP vn)            8 s = encodeWord64 $ complement (getVN64 s vn) + 1
157 | evalO (INT_2COMP _)             _ _ = error "Cannot perform complement on irregular shaped bytestring"
158 | evalO (INT_NEGATE vn)           1 s = encodeWord8  $ complement (getVN8 s vn)
159 | evalO (INT_NEGATE vn)           2 s = encodeWord16 $ complement (getVN16 s vn)
160 | evalO (INT_NEGATE vn)           4 s = encodeWord32 $ complement (getVN32 s vn)
161 | evalO (INT_NEGATE vn)           8 s = encodeWord64 $ complement (getVN64 s vn)
162 | evalO (INT_NEGATE _)             _ _ = error "Cannot perform negation on irregular shaped bytestring"
163 | evalO (INT_XOR vn1 vn2)         1 s = operate8  (getVN s vn1) (getVN s vn2) xor
164 | evalO (INT_XOR vn1 vn2)         2 s = operate16 (getVN s vn1) (getVN s vn2) xor
165 | evalO (INT_XOR vn1 vn2)         4 s = operate32 (getVN s vn1) (getVN s vn2) xor
166 | evalO (INT_XOR vn1 vn2)         8 s = operate64 (getVN s vn1) (getVN s vn2) xor
167 | evalO (INT_XOR _ _)             _ _ = error "Cannot perform xor on irregular shaped bytestring"
168 | evalO (INT_AND vn1 vn2)         1 s = operate8  (getVN s vn1) (getVN s vn2) (.&.)
169 | evalO (INT_AND vn1 vn2)         2 s = operate16 (getVN s vn1) (getVN s vn2) (.&.)
170 | evalO (INT_AND vn1 vn2)         4 s = operate32 (getVN s vn1) (getVN s vn2) (.&.)
171 | evalO (INT_AND vn1 vn2)         8 s = operate64 (getVN s vn1) (getVN s vn2) (.&.)
172 | evalO (INT_AND _ _)             _ _ = error "Cannot perform AND on irregular shaped bytestring"
173 | evalO (INT_OR vn1 vn2)          1 s = operate8  (getVN s vn1) (getVN s vn2) (.|.)
174 | evalO (INT_OR vn1 vn2)          2 s = operate16 (getVN s vn1) (getVN s vn2) (.|.)
175 | evalO (INT_OR vn1 vn2)          4 s = operate32 (getVN s vn1) (getVN s vn2) (.|.)
176 | evalO (INT_OR vn1 vn2)          8 s = operate64 (getVN s vn1) (getVN s vn2) (.|.)
177 | evalO (INT_OR _ _)              _ _ = error "Cannot perform or on irregular shaped bytestring"
178 | evalO (INT_LEFT vn1 vn2)        1 s = encodeWord8  $ shift (decodeWord8  (getVN s vn1)) (fromEnum $ getVN8  s vn2)
179 | evalO (INT_LEFT vn1 vn2)        2 s = encodeWord16 $ shift (decodeWord16 (getVN s vn1)) (fromEnum $ getVN16 s vn2)
180 | evalO (INT_LEFT vn1 vn2)        4 s = encodeWord32 $ shift (decodeWord32 (getVN s vn1)) (fromEnum $ getVN32 s vn2)
181 | evalO (INT_LEFT vn1 vn2)        8 s = encodeWord64 $ shift (decodeWord64 (getVN s vn1)) (fromEnum $ getVN64 s vn2)
182 | evalO (INT_LEFT _ _)            _ _ = error "Cannot perform left shift on irregular shaped bytestring"
183 | evalO (INT_RIGHT vn1 vn2)       1 s = encodeWord8  $ shiftR (decodeWord8  (getVN s vn1)) (fromEnum $ getVN8 s vn2)
184 | evalO (INT_RIGHT vn1 vn2)       2 s = encodeWord16 $ shiftR (decodeWord16 (getVN s vn1)) (fromEnum $ getVN16 s vn2)
185 | evalO (INT_RIGHT vn1 vn2)       4 s = encodeWord32 $ shiftR (decodeWord32 (getVN s vn1)) (fromEnum $ getVN32 s vn2)
186 | evalO (INT_RIGHT vn1 vn2)       8 s = encodeWord64 $ shiftR (decodeWord64 (getVN s vn1)) (fromEnum $ getVN64 s vn2)
187 | evalO (INT_RIGHT _ _)           _ _ = error "Cannot perform right shift on irregular shaped bytestring"
188 | evalO (INT_SRIGHT vn1 vn2)      1 s = BS.unpack $ i2bs $ shiftR (bs2i (getVN s vn1)) (fromEnum $ getVN8 s vn2)
189 | evalO (INT_SRIGHT vn1 vn2)      2 s = BS.unpack $ i2bs $ shiftR (bs2i (getVN s vn1)) (fromEnum $ getVN16 s vn2)
190 | evalO (INT_SRIGHT vn1 vn2)      4 s = BS.unpack $ i2bs $ shiftR (bs2i (getVN s vn1)) (fromEnum $ getVN32 s vn2)
191 | evalO (INT_SRIGHT vn1 vn2)      8 s = BS.unpack $ i2bs $ shiftR (bs2i (getVN s vn1)) (fromEnum $ getVN64 s vn2)
192 | evalO (INT_SRIGHT _ _)          _ _ = error "Cannot perform right shift on irregular shaped bytestring"
193 | evalO (INT_MULT vn1 vn2)        1 s = operate8  (getVN s vn1) (getVN s vn2) (*)
194 | evalO (INT_MULT vn1 vn2)        2 s = operate16 (getVN s vn1) (getVN s vn2) (*)
195 | evalO (INT_MULT vn1 vn2)        4 s = operate32 (getVN s vn1) (getVN s vn2) (*)
196 | evalO (INT_MULT vn1 vn2)        8 s = operate64 (getVN s vn1) (getVN s vn2) (*)
197 | evalO (INT_MULT _ _)            _ _ = error "Cannot perform multiplication on irregular shaped bytestring"
198 | evalO (INT_DIV vn1 vn2)         1 s = operate8  (getVN s vn1) (getVN s vn2) div
199 | evalO (INT_DIV vn1 vn2)         2 s = operate16 (getVN s vn1) (getVN s vn2) div
200 | evalO (INT_DIV vn1 vn2)         4 s = operate32 (getVN s vn1) (getVN s vn2) div
201 | evalO (INT_DIV vn1 vn2)         8 s = operate64 (getVN s vn1) (getVN s vn2) div
202 | evalO (INT_DIV _ _)             _ _ = error "Cannot perform division on irregular shaped bytestring"
203 | evalO (INT_REM vn1 vn2)         1 s = operate8  (getVN s vn1) (getVN s vn2) rem
204 | evalO (INT_REM vn1 vn2)         2 s = operate16 (getVN s vn1) (getVN s vn2) rem
205 | evalO (INT_REM vn1 vn2)         4 s = operate32 (getVN s vn1) (getVN s vn2) rem
206 | evalO (INT_REM vn1 vn2)         8 s = operate64 (getVN s vn1) (getVN s vn2) rem
207 | evalO (INT_REM _ _)             _ _ = error "Cannot perform rem on irregular shaped bytestring"
208 | evalO (INT_SDIV vn1 vn2)        i  s = signExtend i $ BS.unpack $ i2bs (div (bs2i $ getVN s vn1) (bs2i $ getVN s vn2))
209 | evalO (INT_SREM vn1 vn2)        i  s = signExtend i $ BS.unpack $ i2bs $ rem (bs2i $ getVN s vn1) (bs2i $ getVN s vn2)
210 | -- BOOLEAN OPERATIONS
211 | evalO (BOOL_NEGATE vn)          1 s = encodeWord8 $ fromBool (not $ toBool (decodeWord8 $ getVN s vn))
212 | evalO (BOOL_NEGATE _)           _ _ = error "Boolean operation requested for a size larger than 1"
213 | evalO (BOOL_XOR vn1 vn2)        1 s = encodeWord8 (fromBool (xor (toBool (decodeWord8 (getVN s vn1))) (toBool (decodeWord8 $ getVN s vn2))))
214 | evalO(BOOL_XOR _ _)             _ _ = error "Boolean operation requested for a size larger than 1"
215 | evalO (BOOL_AND vn1 vn2)        1 s = encodeWord8 (fromBool (toBool (decodeWord8 (getVN s vn1)) && toBool (decodeWord8 $ getVN s vn2)))
216 | evalO(BOOL_AND _ _)             _ _ = error "Boolean operation requested for a size larger than 1"
217 | evalO (BOOL_OR vn1 vn2)         1 s = encodeWord8 (fromBool (toBool (decodeWord8 (getVN s vn1)) || toBool (decodeWord8 $ getVN s vn2)))
218 | evalO(BOOL_OR _ _)              _ _ = error "Boolean operation requested for a size larger than 1"
219 | -- FLOATING POINT NUMBER OPERATIONS
220 | evalO (FLOAT_EQUAL vn1 vn2)     1 s | vnSize vn1 == 8 = if wordToDouble (getVN64 s vn1) == wordToDouble (getVN64 s vn2) then [trueW] else [falseW]
221 |                                     | vnSize vn1 == 4 = if wordToFloat (getVN32 s vn1) == wordToFloat (getVN32 s vn2) then [trueW] else [falseW]
222 |                                     | otherwise = error "I don't think this is a float"
223 | evalO(FLOAT_EQUAL _ _)          _ _ = error "Boolean operation requested for a size larger than 1"
224 | evalO (FLOAT_NOTEQUAL vn1 vn2)  1 s | vnSize vn1 == 8 = if wordToDouble (getVN64 s vn1) /= wordToDouble (getVN64 s vn2) then [trueW] else [falseW]
225 |                                     | vnSize vn1 == 4 = if wordToFloat (getVN32 s vn1) /= wordToFloat (getVN32 s vn2) then [trueW] else [falseW]
226 |                                     | otherwise = error "I don't think this is a float"
227 | evalO(FLOAT_NOTEQUAL _ _)       _ _ = error "Boolean operation requested for a size larger than 1"
228 | evalO (FLOAT_LESS vn1 vn2)      1 s | vnSize vn1 == 8 = if wordToDouble (getVN64 s vn1) < wordToDouble (getVN64 s vn2) then [trueW] else [falseW]
229 |                                     | vnSize vn1 == 4 = if wordToFloat (getVN32 s vn1) < wordToFloat (getVN32 s vn2) then [trueW] else [falseW]
230 |                                     | otherwise = error "I don't think this is a float"
231 | evalO(FLOAT_LESS _ _)           _ _ = error "Boolean operation requested for a size larger than 1"
232 | evalO (FLOAT_LESSEQUAL vn1 vn2) 1 s | vnSize vn1 == 8 = if wordToDouble (getVN64 s vn1) <= wordToDouble (getVN64 s vn2) then [trueW] else [falseW]
233 |                                     | vnSize vn1 == 4 = if wordToFloat (getVN32 s vn1) <= wordToFloat (getVN32 s vn2) then [trueW] else [falseW]
234 |                                     | otherwise = error "I don't think this is a float"
235 | evalO(FLOAT_LESSEQUAL _ _)      _ _ = error "Boolean operation requested for a size larger than 1"
236 | evalO (FLOAT_ADD vn1 vn2)       8 s = encodeWord64$ doubleToWord (wordToDouble (getVN64 s vn1) + wordToDouble (getVN64 s vn2))
237 | evalO (FLOAT_ADD vn1 vn2)       4 s = encodeWord32$ floatToWord (wordToFloat (getVN32 s vn1) + wordToFloat (getVN32 s vn2))
238 | evalO (FLOAT_ADD _ _)           _ _ = error "I don't think this is a float"
239 | evalO (FLOAT_SUB vn1 vn2)       8 s = encodeWord64$ doubleToWord (wordToDouble (getVN64 s vn1) - wordToDouble (getVN64 s vn2))
240 | evalO (FLOAT_SUB vn1 vn2)       4 s = encodeWord32$ floatToWord (wordToFloat (getVN32 s vn1) - wordToFloat (getVN32 s vn2))
241 | evalO (FLOAT_SUB _ _)           _ _ = error "I don't think this is a float"
242 | evalO (FLOAT_MULT vn1 vn2)      8 s = encodeWord64 $ doubleToWord (wordToDouble (getVN64 s vn1) * wordToDouble (getVN64 s vn2))
243 | evalO (FLOAT_MULT vn1 vn2)      4 s = encodeWord32 $ floatToWord (wordToFloat (getVN32 s vn1) * wordToFloat (getVN32 s vn2))
244 | evalO (FLOAT_MULT _ _)          _ _ = error "I don't think this is a float"
245 | evalO (FLOAT_DIV vn1 vn2)       8 s = encodeWord64$ doubleToWord (wordToDouble (getVN64 s vn1) / wordToDouble (getVN64 s vn2))
246 | evalO (FLOAT_DIV vn1 vn2)       4 s = encodeWord32$ floatToWord (wordToFloat (getVN32 s vn1) / wordToFloat (getVN32 s vn2))
247 | evalO (FLOAT_DIV _ _)           _ _ = error "I don't think this is a float"
248 | evalO (FLOAT_NEG vn)            8 s = encodeWord64$ doubleToWord (negateDouble (wordToDouble $ getVN64 s vn))
249 | evalO (FLOAT_NEG vn)            4 s = encodeWord32 $ floatToWord (negateFloat  (wordToFloat  $ getVN32 s vn))
250 | evalO (FLOAT_NEG _)             _ _ = error "I don't think this is a float"
251 | evalO (FLOAT_ABS vn)            8 s = encodeWord64 $ doubleToWord (abs (wordToDouble $ getVN64 s vn))
252 | evalO (FLOAT_ABS vn)            4 s = encodeWord32 $ floatToWord  (abs (wordToFloat  $ getVN32 s vn))
253 | evalO (FLOAT_ABS _)             _ _ = error "I don't think this is a float"
254 | evalO (FLOAT_SQRT vn)           8 s = encodeWord64 $ doubleToWord (sqrt (wordToDouble $ getVN64 s vn))
255 | evalO (FLOAT_SQRT vn)           4 s = encodeWord32 $ floatToWord  (sqrt (wordToFloat  $ getVN32 s vn))
256 | evalO (FLOAT_SQRT _)            _ _ = error "I don't think this is a float"
257 | evalO (FLOAT_CEIL vn)           8 s = encodeWord64 (ceiling (wordToDouble $ getVN64 s vn))
258 | evalO (FLOAT_CEIL vn)           4 s = encodeWord32 (ceiling (wordToFloat  $ getVN32 s vn))
259 | evalO (FLOAT_CEIL _ )           _ _ = error "I don't think this is a float"
260 | evalO (FLOAT_FLOOR vn)          8 s = encodeWord64 (floor (wordToDouble $ getVN64 s vn))
261 | evalO (FLOAT_FLOOR vn)          4 s = encodeWord32 (floor (wordToFloat  $ getVN32 s vn))
262 | evalO (FLOAT_FLOOR _)           _ _ = error "I don't think this is a float"
263 | evalO (FLOAT_ROUND vn)          8 s = encodeWord64 (round (wordToDouble $ getVN64 s vn))
264 | evalO (FLOAT_ROUND vn)          4 s = encodeWord32 (round (wordToFloat  $ getVN32 s vn))
265 | evalO (FLOAT_ROUND _)           _ _ = error "I don't think this is a float"
266 | evalO (FLOAT_NAN vn)            1 s | vnSize vn == 8 = encodeWord8 $ fromBool (isNaN (wordToDouble $ getVN64 s vn))
267 |                                     | vnSize vn == 4 = encodeWord8 $ fromBool (isNaN (wordToFloat  $ getVN32 s vn))
268 |                                     | otherwise = error "I don't think this is a float"
269 | evalO (FLOAT_NAN _ )            _ _ = error "Boolean operation requested for a size larger than 1"
270 | evalO (INT2FLOAT vn)            8 s = encodeWord64 $ doubleToWord $ toEnum $ fromEnum $ getVN64 s vn
271 | evalO (INT2FLOAT vn)            4 s = encodeWord32 $ floatToWord $ toEnum $ fromEnum $ getVN32 s vn
272 | evalO (INT2FLOAT _ )            _ _ = error "Destination size too small"
273 | evalO (FLOAT2FLOAT vn)          i s = case (vnSize vn,i) of
274 |   (4,8) -> encodeWord64$ doubleToWord (float2Double (wordToFloat $ getVN32 s vn))
275 |   (8,4) -> encodeWord32$ floatToWord (double2Float (wordToDouble $ getVN64 s vn))
276 |   _     -> error "Float destination and input have to be of different sizes and should be either 4 or 8 bytes"
277 | evalO (TRUNC vn)                8 s = encodeWord64 $ toEnum $ fromEnum (wordToDouble $ getVN64 s vn)
278 | evalO (TRUNC vn)                4 s = encodeWord32 $ toEnum $ fromEnum (wordToFloat $ getVN32 s vn)
279 | evalO (TRUNC _ )                _ _ = error "I don't think this is a float"
280 | -- Special operations
281 | evalO (INDIRECT vn1 _)          _ s = getVN s vn1 -- We need to do something special here; vn1 might be the value, it might not be
282 |     -- NOTE: Often times, a varnode is not set in MULTIEQUAL, so we can discard that path if that is the case
283 | evalO (MULTIEQUAL vns)    _ (m,r,v) = if not (null vns') then head vns' else error "no suitable varNode found in multiequal"
284 |   where a = getVN (m,r,v) (Reg retAddr 8)
285 |         vns' = [getVN (m,r,v) vn | (vn,a') <- vns, a'==a]
286 | evalO (PTRSUB vn1 vn2)          i s = evalO (INT_ADD vn1 vn2) i s
287 | evalO (PTRADD vn1 vn2 vn3)      i s = (\x -> evalO (INT_ADD vn1 (Const x i)) i s) (evalO (INT_MULT vn2 vn3) i s)
288 | evalO (CAST vn)                 _ s = getVN s vn
289 | 
290 | 
291 | 
292 | 
293 | 
294 | 
295 | ---------------------
296 | --- MEMORY FUNCTIONS
297 | ---------------------
298 | getVN64 ::State -> VarNode -> Word64
299 | getVN64 s v = if length (getVN s v) == 8 then  decodeWord64 $ getVN s v else error $ "Tried to make a Word64 of varnode " ++ show v
300 | 
301 | getVN32 ::State -> VarNode -> Word32
302 | getVN32 s v = if length (getVN s v) == 4 then  decodeWord32 $ getVN s v else error $ "Tried to make a Word32 of varnode " ++ show v ++ ", which contains " ++ show (getVN s v) ++ ", and state contains: " ++ show s
303 | 
304 | getVN16 ::State -> VarNode -> Word16
305 | getVN16 s v = if length (getVN s v) == 2 then  decodeWord16 $ getVN s v else error $ "Tried to make a Word16 of varnode " ++ show v ++ ", which contains " ++ show (getVN s v) ++ ", and state contains: " ++ show s
306 | 
307 | getVN8 ::State -> VarNode -> Word8
308 | getVN8 s v = if length (getVN s v) == 1 then  decodeWord8 $ getVN s v else error $ "Tried to make a Word8 of varnode " ++ show v ++ ", which contains " ++ show (getVN s v) ++ ", and state contains: " ++ show s
309 | 
310 | 
311 | getVN :: State -> VarNode -> [Word8]
312 | getVN (mem,_,_)  (Ram a n)    = readMem mem a n
313 | getVN (_,reg,_)  (Reg r s)    = getReg reg r s
314 | getVN _          (Const i _)  = i
315 | getVN (_,_,vars) (Variable n _) = getVar vars n
316 | 
317 | getAddress :: VarNode -> [Word8]
318 | getAddress (Ram a _) = a
319 | getAddress _ = error "unexpected address notation"
320 | 
321 | writeMem :: Mem -> [Word8] -> Addr -> Mem
322 | writeMem mem x a = foldr (\(v,i) mem' -> M.insert (encodeWord64 (decodeWord64 a + decodeWord64 (zeroExtend 8 [i]))) v mem') mem (zip x [0..])
323 | 
324 | readMem :: Mem -> Addr -> Size -> [Word8]
325 | readMem mem a n = reverse $ map (\i -> read (encodeWord64 (decodeWord64 a + decodeWord64 (zeroExtend 8 [i])))) [0..(n-1)]
326 |     where read a1 = fromMaybe err (M.lookup a1 mem)
327 |           err = error $ "memory location "++ show a ++ " not initialized" ++ show mem
328 | 
329 | setReg :: Regs -> Addr -> [Word8] -> Regs
330 | setReg reg a x = foldr (\(v,i) reg' -> M.insert (encodeWord64 (decodeWord64 a + decodeWord64 (zeroExtend 8 [i]))) v reg') reg (zip (reverse x) [0..])
331 | 
332 | getReg :: Regs -> Addr -> Size -> [Word8]
333 | getReg reg a n = reverse $ map (\i -> read (encodeWord64 (decodeWord64 a + decodeWord64 (zeroExtend 8 [i])))) [0..(n-1)]
334 |   where read a1 = fromMaybe err (M.lookup a1 reg)
335 |         err = error $ "register location "++ show a ++ " not initialized" ++ show reg ++ " of size " ++ show n
336 | 
337 | -- size in BYTES! So a size of 1 returns 8 bits
338 | 
339 | 
340 | getVar :: Vars -> String -> [Word8]
341 | getVar v s = case M.lookup s v of
342 |   Nothing -> error $ "variable " ++ s ++ " undefined. Memory contains: " ++ show v
343 |   Just a -> a
344 | 
345 | setVar :: Vars -> String -> [Word8] -> Vars
346 | setVar v s w = M.insert s w v
347 | 
348 | resetReg :: Regs -> Addr -> Size -> Regs
349 | resetReg reg a s = foldr (\i reg' -> M.delete (encodeWord64 (decodeWord64 a + decodeWord64 (zeroExtend 8 [i]))) reg') reg [0..(s-1)]
350 | 
351 | 
352 | getNext :: PBlocks -> Addr -> Addr
353 | getNext p a = nextA a (blockL p) []
354 | 
355 | blockL :: PBlocks -> [Addr]
356 | blockL pcode = map fst $ M.toList pcode
357 | 
358 | -- we assume the address list to be sorted
359 | nextA :: Addr -> [Addr] -> [Addr] -> Addr
360 | nextA x [] xs = error $ "I was unable to find the next block, there is no such thing." ++ show x ++ " next not in " ++ show xs
361 | nextA a (x:xs) i | x > a = x
362 |                  | otherwise = nextA a xs (x:i)
363 | 
364 | -- REGISTER functions
365 | 
366 | restoreReg :: Regs -> Regs -> Regs
367 | restoreReg old new = M.unions [newFresh,old1,old2,old3]
368 |  where newFresh = resetReg (resetReg (resetReg new [0,0,0,0,0,0,0,8] 16) [0,0,0,0,0,0,0,48] 16) [0,0,0,0,0,0,0,128] 16
369 |        old1 = setReg M.empty [0,0,0,0,0,0,0,8] (getReg old [0,0,0,0,0,0,0,8] 16)
370 |        old2 = setReg M.empty [0,0,0,0,0,0,0,48] (getReg old [0,0,0,0,0,0,0,48] 16)
371 |        old3 = setReg M.empty [0,0,0,0,0,0,0,128] (getReg old [0,0,0,0,0,0,0,128] 16)
372 | 
373 | emptyReg :: Regs
374 | emptyReg = foldr (\(Reg i _) m -> setReg m i [0,0,0,0,0,0,0,0]) M.empty argRegs -- M.fromList [("RSP",BS.unpack $ encode (64 :: Word64)),("EDI",(signExtend 4 $ BS.unpack $ i2bs 55 ))]
375 | 
376 | argRegs :: [VarNode]
377 | argRegs = [Reg arg0 8,Reg arg1 8
378 |           ,Reg arg2 8,Reg arg3 8
379 |           ,Reg arg4 8,Reg arg5 8]
380 | ---------------------
381 | --- PCODE ACCESS FUNCTIONS
382 | ---------------------
383 | 
384 | getBlock :: PBlocks -> Addr -> PBlock
385 | getBlock p a = case M.lookup a p of
386 |   Nothing -> error $ "block " ++ show a ++ " not found in " ++ show p
387 |   Just b -> b
388 | 
389 | ---------------------
390 | --- OPERATIONS
391 | ---------------------
392 | 
393 | trueW, falseW :: Word8
394 | trueW = toEnum 1
395 | falseW = toEnum 0
396 | 
397 | boolNegate :: Word8 -> Word8
398 | boolNegate xs | xs == 0 = 1
399 |               | otherwise = 0
400 | 
401 | ---------------------
402 | --- Helpers
403 | ---------------------
404 | 
405 | fromFloat :: Float -> Word32
406 | fromFloat = floatToWord
407 | 
408 | --We use Big endian encoding... This is kinda arbitrary
409 | 
410 | operate8 :: [Word8] -> [Word8] -> (Word8 -> Word8 -> Word8) -> [Word8]
411 | operate8 wx wy f = encodeWord8(f (decodeWord8 wx) (decodeWord8 wy))
412 | 
413 | operate16 :: [Word8] -> [Word8] -> (Word16 -> Word16 -> Word16) -> [Word8]
414 | operate16 wx wy f = encodeWord16(f (decodeWord16 wx) (decodeWord16 wy))
415 | 
416 | operate32 :: [Word8] -> [Word8] -> (Word32 -> Word32 -> Word32) -> [Word8]
417 | operate32 wx wy f = encodeWord32(f (decodeWord32 wx) (decodeWord32 wy))
418 | 
419 | operate64 :: [Word8] -> [Word8] -> (Word64 -> Word64 -> Word64) -> [Word8]
420 | operate64 wx wy f = encodeWord64(f (decodeWord64 wx) (decodeWord64 wy))
421 | 
422 | floatOperate64 :: [Word8] -> [Word8] -> (Word64 -> Word64 -> Word64) -> [Word8]
423 | floatOperate64 wx wy f = encodeWord64(f (decodeWord64 wx) (decodeWord64 wy))
424 | 
425 | fromHex :: String -> [Word8]
426 | fromHex s = case (Hex.decodeHex . Text.pack) s of
427 |   Nothing -> error $ "could not read hex string " ++ s
428 |   Just w -> (BS.unpack . Hex.lazyByteString) w
429 | 
430 | --we assume 0 is false and everything else is true
431 | toBool :: Word8 -> Bool
432 | toBool = (/=) 0
433 | 
434 | fromBool :: Bool -> Word8
435 | fromBool True = 1
436 | fromBool False = 0
437 | 
438 | -- Two's complement conversion functions
439 | -- source: https://stackoverflow.com/questions/15047191/read-write-haskell-integer-in-twos-complement-representation
440 | bs2i :: [Word8] -> Integer
441 | bs2i bs
442 |    | sign = go b - 2 ^ (BS.length b * 8)
443 |    | otherwise = go b
444 |    where
445 |       b = BS.pack bs
446 |       go = BS.foldl' (\i b' -> (i `shiftL` 8) + fromIntegral b') 0
447 |       sign = BS.index b 0 > 127
448 | 
449 | i2bs :: Integer -> BS.ByteString
450 | i2bs x
451 |    | x == 0 = BS.singleton 0
452 |    | x < 0 = i2bs $ 2 ^ (8 * bytes) + x
453 |    | otherwise = BS.reverse $ BS.unfoldr go x
454 |    where
455 |       bytes = (integerLogBase 2 (abs x) + 1) `quot` 8 + 1
456 |       go i = if i == 0 then Nothing
457 |                        else Just (fromIntegral i, i `shiftR` 8)
458 | 
459 | integerLogBase :: Integer -> Integer -> Int
460 | integerLogBase b i =
461 |      if i < b then
462 |         0
463 |      else
464 |         let l = 2 * integerLogBase (b*b) i
465 |             doDiv :: Integer -> Int -> Int
466 |             doDiv i l = if i < b then l else doDiv (i `div` b) (l+1)
467 |         in  doDiv (i `div` (b^l)) l


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/PCode.hs:
--------------------------------------------------------------------------------
 1 | {-|
 2 | Module      : PCode
 3 | Description : PCode file
 4 | Copyright   : (c) Nico Naus, 2022
 5 | Maintainer  : niconaus@vt.edu
 6 | Stability   : experimental
 7 | This module defines interfaces for interpreter and parser of P-Code
 8 | -}
 9 | 
10 | module PCode where
11 | 
12 | import qualified Text.ParserCombinators.Parsec as P
13 | import qualified Data.Map as M
14 | import Parser ( pFile )
15 | import Interpreter
16 | import WordString ( decodeWord32, encodeWord64 )
17 | 
18 | main :: IO ()
19 | main = do
20 |     putStrLn "P-Code interpreter \nPlease input P-Code file"
21 |     src <- getLine
22 |     let src' = if last src == ' ' then init src else src
23 |     s <- readFile src'
24 |     case P.parse pFile "(unknown)" s of
25 |         Right x -> do
26 |                 putStrLn "Enter function address"
27 |                 entry <- getLine
28 |                 let entryAddr = encodeWord64 $ toEnum (read entry :: Int)
29 |                 putStrLn "Enter arguments"
30 |                 args <- getLine
31 |                 let numbers = map read (words args) :: [Int]
32 |                 let regs = foldr (\(a,v) r -> setReg r a v) emptyReg (zip [arg0,arg1,arg2,arg3,arg4,arg5] (map (encodeWord64 . toEnum) numbers))
33 |                 let (_,regs',_) = run (fst x) (M.empty,regs,M.empty) entryAddr entryAddr
34 |                 --print regs'
35 |                 let ret = decodeWord32 $ getReg regs' retVal 4
36 |                 putStrLn "Return value of called function is:"
37 |                 print ret
38 |                 -- print (run (fst x) (M.empty,regs,M.empty) (show entryAddr) entryAddr) -- 4294971152
39 |                 return ()
40 |         Left _ -> do
41 |                 putStrLn "Parse error"
42 |                 return ()
43 | 


--------------------------------------------------------------------------------
/Parser.hs:
--------------------------------------------------------------------------------
  1 | {-|
  2 | Module      : PCode
  3 | Description : Ghidra P-Code language definitions
  4 | Copyright   : (c) Nico Naus, 2022
  5 | Maintainer  : niconaus@vt.edu
  6 | Stability   : experimental
  7 | This module defines the datatypes and translation of Ghidra P-Code
  8 | -}
  9 | module Parser where
 10 | 
 11 | import qualified Data.Map as M
 12 | import Data.Maybe ( fromMaybe )
 13 | import Data.List ( foldl', elemIndex )
 14 | import Text.ParserCombinators.Parsec
 15 | import Types
 16 | import Data.Word ( Word8 )
 17 | import Text.Hex ( decodeHex )
 18 | import qualified Data.ByteString as BS
 19 | import qualified Data.Text as T
 20 | 
 21 | runParse :: IO ()
 22 | runParse = do
 23 |   s <- readFile "tests/nearestPrime.txt"
 24 |   case parse pFile "(unknown)" s of
 25 |     Right x -> do
 26 |                 putStrLn $ prettyPF $ fst x
 27 |                 --putStrLn $ show $ snd x
 28 |                 return ()
 29 |     Left er -> do
 30 |       print er
 31 |       return ()
 32 | -- Parser stuff
 33 | 
 34 | pFile :: GenParser Char st (PCode,Mem)
 35 | pFile = do
 36 |   prog <- M.fromList <$> many1 (try pFunction)
 37 |   _    <- string "MEMORY\n"
 38 |   mem  <- M.fromList <$> many1 (try pMemory) <* eof
 39 |   return (prog,mem)
 40 | 
 41 | pMemory :: GenParser Char st (Addr,Word8)
 42 | pMemory = do
 43 |   a <- pHex' 8 <$> many1 (oneOf "0123456789abcdef") <* char ' '
 44 |   w <- toEnum <$> (read <$> many1 digit) <* char '\n'
 45 |   return (a,w)
 46 | 
 47 | pFunction :: GenParser Char st (Fname,PBlocks)
 48 | pFunction = do
 49 |   name <- many1 (noneOf "\n") <* char '\n'  -- reads the function name
 50 |   let prefix = if take 4 name == "EXT_" then "EXT_" else ""
 51 |   addr <- pHex' 8 <$> many1 (oneOf "0123456789abcdef") <* char '\n' -- reads the HEX address that the function is stored at
 52 |   block <- many1 (try pInstr) -- parse the first block manually, since we consumed its address
 53 |   blocks <- pBlocks
 54 |   return (addr, M.insert addr block blocks)
 55 | 
 56 | -- gets the first address as an integer input, since it is consumed by pFunction
 57 | pBlocks :: GenParser Char st PBlocks
 58 | pBlocks = do
 59 |   result <- many $ try pEntry
 60 |   return (M.fromList result)
 61 | 
 62 | 
 63 | pEntry :: GenParser Char st (Addr,PBlock)
 64 | pEntry = do
 65 |   addr <- pAddress
 66 |   block <- many1 (try pInstr)
 67 |   return (addr,block)
 68 | 
 69 | pAddress :: GenParser Char st Addr
 70 | pAddress = pHex' 8 <$> many1 (oneOf "0123456789abcdef") <* many (noneOf "\n") <* char '\n'
 71 | 
 72 | pInstr :: GenParser Char st PInstr
 73 | pInstr = do
 74 |   try pStore <|> try pExtCall <|> try pBr <|> try pInstr1m <|> try pInstr11 <|> try pPAss
 75 | 
 76 | pPAss :: GenParser Char st PInstr
 77 | pPAss = do
 78 |   node <- pVarNode <* space
 79 |   PAss node <$> pOp
 80 | 
 81 | pStore :: GenParser Char st PInstr
 82 | pStore = do
 83 |   _ <- string " ---  "
 84 |   instr <- choice $ map (\(x,y) -> try $ string x *> return y) [("STORE", STORE),("CBRANCH",CBRANCH)]
 85 |   node1 <- string " " *> pVarNode
 86 |   node2 <- string " , "*> pVarNode
 87 |   node3 <- (string " , "*> pVarNode) <* char '\n'
 88 |   return $ instr node1 node2 node3
 89 | 
 90 | pBr :: GenParser Char st PInstr
 91 | pBr = do
 92 |     _ <- string " ---  "
 93 |     instr <- choice $ map (\(x,y) -> try $ string x *> return y) [("BRANCHIND",BRANCHIND),("BRANCH*",BRANCH),("BRANCH",BRANCH)]
 94 |     node <- space *> pVarNode <* char '\n'
 95 |     return $ instr node
 96 | 
 97 | pInstr1m :: GenParser Char st PInstr
 98 | pInstr1m = do
 99 |     _ <- string " ---  "
100 |     instr <- choice $ map (\(x,y) -> try $ string x *> return y) [("RETURN",RETURN)]
101 |     _ <- space
102 |     node0 <- pVarNode
103 |     node1 <- choice [Just <$> (string " , "*> try pVarNode),return Nothing]
104 |     _ <- char '\n'
105 |     return $ instr node0 node1
106 | 
107 |   -- Parses instructions of the form: CODE NODE [NODE]
108 | pInstr11 :: GenParser Char st PInstr
109 | pInstr11 = do
110 |   constructor <- try (string " ---  " *> return Do) <|> try ( PCAss <$> (pVarNode <* space))
111 |   instr <- choice $ map (try . string)["CALLIND ","CALLOTHER ","CALL "]
112 |   node <- pVarNode
113 |   nodes <- many (try (string " , " *> pVarNode)) <* char '\n'
114 |   return $ constructor $ match instr node nodes
115 |     where
116 |       match "CALL " = CALL
117 |       match "CALLIND " = CALLIND
118 |       match "CALLOTHER " = CALLOTHER
119 |       match x = error $ "this should not happen. pInstr1: " ++ show x
120 | 
121 | pExtCall :: GenParser Char st PInstr
122 | pExtCall = do
123 |     _ <- string " ---  "
124 |     name <- string "EXTCALL " *> many1 (noneOf "\n") <* char '\n'
125 |     return $ Do $ EXTCALL name
126 | 
127 | pOp :: GenParser Char st POp
128 | pOp = choice $ map try [pInstr1,pInstr3,pInstrMult,pInstrStore]
129 | 
130 | pInstr1 :: GenParser Char st POp
131 | pInstr1 = do
132 |     instr <- choice $ map (\(x,y) -> try $ string x *> return y) [("INT2FLOAT",INT2FLOAT)
133 |                                                                  ,("FLOAT2FLOAT",FLOAT2FLOAT),("FLOAT_NEG",FLOAT_NEG)
134 |                                                                  ,("COPY",COPY),("POPCOUNT",POPCOUNT)
135 |                                                                  ,("BOOL_NEGATE",BOOL_NEGATE)
136 |                                                                  ,("INT_ZEXT",INT_ZEXT),("INT_SEXT",INT_SEXT)
137 |                                                                  ,("TRUNC",TRUNC),("POPCOUNT",POPCOUNT)
138 |                                                                  ,("INT_NEGATE",INT_NEGATE),("INT_2COMP",INT_2COMP)
139 |                                                                  ,("CAST",CAST),("FLOAT_SQRT",FLOAT_SQRT)]
140 |     _ <- space
141 |     node <- pVarNode <* char '\n'
142 |     return $ instr node
143 | 
144 | pInstrStore :: GenParser Char st POp
145 | pInstrStore = do
146 |   instr <- choice $ map (\(x,y) -> try $ string x *> return y) [("PTRADD",PTRADD)]
147 |   node1 <- string " " *> pVarNode
148 |   node2 <- string " , "*> pVarNode
149 |   node3 <- (string " , "*> pVarNode) <* char '\n'
150 |   return $ instr node1 node2 node3
151 | 
152 | -- Parses instructions of the form: CODE NODE [NODE]
153 | pInstrMult :: GenParser Char st POp
154 | pInstrMult = do
155 |   _ <- string "MULTIEQUAL" <* space
156 |   node <- pVarNode
157 |   lab <- string " , " *> many1 (oneOf "0123456789abcdef")
158 |   let lab' = pHex' 8 lab
159 |   nodes <- many (try getNodeLabel) <* char '\n'
160 |   return $ MULTIEQUAL ((node,lab'):nodes)
161 |   where
162 |     getNodeLabel = do
163 |       node <- string " , " *> pVarNode
164 |       lab <- string " , " *> many1 (oneOf "0123456789abcdef")
165 |       let lab' = pHex' 8 lab
166 |       return (node,lab')
167 | 
168 | pInstr3 :: GenParser Char st POp
169 | pInstr3 = do
170 |   instr <- choice $ map (\(x,y) -> try $ string x *> return y)
171 |                                               [("LOAD",LOAD),("INT_SUB",INT_SUB),("INT_ADD",INT_ADD)
172 |                                               ,("INT_AND",INT_AND),("INT_SLESSEQUAL",INT_SLESSEQUAL),("INT_SLESS",INT_SLESS)
173 |                                               ,("INT_EQUAL",INT_EQUAL),("INT_LESSEQUAL",INT_LESSEQUAL),("INT_LESS",INT_LESS)
174 |                                               ,("INT_SBORROW",INT_SBORROW),("INT_NOTEQUAL",INT_NOTEQUAL)
175 |                                               ,("BOOL_OR",BOOL_OR),("INT_MULT",INT_MULT)
176 |                                               ,("INT_SCARRY",INT_SCARRY),("INT_CARRY",INT_CARRY)
177 |                                               ,("INDIRECT",INDIRECT),("SUBPIECE",SUBPIECE)
178 |                                               ,("PIECE",PIECE),("INT_OR",INT_OR)
179 |                                               ,("INT_SREM",INT_SREM),("INT_SDIV",INT_SDIV)
180 |                                               ,("INT_SRIGHT",INT_SRIGHT),("INT_RIGHT",INT_RIGHT)
181 |                                               ,("FLOAT_NOTEQUAL",FLOAT_NOTEQUAL),("FLOAT_EQUAL",FLOAT_EQUAL)
182 |                                               ,("FLOAT_LESSEQUAL",FLOAT_LESSEQUAL),("FLOAT_LESS",FLOAT_LESS),("BOOL_AND",BOOL_AND)
183 |                                               ,("INT_DIV",INT_DIV),("FLOAT_MULT",FLOAT_MULT)
184 |                                               ,("FLOAT_ADD",FLOAT_ADD),("FLOAT_DIV",FLOAT_DIV)
185 |                                               ,("FLOAT_SUB",FLOAT_SUB),("INT_LEFT",INT_LEFT)
186 |                                               ,("INT_REM",INT_REM),("INT_XOR",INT_XOR)
187 |                                               ,("BOOL_XOR",BOOL_XOR),("PTRSUB",PTRSUB)]
188 |   node1 <- space *> pVarNode <* string " , "
189 |   node2 <- pVarNode <* char '\n'
190 |   return $ instr node1 node2
191 | 
192 | ------------------------------------------------------------
193 | -- VarNode parsers -----------------------------------------
194 | ------------------------------------------------------------
195 | 
196 | pVarNode ::GenParser Char st VarNode
197 | pVarNode = choice $ map try [pReg,pMem,pVal,pStack,pUnique,pVar]
198 | 
199 | pReg :: GenParser Char st VarNode
200 | pReg = do
201 |   -- name <- string "(register, " *> many1 (oneOf "ABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890") <* string ", "
202 |   name <- string "(register, 0x" *> many1 (oneOf "0123456789abcdef") <* string ", "
203 |   size <- read <$> many1 digit <* char ')'
204 |   let name' = pHex' 8 name
205 |   return $ Reg name' (intToSize size)
206 | 
207 | pMem :: GenParser Char st VarNode
208 | pMem = do
209 |   addr <- string "(ram, 0x" *> many1 (oneOf "0123456789abcdef") <* string ", "
210 |   size <- read <$> many1 digit <* char ')'
211 |   let addr' = pHex' 8 addr
212 |   return $ Ram addr' (intToSize size)
213 | 
214 | pVal :: GenParser Char st VarNode
215 | pVal = do
216 |   val <- string "(const, 0x" *> many1 (oneOf "0123456789abcdef") <* string ", "
217 |   size <- read <$> many1 digit <* char ')'
218 |   let val' = pHex' size val
219 |   return (Const val' (intToSize size))
220 | 
221 | pStack :: GenParser Char st VarNode
222 | pStack = do
223 |   addr <- string "(stack, 0x" *> many1 (oneOf "ABCDEF1234567890abcdef") <* string ", "
224 |   size <- read <$> many1 digit <* char ')'
225 |   return (Variable ("stack_" ++ addr) (intToSize size))
226 | 
227 | pUnique :: GenParser Char st VarNode
228 | pUnique = do
229 |   addr <- (string "(unique, 0x" *> pHex) <* string ", "
230 |   size <- read <$> many1 digit <* char ')'
231 |   return (Variable ("u_" ++ show addr) (intToSize size))
232 | 
233 | pVar :: GenParser Char st VarNode
234 | pVar = do
235 |   addr <- (string "(VARIABLE, 0x" *> pHex) <* string ", "
236 |   size <- read <$> many1 digit <* char ')'
237 |   return (Variable ("var_" ++ show addr) (intToSize size))
238 | 
239 | ------------------------------------------------------------
240 | -- Hex parser -----------------------------------------
241 | ------------------------------------------------------------
242 | 
243 | pHex' :: Int -> String -> [Word8]
244 | pHex' i val = let val' = replicate ((i*2)-length val) '0' ++ val in
245 |     BS.unpack $ fromMaybe (error "illegal character in hex string") (decodeHex (T.pack val'))
246 | 
247 | pHex :: GenParser Char st Int
248 | pHex = do
249 |   val <- many1 (oneOf "0123456789abcdef")
250 |   return $ parseHex val
251 | 
252 | hexChar :: Char -> Int
253 | hexChar ch = fromMaybe (error $ "illegal char " ++ [ch]) $ elemIndex ch "0123456789abcdef"
254 | 
255 | parseHex :: Foldable t => t Char -> Int
256 | parseHex = foldl' f 0 where f n c = 16*n + hexChar c
257 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # P-Code interpreter
 2 | 
 3 | This repository contains the code for the high P-Code interpreter described in the paper "[A Formal Semantics for P-Code](https://link.springer.com/chapter/10.1007/978-3-031-25803-9_7)" by Naus, Verbeek and Ravindran.
 4 | 
 5 | The code in this repo is intended for use with our custom P-Code dumping script, which can be found here: https://github.com/niconaus/PCode-Dump
 6 | 
 7 | Make sure that the Ghidra option "Decompiler Parameter ID" is enabled, otherwise you might run into intra-procedural type issues.
 8 | 
 9 | ## Usage
10 | 
11 | Load the PCode module in GHCI, or compile with GCH.
12 | Enter the file name of the P-Code that you want to interpret.
13 | Enter the function address as Integral number.
14 | Enter the function arguments, using " " to separate them.
15 | 
16 | The return value of the called function will be printed as an integer value.
17 | 
18 | Example:
19 | 
20 | ```
21 | PCode> main
22 | P-Code interpreter 
23 | Please input P-Code file
24 | p.txt
25 | Enter function address
26 | 4294983536
27 | Enter arguments
28 | 4 5
29 | Return value of called function is:
30 | 9
31 | ```
32 | 
33 | #### Acknowledgements:
34 | 
35 | This material is based upon work supported by the Defense Advanced Research Projects Agency (DARPA) and Naval Information Warfare Center Pacific (NIWC Pacific) under contract N6600121C4028 and Agreement No. HR.00112090028, and the US Office of Naval Research (ONR) under grant N00014-17-1-2297.
36 | 
37 | Any opinions, findings and conclusions or recommendations expressed in this material are those of the author(s) and do not necessarily reflect the views of DARPA or NIWC Pacific, or ONR. 
38 | 
39 | Special thanks to [harrisonwl](https://github.com/harrisonwl "harrisonwl") for his feedback on this codebase
40 | 


--------------------------------------------------------------------------------
/Types.hs:
--------------------------------------------------------------------------------
  1 | {-|
  2 | Module      : PCode
  3 | Description : Ghidra P-Code language definitions
  4 | Copyright   : (c) Nico Naus, 2022
  5 | Maintainer  : niconaus@vt.edu
  6 | Stability   : experimental
  7 | This module defines the datatypes and translation of Ghidra P-Code
  8 | -}
  9 | module Types where
 10 | 
 11 | import qualified Data.Map as M
 12 | import Data.Word ( Word8 )
 13 | import qualified Data.ByteString as BS
 14 | 
 15 | -- Program in P-code
 16 | 
 17 | type PCode = M.Map Fname PBlocks
 18 | 
 19 | type PBlocks = M.Map Addr PBlock
 20 | 
 21 | -- Memory types
 22 | 
 23 | type State = (Mem,Regs,Vars)
 24 | 
 25 | type Mem = M.Map Addr Word8
 26 | -- Variables "return" and "last" are reserved and should not be used in PCode
 27 | type Vars = M.Map String [Word8]
 28 | type Regs = M.Map Addr Word8
 29 | 
 30 | -- Block address
 31 | type Addr = [Word8]
 32 | 
 33 | type Fname = [Word8] -- functions are identified by the starting address, since that is how they are called
 34 | 
 35 | type PBlock = [PInstr]
 36 | 
 37 | data PInstr = STORE VarNode VarNode VarNode | BRANCH VarNode
 38 |             | CBRANCH VarNode VarNode VarNode
 39 |             | BRANCHIND VarNode
 40 |             | RETURN VarNode (Maybe VarNode)
 41 |             | Do PCall | PCAss VarNode PCall | PAss VarNode POp deriving Show
 42 | 
 43 | data PCall = CALL VarNode [VarNode]
 44 |            | CALLIND VarNode [VarNode]
 45 |            | CALLOTHER VarNode [VarNode]
 46 |            -- EXTCALL is an artifical instruction, to encode which external function is called from this point
 47 |            | EXTCALL String deriving Show
 48 | 
 49 | data POp = COPY VarNode
 50 |          | LOAD VarNode VarNode
 51 |          | PIECE VarNode VarNode
 52 |          | SUBPIECE VarNode VarNode
 53 |          | POPCOUNT VarNode
 54 |          -- INTEGER OPERATIONS
 55 |          | INT_EQUAL VarNode VarNode
 56 |          | INT_NOTEQUAL VarNode VarNode
 57 |          | INT_LESS VarNode VarNode
 58 |          | INT_SLESS VarNode VarNode
 59 |          | INT_LESSEQUAL VarNode VarNode
 60 |          | INT_SLESSEQUAL VarNode VarNode
 61 |          | INT_ZEXT VarNode
 62 |          | INT_SEXT VarNode
 63 |          | INT_ADD VarNode VarNode
 64 |          | INT_SUB VarNode VarNode
 65 |          | INT_CARRY VarNode VarNode
 66 |          | INT_SCARRY VarNode VarNode
 67 |          | INT_SBORROW VarNode VarNode
 68 |          | INT_2COMP VarNode
 69 |          | INT_NEGATE VarNode
 70 |          | INT_XOR VarNode VarNode
 71 |          | INT_AND VarNode VarNode
 72 |          | INT_OR VarNode VarNode
 73 |          | INT_LEFT VarNode VarNode
 74 |          | INT_RIGHT VarNode VarNode
 75 |          | INT_SRIGHT VarNode VarNode
 76 |          | INT_MULT VarNode VarNode
 77 |          | INT_DIV VarNode VarNode
 78 |          | INT_REM VarNode VarNode
 79 |          | INT_SDIV VarNode VarNode
 80 |          | INT_SREM VarNode VarNode
 81 |          -- BOOLEAN OPERATIONS
 82 |          | BOOL_NEGATE VarNode
 83 |          | BOOL_XOR VarNode VarNode
 84 |          | BOOL_AND VarNode VarNode
 85 |          | BOOL_OR VarNode VarNode
 86 |          -- FLOATING POINT NUMBER OPERATIONS
 87 |          | FLOAT_EQUAL VarNode VarNode
 88 |          | FLOAT_NOTEQUAL VarNode VarNode
 89 |          | FLOAT_LESS VarNode VarNode
 90 |          | FLOAT_LESSEQUAL VarNode VarNode
 91 |          | FLOAT_ADD VarNode VarNode
 92 |          | FLOAT_SUB VarNode VarNode
 93 |          | FLOAT_MULT VarNode VarNode
 94 |          | FLOAT_DIV VarNode VarNode
 95 |          | FLOAT_NEG VarNode
 96 |          | FLOAT_ABS VarNode
 97 |          | FLOAT_SQRT VarNode
 98 |          | FLOAT_CEIL VarNode
 99 |          | FLOAT_FLOOR VarNode
100 |          | FLOAT_ROUND VarNode
101 |          | FLOAT_NAN VarNode
102 |          | INT2FLOAT VarNode
103 |          | FLOAT2FLOAT VarNode
104 |          -- OTHER OPERATIONS
105 |          | TRUNC VarNode
106 |          -- UNDOCUMENTED INSTRUCTIONS
107 |          -- | CALLOTHER VarNode [VarNode]-- I have no idea what this instruction does...
108 |          -- ADDITIONAL INSTRUCTIONS
109 |          | MULTIEQUAL [(VarNode,Addr)]
110 |          | INDIRECT VarNode VarNode
111 |          | PTRADD VarNode VarNode VarNode
112 |          | PTRSUB VarNode VarNode
113 |          | CAST VarNode deriving Show
114 | 
115 | data VarNode = Reg Addr Size
116 |              | Ram Addr Size
117 |              | Variable String Size
118 |              | Const [Word8] Size deriving Show  -- String is hex representation
119 | 
120 | type Size = Word8
121 | 
122 | -- COMMON OPERATIONS ON PCODE TYPES
123 | 
124 | vnSize :: VarNode -> Size
125 | vnSize (Ram _ s) = s
126 | vnSize (Reg _ s) = s
127 | vnSize (Const _ s) = s
128 | vnSize (Variable _ s) = s
129 | 
130 | sizeToInt :: Size -> Int
131 | sizeToInt = fromEnum
132 | 
133 | intToSize :: Int -> Size
134 | intToSize = toEnum
135 | 
136 | -- REGISTER MAPPING
137 | -- this mapping is established by experimental results
138 | showReg :: Addr -> Size -> String
139 | showReg [0,0,0,0,0,0,0,0] 8 = "RAX"
140 | showReg [0,0,0,0,0,0,0,0] 4 = "EAX"
141 | showReg [0,0,0,0,0,0,0,0] 2 = "AX"
142 | showReg [0,0,0,0,0,0,0,0] 1 = "AL"
143 | showReg [0,0,0,0,0,0,0,1] 1 = "AH"
144 | showReg [0,0,0,0,0,0,0,8] 8 = "RCX"
145 | showReg [0,0,0,0,0,0,0,8] 4 = "ECX"
146 | showReg [0,0,0,0,0,0,0,8] 2 = "CX"
147 | showReg [0,0,0,0,0,0,0,8] 1 = "BL"
148 | showReg [0,0,0,0,0,0,0,9] 1 = "BH"
149 | showReg [0,0,0,0,0,0,0,16] 8 = "RDX"
150 | showReg [0,0,0,0,0,0,0,16] 4 = "EDX"
151 | showReg [0,0,0,0,0,0,0,24] 8 = "RBX"
152 | showReg [0,0,0,0,0,0,0,32] 8 = "RSP"
153 | showReg [0,0,0,0,0,0,0,40] 8 = "RBP"
154 | showReg [0,0,0,0,0,0,0,40] 4 = "EBP"
155 | showReg [0,0,0,0,0,0,0,48] 8 = "RSI"
156 | showReg [0,0,0,0,0,0,0,48] 4 = "ESI"
157 | showReg [0,0,0,0,0,0,0,56] 8 = "RDI"
158 | showReg [0,0,0,0,0,0,0,56] 4 = "EDI"
159 | 
160 | showReg [0,0,0,0,0,0,0,128] 8 = "R8"
161 | showReg [0,0,0,0,0,0,0,136] 8 = "R9"
162 | showReg [0,0,0,0,0,0,0,144] 8 = "R10"
163 | -- showReg 152 8 = "R11"
164 | -- showReg 160 8 = "R12"
165 | -- showReg 168 8 = "R13"
166 | -- showReg 176 8 = "R14"
167 | -- showReg 176 4 = "R14D"
168 | -- showReg 184 8 = "R15"
169 | -- showReg 184 4 = "R15D"
170 | --
171 | -- showReg 512 1 = "CF"
172 | -- showReg 514 1 = "PF"
173 | showReg [0,0,0,0,0,0,2,6] 1 = "AF"
174 | showReg [0,0,0,0,0,0,2,8] 1 = "ZF"
175 | showReg [0,0,0,0,0,0,2,9] 1 = "SF"
176 | showReg [0,0,0,0,0,0,2,10] 1 = "TF"
177 | showReg [0,0,0,0,0,0,2,11] 1 = "IF"
178 | showReg [0,0,0,0,0,0,2,12] 1 = "DF"
179 | showReg [0,0,0,0,0,0,2,13] 1 = "OF"
180 | --
181 | -- showReg 1200 8 = "XMM0_Qa"
182 | 
183 | showReg a s = "UnmatchedReg " ++ show a ++ ":" ++ show s
184 | 
185 | 
186 | -- PRETTY PRINTER for programs
187 | 
188 | prettyPF :: PCode -> String
189 | prettyPF funs = concatMap (\(fl,blocks) -> "Function " ++ show fl ++ "\n" ++ prettyPBs blocks) (M.toList funs)
190 | 
191 | prettyPBs :: PBlocks -> String
192 | prettyPBs blocks = concatMap (\(l,block) -> "   " ++ show l ++ "\n" ++ prettyPB block) (M.toList blocks)
193 | 
194 | prettyPB :: PBlock -> String
195 | prettyPB [] = ""
196 | prettyPB (x:xs) = "      " ++ show x ++ "\n" ++ prettyPB xs
197 | 


--------------------------------------------------------------------------------
/WordString.hs:
--------------------------------------------------------------------------------
 1 | {-|
 2 | Module      : WordString
 3 | Description : Word8 string helper functions
 4 | Copyright   : (c) Nico Naus, 2022
 5 | Maintainer  : niconaus@vt.edu
 6 | Stability   : experimental
 7 | This module defines several helper functions to perform operations on word8 strings
 8 | -}
 9 | module WordString where
10 | 
11 | import Data.Binary ( Word8, Word16, Word32, Word64 )
12 | import Data.ByteString.Builder
13 |     ( toLazyByteString, word16BE, word32BE, word64BE )
14 | import qualified Data.ByteString.Lazy as BS
15 | import Data.Binary.Get
16 |     ( getWord16be, getWord32be, getWord64be, runGet )
17 | import Data.Bits ( Bits(testBit) )
18 | 
19 | -- Unsigned addition
20 | uAdd :: [Word8] -> [Word8] -> [Word8]
21 | uAdd w1 w2 | length w1 == 1 = encodeWord8  $ decodeWord8 w1  + decodeWord8 w2
22 |            | length w1 == 2 = encodeWord16 $ decodeWord16 w1 + decodeWord16 w2
23 |            | length w1 == 4 = encodeWord32 $ decodeWord32 w1 + decodeWord32 w2
24 |            | length w1 == 8 = encodeWord64 $ decodeWord64 w1 + decodeWord64 w2
25 | uAdd _ _ = error "addition not defined for this length"
26 | 
27 | 
28 | 
29 | --Sign extension
30 | signExtend :: Word8 -> [Word8] -> [Word8]
31 | signExtend i wx | length wx > fromEnum i = error "sign extend cannot shorten word"
32 |                 | otherwise = concat $ replicate (fromEnum i-length wx) (if getSign wx then [255] else [0]) ++ [wx]
33 | 
34 | zeroExtend :: Word8 -> [Word8] -> [Word8]
35 | zeroExtend i wx | length wx > fromEnum i = error "unsigned extend cannot shorten word"
36 |                 | otherwise = replicate (fromEnum i-length wx) 0 ++ wx
37 | 
38 | getSign :: [Word8] -> Bool
39 | getSign xs = testBit (last xs) 7
40 | 
41 | -- From and to Word
42 | 
43 | encodeWord8 :: Word8 -> [Word8]
44 | encodeWord8 x = [x]
45 | 
46 | decodeWord8 :: [Word8] -> Word8
47 | decodeWord8 [x] = x
48 | decodeWord8 xs = error $ "Bytestring too long or short to be a word8: " ++ show xs
49 | 
50 | encodeWord16 :: Word16 -> [Word8]
51 | encodeWord16 = BS.unpack . toLazyByteString . word16BE
52 | 
53 | decodeWord16 :: [Word8] -> Word16
54 | decodeWord16 xs = runGet getWord16be (BS.pack xs)
55 | 
56 | encodeWord32 :: Word32 -> [Word8]
57 | encodeWord32 = BS.unpack . toLazyByteString . word32BE
58 | 
59 | decodeWord32 :: [Word8] -> Word32
60 | decodeWord32 xs = runGet getWord32be (BS.pack xs)
61 | 
62 | encodeWord64 :: Word64 -> [Word8]
63 | encodeWord64 = BS.unpack . toLazyByteString . word64BE
64 | 
65 | decodeWord64 :: [Word8] -> Word64
66 | decodeWord64 x = runGet getWord64be (BS.pack x)
67 | 


--------------------------------------------------------------------------------