├── .gitignore ├── AppList.hs ├── Eval.hs ├── Expr.hs ├── Instances.hs ├── Main.hs ├── Memlog.hs ├── Options.hs ├── Pretty.hs ├── README.md ├── Setup.lhs ├── atoi.c ├── euclid.c ├── invsqrt.c ├── symbolic-trace.cabal ├── twentytwo.c └── types ├── Data └── RESET │ ├── Expr.hs │ ├── Memlog.hs │ ├── Message.hs │ └── Types.hs └── reset-types.cabal /.gitignore: -------------------------------------------------------------------------------- 1 | dist 2 | cabal-dev 3 | *.o 4 | *.hi 5 | *.chi 6 | *.chs.h 7 | .virthualenv 8 | *.swp 9 | /Eval 10 | -------------------------------------------------------------------------------- /AppList.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE FlexibleContexts #-} 2 | module AppList(AppList, mkAppList, singleAppList, unAppList, (+:), suffix) where 3 | 4 | newtype AppList a = MkAppList [a] 5 | deriving (Eq, Ord) 6 | 7 | instance Show a => Show (AppList a) where 8 | show (MkAppList l) = "AppList " ++ show (reverse l) 9 | 10 | (+:) :: AppList a -> a -> AppList a 11 | MkAppList l +: x = MkAppList (x : l) 12 | infixr 5 +: 13 | 14 | mkAppList :: AppList a 15 | mkAppList = MkAppList [] 16 | 17 | singleAppList :: a -> AppList a 18 | singleAppList x = MkAppList [x] 19 | 20 | unAppList :: AppList a -> [a] 21 | unAppList (MkAppList l) = reverse l 22 | 23 | suffix :: Int -> AppList a -> [a] 24 | suffix n (MkAppList l) = reverse $ take n l 25 | -------------------------------------------------------------------------------- /Eval.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE FlexibleInstances, FlexibleContexts, UndecidableInstances, MultiParamTypeClasses, StandaloneDeriving #-} 2 | -- Symbolic evaluator for basic blocks 3 | 4 | module Eval(Symbolic(..), SymbolicState(..), noSymbolicState, runBlocks, messages, messagesByIP, showWarning) where 5 | 6 | import Data.LLVM.Types 7 | import qualified Data.List as L 8 | import qualified Data.Map as M 9 | import qualified Data.Map.Strict as MS 10 | import qualified Data.Set as S 11 | import qualified Data.Text as T 12 | import qualified Data.Bits as Bits 13 | import Data.Word 14 | import Data.Maybe 15 | import Debug.Trace 16 | import Control.Applicative 17 | import Control.Monad 18 | import Control.Monad.State.Lazy 19 | import Control.Monad.Trans.Class(lift, MonadTrans) 20 | import Control.Monad.Trans.Maybe 21 | -- For progress bar tracking 22 | import System.IO.Unsafe(unsafePerformIO) 23 | import Text.Printf(printf) 24 | 25 | import Data.RESET.Types 26 | import AppList 27 | import Expr 28 | import Memlog 29 | import Options 30 | import Pretty 31 | 32 | data LocInfo = LocInfo{ 33 | locExpr :: Expr, 34 | -- Guest instruction address where loc originated 35 | locOrigin :: Maybe Word64 36 | } deriving (Eq, Ord, Show) 37 | 38 | noLocInfo :: LocInfo 39 | noLocInfo = LocInfo{ 40 | locExpr = IrrelevantExpr, 41 | locOrigin = Nothing 42 | } 43 | 44 | deriving instance (Show a) => Show (Message a) 45 | 46 | -- Representation of our [partial] knowledge of machine state. 47 | type Info = M.Map Loc LocInfo 48 | data SymbolicState = SymbolicState { 49 | symbolicInfo :: Info, 50 | symbolicPreviousBlock :: Maybe BasicBlock, 51 | symbolicFunction :: Function, 52 | -- Map of names for free variables: loads from uninitialized memory 53 | symbolicVarNameMap :: M.Map (ExprT, AddrEntry) String, 54 | symbolicCurrentIP :: Maybe Word64, 55 | symbolicMessages :: AppList (Maybe Word64, Message Expr), 56 | symbolicMessagesByIP :: MS.Map Word64 (AppList (Message Expr)), 57 | symbolicSkipRest :: Bool, 58 | symbolicRetVal :: Maybe Expr, 59 | symbolicInstCount :: Integer, 60 | symbolicInstTotal :: Integer, 61 | symbolicOptions :: Options 62 | } deriving Show 63 | 64 | messages :: SymbolicState -> [(Maybe Word64, Message Expr)] 65 | messages = unAppList . symbolicMessages 66 | 67 | messagesByIP :: Word64 -> SymbolicState -> [Message Expr] 68 | messagesByIP ip SymbolicState{ symbolicMessagesByIP = msgMap } 69 | = unAppList $ MS.findWithDefault mkAppList ip msgMap 70 | 71 | -- Symbolic is our fundamental monad: it holds state about control flow and 72 | -- holds our knowledge of machine state. 73 | type Symbolic = State SymbolicState 74 | 75 | class (MonadState SymbolicState m, Functor m) => Symbolicish m where { } 76 | instance (MonadState SymbolicState m, Functor m) => Symbolicish m 77 | 78 | -- Atomic operations inside Symbolic. 79 | getInfo :: Symbolicish m => m Info 80 | getInfo = symbolicInfo <$> get 81 | getPreviousBlock :: Symbolicish m => m (Maybe BasicBlock) 82 | getPreviousBlock = symbolicPreviousBlock <$> get 83 | getCurrentFunction :: Symbolicish m => m Function 84 | getCurrentFunction = symbolicFunction <$> get 85 | getCurrentIP :: Symbolicish m => m (Maybe Word64) 86 | getCurrentIP = symbolicCurrentIP <$> get 87 | getSkipRest :: Symbolicish m => m Bool 88 | getSkipRest = symbolicSkipRest <$> get 89 | getRetVal :: Symbolicish m => m (Maybe Expr) 90 | getRetVal = symbolicRetVal <$> get 91 | putInfo :: Symbolicish m => Info -> m () 92 | putInfo info = modify (\s -> s{ symbolicInfo = info }) 93 | putPreviousBlock :: Symbolicish m => Maybe BasicBlock -> m () 94 | putPreviousBlock block = modify (\s -> s{ symbolicPreviousBlock = block }) 95 | putCurrentFunction :: Symbolicish m => Function -> m () 96 | putCurrentFunction f = modify (\s -> s{ symbolicFunction = f }) 97 | putCurrentIP :: Symbolicish m => Maybe Word64 -> m () 98 | putCurrentIP newIP = modify (\s -> s{ symbolicCurrentIP = newIP }) 99 | putRetVal retVal = modify (\s -> s{ symbolicRetVal = retVal }) 100 | 101 | getOption :: Symbolicish m => (Options -> a) -> m a 102 | getOption projection = projection <$> symbolicOptions <$> get 103 | 104 | whenDebugIP :: Symbolicish m => m () -> m () 105 | whenDebugIP action = do 106 | currentIP <- getCurrentIP 107 | debugIP <- getOption optDebugIP 108 | case (currentIP, debugIP) of 109 | (Just ip, Just ip') 110 | | ip == ip' -> action 111 | _ -> return () 112 | 113 | skipRest :: Symbolicish m => m () 114 | skipRest = modify (\s -> s{ symbolicSkipRest = True }) 115 | clearSkipRest :: Symbolicish m => m () 116 | clearSkipRest = modify (\s -> s{ symbolicSkipRest = False }) 117 | 118 | printIP :: Maybe Word64 -> String 119 | printIP (Just realIP) = printf "%x" realIP 120 | printIP Nothing = "unkown" 121 | 122 | getStringIP :: Symbolicish m => m String 123 | getStringIP = printIP <$> getCurrentIP 124 | 125 | generateName :: Symbolicish m => ExprT -> AddrEntry -> m (Maybe String) 126 | generateName typ addr@AddrEntry{ addrType = MAddr, addrVal = val } = do 127 | varNameMap <- getVarNameMap 128 | case M.lookup (typ, addr) varNameMap of 129 | Just name -> return $ Just name 130 | Nothing -> do 131 | let newName = printf "%s_%04x_%d" (pretty typ) (val `rem` (2 ^ 12)) (M.size varNameMap) 132 | putVarNameMap $ M.insert (typ, addr) newName varNameMap 133 | return $ Just newName 134 | where getVarNameMap = symbolicVarNameMap <$> get 135 | putVarNameMap m = modify (\s -> s{ symbolicVarNameMap = m }) 136 | generateName _ _ = return Nothing 137 | 138 | whenM :: Monad m => m Bool -> m () -> m () 139 | whenM cond action = cond >>= (flip when) action 140 | 141 | inUserCode :: Symbolicish m => m Bool 142 | inUserCode = do 143 | maybeCurrentIP <- getCurrentIP 144 | return $ case maybeCurrentIP of 145 | Just currentIP 146 | | currentIP >= 2 ^ 32 -> False 147 | _ -> True 148 | 149 | message :: Symbolicish m => Message Expr -> m () 150 | message msg = do 151 | --whenDebugIP $ trace (printf "\t\tMESSAGE: %s" $ show msg) $ return () 152 | maybeIP <- getCurrentIP 153 | modify (\s -> s{ symbolicMessages = symbolicMessages s +: (maybeIP, msg) }) 154 | case maybeIP of 155 | Just ip -> do 156 | modify (\s -> s{ 157 | symbolicMessagesByIP = MS.alter addMsg ip $ symbolicMessagesByIP s 158 | }) 159 | Nothing -> return () 160 | where addMsg (Just msgs) = Just $ msgs +: msg 161 | addMsg Nothing = Just $ singleAppList msg 162 | 163 | warning :: Symbolicish m => String -> m a 164 | warning warn = do 165 | ip <- getCurrentIP 166 | fail $ showWarning (ip, warn) 167 | 168 | showWarning :: (Maybe Word64, String) -> String 169 | showWarning (ip, s) = printf "(%s) %s" (printIP ip) s 170 | 171 | locInfoInsert :: Symbolicish m => Loc -> LocInfo -> m () 172 | locInfoInsert key locInfo = do 173 | info <- getInfo 174 | putInfo $ M.insert key locInfo info 175 | exprInsert :: Symbolicish m => Loc -> Expr -> m () 176 | exprInsert key expr = do 177 | currentIP <- getCurrentIP 178 | locInfoInsert key LocInfo{ locExpr = expr, locOrigin = currentIP } 179 | exprFindInfo :: Symbolicish m => Expr -> Loc -> m Expr 180 | exprFindInfo def key = locExpr <$> M.findWithDefault defLocInfo key <$> getInfo 181 | where defLocInfo = noLocInfo{ locExpr = def } 182 | 183 | noSymbolicState :: SymbolicState 184 | noSymbolicState = SymbolicState{ 185 | symbolicInfo = M.empty, 186 | symbolicPreviousBlock = Nothing, 187 | symbolicFunction = error "No function.", 188 | symbolicVarNameMap = M.empty, 189 | symbolicCurrentIP = Nothing, 190 | symbolicMessages = mkAppList, 191 | symbolicMessagesByIP = M.empty, 192 | symbolicSkipRest = False, 193 | symbolicRetVal = Nothing, 194 | symbolicInstCount = 0, 195 | symbolicInstTotal = error "No inst total", 196 | symbolicOptions = defaultOptions 197 | } 198 | 199 | valueAt :: Symbolicish m => Loc -> m Expr 200 | valueAt loc = exprFindInfo (InputExpr Int64T loc) loc 201 | 202 | maybeToM :: (Monad m) => Maybe a -> MaybeT m a 203 | maybeToM (Just x) = return x 204 | maybeToM (Nothing) = fail "" 205 | 206 | identifierToExpr :: Identifier -> Symbolic Expr 207 | identifierToExpr name = do 208 | func <- getCurrentFunction 209 | value <- valueAt (idLoc func name) 210 | case value of 211 | IrrelevantExpr -> return IrrelevantExpr -- HACK!!! figure out why this is happening 212 | e -> return e 213 | 214 | valueToExpr :: Value -> Symbolic Expr 215 | valueToExpr (ConstantC UndefValue{}) = return UndefinedExpr 216 | valueToExpr (ConstantC (ConstantFP _ _ value)) = return $ FLitExpr value 217 | valueToExpr (ConstantC (ConstantInt _ _ value)) = return $ ILitExpr value 218 | valueToExpr (ConstantC ConstantPointerNull{}) = return $ ILitExpr 0 219 | valueToExpr (ConstantC (ConstantValue{ constantInstruction = inst })) 220 | = fromMaybe (error "non-expr inst in valueToExpr" ) $ instToExpr (inst, Nothing) 221 | valueToExpr (InstructionC inst) = do 222 | name <- case instructionName inst of 223 | Just n -> return n 224 | Nothing -> warning "No name for inst" 225 | identifierToExpr name 226 | valueToExpr (ArgumentC (Argument{ argumentName = name, 227 | argumentType = argType })) = do 228 | func <- getCurrentFunction 229 | identifierToExpr name 230 | valueToExpr (GlobalVariableC GlobalVariable{ globalVariableName = name, 231 | globalVariableType = varType }) = do 232 | func <- getCurrentFunction 233 | return $ InputExpr (typeToExprT varType) (idLoc func name) 234 | valueToExpr (ExternalValueC ExternalValue{ externalValueName = name, 235 | externalValueType = valType }) = do 236 | func <- getCurrentFunction 237 | return $ InputExpr (typeToExprT valType) (idLoc func name) 238 | valueToExpr val = warning ("Couldn't find expr for " ++ show val) 239 | 240 | lookupValue :: Value -> Symbolic Expr 241 | lookupValue val = do 242 | expr <- valueToExpr val 243 | loc <- case expr of 244 | InputExpr _ loc' -> return loc' 245 | _ -> warning "Expr not an InputExpr!" 246 | valueAt loc 247 | 248 | -- Decide whether or not to tell the user about a load or a store. 249 | interestingOp :: Expr -> AddrEntry -> Bool 250 | interestingOp _ AddrEntry{ addrFlag = IrrelevantFlag } = False 251 | interestingOp _ AddrEntry{ addrType = GReg, addrVal = reg } 252 | | reg >= 16 = False 253 | interestingOp _ _ = True 254 | 255 | findIncomingValue :: BasicBlock -> [(Value, Value)] -> Value 256 | findIncomingValue prevBlock valList 257 | = pairListFind test (error err) $ map swap valList 258 | where err = printf "Couldn't find block in list:\n%s" (show valList) 259 | swap (a, b) = (b, a) 260 | test (BasicBlockC block) = block == prevBlock 261 | test _ = False 262 | 263 | typeBytes :: Type -> Integer 264 | typeBytes (TypePointer _ _) = 8 265 | typeBytes (TypeInteger bits) = fromIntegral bits `quot` 8 266 | typeBytes (TypeArray count t) = fromIntegral count * typeBytes t 267 | typeBytes (TypeStruct _ ts _) = sum $ map typeBytes ts 268 | typeBytes t = error $ printf "Unsupported type %s" (show t) 269 | 270 | modifyAt :: Int -> a -> [a] -> [a] 271 | modifyAt 0 v (_ : xs) = v : xs 272 | modifyAt n v (x : xs) = x : modifyAt (n - 1) v xs 273 | 274 | binaryInstToExpr :: (ExprT -> Expr -> Expr -> Expr) -> Instruction -> Maybe (Symbolic Expr) 275 | binaryInstToExpr constructor inst = Just $ constructor (exprTOfInst inst) 276 | <$> valueToExpr (binaryLhs inst) <*> valueToExpr (binaryRhs inst) 277 | 278 | castInstToExpr :: (ExprT -> Expr -> Expr) -> Instruction -> Maybe (Symbolic Expr) 279 | castInstToExpr constructor inst 280 | = Just $ constructor (exprTOfInst inst) <$> valueToExpr (castedValue inst) 281 | 282 | instToExpr :: (Instruction, Maybe MemlogOp) -> Maybe (Symbolic Expr) 283 | instToExpr (inst@AddInst{}, _) = binaryInstToExpr AddExpr inst 284 | instToExpr (inst@SubInst{}, _) = binaryInstToExpr SubExpr inst 285 | instToExpr (inst@MulInst{}, _) = binaryInstToExpr MulExpr inst 286 | instToExpr (inst@DivInst{}, _) = binaryInstToExpr DivExpr inst 287 | instToExpr (inst@RemInst{}, _) = binaryInstToExpr RemExpr inst 288 | instToExpr (inst@ShlInst{}, _) = binaryInstToExpr ShlExpr inst 289 | instToExpr (inst@LshrInst{}, _) = binaryInstToExpr LshrExpr inst 290 | instToExpr (inst@AshrInst{}, _) = binaryInstToExpr AshrExpr inst 291 | instToExpr (inst@AndInst{}, _) = binaryInstToExpr AndExpr inst 292 | instToExpr (inst@OrInst{}, _) = binaryInstToExpr OrExpr inst 293 | instToExpr (inst@XorInst{}, _) = binaryInstToExpr XorExpr inst 294 | instToExpr (inst@TruncInst{}, _) = castInstToExpr TruncExpr inst 295 | instToExpr (inst@ZExtInst{}, _) = castInstToExpr ZExtExpr inst 296 | instToExpr (inst@SExtInst{}, _) = castInstToExpr SExtExpr inst 297 | instToExpr (inst@FPTruncInst{}, _) = castInstToExpr FPTruncExpr inst 298 | instToExpr (inst@FPExtInst{}, _) = castInstToExpr FPExtExpr inst 299 | instToExpr (inst@FPToSIInst{}, _) = castInstToExpr FPToSIExpr inst 300 | instToExpr (inst@FPToUIInst{}, _) = castInstToExpr FPToUIExpr inst 301 | instToExpr (inst@SIToFPInst{}, _) = castInstToExpr SIToFPExpr inst 302 | instToExpr (inst@UIToFPInst{}, _) = castInstToExpr UIToFPExpr inst 303 | instToExpr (inst@PtrToIntInst{}, _) = castInstToExpr PtrToIntExpr inst 304 | instToExpr (inst@IntToPtrInst{}, _) = castInstToExpr IntToPtrExpr inst 305 | instToExpr (inst@BitcastInst{}, _) = castInstToExpr BitcastExpr inst 306 | instToExpr (PhiNode{ phiIncomingValues = valList }, _) = Just $ do 307 | maybePrevBlock <- getPreviousBlock 308 | let prevBlock = fromMaybe (error "No previous block!") maybePrevBlock 309 | valueToExpr (findIncomingValue prevBlock valList) 310 | instToExpr (GetElementPtrInst{}, _) = Just $ return GEPExpr 311 | instToExpr (inst@CallInst{ callFunction = ExternalFunctionC func, 312 | callArguments = argValuePairs }, _) 313 | | externalIsIntrinsic func = Just $ do 314 | args <- mapM valueToExpr $ map fst argValuePairs 315 | return $ IntrinsicExpr (exprTOfInst inst) func args 316 | instToExpr (inst@InsertValueInst{ insertValueAggregate = aggr, 317 | insertValueValue = val, 318 | insertValueIndices = [idx] }, _) = Just $ do 319 | aggrExpr <- valueToExpr aggr 320 | insertExpr <- valueToExpr val 321 | let typ = exprTOfInst inst 322 | case aggrExpr of 323 | UndefinedExpr -> case typ of 324 | StructT ts -> return $ StructExpr typ $ 325 | modifyAt idx insertExpr $ replicate (length ts) UndefinedExpr 326 | _ -> warning "Bad result type!" 327 | StructExpr t es -> return $ StructExpr t $ modifyAt idx insertExpr es 328 | _ -> warning (printf "Unrecognized expr at inst '%s'" (show inst)) 329 | instToExpr (inst@ExtractValueInst{ extractValueAggregate = aggr, 330 | extractValueIndices = [idx] }, _) = Just $ do 331 | aggrExpr <- valueToExpr aggr 332 | return $ ExtractExpr (exprTOfInst inst) idx aggrExpr 333 | instToExpr (inst@ICmpInst{ cmpPredicate = pred, 334 | cmpV1 = val1, 335 | cmpV2 = val2 }, _) 336 | = Just $ ICmpExpr pred <$> valueToExpr val1 <*> valueToExpr val2 337 | instToExpr (inst@LoadInst{ loadAddress = origin }, Just (MemoryOp LoadOp addr)) 338 | = Just $ loadToExpr (exprTOfInst inst) origin addr 339 | instToExpr (inst@SelectInst{ selectTrueValue = trueVal, 340 | selectFalseValue = falseVal }, 341 | Just (SelectOp selection)) 342 | = Just $ valueToExpr (if selection == 0 then trueVal else falseVal) 343 | instToExpr _ = Nothing 344 | 345 | loadToExpr :: ExprT -> Value -> AddrEntry -> Symbolic Expr 346 | loadToExpr typ originVal addr = do 347 | info <- getInfo 348 | expr <- case locExpr <$> M.lookup (MemLoc addr) info of 349 | Just expr -> return expr 350 | Nothing -> LoadExpr typ addr <$> generateName typ addr 351 | stringIP <- getStringIP 352 | origin <- deIntToPtr <$> valueToExpr originVal 353 | when (interestingOp expr addr) $ 354 | message $ MemoryMessage LoadOp (pretty addr) expr (Just origin) 355 | return expr 356 | 357 | deIntToPtr :: Expr -> Expr 358 | deIntToPtr (IntToPtrExpr _ e) = e 359 | deIntToPtr e = e 360 | 361 | storeUpdate :: Value -> Value -> AddrEntry -> Symbolic () 362 | storeUpdate val originVal addr = do 363 | value <- valueToExpr val 364 | origin <- deIntToPtr <$> valueToExpr originVal 365 | when (interestingOp value addr) $ 366 | message $ MemoryMessage StoreOp (pretty addr) value (Just origin) 367 | exprInsert (MemLoc addr) value 368 | 369 | exprUpdate :: Instruction -> Expr -> Symbolic () 370 | exprUpdate inst expr = do 371 | let id = fromMaybe (error "No instruction name") $ instructionName inst 372 | func <- getCurrentFunction 373 | exprInsert (idLoc func id) expr 374 | 375 | otherUpdate :: (Instruction, Maybe MemlogOp) -> Symbolic () 376 | otherUpdate (AllocaInst{}, _) = return () 377 | otherUpdate (inst@CallInst{ callFunction = ExternalFunctionC func }, _) 378 | | T.pack "log_dynval" == name = return () 379 | | T.pack "rr_" `T.isPrefixOf` name = return () 380 | where name = identifierContent $ externalFunctionName func 381 | otherUpdate (inst@CallInst{ callArguments = argsWithAttrs, 382 | callFunction = ExternalFunctionC func }, 383 | Just (MemoryOp op addr)) = do 384 | let args = map fst argsWithAttrs 385 | case (op, args) of 386 | (LoadOp, val : _) 387 | | T.pack "__ld" `T.isPrefixOf` name -> 388 | loadToExpr (exprTOfInst inst) val addr >>= exprUpdate inst 389 | (StoreOp, val : loc : _) 390 | | T.pack "__st" `T.isPrefixOf` name -> storeUpdate val loc addr 391 | _ -> error $ printf "Bad call load/store: %s" (show inst) 392 | where name = identifierContent $ externalFunctionName func 393 | otherUpdate (inst@CallInst{ callFunction = FunctionC func }, 394 | Just (HelperFuncOp _)) 395 | | T.pack "cpu_x86_update_cr3_llvm" == name = return () -- skip CR3 updates 396 | where name = identifierContent $ functionName func 397 | otherUpdate (inst@CallInst{ callArguments = argVals, 398 | callFunction = FunctionC func }, 399 | Just (HelperFuncOp memlog)) = do 400 | -- Call stack abstraction; store current function so we can restore it later 401 | currentFunc <- getCurrentFunction 402 | -- Pass arguments through 403 | argExprs <- mapM (valueToExpr . fst) argVals 404 | let argNames = map argumentName $ functionParameters func 405 | let locs = map (idLoc func) argNames 406 | let argLocInfos = [ noLocInfo{ locExpr = e } | e <- argExprs ] 407 | zipWithM locInfoInsert locs argLocInfos 408 | -- Run and grab return value 409 | maybeRetVal <- runBlocks memlog 410 | -- Understand return value 411 | runMaybeT $ optional $ do 412 | val <- maybeToM $ maybeRetVal 413 | id <- maybeToM $ instructionName inst 414 | currentIP <- getCurrentIP 415 | let locInfo = noLocInfo{ locExpr = val, locOrigin = currentIP } 416 | locInfoInsert (idLoc currentFunc id) locInfo 417 | -- Restore old function 418 | putCurrentFunction currentFunc 419 | otherUpdate instOp@(inst, _) 420 | | isJust maybeMexpr = fromJust maybeMexpr >>= exprUpdate inst 421 | where maybeMexpr = instToExpr instOp 422 | otherUpdate (inst@StoreInst{ storeIsVolatile = False, 423 | storeValue = val, 424 | storeAddress = addrValue }, 425 | (Just (MemoryOp StoreOp addr))) = storeUpdate val addrValue addr 426 | -- This will trigger twice with each IP update, but that's okay because the 427 | -- second one is the one we want. 428 | otherUpdate (StoreInst{ storeIsVolatile = True, 429 | storeValue = val }, _) = do 430 | ip <- case valueContent val of 431 | ConstantC (ConstantInt{ constantIntValue = ipVal }) -> return ipVal 432 | _ -> warning "Failed to update IP" 433 | putCurrentIP $ Just $ fromIntegral $ ip 434 | otherUpdate (RetInst{ retInstValue = Just val }, _) = do 435 | expr <- valueToExpr val 436 | putRetVal $ Just expr 437 | otherUpdate (RetInst{}, _) = return () 438 | otherUpdate (UnconditionalBranchInst{}, _) 439 | = message UnconditionalBranchMessage 440 | otherUpdate (BranchInst{ branchTrueTarget = trueTarget, 441 | branchFalseTarget = falseTarget, 442 | branchCondition = cond }, 443 | Just (BranchOp idx)) = void $ do 444 | condExpr <- valueToExpr cond 445 | message $ BranchMessage condExpr (idx == 0) 446 | otherUpdate (SwitchInst{}, _) = return () 447 | otherUpdate (CallInst{ callFunction = ExternalFunctionC func, 448 | callAttrs = attrs }, _) 449 | | FANoReturn `elem` externalFunctionAttrs func = skipRest 450 | | FANoReturn `elem` attrs = skipRest 451 | | T.pack "cpu_loop_exit" == identifierContent (externalFunctionName func) 452 | = skipRest 453 | -- FIXME: Implement a more reasonable model for "real" memcpy/memset 454 | -- (i.e. those that are for arrays, not structs) 455 | otherUpdate (CallInst{ callFunction = ExternalFunctionC func, 456 | callArguments = [_, (value, _), (lenValue, _), _, _] }, 457 | Just (MemsetOp addr)) = do 458 | val <- valueToExpr value 459 | lenExpr <- valueToExpr lenValue 460 | len <- case lenExpr of 461 | ILitExpr len' -> return len' 462 | _ -> warning "Can't extract memset length" 463 | currentExpr <- valueAt (MemLoc addr) 464 | case currentExpr of 465 | StructExpr{} -> warning "Zeroing struct" 466 | _ 467 | | len > 16 -> warning "Array memset" 468 | | otherwise -> return () 469 | exprInsert (MemLoc addr) val 470 | otherUpdate (CallInst{ callFunction = ExternalFunctionC func, 471 | callArguments = [_, _, (lenValue, _), _, _] }, 472 | Just (MemcpyOp src dest)) = do 473 | lenExpr <- valueToExpr lenValue 474 | len <- case lenExpr of 475 | ILitExpr len' -> return len' 476 | _ -> warning "Can't extract memcpy length" 477 | srcExpr <- valueAt $ MemLoc src 478 | case srcExpr of 479 | StructExpr{} -> return () 480 | _ 481 | | len > 16 -> warning "Array memcpy" 482 | | otherwise -> return () 483 | exprInsert (MemLoc dest) srcExpr 484 | otherUpdate (UnreachableInst{}, _) = warning "UNREACHABLE INSTRUCTION!" 485 | otherUpdate instOp = warnInstOp instOp 486 | 487 | warnInstOp :: Symbolicish m => (Instruction, Maybe MemlogOp) -> m () 488 | warnInstOp (inst, op) 489 | = warning $ printf "Couldn't process inst '%s' with op %s" 490 | (show inst) (show op) 491 | 492 | traceInstOp :: (Instruction, Maybe MemlogOp) -> a -> a 493 | traceInstOp (inst, Just (HelperFuncOp _)) 494 | = trace $ printf "%s\n=============\nHELPER FUNCTION:" (show inst) 495 | traceInstOp (inst, Just op) = trace $ printf "%s\n\t\t%s" (show inst) (show op) 496 | traceInstOp (inst, Nothing) = traceShow inst 497 | 498 | progress :: Monad m => Float -> m () 499 | progress f = seq (unsafePerformIO $ putStr $ printf "\r%.2f%%" $ 100 * f) $ return () 500 | 501 | countInst :: Symbolic () 502 | countInst = do 503 | count <- symbolicInstCount <$> get 504 | total <- symbolicInstTotal <$> get 505 | when (count `rem` (total `quot` 10000) == 0) $ 506 | progress $ fromIntegral count / fromIntegral total 507 | modify (\s -> s{ symbolicInstCount = count + 1 }) 508 | 509 | updateInfo :: (Instruction, Maybe MemlogOp) -> Symbolic () 510 | updateInfo instOp = do 511 | --whenDebugIP $ traceInstOp instOp $ return () 512 | skip <- getSkipRest 513 | unless skip $ void $ countInst >> otherUpdate instOp 514 | 515 | runBlock :: (BasicBlock, InstOpList) -> Symbolic (Maybe Expr) 516 | runBlock (block, instOpList) = do 517 | putCurrentFunction $ basicBlockFunction block 518 | putRetVal Nothing 519 | clearSkipRest 520 | mapM updateInfo instOpList 521 | putPreviousBlock $ Just block 522 | getRetVal 523 | 524 | isMemLoc :: Loc -> Bool 525 | isMemLoc MemLoc{} = True 526 | isMemLoc _ = False 527 | 528 | runBlocks :: MemlogList -> Symbolic (Maybe Expr) 529 | runBlocks blocks = do 530 | retVals <- mapM runBlock blocks 531 | return $ last retVals 532 | 533 | showInfo :: Info -> String 534 | showInfo = unlines . map showEach . filter doShow . M.toList 535 | where showEach (key, val) = printf "%s %s-> %s" (pretty key) origin (show (locExpr val)) 536 | where origin = fromMaybe "" $ printf "(from %x) " <$> locOrigin val 537 | doShow (IdLoc{}, LocInfo{ locExpr = expr }) = doShowExpr expr 538 | doShow (MemLoc{}, LocInfo{ locExpr = IrrelevantExpr }) = False 539 | doShow _ = True 540 | doShowExpr IrrelevantExpr = False 541 | doShowExpr ILitExpr{} = False 542 | doShowExpr LoadExpr{} = False 543 | doShowExpr InputExpr{} = True 544 | doShowExpr expr = True 545 | -------------------------------------------------------------------------------- /Expr.hs: -------------------------------------------------------------------------------- 1 | module Expr(simplify, exprTOfInst, typeToExprT, idLoc, renderExpr) where 2 | 3 | import Debug.Trace 4 | 5 | import Control.Applicative((<$>)) 6 | import Control.Monad.Reader 7 | import Control.Monad.State 8 | import Control.Monad.RWS(RWS, evalRWS) 9 | import Data.Bits((.&.), (.|.), xor, shiftL, shiftR) 10 | import Data.LLVM.Types 11 | import Data.Word(Word8, Word16, Word32, Word64) 12 | import Text.PrettyPrint 13 | import Text.Printf(printf) 14 | 15 | import Data.RESET.Types 16 | import Memlog(AddrEntry(..), AddrEntryType(..)) 17 | import Pretty(Pretty, pretty) 18 | 19 | import qualified Data.List as L 20 | 21 | idLoc :: Function -> Identifier -> Loc 22 | idLoc f id = IdLoc (functionName f) id 23 | 24 | instance Show Loc where 25 | show (IdLoc f id) = printf "IdLoc %s %s" (show f) (show id) 26 | show (MemLoc addr) = printf "MemLoc (%s)" (show addr) 27 | 28 | instance Pretty Loc where 29 | pretty (IdLoc f id) = printf "%s: %s" (show f) (show id) 30 | pretty (MemLoc addr) = pretty addr 31 | 32 | instance Pretty ExprT where 33 | pretty VoidT = "void" 34 | pretty PtrT = "ptr" 35 | pretty Int1T = "i1" 36 | pretty Int8T = "i8" 37 | pretty Int16T = "i16" 38 | pretty Int32T = "i32" 39 | pretty Int64T = "i64" 40 | pretty FloatT = "flt" 41 | pretty DoubleT = "dbl" 42 | 43 | instance Pretty CmpPredicate where 44 | pretty ICmpEq = "==" 45 | pretty ICmpNe = "!=" 46 | pretty ICmpSgt = ">s" 47 | pretty ICmpSge = ">=s" 48 | pretty ICmpSlt = " Expr -> String 57 | renderExpr opts e = render $ fst $ evalRWS (sh $ repeatf 50 simplify e) opts 0 58 | where render = renderStyle style{ mode = OneLineMode } 59 | 60 | instance Show Expr where 61 | show = renderExpr defaultExprOptions 62 | 63 | -- Inside ShowExpr, can read ExprOptions and set precedence. 64 | type ShowExpr = RWS ExprOptions () Int 65 | 66 | -- Convenience operators for monadic Docs 67 | (<<+>>) = liftM2 (<+>) 68 | (<<>>) = liftM2 (<>) 69 | a <+>> mb = return a <<+>> mb 70 | ma <<+> b = ma <<+>> return b 71 | a <>> mb = return a <<>> mb 72 | ma <<> b = ma <<>> return b 73 | 74 | -- Render an expression inside a different precedence context 75 | withPrec :: Int -> ShowExpr a -> ShowExpr a 76 | withPrec prec act = do 77 | contextPrec <- get 78 | put prec 79 | result <- act 80 | put contextPrec 81 | return result 82 | 83 | -- Parenthesize if precedence requires 84 | -- Type is SE Doc -> SE Doc for syntactical convenience 85 | parensPrec :: Int -> ShowExpr Doc -> ShowExpr Doc 86 | parensPrec prec mdoc = do 87 | doc <- mdoc 88 | contextPrec <- get 89 | withPrec prec $ if prec >= contextPrec 90 | then mdoc 91 | else parens <$> mdoc 92 | 93 | -- Make parentheses and reset precedence 94 | parens0 :: ShowExpr Doc -> ShowExpr Doc 95 | parens0 = parensPrec 0 96 | 97 | -- Associate left, right, always, or never (i.e., where to parenthesize operands) 98 | data Infix = AssocL | AssocR | AssocA | AssocN 99 | 100 | -- Binary operator: expr1 op expr2 101 | bin :: Infix -> Int -> String -> Expr -> Expr -> ShowExpr Doc 102 | bin inf prec op e1 e2 = parensPrec prec $ shl e1 <<+> text op <<+>> shr e2 103 | where (shl, shr) = case inf of 104 | AssocL -> (sh, shHighPrec) 105 | AssocR -> (shHighPrec, sh) 106 | AssocA -> (sh, sh) 107 | AssocN -> (shHighPrec, shHighPrec) 108 | 109 | -- Unary operator: func(expr) 110 | un :: String -> Expr -> ShowExpr Doc 111 | un func e = text func <>> (parens0 $ sh e) 112 | 113 | -- Same as un, but show only if exprShowCasts is true. 114 | -- Otherwise just show casted expression 115 | cast :: String -> Expr -> ShowExpr Doc 116 | cast func e = do 117 | showCasts <- asks exprShowCasts 118 | if showCasts then un func e else sh e 119 | 120 | -- e1, e2, e3, ..., e9 121 | -- We don't want to parenthesize interior expressions, so we use shNoPrec 122 | commas :: [Expr] -> ShowExpr Doc 123 | commas es = hsep <$> punctuate (text ",") <$> mapM shNoPrec es 124 | 125 | -- (e1, e2, e3, ..., e9) 126 | tuple :: [Expr] -> ShowExpr Doc 127 | tuple es = parens0 $ commas es 128 | 129 | -- Guarantee to not parenthesize the inner expression. 130 | shNoPrec :: Expr -> ShowExpr Doc 131 | shNoPrec = withPrec 0 . sh 132 | 133 | -- Guarantee to parenthesize inner expression 134 | shHighPrec :: Expr -> ShowExpr Doc 135 | shHighPrec = withPrec 10000 . sh 136 | 137 | sh :: Expr -> ShowExpr Doc 138 | sh (AddExpr _ e1 e2) = bin AssocA 10 "+" e1 e2 139 | sh (SubExpr _ e1 e2) = bin AssocL 10 "-" e1 e2 140 | sh (MulExpr _ e1 e2) = bin AssocA 20 "*" e1 e2 141 | sh (DivExpr _ e1 e2) = bin AssocL 20 "/" e1 e2 142 | sh (RemExpr _ e1 e2) = bin AssocL 15 "%%" e1 e2 143 | sh (ShlExpr _ e1 e2) = bin AssocN 40 "<<" e1 e2 144 | sh (LshrExpr _ e1 e2) = bin AssocN 40 "L>>" e1 e2 145 | sh (AshrExpr _ e1 e2) = bin AssocN 40 "A>>" e1 e2 146 | sh (AndExpr _ e1 e2) = bin AssocN 35 "&" e1 e2 147 | sh (OrExpr _ e1 e2) = bin AssocN 30 "|" e1 e2 148 | sh (XorExpr _ e1 e2) = bin AssocN 25 "^" e1 e2 149 | sh (TruncExpr t e) = cast (printf "T%d" (bits t)) e 150 | sh (ZExtExpr t e) = cast (printf "ZX%d" (bits t)) e 151 | sh (SExtExpr t e) = cast (printf "SX%d" (bits t)) e 152 | sh (FPTruncExpr _ e) = cast "FPTrunc" e 153 | sh (FPExtExpr _ e) = cast "FPExt" e 154 | sh (FPToSIExpr _ e) = cast "FPToSI" e 155 | sh (FPToUIExpr _ e) = cast "FPToUI" e 156 | sh (SIToFPExpr _ e) = cast "SIToFP" e 157 | sh (UIToFPExpr _ e) = cast "UIToFP" e 158 | sh (PtrToIntExpr _ e) = cast "PtrToInt" e 159 | sh (IntToPtrExpr _ e) = cast "IntToPtr" e 160 | sh (BitcastExpr t e) = un (printf "Bitcast%s" (show t)) e 161 | sh (LoadExpr _ _ (Just name)) = return $ text "%" <> text name 162 | sh (LoadExpr _ addr@AddrEntry{ addrType = GReg } _) = return $ text $ pretty addr 163 | sh (LoadExpr _ addr _) = return $ text "*" <> text (pretty addr) 164 | sh (ICmpExpr pred e1 e2) = bin AssocL 0 (pretty pred) e1 e2 165 | -- Print in hex if >=256 (probably an address); otherwise print in decimal 166 | sh (ILitExpr i) = return $ if i >= 256 then text $ printf "0x%x" i else integer i 167 | sh (FLitExpr f) = return $ text $ printf "%.1ff" f 168 | sh (InputExpr _ loc) = return $ parens $ text $ show loc 169 | sh (StubExpr _ f es) = text f <>> tuple es 170 | sh (IntrinsicExpr _ f es) = text (show $ externalFunctionName f) <>> tuple es 171 | sh (ExtractExpr _ idx e) = shHighPrec e <<> brackets (int idx) 172 | sh (StructExpr _ es) = braces <$> commas es 173 | sh (UndefinedExpr) = return $ text "Undef" 174 | sh (GEPExpr) = return $ text "GEP" 175 | sh (IrrelevantExpr) = return $ text "IRRELEVANT" 176 | 177 | bits :: ExprT -> Int 178 | bits Int1T = 1 179 | bits Int8T = 8 180 | bits Int16T = 16 181 | bits Int32T = 32 182 | bits Int64T = 64 183 | bits t = trace ("Unexpected argument to bits: " ++ show t) 64 184 | 185 | repeatf :: (Eq a) => Int -> (a -> a) -> a -> a 186 | repeatf 0 f x = trace "repeatf overflow. bailing." x 187 | repeatf lim f x 188 | | fx == x = x 189 | | otherwise = repeatf (lim - 1) f fx 190 | where fx = f x 191 | 192 | simplify :: Expr -> Expr 193 | simplify (AddExpr t e1 (ILitExpr 0)) = simplify e1 194 | simplify (AddExpr t (ILitExpr 0) e2) = simplify e2 195 | simplify (AddExpr t (ILitExpr a) (ILitExpr b)) = ILitExpr $ a + b 196 | simplify (AddExpr ta (MulExpr tm e1 e2) e3) 197 | | e1 == e3 = simplify $ MulExpr ta e1 (AddExpr tm e2 (ILitExpr 1)) 198 | simplify (AddExpr t (AddExpr _ e1 (ILitExpr a)) (ILitExpr b)) 199 | = simplify $ AddExpr t e1 (ILitExpr $ a + b) 200 | simplify (AddExpr _ (SubExpr _ e1 e2) e3) 201 | | e2 == e3 = simplify e1 202 | simplify (AddExpr t e1 e2) 203 | | e1 == e2 = simplify $ MulExpr t e1 (ILitExpr 2) 204 | simplify (AddExpr t e1 e2) = AddExpr t (simplify e1) (simplify e2) 205 | simplify (SubExpr t (ILitExpr a) (ILitExpr b)) = ILitExpr $ a - b 206 | simplify (SubExpr t e1 (ILitExpr b)) = simplify $ AddExpr t e1 (ILitExpr $ -b) 207 | simplify (SubExpr t e1 e2) 208 | | e1 == e2 = ILitExpr 0 209 | simplify (SubExpr ta (MulExpr tm e1 e2) e3) 210 | | e1 == e3 = simplify $ MulExpr ta e1 (SubExpr tm e2 (ILitExpr 1)) 211 | simplify (SubExpr t (AddExpr _ e1 e2) (AddExpr _ e3 e4)) 212 | | e1 == e3 = simplify $ SubExpr t e2 e4 213 | simplify (SubExpr t e1 e2) = SubExpr t (simplify e1) (simplify e2) 214 | simplify (MulExpr t (ILitExpr a) (ILitExpr b)) = ILitExpr $ a * b 215 | simplify (MulExpr t e (ILitExpr 1)) = simplify e 216 | simplify (MulExpr t e1 e2) = MulExpr t (simplify e1) (simplify e2) 217 | simplify (DivExpr t e1 e2) = DivExpr t (simplify e1) (simplify e2) 218 | simplify (RemExpr t e1 e2) = RemExpr t (simplify e1) (simplify e2) 219 | --simplify (ShlExpr t e1 (ILitExpr i)) 220 | -- | i >= 0 = simplify $ MulExpr t e1 (ILitExpr $ 2 ^ i) 221 | simplify (ShlExpr t (ILitExpr a) (ILitExpr b)) 222 | = ILitExpr $ (shiftL a $ fromIntegral b) `rem` (2 ^ bits t) 223 | simplify (ShlExpr t e1 e2) = ShlExpr t (simplify e1) (simplify e2) 224 | simplify (LshrExpr t (ILitExpr a) (ILitExpr b)) 225 | = ILitExpr $ shiftR (a `rem` (2 ^ bits t)) $ fromIntegral b 226 | simplify (LshrExpr t e1 e2) = LshrExpr t (simplify e1) (simplify e2) 227 | simplify (AshrExpr _ (ILitExpr 0) _) = ILitExpr 0 228 | simplify (AshrExpr t (ILitExpr a) (ILitExpr b)) 229 | = ILitExpr $ case t of 230 | Int8T -> fromIntegral $ shiftR a8 $ fromIntegral b 231 | Int16T -> fromIntegral $ shiftR a16 $ fromIntegral b 232 | Int32T -> fromIntegral $ shiftR a32 $ fromIntegral b 233 | Int64T -> fromIntegral $ shiftR a64 $ fromIntegral b 234 | where a64 = (fromIntegral a) :: Word64 235 | a32 = (fromIntegral a) :: Word32 236 | a16 = (fromIntegral a) :: Word16 237 | a8 = (fromIntegral a) :: Word8 238 | simplify (AshrExpr t e1 e2) = AshrExpr t (simplify e1) (simplify e2) 239 | simplify (AndExpr t (ILitExpr a) (ILitExpr b)) = ILitExpr $ (a .&. b) `rem` (2 ^ bits t) 240 | simplify (AndExpr _ (ZExtExpr _ e@(LoadExpr Int8T _ _)) (ILitExpr 255)) = simplify e 241 | simplify (AndExpr Int32T e (ILitExpr 0xFFFFFFFF)) = simplify e 242 | simplify (AndExpr Int64T e (ILitExpr 0xFFFFFFFF)) 243 | = simplify $ ZExtExpr Int64T $ TruncExpr Int32T e 244 | simplify (AndExpr t e1 e2) = AndExpr t (simplify e1) (simplify e2) 245 | simplify 246 | (OrExpr t 247 | (LshrExpr _ e1 r@(ILitExpr a)) 248 | (ShlExpr _ e2 l@(ILitExpr b))) 249 | | e1 == e2 && a + b == fromIntegral (bits t) 250 | = simplify $ case compare a b of 251 | LT -> StubExpr t "RotR" [e1, r] 252 | _ -> StubExpr t "RotL" [e2, l] 253 | simplify 254 | (OrExpr t 255 | (ShlExpr _ e2 l@(ILitExpr b)) 256 | (LshrExpr _ e1 r@(ILitExpr a))) 257 | | e1 == e2 && a + b == fromIntegral (bits t) 258 | = simplify $ case compare a b of 259 | LT -> StubExpr t "RotR" [e1, r] 260 | _ -> StubExpr t "RotL" [e2, l] 261 | simplify 262 | (OrExpr t 263 | (ShlExpr _ e1 (ILitExpr 24)) 264 | (OrExpr _ 265 | (OrExpr _ 266 | (LshrExpr _ e2 (ILitExpr 24)) 267 | (AndExpr _ 268 | (LshrExpr _ e3 (ILitExpr 8)) 269 | (ILitExpr 0xFF00))) 270 | (AndExpr _ 271 | (ShlExpr _ e4 (ILitExpr 8)) 272 | (ILitExpr 0xFF0000)))) 273 | | e1 == e2 && e2 == e3 && e3 == e4 274 | = simplify $ StubExpr t "Byteswap32" [e1] 275 | simplify (OrExpr t (ILitExpr a) (ILitExpr b)) = ILitExpr $ (a .|. b) `rem` (2 ^ bits t) 276 | simplify (OrExpr t e (ILitExpr 0)) = simplify e 277 | simplify (OrExpr t (ILitExpr 0) e) = simplify e 278 | simplify (OrExpr t e1 e2) = OrExpr t (simplify e1) (simplify e2) 279 | simplify (XorExpr t (ILitExpr a) (ILitExpr b)) = ILitExpr $ (a `xor` b) `rem` (2 ^ bits t) 280 | simplify (XorExpr t (ILitExpr 0) e) = simplify e 281 | simplify (XorExpr t e1 e2) = XorExpr t (simplify e1) (simplify e2) 282 | -- FIXME: HACK!!!! 283 | --simplify (ZExtExpr _ e) = simplify e 284 | --simplify (SExtExpr _ e) = simplify e 285 | --simplify (TruncExpr _ e) = simplify e 286 | simplify (TruncExpr t1 (TruncExpr t2 e)) 287 | | bits t1 <= bits t2 = simplify $ TruncExpr t1 e 288 | simplify (TruncExpr t1 (ZExtExpr t2 e)) 289 | | t1 == t2 = simplify e 290 | | bits t1 < bits t2 = simplify $ TruncExpr t1 e 291 | simplify (TruncExpr t1 (SExtExpr t2 e)) 292 | | t1 == t2 = simplify e 293 | | bits t1 < bits t2 = simplify $ TruncExpr t1 e 294 | simplify expr@(TruncExpr t e@(ILitExpr int)) 295 | | int < 2 ^ bits t = e 296 | | otherwise = expr 297 | simplify (TruncExpr t e) = simplify e 298 | simplify (ZExtExpr t e@ILitExpr{}) = e 299 | simplify (ZExtExpr t1 (TruncExpr t2 e)) 300 | | t1 == t2 = simplify $ TruncExpr t2 e 301 | simplify (ZExtExpr t e) = ZExtExpr t (simplify e) 302 | simplify (SExtExpr t e@ILitExpr{}) = e -- FIXME: add typing to lits 303 | simplify (SExtExpr t e) = SExtExpr t (simplify e) 304 | simplify (FPTruncExpr t e) = FPTruncExpr t (simplify e) 305 | simplify (FPExtExpr t e) = FPExtExpr t (simplify e) 306 | simplify (FPToSIExpr t e) = FPToSIExpr t (simplify e) 307 | simplify (FPToUIExpr t e) = FPToUIExpr t (simplify e) 308 | simplify (SIToFPExpr t e) = SIToFPExpr t (simplify e) 309 | simplify (UIToFPExpr t e) = UIToFPExpr t (simplify e) 310 | simplify (PtrToIntExpr t1 (IntToPtrExpr t2 e)) = simplify e 311 | simplify (IntToPtrExpr t1 (PtrToIntExpr Int64T e)) = simplify e 312 | simplify (PtrToIntExpr t e) = PtrToIntExpr t (simplify e) 313 | simplify (IntToPtrExpr t e) = IntToPtrExpr t (simplify e) 314 | simplify (BitcastExpr t e) = BitcastExpr t (simplify e) 315 | simplify (ICmpExpr p (SubExpr _ e1 e2) (ILitExpr 0)) 316 | = simplify $ ICmpExpr p e1 e2 317 | simplify (ICmpExpr ICmpEq (XorExpr _ e1 e2) (ILitExpr 0)) 318 | = simplify $ ICmpExpr ICmpEq e1 e2 319 | simplify (ICmpExpr p (AndExpr _ e1 e2) (ILitExpr 0)) 320 | | e1 == e2 && (p == ICmpEq || p == ICmpNe) 321 | = simplify $ ICmpExpr ICmpEq e1 (ILitExpr 0) 322 | simplify (ICmpExpr ICmpEq (ILitExpr a) (ILitExpr b)) 323 | | a == b = ILitExpr 1 324 | | otherwise = ILitExpr 0 325 | simplify (ICmpExpr p e1 e2) = ICmpExpr p (simplify e1) (simplify e2) 326 | simplify (ExtractExpr t 0 (IntrinsicExpr _ f [e1, e2])) 327 | | "llvm.uadd.with.overflow" `L.isPrefixOf` 328 | identifierAsString (externalFunctionName f) 329 | = simplify $ AddExpr t e1 e2 330 | simplify (ExtractExpr _ 1 (IntrinsicExpr (StructT [_, t]) 331 | f [ILitExpr a, ILitExpr b])) 332 | | "llvm.uadd.with.overflow" `L.isPrefixOf` 333 | identifierAsString (externalFunctionName f) 334 | = case compare (a + b) (2 ^ bits t) of 335 | LT -> ILitExpr 0 336 | _ -> ILitExpr 1 337 | simplify (StubExpr t f es) = StubExpr t f $ map simplify es 338 | simplify (IntrinsicExpr t f es) = IntrinsicExpr t f $ map simplify es 339 | simplify (ExtractExpr _ idx (StructExpr _ es)) = simplify $ es !! idx 340 | simplify (ExtractExpr t idx e) = ExtractExpr t idx (simplify e) 341 | simplify (StructExpr t es) = StructExpr t $ map simplify es 342 | simplify e = e 343 | 344 | -- Simple type system 345 | typeToExprT :: Type -> ExprT 346 | typeToExprT (TypeInteger 1) = Int1T 347 | typeToExprT (TypeInteger 8) = Int8T 348 | typeToExprT (TypeInteger 16) = Int16T 349 | typeToExprT (TypeInteger 32) = Int32T 350 | typeToExprT (TypeInteger 64) = Int32T 351 | typeToExprT (TypePointer _ _) = PtrT 352 | typeToExprT (TypeFloat) = FloatT 353 | typeToExprT (TypeDouble) = DoubleT 354 | typeToExprT (TypeStruct _ ts _) = StructT $ map typeToExprT ts 355 | typeToExprT t = trace (printf "making VoidT from %s" (show t)) VoidT 356 | 357 | exprTOfInst :: Instruction -> ExprT 358 | exprTOfInst = typeToExprT . instructionType 359 | -------------------------------------------------------------------------------- /Instances.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE StandaloneDeriving #-} 2 | module Instances where 3 | 4 | import Data.LLVM.Types 5 | 6 | deriving instance Show Constant 7 | deriving instance Show ExternalValue 8 | deriving instance Show GlobalAlias 9 | 10 | deriving instance Ord CmpPredicate 11 | -------------------------------------------------------------------------------- /Main.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE StandaloneDeriving, OverloadedStrings #-} 2 | module Main where 3 | 4 | import Data.LLVM.Types 5 | import LLVM.Parse 6 | 7 | import Control.Applicative 8 | import Control.DeepSeq 9 | import Control.Monad 10 | import Control.Monad.Reader 11 | import Control.Monad.State.Lazy 12 | import Control.Monad.Trans.Maybe 13 | import Data.Aeson 14 | import Data.Maybe 15 | import Data.Word 16 | import Debug.Trace 17 | import Network 18 | import System.Console.GetOpt 19 | import System.Directory(setCurrentDirectory, canonicalizePath) 20 | import System.Environment(getArgs) 21 | import System.Exit(ExitCode(..), exitFailure) 22 | import System.FilePath(()) 23 | import System.IO 24 | import System.IO.Error 25 | import Text.Printf 26 | 27 | import qualified Data.ByteString.Char8 as BS 28 | import qualified Data.ByteString.Lazy.Char8 as BSL 29 | import qualified Data.List as L 30 | import qualified Data.Map as M 31 | import qualified Data.Map.Strict as MS 32 | import qualified Data.Text as T 33 | import qualified Data.Text.Encoding as TE 34 | import qualified System.Process as P 35 | 36 | import Data.RESET.Types 37 | import Eval 38 | import Expr 39 | import Memlog 40 | import Options 41 | 42 | deriving instance Show Command 43 | deriving instance Show Response 44 | 45 | type SymbReader = ReaderT SymbolicState IO 46 | 47 | processCmd :: String -> IO Response 48 | processCmd s = case parseCmd s of 49 | Left err -> do 50 | putStrLn $ printf "Parse error on %s:\n %s" (show s) err 51 | return $ ErrorResponse err 52 | Right cmd -> do 53 | putStrLn $ printf "executing command: %s" (show cmd) 54 | respond cmd 55 | where parseCmd = eitherDecode . BSL.pack :: String -> Either String Command 56 | 57 | respond :: Command -> IO Response 58 | respond WatchIP{ commandIP = ip, 59 | commandLimit = limit, 60 | commandExprOptions = opts } 61 | = MessagesResponse <$> map (messageMap $ renderExpr opts) <$> 62 | take limit <$> messagesByIP ip <$> (parseOptions >>= symbolic ip) 63 | 64 | process :: (Handle, HostName, PortNumber) -> IO () 65 | process (handle, _, _) = do 66 | putStrLn "Client connected." 67 | commands <- lines <$> hGetContents handle 68 | mapM_ (BSL.hPutStrLn handle <=< liftM encode . processCmd) commands 69 | 70 | -- Command line arguments 71 | opts :: [OptDescr (Options -> Options)] 72 | opts = 73 | [ Option [] ["debug-ip"] 74 | (ReqArg (\a o -> o{ optDebugIP = Just $ read a }) "Need IP") 75 | "Run in debug mode on a given IP; write out trace at that IP." 76 | , Option ['q'] ["qemu-dir"] 77 | (ReqArg (\a o -> o{ optQemuDir = a }) "Need dir") 78 | "Run QEMU on specified program." 79 | , Option ['t'] ["qemu-target"] 80 | (ReqArg (\a o -> o{ optQemuTarget = a }) "Need triple") $ 81 | "Run specified QEMU target. Default i386-linux-user for user mode " ++ 82 | "and i386-softmmu for whole-system mode." 83 | , Option ['c'] ["qemu-cr3"] 84 | (ReqArg (\a o -> o{ optQemuCr3 = Just $ read a }) "Need CR3") 85 | "Run QEMU with filtering on a given CR3 (in whole-system mode)." 86 | , Option ['r'] ["qemu-replay"] 87 | (ReqArg (\a o -> o{ optQemuReplay = Just a }) "Need replay") 88 | "Run specified replay in QEMU (exclude filename extension)." 89 | , Option [] ["qemu-qcows"] 90 | (ReqArg (\a o -> o{ optQemuQcows = Just a }) "Need qcows") 91 | "Use specified Qcows2 with QEMU." 92 | , Option ['d'] ["log-dir"] 93 | (ReqArg (\a o -> o{ optLogDir = a }) "Need dir") 94 | "Place or look for QEMU LLVM logs in a given dir." 95 | ] 96 | 97 | data WholeSystemArgs = WSA 98 | { wsaCr3 :: Word64 99 | , wsaReplay :: FilePath 100 | , wsaQcows :: FilePath 101 | } 102 | 103 | getWSA :: Options -> Maybe WholeSystemArgs 104 | getWSA Options{ optQemuCr3 = Just cr3, 105 | optQemuReplay = Just replay, 106 | optQemuQcows = Just qcows } 107 | = Just $ WSA{ wsaCr3 = cr3, wsaReplay = replay, wsaQcows = qcows } 108 | getWSA _ = Nothing 109 | 110 | runQemu :: FilePath -> String -> FilePath -> Word64 -> Maybe WholeSystemArgs -> [String] -> IO () 111 | runQemu dir target logdir trigger wsArgs prog = do 112 | arch <- case map T.unpack $ T.splitOn "-" (T.pack target) of 113 | [arch, _, _] -> return arch 114 | [arch, "softmmu"] -> return arch 115 | _ -> putStrLn "Bad target triple." >> exitFailure 116 | -- Make sure we run prog relative to old working dir. 117 | progShifted <- case prog of 118 | progName : progArgs -> do 119 | progPath <- canonicalizePath progName 120 | return $ progPath : progArgs 121 | _ -> return $ error "Need a program to run." 122 | let qemu = dir target 123 | if isJust wsArgs -- if in whole-system mode 124 | then printf "qemu-system-%s" arch 125 | else printf "qemu-%s" arch 126 | otherArgs = ["-tubtf", "-monitor", "tcp:localhost:4444,server,nowait"] 127 | findPlugin = target "panda_plugins" "panda_findeip.so" 128 | findArgs = 129 | ["-panda-plugin", findPlugin, 130 | "-panda-arg", printf "findeip:eip=%x" trigger] 131 | runArgs = case wsArgs of 132 | Nothing -> progShifted -- user mode 133 | Just (WSA cr3 replay qcows) -> -- whole-system mode 134 | ["-m", "2048", qcows, "-replay", replay] 135 | qemuFindArgs = otherArgs ++ findArgs ++ runArgs 136 | 137 | putStrLn $ printf "Running QEMU at %s with args %s..." qemu (show qemuFindArgs) 138 | -- Don't pass an environment, and use our stdin/stdout 139 | (_, Just out, _, procHandle) <- P.createProcess $ 140 | (P.proc qemu qemuFindArgs){ P.cwd = Just dir, P.std_out = P.CreatePipe } 141 | exitCode <- P.waitForProcess procHandle 142 | output <- lines <$> hGetContents out 143 | 144 | let fracS = last $ catMaybes $ map (L.stripPrefix "REPLAYFRAC=") output 145 | tracePlugin = target "panda_plugins" "panda_llvm_trace.so" 146 | traceArgs = 147 | ["-panda-plugin", tracePlugin, 148 | "-panda-arg", printf "llvm_trace:base=%s" logdir, 149 | "-panda-arg", printf "llvm_trace:rfrac=%s" fracS] 150 | ++ case wsArgs of 151 | Just (WSA cr3 _ _) -> 152 | ["-panda-arg", printf "llvm_trace:cr3=%x" cr3] 153 | Nothing -> [] 154 | qemuTraceArgs = otherArgs ++ traceArgs ++ runArgs 155 | 156 | putStrLn $ printf "Running QEMU at %s with args %s..." qemu (show qemuTraceArgs) 157 | (_, _, _, procHandle2) <- P.createProcess $ 158 | (P.proc qemu qemuTraceArgs){ P.cwd = Just dir } 159 | exitCode2 <- P.waitForProcess procHandle2 160 | 161 | case exitCode of 162 | ExitFailure code -> 163 | putStrLn $ printf "\nQEMU exited with return code %d." code 164 | ExitSuccess -> putStrLn "Done running QEMU." 165 | 166 | -- Run a round of symbolic evaluation 167 | symbolic :: Word64 -> (Options, [String]) -> IO SymbolicState 168 | symbolic trigger (options, nonOptions) = do 169 | let logDir = optLogDir options 170 | dir = optQemuDir options 171 | 172 | -- Run QEMU if necessary 173 | if isJust $ optDebugIP options 174 | then return () 175 | else 176 | runQemu dir (optQemuTarget options) logDir trigger 177 | (getWSA options) nonOptions 178 | 179 | -- Load LLVM files and dynamic logs 180 | let llvmMod = logDir "llvm-mod.bc" 181 | printf "Loading LLVM module from %s.\n" llvmMod 182 | theMod <- parseLLVMFile defaultParserOptions llvmMod 183 | 184 | -- Align dynamic log with execution history 185 | putStrLn "Loading dynamic log." 186 | memlog <- parseMemlog $ optLogDir options "tubtf.log" 187 | putStr "Aligning dynamic log data..." 188 | let (associated, instCount) = associateFuncs memlog theMod 189 | putStrLn $ printf " done.\nRunning symbolic execution analysis with %d instructions." instCount 190 | 191 | -- Run symbolic execution analysis 192 | let initialState = noSymbolicState{ 193 | symbolicInstTotal = instCount, 194 | symbolicOptions = options 195 | } 196 | let (_, state) = runState (runBlocks associated) initialState 197 | seq state $ return state 198 | 199 | parseOptions :: IO (Options, [String]) 200 | parseOptions = do 201 | args <- getArgs 202 | let (optionFs, nonOptions, optionErrs) = getOpt RequireOrder opts args 203 | case optionErrs of 204 | [] -> return () 205 | _ -> mapM putStrLn optionErrs >> exitFailure 206 | return $ (foldl (flip ($)) defaultOptions optionFs, nonOptions) 207 | 208 | -- Serve requests for data from analysis 209 | server :: IO () 210 | server = do 211 | let addr = PortNumber 22022 212 | sock <- listenOn addr 213 | putStrLn $ printf "Listening on %s." (show addr) 214 | forever $ catchIOError (accept sock >>= process) $ \e -> print e 215 | 216 | main :: IO () 217 | main = do 218 | hSetBuffering stdout NoBuffering 219 | 220 | (opts, _) <- parseOptions 221 | case optDebugIP opts of 222 | Nothing -> server 223 | Just ip -> do 224 | response <- respond WatchIP{ commandIP = ip, 225 | commandLimit = 10, 226 | commandExprOptions = defaultExprOptions } 227 | printf "\n%s\n" $ show response 228 | -------------------------------------------------------------------------------- /Memlog.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE FlexibleContexts, FlexibleInstances, UndecidableInstances, CPP #-} 2 | module Memlog(MemlogOp(..), AddrOp(..), AddrEntry(..), AddrEntryType(..), AddrFlag(..), parseMemlog, associateFuncs, shouldIgnoreInst, pairListFind, InstOpList, MemlogList) where 3 | 4 | import Control.Applicative 5 | import Control.Monad(liftM) 6 | #ifdef DEBUG 7 | import Control.Monad.Error 8 | #endif 9 | import Control.Monad.State 10 | import Control.Monad.Trans(lift) 11 | import Control.Monad.Trans.Maybe 12 | import Data.Binary.Get(Get, runGet, getWord32host, getWord64host, skip, getLazyByteString) 13 | import Data.Bits(shiftR, (.&.)) 14 | import Data.LLVM.Types 15 | import Data.Maybe(isJust, fromMaybe, catMaybes) 16 | import Data.Word(Word8, Word32, Word64) 17 | import Text.Printf(printf) 18 | import Debug.Trace 19 | 20 | import qualified Data.ByteString.Lazy as B 21 | import qualified Data.Char as C 22 | import qualified Data.List as L 23 | import qualified Data.Map as M 24 | import qualified Data.Set as S 25 | import qualified Data.Text as T 26 | 27 | import Data.RESET.Types 28 | import AppList 29 | import Instances 30 | import Pretty 31 | 32 | instance Pretty AddrEntry where 33 | pretty AddrEntry{ addrType = MAddr, addrVal = val } 34 | = printf "0x%08x" val 35 | pretty AddrEntry{ addrType = GReg, addrVal = reg } = case reg of 36 | 0 -> "EAX" 37 | 1 -> "ECX" 38 | 2 -> "EDX" 39 | 3 -> "EBX" 40 | 4 -> "ESP" 41 | 5 -> "EBP" 42 | 6 -> "ESI" 43 | 7 -> "EDI" 44 | _ -> "Reg" ++ show reg 45 | pretty addr = show addr 46 | 47 | parseMemlog :: FilePath -> IO [MemlogOp] 48 | parseMemlog file = runGet (getTubtfHeader >> many getMemlogEntry) <$> B.readFile file 49 | 50 | getTubtfHeader :: Get () 51 | getTubtfHeader = do 52 | skip 4 -- tubtf version 53 | colw <- getWord32host 54 | case colw of 55 | 0 -> error "Can't process 32-bit tubtf log" 56 | 1 -> return () -- 64-bit, so we're good. 57 | skip 8 -- FIXME: we currently ignore what elements are present - we should check that. 58 | skip 4 -- number of rows; we'll just read until EOF. 59 | 60 | word :: Get Word64 61 | word = getWord64host 62 | 63 | getMemlogEntry :: Get MemlogOp 64 | getMemlogEntry = do 65 | cr3 <- word 66 | eip <- word 67 | entryType <- word 68 | out <- case entryType of 69 | 30 -> BeginBlockOp eip <$> word <* skip 24 70 | 31 -> MemoryOp LoadOp <$> getAddrEntry 71 | 32 -> MemoryOp StoreOp <$> getAddrEntry 72 | 33 -> BranchOp <$> word <* skip 24 73 | 34 -> SelectOp <$> word <* skip 24 74 | 35 -> SwitchOp <$> word <* skip 24 75 | 36 -> ExceptionOp <$ skip 32 76 | _ -> do 77 | nextBytes <- getLazyByteString 32 78 | error $ printf "Unknown entry type %d; next bytes %s" entryType 79 | (L.concatMap (printf "%02X " :: Word8 -> String) $ B.unpack nextBytes) 80 | return out 81 | 82 | getAddrEntry :: Get AddrEntry 83 | getAddrEntry = do 84 | metadata <- word 85 | let typ = toEnum $ fromIntegral $ metadata .&. 0xff 86 | flagNum = metadata `shiftR` 8 .&. 0xff 87 | flag <- case flagNum of 88 | 5 -> return IrrelevantFlag 89 | 0 -> return NoFlag 90 | 1 -> return ExceptionFlag 91 | 2 -> return ReadlogFlag 92 | 3 -> return FuncargFlag 93 | f -> error ("Parse error in dynamic log: Unexpected flag " ++ show f) 94 | val <- word 95 | skip 16 96 | return $ AddrEntry typ val flag 97 | 98 | type MemlogAppList = AppList (BasicBlock, InstOpList) 99 | 100 | -- Monads for doing the association. 101 | 102 | data MemlogState = MemlogState{ 103 | memlogOpStream :: [MemlogOp], 104 | -- Work already done in current block. We use this instead of a mapM so we 105 | -- can do better error reporting. 106 | memlogCurrentAssociated :: AppList (Instruction, Maybe MemlogOp), 107 | -- Already associated blocks. If the Maybe is Nothing, we don't keep track 108 | -- (i.e. this is loading code, not something we're actually interested in) 109 | memlogAssociatedBlocks :: Maybe MemlogAppList, 110 | memlogNextBlock :: Maybe BasicBlock, -- Next block to process 111 | memlogSkipRest :: Bool, 112 | memlogBlockMap :: M.Map Word64 Function, 113 | memlogInstCount :: Integer 114 | } 115 | 116 | noMemlogState :: MemlogState 117 | noMemlogState = MemlogState{ 118 | memlogOpStream = [], 119 | memlogCurrentAssociated = mkAppList, 120 | memlogAssociatedBlocks = Just mkAppList, 121 | memlogNextBlock = Nothing, 122 | memlogSkipRest = False, 123 | memlogBlockMap = M.empty, 124 | memlogInstCount = 0 125 | } 126 | 127 | class (Functor m, Monad m, MonadState MemlogState m) => Memlogish m where 128 | instance (Functor m, Monad m, MonadState MemlogState m) => Memlogish m 129 | 130 | type MemlogContext = State MemlogState 131 | 132 | #ifdef DEBUG 133 | type FuncOpContext = ErrorT String MemlogContext 134 | #else 135 | type FuncOpContext = MemlogContext 136 | 137 | runErrorT = liftM Right 138 | throwError s = fail s 139 | catchError c h = c 140 | #endif 141 | 142 | getOpStream :: Memlogish m => m [MemlogOp] 143 | getOpStream = memlogOpStream <$> get 144 | putOpStream :: Memlogish m => [MemlogOp] -> m () 145 | putOpStream stream = modify (\s -> s{ memlogOpStream = stream }) 146 | 147 | memlogPopMaybe :: Memlogish m => m (Maybe MemlogOp) 148 | memlogPopMaybe = do 149 | stream <- getOpStream 150 | case stream of 151 | op : ops -> putOpStream ops >> return (Just op) 152 | [] -> return Nothing 153 | 154 | memlogPopErr :: Memlogish m => Instruction -> m MemlogOp 155 | memlogPopErr inst = fromMaybe err <$> memlogPopMaybe 156 | where err = error $ printf "Failed on block %s" 157 | (show $ instructionBasicBlock inst) 158 | 159 | putNextBlock :: Memlogish m => BasicBlock -> m () 160 | putNextBlock block = modify (\s -> s{ memlogNextBlock = Just block }) 161 | clearNextBlock :: Memlogish m => m () 162 | clearNextBlock = modify (\s -> s{ memlogNextBlock = Nothing }) 163 | 164 | clearCurrentAssociated :: Memlogish m => m () 165 | clearCurrentAssociated = modify (\s -> s{ memlogCurrentAssociated = mkAppList }) 166 | appendInstOp :: Memlogish m => (Instruction, Maybe MemlogOp) -> m () 167 | appendInstOp instOp 168 | = modify (\s -> s{ 169 | memlogCurrentAssociated = memlogCurrentAssociated s +: instOp }) 170 | 171 | appendAssociated :: Memlogish m => (BasicBlock, InstOpList) -> m () 172 | appendAssociated block = do 173 | associated <- memlogAssociatedBlocks <$> get 174 | case associated of 175 | Nothing -> return () 176 | Just associated' -> 177 | modify (\s -> s{ memlogAssociatedBlocks = Just $ associated' +: block }) 178 | 179 | skipRest :: Memlogish m => m () 180 | skipRest = modify (\s -> s{ memlogSkipRest = True }) 181 | 182 | countInsts :: Memlogish m => Integer -> m () 183 | countInsts n = modify (\s -> s{ memlogInstCount = memlogInstCount s + n }) 184 | 185 | t x = traceShow x x 186 | 187 | associateBasicBlock :: BasicBlock -> FuncOpContext InstOpList 188 | associateBasicBlock block = do 189 | clearCurrentAssociated 190 | clearNextBlock 191 | modify (\s -> s{ memlogSkipRest = False }) 192 | mapM associateInstWithCopy $ basicBlockInstructions block 193 | where associateInstWithCopy inst = do 194 | skip <- memlogSkipRest <$> get 195 | case skip of 196 | True -> return (inst, Nothing) 197 | False -> do 198 | maybeOp <- associateInst inst `catchError` handler inst 199 | appendInstOp (inst, maybeOp) 200 | return (inst, maybeOp) 201 | handler :: Instruction -> String -> FuncOpContext (Maybe MemlogOp) 202 | handler inst err = do 203 | ops <- getOpStream 204 | currentAssociated <- memlogCurrentAssociated <$> get 205 | Just associatedBlocks <- memlogAssociatedBlocks <$> get 206 | throwError $ printf 207 | ("Error during alignment.\n\n" ++ 208 | "Previous blocks:\n%s\n\nCurrent block:\n%s\n%s\n\n" ++ 209 | "Next ops:\n%s\n\nError: %s") 210 | (L.intercalate "\n\n" $ map showBlock $ suffix 3 associatedBlocks) 211 | (showBlock (block, unAppList currentAssociated)) 212 | (show inst) 213 | (L.intercalate "\n" $ map show $ take 5 ops) 214 | err 215 | 216 | shouldIgnoreInst :: Instruction -> Bool 217 | shouldIgnoreInst AllocaInst{} = True 218 | shouldIgnoreInst CallInst{ callFunction = ExternalFunctionC func} 219 | | (identifierContent $ externalFunctionName func) == T.pack "log_dynval" = True 220 | shouldIgnoreInst StoreInst{ storeIsVolatile = True } = True 221 | shouldIgnoreInst inst = False 222 | 223 | pairListFind :: (a -> Bool) -> b -> [(a, b)] -> b 224 | pairListFind test def list = foldr check def list 225 | where check (key, val) _ 226 | | test key = val 227 | check _ val = val 228 | 229 | findSwitchTarget :: BasicBlock -> Word64 -> [(Value, BasicBlock)] -> BasicBlock 230 | findSwitchTarget defaultTarget idx casesList 231 | = pairListFind test defaultTarget casesList 232 | where test (ConstantC (ConstantInt{ constantIntValue = int })) 233 | | int == fromIntegral idx = True 234 | test (ConstantC (ConstantArray{ constantArrayValues = array })) 235 | = test $ head array 236 | test (ConstantC (ConstantVector{ constantVectorValues = vector })) 237 | = test $ head vector 238 | test (ConstantC (ConstantAggregateZero{})) 239 | | idx == 0 = True 240 | test _ = False 241 | 242 | associateMem :: AddrOp -> Instruction -> FuncOpContext (Maybe MemlogOp) 243 | associateMem typ inst = do 244 | op <- memlogPopErr inst 245 | case op of 246 | MemoryOp typ' _ 247 | | typ == typ' -> return $ Just op 248 | _ -> throwError $ printf "Expected %s; got %s" (show typ) (show op) 249 | 250 | associateInst :: Instruction -> FuncOpContext (Maybe MemlogOp) 251 | associateInst inst 252 | | shouldIgnoreInst inst = return Nothing 253 | associateInst inst@LoadInst{} = associateMem LoadOp inst 254 | associateInst inst@StoreInst{} = associateMem StoreOp inst 255 | associateInst inst@SelectInst{} = liftM Just $ memlogPopErr inst 256 | associateInst inst@BranchInst{} = do 257 | op <- memlogPopErr inst 258 | case op of 259 | BranchOp 0 -> putNextBlock $ branchTrueTarget inst 260 | BranchOp 1 -> putNextBlock $ branchFalseTarget inst 261 | _ -> throwError $ printf "Expected branch operation; got %s" (show op) 262 | return $ Just op 263 | associateInst inst@SwitchInst{ switchDefaultTarget = defaultTarget, 264 | switchCases = casesList } = do 265 | op <- memlogPopErr inst 266 | case op of 267 | SwitchOp idx -> putNextBlock $ findSwitchTarget defaultTarget idx casesList 268 | _ -> throwError "Expected switch operation" 269 | return $ Just op 270 | associateInst inst@UnconditionalBranchInst{ unconditionalBranchTarget = target } = do 271 | op <- memlogPopErr inst 272 | case op of 273 | BranchOp 0 -> putNextBlock target 274 | _ -> throwError $ printf "Expected branch operation; got %s" (show op) 275 | return $ Just op 276 | associateInst inst@CallInst{ callFunction = ExternalFunctionC func, 277 | callAttrs = attrs } 278 | | FANoReturn `elem` externalFunctionAttrs func = skipRest >> return Nothing 279 | | FANoReturn `elem` attrs = skipRest >> return Nothing 280 | | T.pack "cpu_loop_exit" == name = skipRest >> return Nothing 281 | | T.pack "llvm.memset." `T.isPrefixOf` name = do 282 | op <- memlogPopErr inst 283 | case op of 284 | MemoryOp StoreOp addr -> return $ Just $ MemsetOp addr 285 | _ -> throwError $ printf "Expected store operation (memset)" 286 | | T.pack "llvm.memcpy." `T.isPrefixOf` name = do 287 | op1 <- memlogPopErr inst 288 | op2 <- memlogPopErr inst 289 | case (op1, op2) of 290 | (MemoryOp LoadOp src, MemoryOp StoreOp dest) -> 291 | return $ Just $ MemcpyOp src dest 292 | _ -> throwError $ printf "Expected load and store operation (memcpy)" 293 | | isMMU && T.pack "__ld" `T.isPrefixOf` name = associateMem LoadOp inst 294 | | isMMU && T.pack "__st" `T.isPrefixOf` name = associateMem StoreOp inst 295 | where name = identifierContent $ externalFunctionName func 296 | isMMU = T.pack "_mmu_panda" `T.isSuffixOf` name 297 | associateInst CallInst{ callFunction = FunctionC func } = do 298 | opStream <- getOpStream 299 | let (eitherError, memlogState) 300 | = runState (runErrorT $ associateMemlogWithFunc $ Just func) 301 | noMemlogState{ memlogOpStream = opStream } 302 | putOpStream $ memlogOpStream memlogState 303 | case eitherError of 304 | Right () -> return () 305 | Left err -> throwError err 306 | let maybeRevMemlog = memlogAssociatedBlocks memlogState 307 | let revMemlog = fromMaybe (error "no memlog!") maybeRevMemlog 308 | when (memlogSkipRest memlogState) skipRest 309 | return $ Just $ HelperFuncOp $ unAppList revMemlog 310 | associateInst RetInst{} = clearNextBlock >> return Nothing 311 | associateInst UnreachableInst{} = do 312 | skip <- memlogSkipRest <$> get 313 | throwError $ printf "Unreachable instruction; skipRest = %s" (show skip) 314 | associateInst _ = return Nothing 315 | 316 | -- If argument is Nothing, pull a func out of the OpStream 317 | associateMemlogWithFunc :: Maybe Function -> FuncOpContext () 318 | associateMemlogWithFunc maybeFunc = do 319 | func <- case maybeFunc of 320 | Just func -> return func 321 | Nothing -> do 322 | op <- fromMaybe (error "No op to begin block") <$> memlogPopMaybe 323 | blocks <- memlogAssociatedBlocks <$> get 324 | tbCount <- case op of 325 | BeginBlockOp eip tbCount -> return tbCount 326 | _ -> throwError $ printf "Expected BeginBlockOp; got %s; previous block %s" 327 | (show op) (fromMaybe "none" $ identifierAsString <$> functionName <$> 328 | basicBlockFunction <$> fst <$> head <$> suffix 1 <$> blocks) 329 | blockMap <- memlogBlockMap <$> get 330 | return $ M.findWithDefault (error $ printf "Couldn't find block with tbCount %d" tbCount) 331 | tbCount blockMap 332 | addBlock $ head $ functionBody func 333 | where addBlock :: BasicBlock -> FuncOpContext () 334 | addBlock block = do 335 | associated <- associateBasicBlock block 336 | appendAssociated (block, associated) 337 | countInsts $ fromIntegral $ length $ 338 | basicBlockInstructions $ block 339 | nextBlock <- memlogNextBlock <$> get 340 | case nextBlock of 341 | Just nextBlock' -> addBlock nextBlock' 342 | Nothing -> return () 343 | 344 | associateMemlogWithModule :: Module -> FuncOpContext () 345 | associateMemlogWithModule mod = do 346 | ops <- getOpStream 347 | unless (null ops) $ do 348 | associateMemlogWithFunc Nothing 349 | associateMemlogWithModule mod 350 | 351 | mkBlockMap :: Module -> M.Map Word64 Function 352 | mkBlockMap mod = foldl construct M.empty $ moduleDefinedFunctions mod 353 | where construct map func = case strippedName func of 354 | Just suffix -> M.insert 355 | (read $ takeWhile C.isDigit suffix) func map 356 | Nothing -> map 357 | strippedName func = L.stripPrefix "tcg-llvm-tb-" $ 358 | identifierAsString $ functionName func 359 | 360 | -- Returns list of basic blocks in execution order and total number of instructions 361 | associateFuncs :: [MemlogOp] -> Module -> (MemlogList, Integer) 362 | associateFuncs ops mod = (unAppList revMemlog, memlogInstCount memlogState) 363 | where revMemlog = fromMaybe (error "No memlog list") maybeRevMemlog 364 | maybeRevMemlog = memlogAssociatedBlocks memlogState 365 | memlogState = execState (associate mod) $ 366 | noMemlogState{ memlogOpStream = ops, 367 | memlogBlockMap = mkBlockMap mod } 368 | associate funcs = do 369 | result <- runErrorT $ associateMemlogWithModule mod 370 | case result of 371 | Right associated -> return associated 372 | Left err -> error err 373 | 374 | showAssociated :: MemlogList -> String 375 | showAssociated theList = L.intercalate "\n\n\n" $ map showBlock theList 376 | 377 | showBlock :: (BasicBlock, InstOpList) -> String 378 | showBlock (block, list) = printf "%s: %s:\n%s" 379 | (show $ functionName $ basicBlockFunction block) 380 | (show $ basicBlockName block) 381 | (L.intercalate "\n" $ map showInstOp list) 382 | where showInstOp (inst, Just op) 383 | = printf "%s =>\n\t%s" (show inst) (showOp op) 384 | showInstOp (inst, Nothing) = show inst 385 | showOp (HelperFuncOp helperMemlog) 386 | = printf "\n===HELPER===:\n%s\n===END HELPER===" $ 387 | showAssociated helperMemlog 388 | showOp op = show op 389 | -------------------------------------------------------------------------------- /Options.hs: -------------------------------------------------------------------------------- 1 | module Options(Options(..), defaultOptions) where 2 | 3 | import Data.Word(Word64) 4 | 5 | data Options = Options 6 | { optDebugIP :: Maybe Word64 7 | , optQemuDir :: FilePath 8 | , optQemuTarget :: String 9 | , optQemuCr3 :: Maybe Word64 10 | , optQemuReplay :: Maybe FilePath 11 | , optQemuQcows :: Maybe FilePath 12 | , optLogDir :: FilePath 13 | } deriving Show 14 | 15 | defaultOptions :: Options 16 | defaultOptions = Options 17 | { optDebugIP = Nothing 18 | , optQemuDir = "." 19 | , optQemuTarget = "i386-linux-user" 20 | , optQemuCr3 = Nothing 21 | , optQemuReplay = Nothing 22 | , optQemuQcows = Nothing 23 | , optLogDir = "/tmp" 24 | } 25 | -------------------------------------------------------------------------------- /Pretty.hs: -------------------------------------------------------------------------------- 1 | module Pretty where 2 | 3 | class Pretty a where 4 | pretty :: a -> String 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | RESET 2 | ===== 3 | Reverse Engineering through Symbolic Execution of Traces: Symbolic execution of LLVM IR traces for program understanding. 4 | 5 | To evaluate a trace, first run: 6 | 7 | cabal configure 8 | cabal build --ghc-options="-rtsopts" 9 | 10 | Then, to run a program, grab a trace 11 | 12 | dist/build/Eval/Eval -q 13 | 14 | The qemu\_build\_dir should be the build directory, such as ~/qemu/x86\_64-linux-user. 15 | 16 | For a trace in whole-system mode, you need to gather the trace manually. First, make a PANDA record/replay recording of the execution you want to look at. Next, use Volatility or a similar tool to find the CR3 you're looking for, and then run a command like 17 | 18 | echo "begin_replay " | ~/qemu/i386-softmmu/qemu-system-i386 -panda-plugin ~/qemu/i386-softmmu/panda_plugins/panda_llvm_trace.so -panda-arg llvm_trace:cr3=0xDEADBEEF -monitor stdio ~/win7.1.qcows2 19 | 20 | followed by 21 | 22 | dist/build/Eval/Eval 23 | 24 | You can do `Eval --help` to see a list of command line options. By default, PANDA stores trace information in `/tmp`; if you want to change this, use `Eval -d` and `qemu -panda-arg llvm_trace:base=/other/dir`. Eval will also probably run out stack space; increase that by adding the arguments `+RTS -K1G -RTS`, where the 1G specifies 1 GB of stack space. 25 | 26 | This will start a server that accepts JSON requests for symbolic execution data from the RESET IDA plugin (github.com/phulin/RESETPlugin) 27 | 28 | Files 29 | ===== 30 | * `types/`: Definitions of basic types. This is in a separate Cabal package due to GHC bug #3333 - you can't have Template Haskell code in a package that links to C++ code. We use TH for the JSON parsing; aeson provides a nice auto-serialization interface. 31 | * `AppList.hs`: Definition of a linked list type which is optimized for appending; we use this instead of normal List for pretty much everything. 32 | * `Memlog.hs`: Functions for parsing and processing the Panda dynamic log 33 | * `Instances.hs`: Miscellanous instances of Show, mostly for debugging 34 | * `Options.hs`: Definition and parsing of command-line arguments. 35 | * `Pretty.hs`: The Pretty class for pretty-printing; probably could be done in a much nicer way 36 | * `Expr.hs`: Operations for working with our expression format 37 | * `Eval.hs`: Main functions - meat of the symbolic evaluation engine 38 | * `Main.hs`: Server code and command-line argument processing, etc 39 | -------------------------------------------------------------------------------- /Setup.lhs: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env runhaskell 2 | 3 | > import Distribution.Simple 4 | > main = defaultMain 5 | -------------------------------------------------------------------------------- /atoi.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main(int argc, char **argv) { 4 | char c = getchar(); 5 | char cs[2]; 6 | cs[0] = c; 7 | cs[1] = '\0'; 8 | return atoi(cs); 9 | } 10 | -------------------------------------------------------------------------------- /euclid.c: -------------------------------------------------------------------------------- 1 | int main(int argc, char **argv) { 2 | int a = (argv[1][0] - '0') * 10 + (argv[1][1] - '0'); 3 | int b = (argv[2][0] - '0') * 10 + (argv[2][1] - '0'); 4 | 5 | int h = (a > b) ? a : b; 6 | int l = (a > b) ? b : a;; 7 | while (l != 0) { 8 | h = h % l; 9 | int tmp = h; 10 | h = l; 11 | l = tmp; 12 | } 13 | return h; 14 | } 15 | -------------------------------------------------------------------------------- /invsqrt.c: -------------------------------------------------------------------------------- 1 | //#include 2 | 3 | int main(int argc, char **argv) { 4 | float x = (argv[1][0] - '0') * 100 + (argv[1][1] - '0') * 10 + (argv[1][2] - '0'); 5 | // printf("%f\n", (double)x); 6 | float xhalf = 0.5f * x; 7 | int i = *(int*)&x; 8 | i = 0x5f3759df - (i >> 1); 9 | x = *(float*)&i; 10 | x = x * (1.5f - xhalf * x * x); 11 | // printf("%f\n", (double)x); 12 | return x * 10000; 13 | } 14 | -------------------------------------------------------------------------------- /symbolic-trace.cabal: -------------------------------------------------------------------------------- 1 | Name: symbolic-trace 2 | Version: 0.0 3 | Synopsis: Symbolic execution of traces for program understanding 4 | Description: Symbolic execution of traces for program understanding 5 | Author: Patrick Hulin 6 | Maintainer: Patrick Hulin 7 | Build-Depends: base, mtl, transformers, bytestring, binary >= 0.6.0.0, containers >= 0.5.0.0, llvm-data-interop, llvm-base-types, network, aeson, text, reset-types, pretty, filepath, directory, process, zlib, deepseq 8 | 9 | Build-Type: Simple 10 | Executable: Eval 11 | Main-is: Main.hs 12 | -------------------------------------------------------------------------------- /twentytwo.c: -------------------------------------------------------------------------------- 1 | int main(int argc, char *argv[]) { 2 | int input = (argv[1][1] - '0') * 10 + (argv[1][0] - '0'); 3 | if (input == 22) 4 | return 42; 5 | else 6 | return 32; 7 | } 8 | -------------------------------------------------------------------------------- /types/Data/RESET/Expr.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE StandaloneDeriving #-} 2 | module Data.RESET.Expr where 3 | 4 | import Data.LLVM.Types 5 | import Text.Printf 6 | 7 | import Data.RESET.Memlog 8 | 9 | deriving instance Ord CmpPredicate 10 | 11 | data Loc = IdLoc Identifier Identifier | MemLoc AddrEntry 12 | deriving (Eq, Ord) 13 | 14 | data ExprT = VoidT | PtrT | Int1T | Int8T | Int16T | Int32T | Int64T | FloatT | DoubleT 15 | | StructT [ExprT] 16 | deriving (Eq, Ord, Show) 17 | 18 | data Expr = 19 | AddExpr ExprT Expr Expr | 20 | SubExpr ExprT Expr Expr | 21 | MulExpr ExprT Expr Expr | 22 | DivExpr ExprT Expr Expr | 23 | RemExpr ExprT Expr Expr | 24 | ShlExpr ExprT Expr Expr | 25 | LshrExpr ExprT Expr Expr | 26 | AshrExpr ExprT Expr Expr | 27 | AndExpr ExprT Expr Expr | 28 | OrExpr ExprT Expr Expr | 29 | XorExpr ExprT Expr Expr | 30 | TruncExpr ExprT Expr | 31 | ZExtExpr ExprT Expr | 32 | SExtExpr ExprT Expr | 33 | FPTruncExpr ExprT Expr | 34 | FPExtExpr ExprT Expr | 35 | FPToSIExpr ExprT Expr | 36 | FPToUIExpr ExprT Expr | 37 | SIToFPExpr ExprT Expr | 38 | UIToFPExpr ExprT Expr | 39 | PtrToIntExpr ExprT Expr | 40 | IntToPtrExpr ExprT Expr | 41 | BitcastExpr ExprT Expr | 42 | -- Type, dynamic address, and name. 43 | LoadExpr ExprT AddrEntry (Maybe String) | 44 | ICmpExpr CmpPredicate Expr Expr | 45 | ILitExpr Integer | -- takes any integer type 46 | FLitExpr Double | -- takes any float type 47 | InputExpr ExprT Loc | 48 | StubExpr ExprT String [Expr] | 49 | IntrinsicExpr ExprT ExternalFunction [Expr] | 50 | ExtractExpr ExprT Int Expr | 51 | StructExpr ExprT [Expr] | 52 | UndefinedExpr | 53 | GEPExpr | -- dummy expression for getelementptr instructions 54 | IrrelevantExpr 55 | deriving (Eq, Ord) 56 | 57 | data ExprOptions = ExprOptions{ exprShowCasts :: Bool } 58 | deriving (Eq, Ord, Show) 59 | defaultExprOptions :: ExprOptions 60 | defaultExprOptions = ExprOptions{ exprShowCasts = True } 61 | -------------------------------------------------------------------------------- /types/Data/RESET/Memlog.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE TemplateHaskell #-} 2 | module Data.RESET.Memlog where 3 | 4 | import Control.DeepSeq.TH 5 | import Data.LLVM.Types 6 | import Data.Word 7 | 8 | type InstOpList = [(Instruction, Maybe MemlogOp)] 9 | type MemlogList = [(BasicBlock, InstOpList)] 10 | 11 | -- Haskell version of C dynamic log structs 12 | data MemlogOp = BeginBlockOp Word64 Word64 | -- Translation counter and eip 13 | MemoryOp AddrOp AddrEntry | 14 | BranchOp Word64 | -- INDEX of branch taken - 0 for true, 1 for false 15 | SelectOp Word64 | -- Index of select statement result 16 | SwitchOp Word64 | -- Index of switch statement result 17 | ExceptionOp | 18 | HelperFuncOp MemlogList | -- For calls out to helper functions 19 | MemsetOp AddrEntry | 20 | MemcpyOp AddrEntry AddrEntry 21 | deriving (Eq, Ord, Show) 22 | data AddrOp = LoadOp | StoreOp 23 | deriving (Eq, Ord, Show) 24 | data AddrEntry = AddrEntry { addrType :: AddrEntryType 25 | , addrVal :: Word64 26 | , addrFlag :: AddrFlag } 27 | deriving (Eq, Ord, Show) 28 | data AddrEntryType = HAddr | MAddr | IAddr | PAddr | LAddr | GReg | GSpec | Unk | Const | Ret 29 | deriving (Eq, Ord, Show, Enum) 30 | data AddrFlag = IrrelevantFlag | NoFlag | ExceptionFlag | ReadlogFlag | FuncargFlag 31 | deriving (Eq, Ord, Show) 32 | 33 | $(deriveNFData ''AddrFlag) 34 | $(deriveNFData ''AddrEntryType) 35 | $(deriveNFData ''AddrEntry) 36 | $(deriveNFData ''AddrOp) 37 | $(deriveNFData ''MemlogOp) 38 | -------------------------------------------------------------------------------- /types/Data/RESET/Message.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE TemplateHaskell #-} 2 | module Data.RESET.Message where 3 | 4 | import Data.Aeson 5 | import Data.Aeson.TH 6 | import Data.Functor 7 | import Data.LLVM.Types 8 | import Data.Word 9 | import System.Exit(ExitCode(..)) 10 | 11 | import Data.RESET.Memlog 12 | import Data.RESET.Expr 13 | 14 | data Command = 15 | WatchIP{ commandIP :: Word64, 16 | commandLimit :: Int, 17 | commandExprOptions :: ExprOptions } 18 | deriving (Eq, Ord) 19 | 20 | data Response = 21 | ErrorResponse{ responseError :: String } | 22 | MessagesResponse{ responseMessages :: [Message String] } | 23 | ExitCodeResponse{ responseExitCode :: ExitCode } 24 | deriving (Eq, Ord) 25 | 26 | -- Parameterized over whether to represent symbolic expressions as Exprs 27 | -- or as Strings. 28 | data Message a = 29 | MemoryMessage{ messageOp :: AddrOp, 30 | messageAddr :: String, 31 | messageExpr :: a, 32 | messageOrigin :: Maybe a } | 33 | BranchMessage{ messageExpr :: a, 34 | messageTaken :: Bool } | 35 | UnconditionalBranchMessage | 36 | WarningMessage{ messageWarning :: String } 37 | deriving (Eq, Ord) 38 | 39 | messageMap :: (a -> b) -> Message a -> Message b 40 | messageMap f (MemoryMessage op addr expr origin) 41 | = MemoryMessage op addr (f expr) (f `fmap` origin) 42 | messageMap f (BranchMessage expr taken) 43 | = BranchMessage (f expr) taken 44 | messageMap _ UnconditionalBranchMessage = UnconditionalBranchMessage 45 | messageMap _ (WarningMessage w) = WarningMessage w 46 | 47 | $(deriveJSON id ''ExitCode) 48 | $(deriveJSON id ''DW_TAG) 49 | $(deriveJSON id ''DW_ATE) 50 | $(deriveJSON id ''DW_VIRTUALITY) 51 | $(deriveJSON id ''DW_LANG) 52 | $(deriveJSON id ''FunctionAttribute) 53 | $(deriveJSON id ''Metadata) 54 | $(deriveJSON id ''Identifier) 55 | $(deriveJSON id ''Type) 56 | $(deriveJSON id ''Loc) 57 | $(deriveJSON id ''CmpPredicate) 58 | $(deriveJSON id ''ExprT) 59 | $(deriveJSON id ''ExternalFunction) 60 | $(deriveJSON id ''AddrFlag) 61 | $(deriveJSON id ''AddrEntryType) 62 | $(deriveJSON id ''Expr) 63 | $(deriveJSON id ''AddrEntry) 64 | $(deriveJSON id ''AddrOp) 65 | $(deriveJSON (drop 4) ''ExprOptions) 66 | $(deriveJSON (drop 7) ''Message) 67 | $(deriveJSON (drop 7) ''Command) 68 | $(deriveJSON (drop 8) ''Response) 69 | -------------------------------------------------------------------------------- /types/Data/RESET/Types.hs: -------------------------------------------------------------------------------- 1 | module Data.RESET.Types(Loc(..), ExprT(..), Expr(..), MemlogList, InstOpList, MemlogOp(..), AddrOp(..), AddrEntry(..), AddrEntryType(..), AddrFlag(..), Command(..), Response(..), Message(..), ExprOptions(..), defaultExprOptions, messageMap) where 2 | 3 | import Data.RESET.Expr 4 | import Data.RESET.Memlog 5 | import Data.RESET.Message 6 | -------------------------------------------------------------------------------- /types/reset-types.cabal: -------------------------------------------------------------------------------- 1 | Name: reset-types 2 | Version: 0.0 3 | Build-Type: Simple 4 | Cabal-Version: >= 1.2 5 | 6 | Library 7 | Exposed-Modules: Data.RESET.Types, Data.RESET.Message, Data.RESET.Memlog, Data.RESET.Expr 8 | Build-Depends: base, aeson, llvm-base-types, deepseq-th 9 | --------------------------------------------------------------------------------